setup kontor-schema
This commit is contained in:
@@ -1,8 +0,0 @@
|
|||||||
from uuid import UUID
|
|
||||||
|
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
|
|
||||||
class SportResponse(BaseModel):
|
|
||||||
id: UUID
|
|
||||||
name: str
|
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from enum import Enum, auto
|
from enum import Enum, auto
|
||||||
@@ -43,11 +44,10 @@ class ExportType(Enum):
|
|||||||
|
|
||||||
class KontorDB:
|
class KontorDB:
|
||||||
|
|
||||||
def __init__(self, db_engine: Any, log: Logger):
|
def __init__(self, db_engine: Any):
|
||||||
self.engine = db_engine
|
self.engine = db_engine
|
||||||
self.registry = {}
|
self.registry = {}
|
||||||
self.init_registry()
|
self.init_registry()
|
||||||
self.log = log
|
|
||||||
|
|
||||||
def init_registry(self):
|
def init_registry(self):
|
||||||
self.registry[Card.__tablename__] = Card
|
self.registry[Card.__tablename__] = Card
|
||||||
@@ -131,7 +131,6 @@ class KontorDB:
|
|||||||
|
|
||||||
def get_columns(self, table_name: str) -> dict:
|
def get_columns(self, table_name: str) -> dict:
|
||||||
columns = {}
|
columns = {}
|
||||||
order = 0
|
|
||||||
__session__ = sessionmaker(self.engine)
|
__session__ = sessionmaker(self.engine)
|
||||||
table_info = self.get_table_by_name(table_name)
|
table_info = self.get_table_by_name(table_name)
|
||||||
_filters = {'table_id': table_info['id']}
|
_filters = {'table_id': table_info['id']}
|
||||||
@@ -192,7 +191,7 @@ class KontorDB:
|
|||||||
if table in self.registry:
|
if table in self.registry:
|
||||||
model = self.registry[table]
|
model = self.registry[table]
|
||||||
else:
|
else:
|
||||||
self.log.info(f"table {table} is not registered")
|
logging.info(f"table {table} is not registered")
|
||||||
continue
|
continue
|
||||||
__session__ = sessionmaker(self.engine)
|
__session__ = sessionmaker(self.engine)
|
||||||
with __session__() as session:
|
with __session__() as session:
|
||||||
@@ -222,17 +221,17 @@ class KontorDB:
|
|||||||
with open(export_file_name, "w") as dump_file:
|
with open(export_file_name, "w") as dump_file:
|
||||||
dump_file.write(json_dump)
|
dump_file.write(json_dump)
|
||||||
case "YAML":
|
case "YAML":
|
||||||
export_file = Path(export_file_name)
|
pass
|
||||||
case "SQLite":
|
case "SQLite":
|
||||||
export_file = Path(export_file_name)
|
pass
|
||||||
self.log.info(f"{len(results)} tables exported")
|
logging.info(f"{len(results)} tables exported")
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def import_db(self, import_file_name: str) -> dict:
|
def import_db(self, import_file_name: str) -> dict:
|
||||||
result = {}
|
result = {}
|
||||||
import_file = Path(import_file_name)
|
import_file = Path(import_file_name)
|
||||||
if not import_file.exists():
|
if not import_file.exists():
|
||||||
self.log.info(f"File {import_file_name} does not exist. Do nothing.")
|
logging.info(f"File {import_file_name} does not exist. Do nothing.")
|
||||||
return result
|
return result
|
||||||
match import_file.suffix:
|
match import_file.suffix:
|
||||||
case '.json':
|
case '.json':
|
||||||
@@ -240,7 +239,7 @@ class KontorDB:
|
|||||||
with open(import_file_name, 'r') as json_file:
|
with open(import_file_name, 'r') as json_file:
|
||||||
json_load = json.load(json_file)
|
json_load = json.load(json_file)
|
||||||
for table in json_load:
|
for table in json_load:
|
||||||
self.log.info(f"{table}: {len(json_load[table])}")
|
logging.info(f"{table}: {len(json_load[table])}")
|
||||||
result[table] = self.import_table(table, json_load[table])
|
result[table] = self.import_table(table, json_load[table])
|
||||||
case '.yml':
|
case '.yml':
|
||||||
print("read yaml file")
|
print("read yaml file")
|
||||||
@@ -256,7 +255,7 @@ class KontorDB:
|
|||||||
added = []
|
added = []
|
||||||
remaining = []
|
remaining = []
|
||||||
existing_ids = self.get_ids(table_name)
|
existing_ids = self.get_ids(table_name)
|
||||||
self.log.info(f"found {len(existing_ids)} existing ids for table {table_name}")
|
logging.info(f"found {len(existing_ids)} existing ids for table {table_name}")
|
||||||
for item in items:
|
for item in items:
|
||||||
current_id = item['id']
|
current_id = item['id']
|
||||||
# print(f"import item: {item}")
|
# print(f"import item: {item}")
|
||||||
@@ -269,7 +268,7 @@ class KontorDB:
|
|||||||
changed = self.update_entry(table_name, current_id, item)
|
changed = self.update_entry(table_name, current_id, item)
|
||||||
updated.append(item)
|
updated.append(item)
|
||||||
if changed:
|
if changed:
|
||||||
self.log.info(f"{current_id} has changed")
|
logging.info(f"{current_id} has changed")
|
||||||
updated.append(item)
|
updated.append(item)
|
||||||
existing_ids.remove(current_id)
|
existing_ids.remove(current_id)
|
||||||
else:
|
else:
|
||||||
@@ -277,7 +276,7 @@ class KontorDB:
|
|||||||
self.add_entry(table_name, item)
|
self.add_entry(table_name, item)
|
||||||
added.append(item)
|
added.append(item)
|
||||||
except IntegrityError as error:
|
except IntegrityError as error:
|
||||||
self.log.info(f"Could not add item, due to: {error.detail}")
|
logging.info(f"Could not add item, due to: {error.detail}")
|
||||||
if len(existing_ids) > 0:
|
if len(existing_ids) > 0:
|
||||||
print(f"remaining items for {table_name}: {existing_ids}")
|
print(f"remaining items for {table_name}: {existing_ids}")
|
||||||
remaining.extend(existing_ids)
|
remaining.extend(existing_ids)
|
||||||
@@ -296,7 +295,7 @@ class KontorDB:
|
|||||||
return existing_ids
|
return existing_ids
|
||||||
|
|
||||||
def add_entry(self, table_name: str, update_item: dict):
|
def add_entry(self, table_name: str, update_item: dict):
|
||||||
self.log.debug(f"add entry to table {table_name} with {update_item}")
|
logging.debug(f"add entry to table {table_name} with {update_item}")
|
||||||
__session__ = sessionmaker(self.engine)
|
__session__ = sessionmaker(self.engine)
|
||||||
with __session__() as session:
|
with __session__() as session:
|
||||||
add_item = self.registry[table_name]()
|
add_item = self.registry[table_name]()
|
||||||
@@ -318,11 +317,11 @@ class KontorDB:
|
|||||||
if type(existing_value) is not type(update_value):
|
if type(existing_value) is not type(update_value):
|
||||||
existing_value = str(existing_value)
|
existing_value = str(existing_value)
|
||||||
if existing_value != update_value:
|
if existing_value != update_value:
|
||||||
self.log.info(f"{key} has changed: {existing_value} != {update_value}")
|
logging.info(f"{key} has changed: {existing_value} != {update_value}")
|
||||||
setattr(existing_item, key, update_value)
|
setattr(existing_item, key, update_value)
|
||||||
session.commit()
|
session.commit()
|
||||||
changed = True
|
changed = True
|
||||||
self.log.info(f"update {key} with {update_value}")
|
logging.info(f"update {key} with {update_value}")
|
||||||
return changed
|
return changed
|
||||||
|
|
||||||
def add_link(self, link: str) -> dict:
|
def add_link(self, link: str) -> dict:
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import logging
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
@@ -23,7 +24,7 @@ class MediaFile(Base, BaseMixin, BaseVideoMixin):
|
|||||||
return f'{self.title}({self.id})'
|
return f'{self.title}({self.id})'
|
||||||
|
|
||||||
def update_title(self) -> None:
|
def update_title(self) -> None:
|
||||||
print(f"update title for {self.url}")
|
logging.info(f"update title for {self.url}")
|
||||||
try:
|
try:
|
||||||
r = requests.get(self.url)
|
r = requests.get(self.url)
|
||||||
soup = BeautifulSoup(r.content, "html.parser")
|
soup = BeautifulSoup(r.content, "html.parser")
|
||||||
@@ -36,7 +37,7 @@ class MediaFile(Base, BaseMixin, BaseVideoMixin):
|
|||||||
self.last_modified_date = datetime.now()
|
self.last_modified_date = datetime.now()
|
||||||
|
|
||||||
def download_file(self, download_dir: str, dl_tool: str):
|
def download_file(self, download_dir: str, dl_tool: str):
|
||||||
print(f"download file for {self.url} to {download_dir}")
|
logging.info(f"download file for {self.url} to {download_dir}")
|
||||||
result = subprocess.run([dl_tool, self.url], cwd=download_dir, capture_output=True, text=True)
|
result = subprocess.run([dl_tool, self.url], cwd=download_dir, capture_output=True, text=True)
|
||||||
if result.returncode == 0:
|
if result.returncode == 0:
|
||||||
output = result.stdout
|
output = result.stdout
|
||||||
|
|||||||
@@ -1,366 +0,0 @@
|
|||||||
import json
|
|
||||||
import re
|
|
||||||
import subprocess
|
|
||||||
import uuid
|
|
||||||
from datetime import datetime
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import requests
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from cement.core.config import ConfigHandler
|
|
||||||
from sqlalchemy import Engine
|
|
||||||
from sqlalchemy.exc import IntegrityError
|
|
||||||
from sqlalchemy.orm import sessionmaker
|
|
||||||
|
|
||||||
from .bookshelf import Article, Book, Author, BookshelfPublisher, ArticleAuthor, BookAuthor
|
|
||||||
from .comic import Comic, Artist, Publisher, Issue, StoryArc, TradePaperback, Volume, ComicWork, WorkType
|
|
||||||
from .metadata import MetaDataTable, MetaDataColumn
|
|
||||||
from .tysc import Card, CardSet, Sport, Team, FieldPosition, Rooster, Player, Vendor
|
|
||||||
from .media import MediaFile, MediaArticle, MediaVideo
|
|
||||||
|
|
||||||
|
|
||||||
class KontorDB:
|
|
||||||
|
|
||||||
def __init__(self, db_engine: Engine, config: ConfigHandler, log):
|
|
||||||
self.engine = db_engine
|
|
||||||
self.config = config
|
|
||||||
self.log = log
|
|
||||||
self.registry = {}
|
|
||||||
self.init_registry()
|
|
||||||
|
|
||||||
def init_registry(self):
|
|
||||||
self.registry['card'] = Card
|
|
||||||
self.registry['card_set'] = CardSet
|
|
||||||
self.registry['sport'] = Sport
|
|
||||||
self.registry['team'] = Team
|
|
||||||
self.registry['field_position'] = FieldPosition
|
|
||||||
self.registry['rooster'] = Rooster
|
|
||||||
self.registry['player'] = Player
|
|
||||||
self.registry['vendor'] = Vendor
|
|
||||||
self.registry['artist'] = Artist
|
|
||||||
self.registry['publisher'] = Publisher
|
|
||||||
self.registry['comic'] = Comic
|
|
||||||
self.registry['issue'] = Issue
|
|
||||||
self.registry['story_arc'] = StoryArc
|
|
||||||
self.registry['trade_paperback'] = TradePaperback
|
|
||||||
self.registry['volume'] = Volume
|
|
||||||
self.registry['comic_work'] = ComicWork
|
|
||||||
self.registry['worktype'] = WorkType
|
|
||||||
self.registry['article'] = Article
|
|
||||||
self.registry['book'] = Book
|
|
||||||
self.registry['author'] = Author
|
|
||||||
self.registry['bookshelf_publisher'] = BookshelfPublisher
|
|
||||||
self.registry['article_author'] = ArticleAuthor
|
|
||||||
self.registry['book_author'] = BookAuthor
|
|
||||||
self.registry['media_file'] = MediaFile
|
|
||||||
self.registry['media_article'] = MediaArticle
|
|
||||||
self.registry['media_video'] = MediaVideo
|
|
||||||
self.registry['meta_data_table'] = MetaDataTable
|
|
||||||
self.registry[MetaDataColumn.__tablename__] = MetaDataColumn
|
|
||||||
|
|
||||||
def get_table_names(self) -> list:
|
|
||||||
result = []
|
|
||||||
__session__ = sessionmaker(self.engine)
|
|
||||||
with __session__() as session:
|
|
||||||
tables = session.query(MetaDataTable).all()
|
|
||||||
result = [table.table_name for table in tables]
|
|
||||||
return result
|
|
||||||
|
|
||||||
def get_column_meta_data(self, table_name: str, view_only=True) -> dict:
|
|
||||||
meta_data = {}
|
|
||||||
order = 0
|
|
||||||
__session__ = sessionmaker(self.engine)
|
|
||||||
with __session__() as session:
|
|
||||||
if view_only:
|
|
||||||
for (_, column) in (session.query(MetaDataTable, MetaDataColumn).
|
|
||||||
filter(MetaDataTable.id == MetaDataColumn.table_id).
|
|
||||||
filter(MetaDataTable.table_name == table_name).
|
|
||||||
filter(MetaDataColumn.is_shown == 1).all()):
|
|
||||||
meta_data[order] = {'column': column.column_name, 'label': column.column_label,
|
|
||||||
'order': column.column_order, 'ref_column': column.ref_column}
|
|
||||||
order += 1
|
|
||||||
else:
|
|
||||||
for (_, column) in (session.query(MetaDataTable, MetaDataColumn).
|
|
||||||
filter(MetaDataTable.id == MetaDataColumn.table_id).
|
|
||||||
filter(MetaDataTable.table_name == table_name).all()):
|
|
||||||
meta_data[order] = {
|
|
||||||
'column': column.column_name,
|
|
||||||
'order': column.column_order,
|
|
||||||
'ref_column': column.ref_column
|
|
||||||
}
|
|
||||||
order += 1
|
|
||||||
return meta_data
|
|
||||||
|
|
||||||
def get_filters(self, table_name):
|
|
||||||
_filter_map = {}
|
|
||||||
__session__ = sessionmaker(self.engine)
|
|
||||||
with __session__() as session:
|
|
||||||
for (_, column) in (session.query(MetaDataTable, MetaDataColumn).
|
|
||||||
filter(MetaDataTable.id == MetaDataColumn.table_id).
|
|
||||||
filter(MetaDataTable.table_name == table_name).
|
|
||||||
filter(MetaDataColumn.show_filter == 1).all()):
|
|
||||||
_filter_map[column.column_name] = {'label': column.filter_label, 'widget': None}
|
|
||||||
self.log.debug(f"retrieved {len(_filter_map)} filters: {_filter_map}")
|
|
||||||
return _filter_map
|
|
||||||
|
|
||||||
def data(self, table, columns: dict, filters) -> list:
|
|
||||||
data = []
|
|
||||||
__session__ = sessionmaker(self.engine)
|
|
||||||
with __session__() as session:
|
|
||||||
entries = []
|
|
||||||
if len(filters) == 0:
|
|
||||||
entries = session.query(table).all()
|
|
||||||
else:
|
|
||||||
entries = session.query(table).filter_by(**filters)
|
|
||||||
for entry in entries:
|
|
||||||
row = []
|
|
||||||
for order in columns.keys():
|
|
||||||
column_name = columns[order]['column']
|
|
||||||
if str(column_name).endswith("_id"):
|
|
||||||
ref_table = column_name[:-3]
|
|
||||||
# print(f"{ref_table=}")
|
|
||||||
ref = getattr(entry, ref_table)
|
|
||||||
value = getattr(ref, "name")
|
|
||||||
# print(f"{value=}")
|
|
||||||
row.append(value)
|
|
||||||
else:
|
|
||||||
row.append(getattr(entry, column_name))
|
|
||||||
# print(repr(row))
|
|
||||||
data.append(row)
|
|
||||||
return data
|
|
||||||
|
|
||||||
def export_db(self, export_type: str, export_file_name: str):
|
|
||||||
self.log.info(f"export DB to {export_file_name} as {export_type}")
|
|
||||||
db = {}
|
|
||||||
export_table_list = self.get_table_names()
|
|
||||||
for table in export_table_list:
|
|
||||||
columns = self.get_column_meta_data(table, view_only=False)
|
|
||||||
if table in self.registry:
|
|
||||||
model = self.registry[table]
|
|
||||||
else:
|
|
||||||
print(f"table {table} is not registered")
|
|
||||||
continue
|
|
||||||
__session__ = sessionmaker(self.engine)
|
|
||||||
with __session__() as session:
|
|
||||||
rows = session.query(model).all()
|
|
||||||
entries = []
|
|
||||||
self.log.debug(f"found {len(rows)} entries")
|
|
||||||
self.log.debug(f"found {len(columns)} columns")
|
|
||||||
for row in rows:
|
|
||||||
# print(row)
|
|
||||||
entry = {}
|
|
||||||
for order in columns:
|
|
||||||
# print(columns[order])
|
|
||||||
column_name = columns[order]['column']
|
|
||||||
# print(f"get value {column_name} from {row} of table {table}")
|
|
||||||
try:
|
|
||||||
value = getattr(row, column_name)
|
|
||||||
if isinstance(value, datetime):
|
|
||||||
entry[column_name] = str(value)
|
|
||||||
else:
|
|
||||||
entry[column_name] = value
|
|
||||||
except AttributeError:
|
|
||||||
self.log.debug("could not get value")
|
|
||||||
entries.append(entry)
|
|
||||||
db[table] = entries
|
|
||||||
export_file = Path(export_file_name)
|
|
||||||
match export_type:
|
|
||||||
case "JSON":
|
|
||||||
json_dump = json.dumps(db, indent=4)
|
|
||||||
with open(export_file_name, "w") as dump_file:
|
|
||||||
dump_file.write(json_dump)
|
|
||||||
case "YAML":
|
|
||||||
export_file = Path(export_file_name)
|
|
||||||
case "SQLite":
|
|
||||||
export_file = Path(export_file_name)
|
|
||||||
case _:
|
|
||||||
self.log.debug("unknown export type")
|
|
||||||
if export_file.exists():
|
|
||||||
self.log.debug(f"{export_file} exists")
|
|
||||||
|
|
||||||
def import_db(self, import_file_name: str, dry_run: bool):
|
|
||||||
import_file = Path(import_file_name)
|
|
||||||
if not import_file.exists():
|
|
||||||
print(f"File {import_file_name} does not exist. Do nothing.")
|
|
||||||
return
|
|
||||||
self.log.debug(f"evaluate type from file extension: {import_file.suffix}")
|
|
||||||
match import_file.suffix:
|
|
||||||
case '.json':
|
|
||||||
print("read json file")
|
|
||||||
with open(import_file_name, 'r') as json_file:
|
|
||||||
json_load = json.load(json_file)
|
|
||||||
for table in json_load:
|
|
||||||
print(f"{table}: {len(json_load[table])}")
|
|
||||||
self.import_table(table, json_load[table], dry_run)
|
|
||||||
case '.yml':
|
|
||||||
print("read yaml file")
|
|
||||||
case '.yaml':
|
|
||||||
print("read yaml file")
|
|
||||||
case '.db':
|
|
||||||
print("read sqlite file")
|
|
||||||
|
|
||||||
def import_table(self, table_name, items, dry_run: bool):
|
|
||||||
existing_ids = self.get_ids(table_name)
|
|
||||||
for item in items:
|
|
||||||
# self.log.debug(f"{item}")
|
|
||||||
current_id = item['id']
|
|
||||||
found_item = None
|
|
||||||
__session__ = sessionmaker(self.engine)
|
|
||||||
with __session__() as session:
|
|
||||||
found_item = session.query(self.registry[table_name]).get(current_id)
|
|
||||||
self.log.debug(f"found: {found_item}")
|
|
||||||
if found_item is not None:
|
|
||||||
changed = self.update_entry(found_item, item, dry_run)
|
|
||||||
if changed:
|
|
||||||
print(f"{current_id} has changed")
|
|
||||||
existing_ids.remove(current_id)
|
|
||||||
else:
|
|
||||||
self.log.info("item to import not found in database, add new one...")
|
|
||||||
self.add_entry(table_name, item, session, dry_run)
|
|
||||||
if len(existing_ids) > 0:
|
|
||||||
print("remaining items")
|
|
||||||
|
|
||||||
def get_ids(self, table_name: str) -> list:
|
|
||||||
existing_ids = []
|
|
||||||
__session__ = sessionmaker(self.engine)
|
|
||||||
with __session__() as session:
|
|
||||||
items = session.query(self.registry[table_name]).all()
|
|
||||||
for item in items:
|
|
||||||
existing_ids.append(getattr(item, 'id'))
|
|
||||||
return existing_ids
|
|
||||||
|
|
||||||
def add_entry(self, table_name: str, update_item: dict, session, dry_run: bool):
|
|
||||||
add_item = self.registry[table_name]()
|
|
||||||
for key in update_item.keys():
|
|
||||||
update_value = update_item[key]
|
|
||||||
setattr(add_item, key, update_value)
|
|
||||||
if dry_run:
|
|
||||||
self.log.info(f"add item {type(add_item)} with id {update_item['id']}")
|
|
||||||
else:
|
|
||||||
session.add(add_item)
|
|
||||||
session.commit()
|
|
||||||
|
|
||||||
def update_entry(self, existing_item, update_item: dict, dry_run: bool) -> bool:
|
|
||||||
changed = False
|
|
||||||
for key in update_item.keys():
|
|
||||||
update_value = update_item[key]
|
|
||||||
existing_value = getattr(existing_item, key)
|
|
||||||
if type(existing_value) is not type(update_value):
|
|
||||||
# self.log.debug(f"compare {type(existing_value)} with {type(update_value)}")
|
|
||||||
existing_value = str(existing_value)
|
|
||||||
if existing_value != update_value:
|
|
||||||
print(f"{key} has changed: {existing_value} != {update_value}")
|
|
||||||
if not dry_run:
|
|
||||||
setattr(existing_item, key, update_value)
|
|
||||||
# existing_item[key] = update_value
|
|
||||||
changed = True
|
|
||||||
self.log.info(f"update {key} with {update_value}")
|
|
||||||
return changed
|
|
||||||
|
|
||||||
def add_link(self, link: str, dry_run: bool):
|
|
||||||
self.log.info(f"add link {link} to media_file")
|
|
||||||
__session__ = sessionmaker(self.engine)
|
|
||||||
with __session__() as session:
|
|
||||||
media_file = MediaFile()
|
|
||||||
media_file.id = str(uuid.uuid4())
|
|
||||||
media_file.created_date = datetime.now()
|
|
||||||
media_file.last_modified_date = datetime.now()
|
|
||||||
media_file.version = 0
|
|
||||||
media_file.url = link
|
|
||||||
media_file.review = 1
|
|
||||||
media_file.should_download = 1
|
|
||||||
try:
|
|
||||||
session.add(media_file)
|
|
||||||
session.commit()
|
|
||||||
self.log.info(f"entry {media_file} successfully added")
|
|
||||||
except IntegrityError as error:
|
|
||||||
session.rollback()
|
|
||||||
self.log.info(error.orig)
|
|
||||||
|
|
||||||
def update_title(self, dry_run=False):
|
|
||||||
self.log.info("get links to review of media_file")
|
|
||||||
__session__ = sessionmaker(self.engine)
|
|
||||||
with __session__() as session:
|
|
||||||
links = session.query(MediaFile).filter(MediaFile.review == 1).all()
|
|
||||||
self.log.info(f"try to update {len(links)} items")
|
|
||||||
for link in links:
|
|
||||||
url = link.url
|
|
||||||
if url is None:
|
|
||||||
self.log.info(f"url has not been set for {link.id}")
|
|
||||||
continue
|
|
||||||
self.log.info('get title for url {}'.format(url))
|
|
||||||
if dry_run:
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
r = requests.get(url)
|
|
||||||
soup = BeautifulSoup(r.content, "html.parser")
|
|
||||||
title = soup.title.string
|
|
||||||
except:
|
|
||||||
self.log.info("Sorry, could not retrieve title")
|
|
||||||
continue
|
|
||||||
self.log.info('ID {} has title {}'.format(link.id, title))
|
|
||||||
link.title = title
|
|
||||||
link.review = 0
|
|
||||||
session.commit()
|
|
||||||
|
|
||||||
def download_file(self, dry_run=False):
|
|
||||||
self.log.info("download marked files of media_file")
|
|
||||||
__session__ = sessionmaker(self.engine)
|
|
||||||
with __session__() as session:
|
|
||||||
links = session.query(MediaFile).filter(MediaFile.should_download == 1).all()
|
|
||||||
self.log.info(f"try to download {len(links)} items")
|
|
||||||
for link in links:
|
|
||||||
url = link.url
|
|
||||||
if url is None:
|
|
||||||
self.log.info(f"url has not been set for {link.id}")
|
|
||||||
continue
|
|
||||||
if dry_run:
|
|
||||||
self.log.info(f"download {link.url} to {self.config.get('media', 'dir')}")
|
|
||||||
continue
|
|
||||||
filename = self.download_url(link)
|
|
||||||
if filename is None:
|
|
||||||
link.file_name = filename
|
|
||||||
link.should_download = 1
|
|
||||||
else:
|
|
||||||
download_file = Path(filename)
|
|
||||||
download_file.with_name(f"{link.id}{download_file.suffix}")
|
|
||||||
link.file_name = download_file.name
|
|
||||||
link.should_download = 0
|
|
||||||
link.cloud_link = download_file.absolute()
|
|
||||||
session.commit()
|
|
||||||
|
|
||||||
def parse_output(self, lines_list):
|
|
||||||
file_name = ""
|
|
||||||
for line in lines_list:
|
|
||||||
if 'has already been downloaded' in line:
|
|
||||||
end_len = len(' has already been downloaded')
|
|
||||||
file_name = line[11:-end_len]
|
|
||||||
self.log.info('found file: "%s"', file_name)
|
|
||||||
if 'Destination' in line:
|
|
||||||
line_len = len(line)
|
|
||||||
start_len = len('[download] Destination: ')
|
|
||||||
file_len = line_len - start_len
|
|
||||||
file_name = line[-file_len:]
|
|
||||||
self.log.info('new file: "%s"', file_name)
|
|
||||||
return file_name
|
|
||||||
|
|
||||||
def download_url(self, video_url):
|
|
||||||
media_dir = Path(self.config.get('media', 'dir'))
|
|
||||||
if not media_dir.exists():
|
|
||||||
media_dir = Path().absolute()
|
|
||||||
self.log.info(f"download video to {media_dir}")
|
|
||||||
result = subprocess.run([self.config.get('media', 'yt-dlp'), video_url], cwd=media_dir, capture_output=True,
|
|
||||||
text=True)
|
|
||||||
if result.returncode == 0:
|
|
||||||
output = result.stdout
|
|
||||||
output = re.sub(' +', ' ', output)
|
|
||||||
lines_list = output.splitlines()
|
|
||||||
return self.parse_output(lines_list)
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def check_files(self):
|
|
||||||
media_dir = Path(self.config.get('media', 'dir'))
|
|
||||||
if not media_dir.exists():
|
|
||||||
return
|
|
||||||
self.log.info(f"check files in {media_dir}")
|
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
import uuid
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from sqlalchemy import func
|
|
||||||
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
|
||||||
|
|
||||||
|
|
||||||
class Base(DeclarativeBase):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class BaseMixin:
|
|
||||||
# id = Column(String, primary_key=True)
|
|
||||||
id: Mapped[str] = mapped_column(primary_key=True, default=uuid.uuid4())
|
|
||||||
# created_date = Column(DateTime)
|
|
||||||
created_date: Mapped[datetime] = mapped_column(default=func.now())
|
|
||||||
# last_modified_date = Column(DateTime)
|
|
||||||
last_modified_date: Mapped[datetime] = mapped_column(default=func.now())
|
|
||||||
# version = Column(Integer)
|
|
||||||
version: Mapped[int] = mapped_column(default=0)
|
|
||||||
@@ -1,39 +0,0 @@
|
|||||||
from sqlalchemy import Column, String
|
|
||||||
from sqlalchemy.dialects.mysql import BIT
|
|
||||||
|
|
||||||
from .base import Base, BaseMixin
|
|
||||||
|
|
||||||
|
|
||||||
class MediaFile(Base, BaseMixin):
|
|
||||||
__tablename__ = 'media_file'
|
|
||||||
cloud_link = Column(String(255))
|
|
||||||
file_name = Column(String(255))
|
|
||||||
path = Column(String(255))
|
|
||||||
review = Column(BIT(1))
|
|
||||||
title = Column(String(255))
|
|
||||||
url = Column(String(255), unique=True)
|
|
||||||
should_download = Column(BIT(1))
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f'MediaFile({self.id} {self.title} {self.title})'
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return f'{self.title}({self.id})'
|
|
||||||
|
|
||||||
|
|
||||||
class MediaArticle(Base, BaseMixin):
|
|
||||||
__tablename__ = 'media_article'
|
|
||||||
review = Column(BIT(1))
|
|
||||||
title = Column(String(255))
|
|
||||||
url = Column(String(255), unique=True)
|
|
||||||
|
|
||||||
|
|
||||||
class MediaVideo(Base, BaseMixin):
|
|
||||||
__tablename__ = 'media_video'
|
|
||||||
cloud_link = Column(String(255))
|
|
||||||
file_name = Column(String(255))
|
|
||||||
path = Column(String(255))
|
|
||||||
review = Column(BIT(1))
|
|
||||||
title = Column(String(255))
|
|
||||||
url = Column(String(255), unique=True)
|
|
||||||
should_download = Column(BIT(1))
|
|
||||||
@@ -61,19 +61,19 @@ def __parse_output__(lines_list: list[str]) -> str | None:
|
|||||||
return file_name
|
return file_name
|
||||||
|
|
||||||
|
|
||||||
def is_file_downloaded(item: dict, dir: Path) -> FileStatus:
|
def is_file_downloaded(media_file: dict, dir: Path) -> FileStatus:
|
||||||
file_name_as_title = f"{item['file_name']}"
|
file_name_as_title = f"{media_file['file_name']}"
|
||||||
file_title = Path(dir, file_name_as_title, ".mp4")
|
file_title = Path(dir, f"{file_name_as_title}.mp4")
|
||||||
if file_title.exists():
|
if file_title.exists():
|
||||||
log.info(f"{file_name_as_title} has been downloaded")
|
log.info(f"{file_name_as_title} has been downloaded")
|
||||||
item['should_download'] = 0
|
media_file['should_download'] = False
|
||||||
return FileStatus.DOWNLOADED
|
return FileStatus.DOWNLOADED
|
||||||
file_name_as_id = f"{item['id']}"
|
file_name_as_id = f"{media_file['id']}"
|
||||||
file_with_id_as_name = Path(dir, file_name_as_id, ".mp4")
|
file_with_id_as_name = Path(dir, f"{file_name_as_id}.mp4")
|
||||||
if file_with_id_as_name.exists():
|
if file_with_id_as_name.exists():
|
||||||
log.info(f"{file_with_id_as_name} has been downloaded and renamed")
|
log.info(f"{file_with_id_as_name} has been downloaded and renamed")
|
||||||
item['cloud_link'] = file_with_id_as_name
|
media_file['cloud_link'] = file_with_id_as_name
|
||||||
item['should_download'] = 0
|
media_file['should_download'] = False
|
||||||
return FileStatus.RENAMED
|
return FileStatus.RENAMED
|
||||||
log.info("could not find file - start download")
|
log.info("could not find file - start download")
|
||||||
return FileStatus.UNKNOWN
|
return FileStatus.UNKNOWN
|
||||||
|
|||||||
@@ -11,12 +11,12 @@ from pathlib import Path
|
|||||||
|
|
||||||
from schema import Base, KontorDB
|
from schema import Base, KontorDB
|
||||||
from config import get_logger
|
from config import get_logger
|
||||||
|
from schema.database import ExportType
|
||||||
|
|
||||||
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
|
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
|
||||||
parser.add_argument('--verbose', '-v', action='count', default=0)
|
parser.add_argument('--verbose', '-v', action='count', default=0)
|
||||||
parser.add_argument('--config', '-c', default='kontor-docker')
|
parser.add_argument('--config', '-c', default='kontor-docker')
|
||||||
parser.add_argument('--recreate-db', action='store_true')
|
parser.add_argument('--file', '-f', default='data.json')
|
||||||
parser.add_argument('--file', '-f', default='~/data.json')
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
@@ -38,5 +38,5 @@ if __name__ == '__main__':
|
|||||||
Base.metadata.create_all(bind=engine, checkfirst=True)
|
Base.metadata.create_all(bind=engine, checkfirst=True)
|
||||||
__session__ = sessionmaker(bind=engine)
|
__session__ = sessionmaker(bind=engine)
|
||||||
kontor_db = KontorDB(engine, logger)
|
kontor_db = KontorDB(engine, logger)
|
||||||
kontor_db.export_db("JSON", args.file)
|
kontor_db.export_db(ExportType.JSON, args.file)
|
||||||
logger.info('kontor.export finished')
|
logger.info('kontor.export finished')
|
||||||
|
|||||||
@@ -15,7 +15,6 @@ args = parser.parse_args()
|
|||||||
|
|
||||||
def copy_data(mariadb_conn, data_file: Path, log):
|
def copy_data(mariadb_conn, data_file: Path, log):
|
||||||
mariadb_cursor = mariadb_conn.cursor()
|
mariadb_cursor = mariadb_conn.cursor()
|
||||||
result = {}
|
|
||||||
import_file = Path(data_file)
|
import_file = Path(data_file)
|
||||||
if not import_file.exists():
|
if not import_file.exists():
|
||||||
log.info(f"File {data_file} does not exist. Do nothing.")
|
log.info(f"File {data_file} does not exist. Do nothing.")
|
||||||
|
|||||||
@@ -1,8 +0,0 @@
|
|||||||
-r requirements.txt
|
|
||||||
|
|
||||||
pytest
|
|
||||||
pytest-cov
|
|
||||||
coverage
|
|
||||||
twine>=1.11.0
|
|
||||||
setuptools>=38.6.0
|
|
||||||
wheel>=0.31.0
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
from .admin import User, Token, Role, AuthorizationMatrix, ModuleData, MailAccount, Mail
|
|
||||||
from .bookshelf import Article, Book, Author, BookshelfPublisher, ArticleAuthor, BookAuthor
|
|
||||||
from .comic import Comic, Artist, Publisher, Issue, StoryArc, TradePaperback, Volume, ComicWork, WorkType
|
|
||||||
from .metadata import MetaDataTable, MetaDataColumn
|
|
||||||
from .tysc import Card, CardSet, Sport, Team, FieldPosition, Rooster, Player, Vendor
|
|
||||||
from .media import MediaFile, MediaArticle, MediaVideo
|
|
||||||
from .base import Base
|
|
||||||
from .database import KontorDB, ColumnEntry
|
|
||||||
|
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ class StatusType(Enum):
|
|||||||
CLOUD_LINK = auto()
|
CLOUD_LINK = auto()
|
||||||
CLOUD_LINK_ID = auto()
|
CLOUD_LINK_ID = auto()
|
||||||
|
|
||||||
|
|
||||||
class ExportType(Enum):
|
class ExportType(Enum):
|
||||||
JSON = "JSON"
|
JSON = "JSON"
|
||||||
YAML = "YAML"
|
YAML = "YAML"
|
||||||
@@ -131,7 +132,6 @@ class KontorDB:
|
|||||||
|
|
||||||
def get_columns(self, table_name: str) -> dict:
|
def get_columns(self, table_name: str) -> dict:
|
||||||
columns = {}
|
columns = {}
|
||||||
order = 0
|
|
||||||
__session__ = sessionmaker(self.engine)
|
__session__ = sessionmaker(self.engine)
|
||||||
table_info = self.get_table_by_name(table_name)
|
table_info = self.get_table_by_name(table_name)
|
||||||
_filters = {'table_id': table_info['id']}
|
_filters = {'table_id': table_info['id']}
|
||||||
@@ -183,7 +183,7 @@ class KontorDB:
|
|||||||
# self.log.info("data: %s", data)
|
# self.log.info("data: %s", data)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def export_db(self, export_type: str, export_file_name: str) -> dict:
|
def export_db(self, export_type: ExportType, export_file_name: str) -> dict:
|
||||||
results = {}
|
results = {}
|
||||||
db = {}
|
db = {}
|
||||||
export_table_list = self.get_table_names()
|
export_table_list = self.get_table_names()
|
||||||
@@ -217,14 +217,14 @@ class KontorDB:
|
|||||||
db[table] = entries
|
db[table] = entries
|
||||||
results[table] = len(entries)
|
results[table] = len(entries)
|
||||||
match export_type:
|
match export_type:
|
||||||
case "JSON":
|
case ExportType.JSON:
|
||||||
json_dump = json.dumps(db, indent=4)
|
json_dump = json.dumps(db, indent=4)
|
||||||
with open(export_file_name, "w") as dump_file:
|
with open(export_file_name, "w") as dump_file:
|
||||||
dump_file.write(json_dump)
|
dump_file.write(json_dump)
|
||||||
case "YAML":
|
case ExportType.YAML:
|
||||||
export_file = Path(export_file_name)
|
pass
|
||||||
case "SQLite":
|
case ExportType.SQLITE:
|
||||||
export_file = Path(export_file_name)
|
pass
|
||||||
self.log.info(f"{len(results)} tables exported")
|
self.log.info(f"{len(results)} tables exported")
|
||||||
return results
|
return results
|
||||||
|
|
||||||
@@ -340,7 +340,8 @@ class KontorDB:
|
|||||||
try:
|
try:
|
||||||
session.add(media_file)
|
session.add(media_file)
|
||||||
session.commit()
|
session.commit()
|
||||||
result['added'] = {'url': media_file.url, 'title': media_file.title, 'review': media_file.review, 'download': media_file.should_download}
|
result['added'] = {'url': media_file.url, 'title': media_file.title, 'review': media_file.review,
|
||||||
|
'download': media_file.should_download}
|
||||||
except IntegrityError as error:
|
except IntegrityError as error:
|
||||||
session.rollback()
|
session.rollback()
|
||||||
result['error'] = error.orig
|
result['error'] = error.orig
|
||||||
|
|||||||
@@ -5,8 +5,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from sqlalchemy import Column, String, ForeignKey
|
from sqlalchemy import Boolean, Column, False_, String, ForeignKey
|
||||||
from sqlalchemy.dialects.mysql import BIT
|
|
||||||
from sqlalchemy.orm import relationship
|
from sqlalchemy.orm import relationship
|
||||||
|
|
||||||
from .base import Base, BaseMixin, BaseVideoMixin
|
from .base import Base, BaseMixin, BaseVideoMixin
|
||||||
@@ -29,10 +28,10 @@ class MediaFile(Base, BaseMixin, BaseVideoMixin):
|
|||||||
soup = BeautifulSoup(r.content, "html.parser")
|
soup = BeautifulSoup(r.content, "html.parser")
|
||||||
title = soup.title.string
|
title = soup.title.string
|
||||||
self.title = title
|
self.title = title
|
||||||
self.review = 0
|
self.review = False_
|
||||||
except:
|
except:
|
||||||
self.title = None
|
self.title = None
|
||||||
self.review = 1
|
self.review = True
|
||||||
self.last_modified_date = datetime.now()
|
self.last_modified_date = datetime.now()
|
||||||
|
|
||||||
def download_file(self, download_dir: str, dl_tool: str):
|
def download_file(self, download_dir: str, dl_tool: str):
|
||||||
@@ -44,12 +43,12 @@ class MediaFile(Base, BaseMixin, BaseVideoMixin):
|
|||||||
lines_list = output.splitlines()
|
lines_list = output.splitlines()
|
||||||
file_name = self.__parse_output__(lines_list)
|
file_name = self.__parse_output__(lines_list)
|
||||||
if file_name is None:
|
if file_name is None:
|
||||||
self.review = 1
|
self.review = True
|
||||||
self.should_download = 1
|
self.should_download = True
|
||||||
self.file_name = None
|
self.file_name = None
|
||||||
else:
|
else:
|
||||||
download_file = Path(file_name)
|
download_file = Path(file_name)
|
||||||
self.should_download = 0
|
self.should_download = False_
|
||||||
self.file_name = download_file.name
|
self.file_name = download_file.name
|
||||||
self.cloud_link = str(download_file.absolute())
|
self.cloud_link = str(download_file.absolute())
|
||||||
self.last_modified_date = datetime.now()
|
self.last_modified_date = datetime.now()
|
||||||
@@ -84,7 +83,7 @@ class MediaActorFile(Base, BaseMixin):
|
|||||||
|
|
||||||
class MediaArticle(Base, BaseMixin):
|
class MediaArticle(Base, BaseMixin):
|
||||||
__tablename__ = 'media_article'
|
__tablename__ = 'media_article'
|
||||||
review = Column(BIT(1))
|
review = Column(Boolean)
|
||||||
title = Column(String(255))
|
title = Column(String(255))
|
||||||
url = Column(String(255), unique=True)
|
url = Column(String(255), unique=True)
|
||||||
|
|
||||||
@@ -94,7 +93,7 @@ class MediaVideo(Base, BaseMixin):
|
|||||||
cloud_link = Column(String(255))
|
cloud_link = Column(String(255))
|
||||||
file_name = Column(String(255))
|
file_name = Column(String(255))
|
||||||
path = Column(String(255))
|
path = Column(String(255))
|
||||||
review = Column(BIT(1))
|
review = Column(Boolean)
|
||||||
title = Column(String(255))
|
title = Column(String(255))
|
||||||
url = Column(String(255), unique=True)
|
url = Column(String(255), unique=True)
|
||||||
should_download = Column(BIT(1))
|
should_download = Column(Boolean)
|
||||||
|
|||||||
@@ -1,41 +0,0 @@
|
|||||||
|
|
||||||
services:
|
|
||||||
mariadb:
|
|
||||||
image: mariadb
|
|
||||||
restart: unless-stopped
|
|
||||||
environment:
|
|
||||||
MYSQL_ROOT_PASSWORD: kontor
|
|
||||||
MYSQL_USER: kontor
|
|
||||||
MYSQL_PASSWORD: kontor
|
|
||||||
MYSQL_DATABASE: kontor
|
|
||||||
ports:
|
|
||||||
- 3316:3306
|
|
||||||
networks:
|
|
||||||
- database
|
|
||||||
volumes:
|
|
||||||
- mariadb-storage:/var/lib/mysql:rw
|
|
||||||
kontor:
|
|
||||||
image: kontor
|
|
||||||
restart: unless-stopped
|
|
||||||
networks:
|
|
||||||
- database
|
|
||||||
- frontend
|
|
||||||
ports:
|
|
||||||
- 8000:8000
|
|
||||||
kontor-api:
|
|
||||||
image: kontor-api
|
|
||||||
restart: unless-stopped
|
|
||||||
networks:
|
|
||||||
- database
|
|
||||||
- frontend
|
|
||||||
ports:
|
|
||||||
- 8800:8800
|
|
||||||
|
|
||||||
|
|
||||||
networks:
|
|
||||||
database:
|
|
||||||
frontend:
|
|
||||||
|
|
||||||
volumes:
|
|
||||||
mariadb-storage:
|
|
||||||
|
|
||||||
Reference in New Issue
Block a user