Files
kontor/python/kontor/database/__init__.py
T
2025-01-19 17:49:13 +01:00

370 lines
16 KiB
Python

import json
import re
import subprocess
import uuid
from datetime import datetime
from pathlib import Path
from typing import Any
import requests
from bs4 import BeautifulSoup
from cement.core.config import ConfigHandler
from sqlalchemy import Engine
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import sessionmaker
from .base import Base
from .bookshelf import Article, Book, Author, BookshelfPublisher, ArticleAuthor, BookAuthor
from .comic import Comic, Artist, Publisher, Issue, StoryArc, TradePaperback, Volume, ComicWork, WorkType
from .metadata import MetaDataTable, MetaDataColumn
from .tysc import Card, CardSet, Sport, Team, FieldPosition, Rooster, Player, Vendor
from .media import MediaFile, MediaArticle, MediaVideo
from ..gui.progress import ProgressUpdate
class KontorDB:
def __init__(self, db_engine: Engine, config: ConfigHandler, log):
self.engine = db_engine
self.config = config
self.log = log
self.registry = {}
self.init_registry()
def init_registry(self):
self.registry['card'] = Card
self.registry['card_set'] = CardSet
self.registry['sport'] = Sport
self.registry['team'] = Team
self.registry['field_position'] = FieldPosition
self.registry['rooster'] = Rooster
self.registry['player'] = Player
self.registry['vendor'] = Vendor
self.registry['artist'] = Artist
self.registry['publisher'] = Publisher
self.registry['comic'] = Comic
self.registry['issue'] = Issue
self.registry['story_arc'] = StoryArc
self.registry['trade_paperback'] = TradePaperback
self.registry['volume'] = Volume
self.registry['comic_work'] = ComicWork
self.registry['worktype'] = WorkType
self.registry['article'] = Article
self.registry['book'] = Book
self.registry['author'] = Author
self.registry['bookshelf_publisher'] = BookshelfPublisher
self.registry['article_author'] = ArticleAuthor
self.registry['book_author'] = BookAuthor
self.registry['media_file'] = MediaFile
self.registry['media_article'] = MediaArticle
self.registry['media_video'] = MediaVideo
self.registry['meta_data_table'] = MetaDataTable
self.registry[MetaDataColumn.__tablename__] = MetaDataColumn
def get_table_names(self) -> list:
result = []
__session__ = sessionmaker(self.engine)
with __session__() as session:
tables = session.query(MetaDataTable).all()
result = [table.table_name for table in tables]
return result
def get_column_meta_data(self, table_name: str, view_only=True) -> dict:
meta_data = {}
order = 0
__session__ = sessionmaker(self.engine)
with __session__() as session:
if view_only:
for (_, column) in (session.query(MetaDataTable, MetaDataColumn).
filter(MetaDataTable.id == MetaDataColumn.table_id).
filter(MetaDataTable.table_name == table_name).
filter(MetaDataColumn.is_shown == 1).all()):
meta_data[order] = {'column': column.column_name, 'label': column.column_label,
'order': column.column_order, 'ref_column': column.ref_column}
order += 1
else:
for (_, column) in (session.query(MetaDataTable, MetaDataColumn).
filter(MetaDataTable.id == MetaDataColumn.table_id).
filter(MetaDataTable.table_name == table_name).all()):
meta_data[order] = {
'column': column.column_name,
'order': column.column_order,
'ref_column': column.ref_column
}
order += 1
return meta_data
def get_filters(self, table_name):
_filter_map = {}
__session__ = sessionmaker(self.engine)
with __session__() as session:
for (_, column) in (session.query(MetaDataTable, MetaDataColumn).
filter(MetaDataTable.id == MetaDataColumn.table_id).
filter(MetaDataTable.table_name == table_name).
filter(MetaDataColumn.show_filter == 1).all()):
_filter_map[column.column_name] = {'label': column.filter_label, 'widget': None}
self.log.debug(f"retrieved {len(_filter_map)} filters: {_filter_map}")
return _filter_map
def data(self, table, columns: dict, filters) -> list:
data = []
__session__ = sessionmaker(self.engine)
with __session__() as session:
entries = []
if len(filters) == 0:
entries = session.query(table).all()
else:
entries = session.query(table).filter_by(**filters)
for entry in entries:
row = []
for order in columns.keys():
column_name = columns[order]['column']
if str(column_name).endswith("_id"):
ref_table = column_name[:-3]
# print(f"{ref_table=}")
ref = getattr(entry, ref_table)
value = getattr(ref, "name")
# print(f"{value=}")
row.append(value)
else:
row.append(getattr(entry, column_name))
# print(repr(row))
data.append(row)
return data
def export_db(self, export_type: str, export_file_name: str):
self.log.info(f"export DB to {export_file_name} as {export_type}")
db = {}
export_table_list = self.get_table_names()
for table in export_table_list:
columns = self.get_column_meta_data(table, view_only=False)
if table in self.registry:
model = self.registry[table]
else:
print(f"table {table} is not registered")
continue
__session__ = sessionmaker(self.engine)
with __session__() as session:
rows = session.query(model).all()
entries = []
self.log.debug(f"found {len(rows)} entries")
self.log.debug(f"found {len(columns)} columns")
for row in rows:
# print(row)
entry = {}
for order in columns:
# print(columns[order])
column_name = columns[order]['column']
# print(f"get value {column_name} from {row} of table {table}")
try:
value = getattr(row, column_name)
if isinstance(value, datetime):
entry[column_name] = str(value)
else:
entry[column_name] = value
except AttributeError as error:
self.log.debug("could not get value")
entries.append(entry)
db[table] = entries
export_file = Path(export_file_name)
match export_type:
case "JSON":
json_dump = json.dumps(db, indent=4)
with open(export_file_name, "w") as dump_file:
dump_file.write(json_dump)
case "YAML":
export_file = Path(export_file_name)
case "SQLite":
export_file = Path(export_file_name)
case _:
self.log.debug("unknown export type")
if export_file.exists():
self.log.debug(f"{export_file} exists")
def import_db(self, import_file_name: str, dry_run: bool):
import_file = Path(import_file_name)
if not import_file.exists():
print(f"File {import_file_name} does not exist. Do nothing.")
return
self.log.debug(f"evaluate type from file extension: {import_file.suffix}")
match import_file.suffix:
case '.json':
print("read json file")
with open(import_file_name, 'r') as json_file:
json_load = json.load(json_file)
for table in json_load:
print(f"{table}: {len(json_load[table])}")
self.import_table(table, json_load[table], dry_run)
case '.yml':
print("read yaml file")
case '.yaml':
print("read yaml file")
case '.db':
print("read sqlite file")
def import_table(self, table_name, items, dry_run: bool):
existing_ids = self.get_ids(table_name)
for item in items:
# self.log.debug(f"{item}")
current_id = item['id']
found_item = None
__session__ = sessionmaker(self.engine)
with __session__() as session:
found_item = session.query(self.registry[table_name]).get(current_id)
self.log.debug(f"found: {found_item}")
if found_item is not None:
changed = self.update_entry(found_item, item, dry_run)
if changed:
print(f"{current_id} has changed")
existing_ids.remove(current_id)
else:
self.log.info("item to import not found in database, add new one...")
self.add_entry(table_name, item, session, dry_run)
if len(existing_ids) > 0:
print("remaining items")
def get_ids(self, table_name: str) -> list:
existing_ids = []
__session__ = sessionmaker(self.engine)
with __session__() as session:
items = session.query(self.registry[table_name]).all()
for item in items:
existing_ids.append(getattr(item, 'id'))
return existing_ids
def add_entry(self, table_name: str, update_item: dict, session, dry_run: bool):
add_item = self.registry[table_name]()
for key in update_item.keys():
update_value = update_item[key]
setattr(add_item, key, update_value)
if dry_run:
self.log.info(f"add item {type(add_item)} with id {update_item['id']}")
else:
session.add(add_item)
session.commit()
def update_entry(self, existing_item, update_item: dict, dry_run: bool) -> bool:
changed = False
for key in update_item.keys():
update_value = update_item[key]
existing_value = getattr(existing_item, key)
if type(existing_value) is not type(update_value):
# self.log.debug(f"compare {type(existing_value)} with {type(update_value)}")
existing_value = str(existing_value)
if existing_value != update_value:
print(f"{key} has changed: {existing_value} != {update_value}")
if not dry_run:
setattr(existing_item, key, update_value)
# existing_item[key] = update_value
changed = True
self.log.info(f"update {key} with {update_value}")
return changed
def add_link(self, link: str, dry_run: bool):
self.log.info(f"add link {link} to media_file")
__session__ = sessionmaker(self.engine)
with __session__() as session:
media_file = MediaFile()
media_file.id = str(uuid.uuid4())
media_file.created_date = datetime.now()
media_file.last_modified_date = datetime.now()
media_file.version = 0
media_file.url = link
media_file.review = 1
media_file.should_download = 1
try:
session.add(media_file)
session.commit()
self.log.info(f"entry {media_file} successfully added")
except IntegrityError as error:
session.rollback()
self.log.info(error.orig)
def update_title(self, dry_run=False):
self.log.info(f"get links to review of media_file")
__session__ = sessionmaker(self.engine)
with __session__() as session:
links = session.query(MediaFile).filter(MediaFile.review == 1).all()
self.log.info(f"try to update {len(links)} items")
for link in links:
url = link.url
if url is None:
self.log.info(f"url has not been set for {link.id}")
continue
self.log.info('get title for url {}'.format(url))
if dry_run:
continue
try:
r = requests.get(url)
soup = BeautifulSoup(r.content, "html.parser")
title = soup.title.string
except:
self.log.info("Sorry, could not retrieve title")
continue
self.log.info('ID {} has title {}'.format(link.id, title))
link.title = title
link.review = 0
session.commit()
def download_file(self, dry_run=False, update: ProgressUpdate=None):
self.log.info(f"download marked files of media_file")
__session__ = sessionmaker(self.engine)
with __session__() as session:
links = session.query(MediaFile).filter(MediaFile.should_download == 1).all()
self.log.info(f"try to download {len(links)} items")
for link in links:
url = link.url
if url is None:
self.log.info(f"url has not been set for {link.id}")
continue
if dry_run:
self.log.info(f"download {link.url} to {self.config.get('media', 'dir')}")
continue
filename = self.download_url(link)
if filename is None:
link.file_name = filename
link.should_download = 1
else:
download_file = Path(filename)
download_file.with_name(f"{link.id}{download_file.suffix}")
link.file_name = download_file.name
link.should_download = 0
link.cloud_link = download_file.absolute()
session.commit()
def parse_output(self, lines_list):
file_name = ""
for line in lines_list:
if 'has already been downloaded' in line:
end_len = len(' has already been downloaded')
file_name = line[11:-end_len]
self.log.info('found file: "%s"', file_name)
if 'Destination' in line:
line_len = len(line)
start_len = len('[download] Destination: ')
file_len = line_len - start_len
file_name = line[-file_len:]
self.log.info('new file: "%s"', file_name)
return file_name
def download_url(self, video_url):
media_dir = Path(self.config.get('media', 'dir'))
if not media_dir.exists():
media_dir = Path().absolute()
self.log.info(f"download video to {media_dir}")
result = subprocess.run([self.config.get('media', 'yt-dlp'), video_url], cwd=media_dir, capture_output=True,
text=True)
if result.returncode == 0:
output = result.stdout
output = re.sub(' +', ' ', output)
lines_list = output.splitlines()
return self.parse_output(lines_list)
else:
return None
def check_files(self):
media_dir = Path(self.config.get('media', 'dir'))
if not media_dir.exists():
return
self.log.info(f"check files in {media_dir}")