From 10834df92b7fef8f1d758fe5ef8a9209edee6c21 Mon Sep 17 00:00:00 2001 From: Thomas Peetz Date: Tue, 8 Apr 2025 11:49:52 +0200 Subject: [PATCH] add script for import data --- scripts/import.py | 43 ++ scripts/schema/__init__.py | 8 + scripts/schema/admin.py | 78 ++++ scripts/schema/base.py | 31 ++ scripts/schema/bookshelf.py | 51 +++ scripts/schema/comic.py | 100 +++++ scripts/schema/database.py | 391 ++++++++++++++++++ scripts/schema/media.py | 100 +++++ scripts/schema/metadata.py | 42 ++ scripts/schema/tysc.py | 100 +++++ .../admin/services/MetaDataService.java | 18 +- .../data/services/DataManagementService.java | 7 +- .../de/thpeetz/kontor/tysc/data/Sport.java | 11 + .../kontor/tysc/services/SportService.java | 48 +++ 14 files changed, 1024 insertions(+), 4 deletions(-) create mode 100644 scripts/import.py create mode 100644 scripts/schema/__init__.py create mode 100644 scripts/schema/admin.py create mode 100644 scripts/schema/base.py create mode 100644 scripts/schema/bookshelf.py create mode 100644 scripts/schema/comic.py create mode 100644 scripts/schema/database.py create mode 100644 scripts/schema/media.py create mode 100644 scripts/schema/metadata.py create mode 100644 scripts/schema/tysc.py diff --git a/scripts/import.py b/scripts/import.py new file mode 100644 index 0000000..dda286f --- /dev/null +++ b/scripts/import.py @@ -0,0 +1,43 @@ +""" +import data from json file to MariaDB +""" +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter + +import yaml +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from platformdirs import PlatformDirs +from pathlib import Path + +from schema import Base, KontorDB +from setup import get_database_cursors, create_tables, get_logger, get_scripts, get_meta_data + +parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) +parser.add_argument('--recreate-db', action='store_true') +parser.add_argument('--verbose', '-v', action='count', default=0) +parser.add_argument('--file', '-f', default='~/data.json') +args = parser.parse_args() + + +if __name__ == '__main__': + logger = get_logger(args.verbose) + logger.info('kontor.import started') + dirs = PlatformDirs("kontor") + database_config = Path(dirs.user_config_dir, 'database-config.yaml') + with open(database_config, 'rt') as f: + db_config = yaml.safe_load(f.read()) + connect_string = ('mariadb+mariadbconnector://{}:{}@{}:{}/{}'.format( + db_config['mariadb']['user'], + db_config['mariadb']['password'], + db_config['mariadb']['host'], + db_config['mariadb']['port'], + db_config['mariadb']['database'] + )) + engine = create_engine(connect_string) + Base.metadata.create_all(bind=engine, checkfirst=True) + __session__ = sessionmaker(bind=engine) + kontor_db = KontorDB(engine, logger) + if args.recreate_db: + kontor_db.delete_entries() + kontor_db.import_db(args.file) + logger.info('kontor.import finished') diff --git a/scripts/schema/__init__.py b/scripts/schema/__init__.py new file mode 100644 index 0000000..66fabbc --- /dev/null +++ b/scripts/schema/__init__.py @@ -0,0 +1,8 @@ +from .admin import User, Token, Role, AuthorizationMatrix, ModuleData, MailAccount, Mail +from .bookshelf import Article, Book, Author, BookshelfPublisher, ArticleAuthor, BookAuthor +from .comic import Comic, Artist, Publisher, Issue, StoryArc, TradePaperback, Volume, ComicWork, WorkType +from .metadata import MetaDataTable, MetaDataColumn +from .tysc import Card, CardSet, Sport, Team, FieldPosition, Rooster, Player, Vendor +from .media import MediaFile, MediaArticle, MediaVideo +from .base import Base +from .database import KontorDB, ColumnEntry diff --git a/scripts/schema/admin.py b/scripts/schema/admin.py new file mode 100644 index 0000000..8dd33bd --- /dev/null +++ b/scripts/schema/admin.py @@ -0,0 +1,78 @@ +from datetime import datetime + +from sqlalchemy import Column, DateTime, ForeignKey, Integer, String +from sqlalchemy.dialects.mysql import BIT +from sqlalchemy.orm import relationship, mapped_column, Mapped + +from .base import Base, BaseMixin + + +class User(Base, BaseMixin): + __tablename__ = 'user' + first_name = Column(String(255)) + last_name = Column(String(255)) + user_name = Column(String(255), nullable=False) + email = Column(String(255)) + password = Column(String(255)) + enabled = Column(BIT(1)) + matrix = relationship("AuthorizationMatrix") + tokens = relationship("Token") + + def get_full_name(self) -> str: + full_name = "" + if self.first_name is not None: + full_name += self.first_name + if self.last_name is not None: + if len(full_name) > 0: + full_name += " " + full_name += self.last_name + return full_name + + +class Token(Base, BaseMixin): + __tablename__ = "token" + token = Column(String(255), nullable=False, unique=True) + name = Column(String(255)) + last_used_date: Mapped[datetime] = mapped_column() + enabled = Column(BIT(1)) + user_id = Column(String(255), ForeignKey("user.id"), nullable=False) + user = relationship("User", back_populates="tokens") + + +class Role(Base, BaseMixin): + __tablename__ = "role" + name = Column(String(255), nullable=False) + matrix = relationship("AuthorizationMatrix") + + +class AuthorizationMatrix(Base, BaseMixin): + __tablename__ = "authorization_matrix" + user_id = Column(String, ForeignKey("user.id"), nullable=False) + user = relationship("User", back_populates="matrix") + role_id = Column(String, ForeignKey("role.id"), nullable=False) + role = relationship("Role", back_populates="matrix") + + +class ModuleData(Base, BaseMixin): + __tablename__ = "module_data" + module_name = Column(String(255), nullable=False) + import_data = Column(BIT(1)) + + +class MailAccount(Base, BaseMixin): + __tablename__ = "mail_account" + host = Column(String(255)) + port = Column(Integer) + protocol = Column(String(255)) + user_name = Column(String(255)) + password = Column(String(255)) + start_tls = Column(BIT(1)) + + +class Mail(Base, BaseMixin): + __tablename__ = "mail" + folder: Mapped[str] = mapped_column() + subject: Mapped[str] = mapped_column() + body: Mapped[str] = mapped_column() + sent_date: Mapped[datetime] = mapped_column() + received_date: Mapped[datetime] = mapped_column() diff --git a/scripts/schema/base.py b/scripts/schema/base.py new file mode 100644 index 0000000..4a354e7 --- /dev/null +++ b/scripts/schema/base.py @@ -0,0 +1,31 @@ +import uuid +from datetime import datetime + +from sqlalchemy import func, Column, String +from sqlalchemy.dialects.mysql import BIT +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column + + +class Base(DeclarativeBase): + pass + + +class BaseMixin: + id = Column(String(255), primary_key=True, default=uuid.uuid4()) + # id: Mapped[str] = mapped_column(primary_key=True, default=uuid.uuid4()) + # created_date = Column(DateTime) + created_date: Mapped[datetime] = mapped_column(default=func.now()) + # last_modified_date = Column(DateTime) + last_modified_date: Mapped[datetime] = mapped_column(default=func.now()) + # version = Column(Integer) + version: Mapped[int] = mapped_column(default=0) + + +class BaseVideoMixin: + cloud_link = Column(String(255)) + file_name = Column(String(255)) + path = Column(String(255)) + review = Column(BIT(1)) + title = Column(String(255)) + url = Column(String(255), unique=True) + should_download = Column(BIT(1)) diff --git a/scripts/schema/bookshelf.py b/scripts/schema/bookshelf.py new file mode 100644 index 0000000..ab0fe5a --- /dev/null +++ b/scripts/schema/bookshelf.py @@ -0,0 +1,51 @@ +from sqlalchemy import Column, DateTime, ForeignKey, Integer, String +from sqlalchemy.dialects.mysql import BIT +from sqlalchemy.orm import relationship + +from .base import Base, BaseMixin + + +class Article(Base, BaseMixin): + __tablename__ = 'article' + title = Column(String(length=255), unique=True) + article_authors = relationship("ArticleAuthor") + + +class Author(Base, BaseMixin): + __tablename__ = 'author' + first_name = Column(String(255)) + last_name = Column(String(255)) + article_authors = relationship("ArticleAuthor") + book_authors = relationship("BookAuthor") + + +class BookshelfPublisher(Base, BaseMixin): + __tablename__ = 'bookshelf_publisher' + name = Column(String(length=255), unique=True) + books = relationship("Book") + + +class Book(Base, BaseMixin): + __tablename__ = 'book' + isbn = Column(String(255), unique=True) + title = Column(String(255)) + year = Column(Integer, nullable=False) + publisher_id = Column(String, ForeignKey('bookshelf_publisher.id'), nullable=False) + publisher = relationship('BookshelfPublisher', back_populates="books") + book_authors = relationship("BookAuthor") + + +class ArticleAuthor(Base, BaseMixin): + __tablename__ = 'article_author' + article_id = Column(String, ForeignKey('article.id'), nullable=False) + article = relationship('Article', back_populates="article_authors") + author_id = Column(String, ForeignKey('author.id'), nullable=False) + author = relationship('Author', back_populates="article_authors") + + +class BookAuthor(Base, BaseMixin): + __tablename__ = 'book_author' + author_id = Column(String, ForeignKey('author.id'), nullable=False) + author = relationship('Author', back_populates="book_authors") + book_id = Column(String, ForeignKey('book.id'), nullable=False) + book = relationship('Book', back_populates="book_authors") diff --git a/scripts/schema/comic.py b/scripts/schema/comic.py new file mode 100644 index 0000000..fe6ec19 --- /dev/null +++ b/scripts/schema/comic.py @@ -0,0 +1,100 @@ +from sqlalchemy import Column, DateTime, ForeignKey, Integer, String +from sqlalchemy.dialects.mysql import BIT +from sqlalchemy.orm import relationship + +from .base import Base, BaseMixin + + +class Publisher(Base, BaseMixin): + __tablename__ = "publisher" + name = Column(String(length=255), unique=True) + comics = relationship("Comic") + + def __repr__(self): + return f'Publisher({self.id} {self.name})' + + def __str__(self): + return self.__repr__() + + +class Comic(Base, BaseMixin): + __tablename__ = 'comic' + title = Column(String(length=255), unique=True) + publisher_id = Column(String, ForeignKey('publisher.id'), nullable=False) + publisher = relationship("Publisher", back_populates="comics") + current_order = Column(BIT(1)) + completed = Column(BIT(1)) + issues = relationship("Issue") + story_arcs = relationship("StoryArc") + trade_paperbacks = relationship("TradePaperback") + volumes = relationship("Volume") + comic_works = relationship("ComicWork") + + def __repr__(self): + return f'Comic({self.id} {self.version} {self.title} {self.publisher.name})' + + def __str__(self): + return f'{self.title}({self.id})' + + +class Volume(Base, BaseMixin): + __tablename__ = "volume" + name = Column(String(length=255), nullable=False) + comic_id = Column(String, ForeignKey("comic.id"), nullable=False) + comic = relationship("Comic", back_populates="volumes") + issues = relationship("Issue") + + +class TradePaperback(Base, BaseMixin): + __tablename__ = "trade_paperback" + name = Column(String(length=255), nullable=False) + issue_start = Column(Integer) + issue_end = Column(Integer) + comic_id = Column(String, ForeignKey("comic.id"), nullable=False) + comic = relationship("Comic", back_populates="trade_paperbacks") + + +class StoryArc(Base, BaseMixin): + __tablename__ = "story_arc" + name = Column(String(length=255), nullable=False) + comic_id = Column(String, ForeignKey("comic.id"), nullable=False) + comic = relationship("Comic", back_populates="story_arcs") + + +class Issue(Base, BaseMixin): + __tablename__ = "issue" + issue_number = Column(String(255)) + in_stock = Column(BIT(1)) + is_read = Column(BIT(1)) + comic_id = Column(String, ForeignKey("comic.id"), nullable=False) + comic = relationship("Comic", back_populates="issues") + volume_id = Column(String, ForeignKey("volume.id"), nullable=True) + volume = relationship("Volume", back_populates="issues") + + +class Artist(Base, BaseMixin): + __tablename__ = "artist" + name = Column(String(length=255), nullable=False) + comic_works = relationship("ComicWork") + + +class WorkType(Base, BaseMixin): + __tablename__ = "worktype" + name = Column(String(length=255), nullable=False, unique=True) + comic_works = relationship("ComicWork") + + def __repr__(self): + return f'Worktype({self.id} {self.version} {self.name} {len(self.comic_works)})' + + def __str__(self): + return f'{self.name}({self.id})' + + +class ComicWork(Base, BaseMixin): + __tablename__ = "comic_work" + comic_id = Column(String, ForeignKey("comic.id"), nullable=False) + comic = relationship("Comic", back_populates="comic_works") + artist_id = Column(String, ForeignKey("artist.id"), nullable=False) + artist = relationship("Artist", back_populates="comic_works") + work_type_id = Column(String, ForeignKey("worktype.id"), nullable=False) + work_type = relationship("WorkType", back_populates="comic_works") diff --git a/scripts/schema/database.py b/scripts/schema/database.py new file mode 100644 index 0000000..d8b6618 --- /dev/null +++ b/scripts/schema/database.py @@ -0,0 +1,391 @@ +import json +import uuid +from datetime import datetime +from enum import Enum, auto +from logging import Logger +from pathlib import Path + +from sqlalchemy import Engine, select +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import sessionmaker + +from .tysc import Card, CardSet, Rooster, Team, FieldPosition, Player, Vendor, Sport +from .comic import Issue, TradePaperback, StoryArc, Volume, ComicWork, Artist, Comic, Publisher, WorkType +from .bookshelf import ArticleAuthor, BookAuthor, BookshelfPublisher, Article, Book, Author +from .admin import Mail, MailAccount, ModuleData, Role, User, Token, AuthorizationMatrix +from .metadata import MetaDataTable, MetaDataColumn +from .media import MediaVideo, MediaArticle, MediaFile, MediaActor, MediaActorFile + + +class ColumnEntry(Enum): + COLUMN_NAME = 'column' + COLUMN_LABEL = 'label' + COLUMN_ORDER = 'order' + COLUMN_REF_COLUMN = 'ref_column' + COLUMN_TYPE = 'type' + COLUMN_WIDGET = 'widget' + + +class StatusType(Enum): + UNKNOWN = auto() + FILE_NAME = auto() + FILE_ID = auto() + DUPLICATE = auto() + CLOUD_LINK = auto() + CLOUD_LINK_ID = auto() + + +class KontorDB: + + def __init__(self, db_engine: Engine, log: Logger): + self.engine = db_engine + self.registry = {} + self.init_registry() + self.log = log + + def init_registry(self): + self.registry[Card.__tablename__] = Card + self.registry[CardSet.__tablename__] = CardSet + self.registry[Rooster.__tablename__] = Rooster + self.registry[Team.__tablename__] = Team + self.registry[FieldPosition.__tablename__] = FieldPosition + self.registry[Player.__tablename__] = Player + self.registry[Vendor.__tablename__] = Vendor + self.registry[Sport.__tablename__] = Sport + self.registry[Issue.__tablename__] = Issue + self.registry[TradePaperback.__tablename__] = TradePaperback + self.registry[StoryArc.__tablename__] = StoryArc + self.registry[Volume.__tablename__] = Volume + self.registry[ComicWork.__tablename__] = ComicWork + self.registry[Artist.__tablename__] = Artist + self.registry[Comic.__tablename__] = Comic + self.registry[Publisher.__tablename__] = Publisher + self.registry[WorkType.__tablename__] = WorkType + self.registry[ArticleAuthor.__tablename__] = ArticleAuthor + self.registry[BookAuthor.__tablename__] = BookAuthor + self.registry[BookshelfPublisher.__tablename__] = BookshelfPublisher + self.registry[Article.__tablename__] = Article + self.registry[Book.__tablename__] = Book + self.registry[Author.__tablename__] = Author + self.registry[MediaFile.__tablename__] = MediaFile + self.registry[MediaActor.__tablename__] = MediaActor + self.registry[MediaActorFile.__tablename__] = MediaActorFile + self.registry[MediaArticle.__tablename__] = MediaArticle + self.registry[MediaVideo.__tablename__] = MediaVideo + self.registry[MetaDataColumn.__tablename__] = MetaDataColumn + self.registry[MetaDataTable.__tablename__] = MetaDataTable + self.registry[AuthorizationMatrix.__tablename__] = AuthorizationMatrix + self.registry[Token.__tablename__] = Token + self.registry[User.__tablename__] = User + self.registry[Role.__tablename__] = Role + self.registry[ModuleData.__tablename__] = ModuleData + self.registry[MailAccount.__tablename__] = MailAccount + self.registry[Mail.__tablename__] = Mail + + def get_table_names(self) -> list: + result = [] + __session__ = sessionmaker(self.engine) + with __session__() as session: + tables = session.scalars(select(MetaDataTable)).all() + result = [table.table_name for table in tables] + return result + + def get_table_by_name(self, table_name: str) -> dict: + result = {} + __session__ = sessionmaker(self.engine) + _filter = {'table_name': table_name} + with __session__() as session: + table = session.query(MetaDataTable).filter_by(**_filter).one() + result['id'] = table.id + result['table_name'] = table.table_name + return result + + def get_column_meta_data(self, table_name: str, view_only=True) -> dict: + meta_data = {} + order = 0 + __session__ = sessionmaker(self.engine) + columns = list() + table_info = self.get_table_by_name(table_name) + _filters = {'table_id': table_info['id']} + if view_only: + _filters['is_shown'] = True + with __session__() as session: + columns = session.query(MetaDataColumn).filter_by(**_filters).all() + for column in columns: + # self.log.info("get_column_meta_data: %s %s %d", column.column_name, column.column_label, column.column_order) + meta_data[order] = { + ColumnEntry.COLUMN_NAME: column.column_name, + ColumnEntry.COLUMN_LABEL: column.column_label, + ColumnEntry.COLUMN_ORDER: column.column_order, + ColumnEntry.COLUMN_REF_COLUMN: column.ref_column, + ColumnEntry.COLUMN_TYPE: column.column_type + } + order += 1 + return meta_data + + def get_columns(self, table_name: str) -> dict: + columns = {} + order = 0 + __session__ = sessionmaker(self.engine) + table_info = self.get_table_by_name(table_name) + _filters = {'table_id': table_info['id']} + with __session__() as session: + for column in session.query(MetaDataColumn).filter_by(**_filters).all(): + columns[column.column_name] = { + ColumnEntry.COLUMN_ORDER: column.column_order, + ColumnEntry.COLUMN_TYPE: column.column_type + } + return columns + + def get_filters(self, table_name: str) -> dict: + _filter_map = {} + __session__ = sessionmaker(self.engine) + table_info = self.get_table_by_name(table_name) + _filters = {'table_id': table_info['id'], 'show_filter': True} + with __session__() as session: + for column in session.query(MetaDataColumn).filter_by(**_filters).all(): + _filter_map[column.column_name] = { + ColumnEntry.COLUMN_LABEL: column.filter_label, + ColumnEntry.COLUMN_WIDGET: None + } + return _filter_map + + def data(self, table_name: str, columns: dict, filters: dict) -> list: + data = [] + __session__ = sessionmaker(self.engine) + table = self.registry[table_name] + with __session__() as session: + entries = [] + if len(filters) == 0: + entries = session.scalars(select(table)).all() + else: + entries = session.scalars(select(table).filter_by(**filters)).all() + for entry in entries: + # self.log.info("data: %s", entry) + row = [] + for order in columns.keys(): + column_name = columns[order][ColumnEntry.COLUMN_NAME] + ref_column = columns[order][ColumnEntry.COLUMN_REF_COLUMN] + if str(column_name).endswith("_id"): + ref_table = column_name[:-3] + ref = getattr(entry, ref_table) + value = getattr(ref, ref_column) + row.append(value) + else: + row.append(getattr(entry, column_name)) + data.append(row) + # self.log.info("data: %s", data) + return data + + def export_db(self, export_type: str, export_file_name: str) -> dict: + results = {} + db = {} + export_table_list = self.get_table_names() + for table in export_table_list: + columns = self.get_column_meta_data(table, view_only=False) + if table in self.registry: + model = self.registry[table] + else: + self.log.info(f"table {table} is not registered") + continue + __session__ = sessionmaker(self.engine) + with __session__() as session: + rows = session.query(model).all() + entries = [] + for row in rows: + # print(row) + entry = {} + for order in columns: + # print(columns[order]) + column_name = columns[order][ColumnEntry.COLUMN_NAME] + # print(f"get value {column_name} from {row} of table {table}") + try: + value = getattr(row, column_name) + if isinstance(value, datetime): + entry[column_name] = str(value) + else: + entry[column_name] = value + except AttributeError: + pass + entries.append(entry) + db[table] = entries + results[table] = len(entries) + match export_type: + case "JSON": + json_dump = json.dumps(db, indent=4) + with open(export_file_name, "w") as dump_file: + dump_file.write(json_dump) + case "YAML": + export_file = Path(export_file_name) + case "SQLite": + export_file = Path(export_file_name) + self.log.info(f"{len(results)} tables exported") + return results + + def import_db(self, import_file_name: str) -> dict: + result = {} + import_file = Path(import_file_name) + if not import_file.exists(): + self.log.info(f"File {import_file_name} does not exist. Do nothing.") + return result + match import_file.suffix: + case '.json': + print("read json file") + with open(import_file_name, 'r') as json_file: + json_load = json.load(json_file) + for table in json_load: + self.log.info(f"{table}: {len(json_load[table])}") + result[table] = self.import_table(table, json_load[table]) + case '.yml': + print("read yaml file") + case '.yaml': + print("read yaml file") + case '.db': + print("read sqlite file") + return result + + def import_table(self, table_name: str, items:list) -> dict: + result = {} + updated = [] + added = [] + remaining = [] + existing_ids = self.get_ids(table_name) + self.log.info(f"found {len(existing_ids)} existing ids for table {table_name}") + for item in items: + current_id = item['id'] + # print(f"import item: {item}") + found_item = None + __session__ = sessionmaker(self.engine) + with __session__() as session: + found_item = session.get(self.registry[table_name], current_id) + # print(f"found item: {found_item}") + if found_item is not None: + changed = self.update_entry(table_name, current_id, item) + updated.append(item) + if changed: + self.log.info(f"{current_id} has changed") + updated.append(item) + existing_ids.remove(current_id) + else: + try: + self.add_entry(table_name, item) + added.append(item) + except IntegrityError as error: + self.log.info(f"Could not add item, due to: {error.detail}") + if len(existing_ids) > 0: + print(f"remaining items for {table_name}: {existing_ids}") + remaining.extend(existing_ids) + result['updated'] = updated + result['added'] = added + result['remaining'] = remaining + return result + + def get_ids(self, table_name: str) -> list: + existing_ids = [] + __session__ = sessionmaker(self.engine) + with __session__() as session: + items = session.query(self.registry[table_name]).all() + for item in items: + existing_ids.append(getattr(item, 'id')) + return existing_ids + + def add_entry(self, table_name: str, update_item: dict): + self.log.debug(f"add entry to table {table_name} with {update_item}") + __session__ = sessionmaker(self.engine) + with __session__() as session: + add_item = self.registry[table_name]() + for key in update_item.keys(): + update_value = update_item[key] + setattr(add_item, key, update_value) + session.add(add_item) + session.commit() + + def update_entry(self, table_name, current_id, update_item: dict) -> bool: + # self.log.info("update entry to table %s", table_name) + __session__ = sessionmaker(self.engine) + with __session__() as session: + existing_item = session.query(self.registry[table_name]).get(current_id) + changed = False + for key in update_item.keys(): + update_value = update_item[key] + existing_value = getattr(existing_item, key) + if type(existing_value) is not type(update_value): + existing_value = str(existing_value) + if existing_value != update_value: + self.log.info(f"{key} has changed: {existing_value} != {update_value}") + setattr(existing_item, key, update_value) + session.commit() + changed = True + self.log.info(f"update {key} with {update_value}") + return changed + + def add_link(self, link: str) -> dict: + result = {} + __session__ = sessionmaker(self.engine) + with __session__() as session: + media_file = MediaFile() + media_file.id = str(uuid.uuid4()) + media_file.created_date = datetime.now() + media_file.last_modified_date = datetime.now() + media_file.version = 0 + media_file.url = link + media_file.review = 1 + media_file.should_download = 1 + try: + session.add(media_file) + session.commit() + result['added'] = {'url': media_file.url, 'title': media_file.title, 'review': media_file.review, 'download': media_file.should_download} + except IntegrityError as error: + session.rollback() + result['error'] = error.orig + return result + + def update_titles(self) -> dict: + update_list = {} + __session__ = sessionmaker(self.engine) + _filter = { 'review': True} + with __session__() as session: + links = session.query(MediaFile).filter_by(**_filter).all() + for link in links: + url = link.url + if url is None: + continue + link.update_title() + session.commit() + update_list[link.id] = link.title + return update_list + + def get_download_list(self) -> list: + download_list = [] + __session__ = sessionmaker(self.engine) + _filter = { 'should_download': True} + with __session__() as session: + links = session.query(MediaFile).filter_by(**_filter).all() + for link in links: + url = link.url + if url is None: + continue + download_list.append(link.id) + return download_list + + def download_file(self, entry_id: str, download_dir = "/data/media", dl_tool = "yt-dlp") -> str: + __session__ = sessionmaker(self.engine) + with __session__() as session: + link = session.query(MediaFile).get(entry_id) + link.download_file(download_dir, dl_tool) + session.commit() + file_name = link.file_name + return file_name + + def delete_entries(self): + for (table_name, table) in self.registry.items(): + # self.log.info("delete entries from table %s", table_name) + __session__ = sessionmaker(self.engine) + with __session__() as session: + items = session.query(table).all() + for item in items: + session.delete(item) + session.commit() + + def check_files(self): + pass diff --git a/scripts/schema/media.py b/scripts/schema/media.py new file mode 100644 index 0000000..266fe48 --- /dev/null +++ b/scripts/schema/media.py @@ -0,0 +1,100 @@ +import re +import subprocess +from datetime import datetime +from pathlib import Path + +import requests +from bs4 import BeautifulSoup +from sqlalchemy import Column, DateTime, Integer, String, ForeignKey +from sqlalchemy.dialects.mysql import BIT +from sqlalchemy.orm import relationship + +from .base import Base, BaseMixin, BaseVideoMixin + + +class MediaFile(Base, BaseMixin, BaseVideoMixin): + __tablename__ = 'media_file' + media_actor_files = relationship("MediaActorFile") + + def __repr__(self): + return f'MediaFile({self.id} {self.title} {self.title})' + + def __str__(self): + return f'{self.title}({self.id})' + + def update_title(self) -> None: + print(f"update title for {self.url}") + try: + r = requests.get(self.url) + soup = BeautifulSoup(r.content, "html.parser") + title = soup.title.string + self.title = title + self.review = 0 + except: + self.title = None + self.review = 1 + self.last_modified_date = datetime.now() + + def download_file(self, download_dir: str, dl_tool: str): + print(f"download file for {self.url} to {download_dir}") + result = subprocess.run([dl_tool, self.url], cwd=download_dir, capture_output=True, text=True) + if result.returncode == 0: + output = result.stdout + output = re.sub(' +', ' ', output) + lines_list = output.splitlines() + file_name = self.__parse_output__(lines_list) + if file_name is None: + self.review = 1 + self.should_download = 1 + self.file_name = None + else: + download_file = Path(file_name) + self.should_download = 0 + self.file_name = download_file.name + self.cloud_link = str(download_file.absolute()) + self.last_modified_date = datetime.now() + + def __parse_output__(self, lines_list): + self.file_name = None + for line in lines_list: + if 'has already been downloaded' in line: + end_len = len(' has already been downloaded') + self.file_name = line[11:-end_len] + if 'Destination' in line: + line_len = len(line) + start_len = len('[download] Destination: ') + file_len = line_len - start_len + self.file_name = line[-file_len:] + return self.file_name + + +class MediaActor(Base, BaseMixin): + __tablename__ = 'media_actor' + name = Column(String(255)) + media_actor_files = relationship("MediaActorFile") + + +class MediaActorFile(Base, BaseMixin): + __tablename__ = 'media_actor_file' + media_actor_id = Column(String(255), ForeignKey("media_actor.id"), nullable=False) + media_actor = relationship("MediaActor", back_populates="media_actor_files") + media_file_id = Column(String(255), ForeignKey("media_file.id"), nullable=True) + media_file = relationship("MediaFile", back_populates="media_actor_files") + + +class MediaArticle(Base, BaseMixin): + __tablename__ = 'media_article' + review = Column(BIT(1)) + title = Column(String(255)) + url = Column(String(255), unique=True) + + +class MediaVideo(Base, BaseMixin): + __tablename__ = 'media_video' + cloud_link = Column(String(255)) + file_name = Column(String(255)) + path = Column(String(255)) + review = Column(BIT(1)) + title = Column(String(255)) + url = Column(String(255), unique=True) + should_download = Column(BIT(1)) diff --git a/scripts/schema/metadata.py b/scripts/schema/metadata.py new file mode 100644 index 0000000..f9538bb --- /dev/null +++ b/scripts/schema/metadata.py @@ -0,0 +1,42 @@ +from sqlalchemy import Column, String, ForeignKey, DateTime, Integer, Boolean +from sqlalchemy.dialects.mysql import BIT +from sqlalchemy.orm import relationship + +from .base import Base, BaseMixin + + +class MetaDataTable(Base, BaseMixin): + __tablename__ = 'meta_data_table' + table_name = Column(String(255), unique=True) + table_columns = relationship("MetaDataColumn") + + def __repr__(self): + return f'MetaDataTable({self.id} {self.table_name})' + + def __str__(self): + return f'{self.table_name}({self.id})' + + +class MetaDataColumn(Base, BaseMixin): + __tablename__ = 'meta_data_column' + column_name = Column(String(255), nullable=False) + column_sync_name = Column(String(255)) + column_type = Column(String(255)) + column_modifier = Column(String(255), nullable=True) + column_order = Column(Integer) + table_id = Column(String, ForeignKey('meta_data_table.id')) + table = relationship("MetaDataTable", back_populates="table_columns") + column_label = Column(String(255)) + filter_label = Column(String(255)) + is_shown = Column(BIT(1)) + show_filter = Column(BIT(1)) + ref_column = Column(String, nullable=True) + + def __repr__(self): + if self.column_name is None: + return f'MetaDataColumn({self.id} {self.table.table_name}.__)' + else: + return f'MetaDataColumn({self.id} {self.table.table_name}.{self.column_name})' + + def __str__(self): + return f'{self.column_name}({self.id})' diff --git a/scripts/schema/tysc.py b/scripts/schema/tysc.py new file mode 100644 index 0000000..ef8bc5d --- /dev/null +++ b/scripts/schema/tysc.py @@ -0,0 +1,100 @@ +from sqlalchemy import Column, DateTime, Integer, String, ForeignKey, UniqueConstraint +from sqlalchemy.dialects.mysql import BIT +from sqlalchemy.orm import relationship + +from .base import Base, BaseMixin + + +class Sport(Base, BaseMixin): + __tablename__ = "sport" + __table_args__ = ( + UniqueConstraint("name"), + ) + name = Column(String(255), nullable=False, index=True, unique=True) + teams = relationship("Team") + positions = relationship("FieldPosition") + + +class Team(Base, BaseMixin): + __tablename__ = "team" + name = Column(String(255), nullable=False, index=True, unique=True) + short_name = Column(String(255), nullable=False, ) + sport_id = Column(String, ForeignKey("sport.id"), nullable=False) + sport = relationship("Sport", back_populates="teams") + roosters = relationship("Rooster") + + +class FieldPosition(Base, BaseMixin): + __tablename__ = "field_position" + __table_args__ = ( + UniqueConstraint("name", "sport_id"), + UniqueConstraint("short_name", "sport_id"), + ) + name = Column(String(255), nullable=False, index=True) + short_name = Column(String(255), nullable=False) + sport_id = Column(String, ForeignKey("sport.id"), nullable=False, index=True) + sport = relationship("Sport", back_populates="positions") + roosters = relationship("Rooster") + + +class Player(Base, BaseMixin): + __tablename__ = "player" + __table_args__ = ( + UniqueConstraint("first_name", "last_name"), + ) + first_name = Column(String(255), nullable=False, index=True) + last_name = Column(String(255), nullable=False, index=True) + roosters = relationship("Rooster") + + def get_full_name(self) -> str: + return f"{self.last_name}, {self.first_name}" + + +class Rooster(Base, BaseMixin): + __tablename__ = "rooster" + __table_args__ = ( + UniqueConstraint("year", "team_id", "player_id", "position_id"), + ) + year = Column(Integer) + team_id = Column(String, ForeignKey("team.id"), nullable=False, index=True) + team = relationship("Team", back_populates="roosters") + player_id = Column(String, ForeignKey("player.id"), nullable=False, index=True) + player = relationship("Player", back_populates="roosters") + position_id = Column(String, ForeignKey("field_position.id"), nullable=False, index=True) + position = relationship("FieldPosition", back_populates="roosters") + cards = relationship("Card") + + +class Vendor(Base, BaseMixin): + __tablename__ = "vendor" + name = Column(String(255), nullable=False, unique=True, index=True) + card_sets = relationship("CardSet") + cards = relationship("Card") + + +class CardSet(Base, BaseMixin): + __tablename__ = "card_set" + __table_args__ = ( + UniqueConstraint("name", "vendor_id"), + ) + name = Column(String(255), index=True) + parallel_set = Column(BIT(1)) + insert_set = Column(BIT(1)) + vendor_id = Column(String, ForeignKey("vendor.id"), nullable=False, index=True) + vendor = relationship("Vendor", back_populates="card_sets") + cards = relationship("Card") + + +class Card(Base, BaseMixin): + __tablename__ = "card" + __table_args__ = ( + UniqueConstraint("card_number", "year", "vendor_id", "card_set_id"), + ) + card_number = Column(Integer, index=True) + year = Column(Integer, index=True) + card_set_id = Column(String, ForeignKey("card_set.id"), nullable=False) + card_set = relationship("CardSet", back_populates="cards") + rooster_id = Column(String, ForeignKey("rooster.id"), nullable=False) + rooster = relationship("Rooster", back_populates="cards") + vendor_id = Column(String, ForeignKey("vendor.id"), nullable=False) + vendor = relationship("Vendor", back_populates="cards") diff --git a/springboot/src/main/java/de/thpeetz/kontor/admin/services/MetaDataService.java b/springboot/src/main/java/de/thpeetz/kontor/admin/services/MetaDataService.java index edc173e..6c45feb 100644 --- a/springboot/src/main/java/de/thpeetz/kontor/admin/services/MetaDataService.java +++ b/springboot/src/main/java/de/thpeetz/kontor/admin/services/MetaDataService.java @@ -8,6 +8,7 @@ import de.thpeetz.kontor.admin.data.MetaDataTable; import de.thpeetz.kontor.admin.repository.MetaDataTableRepository; import lombok.extern.slf4j.Slf4j; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Optional; @@ -43,10 +44,23 @@ public class MetaDataService { private void deleteTable(MetaDataTable metaDataTable) { List columns = metaDataTable.getTableColumns(); + List columnsToDelete = new LinkedList<>(); for (MetaDataColumn column: columns) { - metaDataColumnRepository.delete(column); + try { + columnsToDelete.add(column); + metaDataColumnRepository.delete(column); + } catch (Exception e) { + log.info("Exception {} thrown, just go on", e.getMessage()); + } + } + for (MetaDataColumn column: columnsToDelete) { + metaDataTable.getTableColumns().remove(column); + } + try { + metaDataTableRepository.delete(metaDataTable); + } catch (Exception e) { + log.info("could not delete MetaDataTable: {}", e.getMessage()); } - metaDataTableRepository.delete(metaDataTable); } public void getColumn(MetaDataTable table, String columnName, String columnSyncName, String columnType, String columnModifier, Integer columnOrder, Boolean isShown, String columnLabel, Boolean showFilter, String filterLabel) { diff --git a/springboot/src/main/java/de/thpeetz/kontor/data/services/DataManagementService.java b/springboot/src/main/java/de/thpeetz/kontor/data/services/DataManagementService.java index 113d750..4b950ac 100644 --- a/springboot/src/main/java/de/thpeetz/kontor/data/services/DataManagementService.java +++ b/springboot/src/main/java/de/thpeetz/kontor/data/services/DataManagementService.java @@ -3,6 +3,7 @@ package de.thpeetz.kontor.data.services; import de.thpeetz.kontor.admin.data.MetaDataTable; import de.thpeetz.kontor.admin.repository.MetaDataTableRepository; import de.thpeetz.kontor.admin.services.MetaDataService; +import de.thpeetz.kontor.tysc.services.SportService; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @@ -16,10 +17,10 @@ import java.util.concurrent.atomic.AtomicReference; public class DataManagementService { @Autowired - MetaDataTableRepository metaDataTableRepository; + MetaDataService metaDataService; @Autowired - MetaDataService metaDataService; + SportService sportService; public DataManagementService() { @@ -34,6 +35,8 @@ public class DataManagementService { case "meta_data_column": status.set(metaDataService.importColumnData(nodeName, fields)); break; + case "sport": + status.set(sportService.importData(fields)); default: log.debug("import for {} not implemented", nodeName); break; diff --git a/springboot/src/main/java/de/thpeetz/kontor/tysc/data/Sport.java b/springboot/src/main/java/de/thpeetz/kontor/tysc/data/Sport.java index 39333fd..72635e1 100644 --- a/springboot/src/main/java/de/thpeetz/kontor/tysc/data/Sport.java +++ b/springboot/src/main/java/de/thpeetz/kontor/tysc/data/Sport.java @@ -17,7 +17,9 @@ import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.Setter; import lombok.ToString; +import lombok.extern.slf4j.Slf4j; +@Slf4j @Getter @Setter @ToString @@ -39,4 +41,13 @@ public class Sport extends AbstractEntity { @OneToMany(fetch = FetchType.EAGER, mappedBy = "sport") @Nullable private List positions = new LinkedList<>(); + + public String updateName(String value) { + if (!this.getName().equals(value)) { + this.setName(value); + log.info("update name"); + return "updated " + this.getId() + " with " + value; + } + return "no changes for " + this.getId(); + } } diff --git a/springboot/src/main/java/de/thpeetz/kontor/tysc/services/SportService.java b/springboot/src/main/java/de/thpeetz/kontor/tysc/services/SportService.java index 120bbfe..9536ec5 100644 --- a/springboot/src/main/java/de/thpeetz/kontor/tysc/services/SportService.java +++ b/springboot/src/main/java/de/thpeetz/kontor/tysc/services/SportService.java @@ -1,7 +1,11 @@ package de.thpeetz.kontor.tysc.services; import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicReference; +import de.thpeetz.kontor.admin.data.MetaDataTable; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @@ -144,4 +148,48 @@ public class SportService { public void deleteRooster(Rooster rooster) { roosterRepository.delete(rooster); } + + public String importData(Map fields) { + AtomicReference status = new AtomicReference<>("unknown"); + String id = fields.get("id"); + Optional optional = sportRepository.findById(id); + if (optional.isEmpty()) { + log.info(" not found: {} with {}", id, fields); + status.set(id + "not found"); + Sport checkExisting = sportRepository.findByName(fields.get("name")); + if (checkExisting != null) { + log.info("entry already there with different id ({}), will be deleted", checkExisting.getId()); + deleteSport(checkExisting); + } + Sport sport = new Sport(); + sport.setId(id); + sport.setName(fields.get("name")); + sportRepository.save(sport); + } else { + optional.ifPresent( entry -> { + log.info(" found: {}", entry.getName()); + String updateStatus = updateSportFields(entry, fields); + sportRepository.save(entry); + status.set(updateStatus); + }); + } + return status.get(); + } + + private String updateSportFields(Sport sport, Map fields) { + String status = ""; + for (Map.Entry entry : fields.entrySet()) { + String key = entry.getKey(); + String value = entry.getValue(); + switch (key) { + case "id", "created_date", "last_modified_date", "version": + break; + case "table_name": + status += sport.updateName(value); + default: + log.info("field {} is unknown for table {}", key, sport.getClass().getName()); + } + } + return status; + } }