diff --git a/python/kontor-cli/kontor/controllers/media.py b/python/kontor-cli/kontor/controllers/media.py index 0f5e610..135366e 100644 --- a/python/kontor-cli/kontor/controllers/media.py +++ b/python/kontor-cli/kontor/controllers/media.py @@ -25,11 +25,6 @@ class Media(Controller): db = self.app.kontor_db updates = db.get_update_list() self.app.log.info(f"found {len(updates)} links for update") - for file_id, url in updates.items(): - link = MediaVideo(url) - title = link.get_title() - if title is not None: - db.update_entry('media_file', file_id, {'title': title, 'review': 0,}) @ex( label='download', @@ -48,25 +43,25 @@ class Media(Controller): if self.app.pargs.media_dir is not None: data['media_dir'] = self.app.pargs.media_dir db = self.app.kontor_db - downloads = db.get_download_list() + downloads = db.get_download_list(data['media_dir']) self.app.log.info(f"found {len(downloads)} links for download") - for file_id, url in downloads.items(): - link = VideoLink(url, VideoType.MEDIA_FILE) - file_name = link.download(download_dir=data['media_dir']) - if file_name is None: - db.update_entry('media_file', file_id, {'file_name': None, 'should_download': 1}) - else: - download_file = Path(file_name) - download_file.with_name(f"{file_id}{download_file.suffix}") - link.file_name = download_file.name - link.should_download = 0 - link.cloud_link = download_file.absolute() - db.update_entry('media_file', file_id, - { - 'file_name': download_file.name, - 'should_download': 0, - 'cloud_link': download_file.absolute()} - ) + #for file_id, url in downloads.items(): + # link = VideoLink(url, VideoType.MEDIA_FILE) + # file_name = link.download(download_dir=data['media_dir']) + # if file_name is None: + # db.update_entry('media_file', file_id, {'file_name': None, 'should_download': 1}) + # else: + # download_file = Path(file_name) + # download_file.with_name(f"{file_id}{download_file.suffix}") + # link.file_name = download_file.name + # link.should_download = 0 + # link.cloud_link = download_file.absolute() + # db.update_entry('media_file', file_id, + # { + # 'file_name': download_file.name, + # 'should_download': 0, + # 'cloud_link': download_file.absolute()} + # ) @ex( help='add url to database', diff --git a/python/kontor-gui/requirements.txt b/python/kontor-gui/requirements.txt index 057d486..6c37bb1 100644 --- a/python/kontor-gui/requirements.txt +++ b/python/kontor-gui/requirements.txt @@ -4,3 +4,6 @@ platformdirs pyyaml PySide6 +beautifulsoup4 +requests + diff --git a/python/kontor-schema/kontor_schema/__init__.py b/python/kontor-schema/kontor_schema/__init__.py index 74f672a..f14b16a 100644 --- a/python/kontor-schema/kontor_schema/__init__.py +++ b/python/kontor-schema/kontor_schema/__init__.py @@ -199,7 +199,7 @@ class KontorDB: self.delete_entries() import_file = Path(import_file_name) if not import_file.exists(): - self.log.info("File %s does not exist. Do nothing.", import_file_name) + self.log.info(f"File {import_file_name} does not exist. Do nothing.") return result match import_file.suffix: case '.json': @@ -207,7 +207,7 @@ class KontorDB: with open(import_file_name, 'r') as json_file: json_load = json.load(json_file) for table in json_load: - self.log.info("%s: %d", table, len(json_load[table])) + self.log.info(f"{table}: {len(json_load[table])}") result[table] = self.import_table(table, json_load[table]) case '.yml': print("read yaml file") @@ -223,7 +223,7 @@ class KontorDB: added = [] remaining = [] existing_ids = self.get_ids(table_name) - self.log.info("found %d existing ids for table %s", len(existing_ids), table_name) + self.log.info(f"found {len(existing_ids)} existing ids for table {table_name}") for item in items: current_id = item['id'] # print(f"import item: {item}") @@ -236,7 +236,7 @@ class KontorDB: changed = self.update_entry(table_name, current_id, item) updated.append(item) if changed: - self.log.info("%s has changed", current_id) + self.log.info(f"{current_id} has changed") updated.append(item) existing_ids.remove(current_id) else: @@ -244,7 +244,7 @@ class KontorDB: self.add_entry(table_name, item) added.append(item) except IntegrityError as error: - self.log.info("Could not add item, due to: %s", error.detail) + self.log.info(f"Could not add item, due to: {error.detail}") if len(existing_ids) > 0: print(f"remaining items: {existing_ids}") remaining.extend(existing_ids) @@ -263,7 +263,7 @@ class KontorDB: return existing_ids def add_entry(self, table_name: str, update_item: dict): - self.log.debug("add entry to table %s with %s", table_name, update_item) + self.log.debug(f"add entry to table {table_name} with {update_item}") __session__ = sessionmaker(self.engine) with __session__() as session: add_item = self.registry[table_name]() @@ -289,7 +289,7 @@ class KontorDB: setattr(existing_item, key, update_value) session.commit() changed = True - self.log.info("update {key} with {update_value}", (key, update_value)) + self.log.info(f"update {key} with {update_value}") return changed def add_link(self, link: str) -> dict: @@ -322,10 +322,12 @@ class KontorDB: url = link.url if url is None: continue - update_list[link.id] = url + link.update_title() + session.commit() + update_list[link.id] = link.title return update_list - def get_download_list(self) -> dict: + def get_download_list(self, download_dir: str) -> dict: download_list = {} __session__ = sessionmaker(self.engine) with __session__() as session: @@ -334,7 +336,8 @@ class KontorDB: url = link.url if url is None: continue - download_list[link.id] = url + link.download_file(download_dir) + download_list[link.id] = link.file_name return download_list def delete_entries(self): diff --git a/python/kontor-schema/kontor_schema/media.py b/python/kontor-schema/kontor_schema/media.py index c259669..ab1e8b4 100644 --- a/python/kontor-schema/kontor_schema/media.py +++ b/python/kontor-schema/kontor_schema/media.py @@ -1,3 +1,5 @@ +import requests +from bs4 import BeautifulSoup from sqlalchemy import Column, DateTime, Integer, String from sqlalchemy.dialects.mysql import BIT @@ -13,6 +15,21 @@ class MediaFile(Base, BaseMixin, BaseVideoMixin): def __str__(self): return f'{self.title}({self.id})' + def update_title(self): + print(f"update title for {self.url}") + try: + r = requests.get(self.url) + soup = BeautifulSoup(r.content, "html.parser") + title = soup.title.string + self.title = title + self.review = 0 + except: + self.title = None + self.review = 1 + + def download_file(self, download_dir: str): + print(f"download file for {self.url}") + class MediaArticle(Base, BaseMixin): __tablename__ = 'media_article' diff --git a/python/kontor-schema/requirements.txt b/python/kontor-schema/requirements.txt index d08ec81..4f0e48d 100644 --- a/python/kontor-schema/requirements.txt +++ b/python/kontor-schema/requirements.txt @@ -1,2 +1,4 @@ mariadb sqlalchemy +beautifulsoup4 +requests