diff --git a/fastapi/Makefile b/fastapi/Makefile index 5bb3fcd..2031118 100644 --- a/fastapi/Makefile +++ b/fastapi/Makefile @@ -4,7 +4,7 @@ clean: find . -name '*.py[co]' -delete virtualenv: - virtualenv --prompt '|> kontor <| ' env + virtualenv --prompt '|> kontor-fastapi <| ' env env/bin/pip install -r requirements.txt env/bin/python setup.py develop @echo diff --git a/fastapi/app/main.py b/fastapi/app/main.py index 3da5c27..d4ff75c 100644 --- a/fastapi/app/main.py +++ b/fastapi/app/main.py @@ -1,5 +1,3 @@ -from typing import Union - from fastapi import FastAPI from .routers import comic, media @@ -12,8 +10,3 @@ app.include_router(media.router) @app.get("/") def read_root(): return {"Hello": "World"} - - -@app.get("/items/{item_id}") -def read_item(item_id: int, q: Union[str, None] = None): - return {"item_id": item_id, "q": q} diff --git a/fastapi/app/routers/comic.py b/fastapi/app/routers/comic.py index f82cd15..217e9f9 100644 --- a/fastapi/app/routers/comic.py +++ b/fastapi/app/routers/comic.py @@ -1,10 +1,11 @@ from uuid import UUID -from fastapi import APIRouter +from fastapi import APIRouter, Depends, HTTPException from sqlalchemy import select +from sqlalchemy.orm import Session from app.models.comic import ComicResponse -from app.schema import Comic, __session__ +from app.schema import Comic, get_db, SessionDep router = APIRouter( prefix="/comic", @@ -14,15 +15,18 @@ router = APIRouter( @router.get("/comics") -def get_all_comics() -> list[ComicResponse]: +def get_all_comics(db: SessionDep) -> list[ComicResponse]: results: list[ComicResponse] = [] - with __session__() as session: - comics = session.scalars(select(Comic)).all() - for comic in comics: - results.append(ComicResponse(id=comic.id, title=comic.title, completed=(comic.completed == 1))) + comics = db.scalars(select(Comic)).all() + for comic in comics: + results.append(ComicResponse(id=comic.id, title=comic.title, completed=(comic.completed == 1))) return results @router.get("/comics/{comic_id}") -def get_comic(comic_id: UUID) -> ComicResponse: - return ComicResponse(id=comic_id, title="Comic2", completed=False) +def get_comic(comic_id: UUID, db: SessionDep) -> ComicResponse: + comic = db.get(Comic, comic_id) + if comic is None: + raise HTTPException(status_code=404, detail="Comic could not be found") + response: ComicResponse = ComicResponse(id=comic_id, title=comic.title, completed=comic.completed) + return response diff --git a/fastapi/app/routers/media.py b/fastapi/app/routers/media.py index 286774a..ec3a450 100644 --- a/fastapi/app/routers/media.py +++ b/fastapi/app/routers/media.py @@ -1,12 +1,12 @@ -from uuid import uuid4 +from datetime import datetime +from typing import List +from uuid import uuid4, UUID -import mariadb from fastapi import APIRouter, status, HTTPException -from fastapi.openapi.utils import status_code_ranges -from sqlalchemy import select +from sqlalchemy import select, Sequence from app.models.media import MediaFileResponse, Link -from app.schema import MediaFile, __session__ +from app.schema import MediaFile, SessionDep router = APIRouter( prefix="/media", @@ -14,52 +14,73 @@ router = APIRouter( ) @router.get("/update-titles") -def update_titles() -> list[MediaFileResponse]: +def update_titles(db: SessionDep) -> list[MediaFileResponse]: results: list[MediaFileResponse] = [] - with __session__() as session: - files = session.query(MediaFile).filter(MediaFile.review == 1).all() - for mediafile in files: - mediafile.update_title() - session.add(mediafile) - response = MediaFileResponse(id=mediafile.id, - title=mediafile.title, - file_name=mediafile.file_name, - cloud_link= mediafile.cloud_link, - url=str(mediafile.url), - review=(mediafile.review == 1), - should_download=(mediafile.should_download == 1)) - results.append(response) - session.commit() + files = db.query(MediaFile).filter(MediaFile.review == 1).all() + for mediafile in files: + mediafile.update_title() + db.add(mediafile) + response = MediaFileResponse(id=mediafile.id, + title=mediafile.title, + file_name=mediafile.file_name, + cloud_link=mediafile.cloud_link, + url=str(mediafile.url), + review=(mediafile.review == 1), + should_download=(mediafile.should_download == 1)) + results.append(response) + db.commit() return results -@router.get("/files") -def get_files() -> list[MediaFileResponse]: + +@router.get("/files", response_model=List[MediaFileResponse]) +def get_files(db: SessionDep, review: bool = False, download: bool = False) -> List[MediaFileResponse]: results: list[MediaFileResponse] = [] - with __session__() as session: - files = session.scalars(select(MediaFile)).all() - for mediafile in files: - response = MediaFileResponse(id=mediafile.id, - title=mediafile.title, - file_name=mediafile.file_name, - cloud_link= mediafile.cloud_link, - url=str(mediafile.url), - review=(mediafile.review == 1), - should_download=(mediafile.should_download == 1)) - results.append(response) + files: Sequence[MediaFile] + if review: + files = db.query(MediaFile).filter(MediaFile.review == 1).all() + elif download: + files = db.query(MediaFile).filter(MediaFile.should_download == 1).all() + else: + files = db.scalars(select(MediaFile)).all() + for mediafile in files: + response = MediaFileResponse(id=mediafile.id, + title=mediafile.title, + file_name=mediafile.file_name, + cloud_link=mediafile.cloud_link, + url=str(mediafile.url), + review=(mediafile.review == 1), + should_download=(mediafile.should_download == 1)) + results.append(response) return results +@router.put("/files/{file_id}", response_model=MediaFileResponse) +def update_file(file_id: UUID, db: SessionDep, info: MediaFileResponse) -> MediaFileResponse: + mediaFile = db.get(MediaFile, file_id) + if not mediaFile: + raise HTTPException(status_code=404, detail="MediaFile could not be found") + mediaFile.file_name = info.file_name + mediaFile.cloud_link = info.cloud_link + mediaFile.url = info.url + mediaFile.title = info.title + mediaFile.last_modified_date = datetime.now() + mediaFile.review = info.review + mediaFile.should_download = info.should_download + db.add(mediaFile) + db.commit() + return info + + @router.post("/files", status_code=status.HTTP_201_CREATED) -def add_file(new_link: Link) -> MediaFileResponse: +def add_file(new_link: Link, db: SessionDep) -> MediaFileResponse: print(new_link.url) try: - with __session__() as session: - mediaFile: MediaFile = MediaFile() - setattr(mediaFile, "url", new_link.url) - setattr(mediaFile, "review", True) - setattr(mediaFile, "should_download", True) - session.add(mediaFile) - session.commit() - except : + mediaFile: MediaFile = MediaFile() + setattr(mediaFile, "url", new_link.url) + setattr(mediaFile, "review", True) + setattr(mediaFile, "should_download", True) + db.add(mediaFile) + db.commit() + except: raise HTTPException(status_code=409, detail="Link duplicate") response = MediaFileResponse(id=uuid4(), title=mediaFile.title, @@ -67,5 +88,5 @@ def add_file(new_link: Link) -> MediaFileResponse: cloud_link=mediaFile.cloud_link, url=new_link.url, review=(mediaFile.review == 1), - shoud_download=(mediaFile.should_download==1)) + should_download=(mediaFile.should_download == 1)) return response diff --git a/fastapi/app/schema/__init__.py b/fastapi/app/schema/__init__.py index a6b41e3..9131d11 100644 --- a/fastapi/app/schema/__init__.py +++ b/fastapi/app/schema/__init__.py @@ -1,10 +1,12 @@ -import logging +import logging.config from pathlib import Path +from typing import Annotated import yaml +from fastapi import Depends from platformdirs import PlatformDirs from sqlalchemy import create_engine -from sqlalchemy.orm import sessionmaker +from sqlalchemy.orm import sessionmaker, Session from .admin import User, Token, Role, AuthorizationMatrix, ModuleData, MailAccount, Mail from .bookshelf import Article, Book, Author, BookshelfPublisher, ArticleAuthor, BookAuthor @@ -34,5 +36,12 @@ connect_string = ('mariadb+mariadbconnector://{}:{}@{}:{}/{}'.format( db_config['mariadb']['database'] )) engine = create_engine(connect_string) +SessionLocal = sessionmaker(bind=engine) Base.metadata.create_all(bind=engine, checkfirst=True) -__session__ = sessionmaker(engine) + +def get_db(): + logger.info("get_db") + with SessionLocal() as db: + yield db + +SessionDep = Annotated[Session, Depends(get_db)] diff --git a/scripts/check_kontor.py b/scripts/check_kontor.py index 81ea1cb..72b26ae 100644 --- a/scripts/check_kontor.py +++ b/scripts/check_kontor.py @@ -1,12 +1,13 @@ """ Checks the database kontor """ +from dataclasses import dataclass from enum import Enum, auto -import mariadb from pathlib import Path from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter +import requests from config import get_logger, get_database_cursors parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) @@ -25,71 +26,63 @@ class StatusType(Enum): CLOUD_LINK = auto() CLOUD_LINK_ID = auto() +@dataclass +class FileStatus: + id: str + status_type: StatusType -def get_status_of_file(found_file, cursor): - status = StatusType.UNKNOWN - file_id = '' - try: - cursor.execute(f'SELECT id, cloud_link FROM media_file WHERE file_name="{found_file.name}"') - rows = cursor.fetchall() - if len(rows) == 1: - status = StatusType.FILE_NAME - file_id = rows[0][0] - except mariadb.Error as error: - logger.debug(f'select failed with {error}') - try: - cursor.execute(f'SELECT id FROM media_file WHERE id="{found_file.stem}"') - rows = cursor.fetchall() - if len(rows) == 1: - status = StatusType.FILE_ID - file_id = rows[0][0] - if len(rows) > 1: - status = StatusType.DUPLICATE - for row in rows: - logger.info(f"found {row[0]} with {found_file}") - except mariadb.Error as error: - logger.debug(f'select failed with {error}') - try: - cursor.execute(f'SELECT id FROM media_file WHERE cloud_link LIKE "%{found_file.stem}%"') - rows = cursor.fetchall() - if len(rows) == 1: - file_id = rows[0][0] - if rows[0][0] == found_file.stem: - status = StatusType.CLOUD_LINK_ID - else: - status = StatusType.CLOUD_LINK - except mariadb.Error as error: - logger.debug(f'select failed with {error}') - return status, file_id -def rename_files_to_id(media_dir, conn, dry_run): +def get_status_of_file(found_file: Path, log) -> FileStatus: + status = FileStatus() + response = requests.post("http://127.0.0.1:8800/media/search") + log.info(f"Status: {response.status_code}") + data = response.json() + status.import(data) + if len(data) == 1: + status = StatusType.FILE_NAME + status.id = data['id'] + response = requests.get(f"http://127.0.0.1:8800/media/files/{found_file.stem}") + log.info(f"Status: {response.status_code}") + data = response.json() + if len(data) == 1: + status = StatusType.FILE_ID + file_id = data['id'] + response = requests.get(f"http://127.0.0.1:8800/media/files?cloud_link=true") + log.info(f"Status: {response.status_code}") + data = response.json() + if len(data) == 1: + status = StatusType.CLOUD_LINK_ID + file_id = data['id'] + return status + +def rename_files_to_id(media_dir, dry_run, log): media_path = Path(media_dir) - cursor = conn.cursor() for file in media_path.iterdir(): - logger.debug('found file: {}'.format(file.name)) - (status, file_id) = get_status_of_file(file, cursor) - new_file_path = file.with_name(f"{file_id}{file.suffix}") - match status: + log.debug('found file: {}'.format(file.name)) + status = get_status_of_file(file, log) + new_file_path = file.with_name(f"{status.id}{file.suffix}") + file_id = status.id + match status.status_type: case StatusType.FILE_NAME: - logger.info(f'status of {file.name} is file_name') + log.info(f'status of {file.name} is file_name') rename_file(file, new_file_path, dry_run) - update_cloud_link(file_id, new_file_path, conn, dry_run) + update_cloud_link(file_id, new_file_path, dry_run) case StatusType.FILE_ID: - logger.info(f'status of {file.name} is file_id') - update_cloud_link(file_id, new_file_path, conn, dry_run) + log.info(f'status of {file.name} is file_id') + update_cloud_link(file_id, new_file_path, dry_run) case StatusType.CLOUD_LINK: - logger.info(f'status of {file.name} is cloud_link') + log.info(f'status of {file.name} is cloud_link') rename_file(file, new_file_path, dry_run) - update_cloud_link(file_id, new_file_path, conn, dry_run) + update_cloud_link(file_id, new_file_path, dry_run) case StatusType.CLOUD_LINK_ID: - logger.debug(f'status of {file.name} is cloud_link_id') - update_cloud_link(file_id, new_file_path, conn, dry_run) + log.debug(f'status of {file.name} is cloud_link_id') + update_cloud_link(file_id, new_file_path, dry_run) case StatusType.DUPLICATE: - logger.info(f'status of {file.name} is duplicate') + log.info(f'status of {file.name} is duplicate') case StatusType.UNKNOWN: - logger.info(f'status of {file.name} is unknown') + log.info(f'status of {file.name} is unknown') case _: - logger.info(f'status of {file.name} is not defined') + log.info(f'status of {file.name} is not defined') def rename_file(current_file, new_file_path, dry_run): if dry_run: @@ -118,14 +111,8 @@ def reset_cloud_link(conn, dry_run): if __name__ == '__main__': logger = get_logger(args.verbose, args.config) logger.info("kontor.check_kontor started") - _, mariadb_conn = get_database_cursors(logger, args.config) - mariadb_cursor = mariadb_conn.cursor() - if args.reset_cloud_link: - reset_cloud_link(mariadb_conn, args.dry_run) - link_list = [] - data_dir = args.dir logger.info("kontor.check_kontor.rename_files_to_id") - rename_files_to_id(data_dir, mariadb_conn, args.dry_run) + rename_files_to_id(args.dir, args.dry_run, logger) #logger.info("kontor.check_kontor.update_cloud_link_with_found_files") #update_cloud_link_with_found_files(data_dir, mariadb_conn, args.dry_run) #logger.info("kontor.check_kontor.get_ids_from_column_cloud_link") @@ -133,5 +120,5 @@ if __name__ == '__main__': #logger.info('found {} ids in column cloud_link'.format(len(link_list))) #logger.info("kontor.check_kontor.checking_ids_from_cloud_link") #checking_ids_from_cloud_link(link_list, mariadb_cursor) - mariadb_conn.close() logger.info("kontor.check_kontor finished") + diff --git a/scripts/download.py b/scripts/download.py index 1d774f6..d74d9f9 100644 --- a/scripts/download.py +++ b/scripts/download.py @@ -1,13 +1,15 @@ """ download files with URLs from DB """ +import re +import subprocess from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter -from platformdirs import PlatformDirs +from datetime import datetime +from enum import Enum, auto from pathlib import Path -import yaml -from sqlalchemy import create_engine, select -from sqlalchemy.orm import sessionmaker -from schema import Base, KontorDB, MediaFile +from uuid import UUID + +import requests from config import get_logger @@ -17,42 +19,104 @@ parser.add_argument('--config', '-c', default='kontor-docker') parser.add_argument('--dir', '-d', default='/data/media') parser.add_argument('--tool', '-t', default='yt-dlp') parser.add_argument('--dry-run', '-m', action='store_true') -parser.add_argument('--rename', '-r', action='store_true') args = parser.parse_args() +class FileStatus(Enum): + DOWNLOADED = auto() + RENAMED = auto() + UNKNOWN = auto() + +def download_file(url: str, file_info: dict, download_dir: str = "/data/media", dl_tool: str = "yt-dlp") -> dict: + print(f"download file for {url} to {download_dir}") + result = subprocess.run([dl_tool, url], cwd=download_dir, capture_output=True, text=True) + if result.returncode == 0: + output = result.stdout + output = re.sub(' +', ' ', output) + lines_list = output.splitlines() + file_name = __parse_output__(lines_list) + if file_name is None: + file_info['review'] = True + file_info['should_download'] = True + file_info['file_name'] = None + else: + download_file_name = Path(download_dir, file_name) + file_info['should_download'] = False + file_info['file_name'] = download_file_name.name + file_info['cloud_link'] = str(download_file_name.absolute()) + file_info['last_modified_date'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + return file_info + + +def __parse_output__(lines_list: list[str]) -> str | None: + file_name = None + for line in lines_list: + if 'has already been downloaded' in line: + end_len = len(' has already been downloaded') + file_name = line[11:-end_len] + if 'Destination' in line: + line_len = len(line) + start_len = len('[download] Destination: ') + file_len = line_len - start_len + file_name = line[-file_len:] + return file_name + + +def is_file_downloaded(item: dict, dir: Path) -> FileStatus: + file_name_as_title = f"{item['file_name']}" + file_title = Path(dir, file_name_as_title, ".mp4") + if file_title.exists(): + log.info(f"{file_name_as_title} has been downloaded") + item['should_download'] = 0 + return FileStatus.DOWNLOADED + file_name_as_id = f"{item['id']}" + file_with_id_as_name = Path(dir, file_name_as_id, ".mp4") + if file_with_id_as_name.exists(): + log.info(f"{file_with_id_as_name} has been downloaded and renamed") + item['cloud_link'] = file_with_id_as_name + item['should_download'] = 0 + return FileStatus.RENAMED + log.info("could not find file - start download") + return FileStatus.UNKNOWN + + +def update_status(item_id: UUID, file_info: dict): + update = requests.put(f"http://127.0.0.1:8800/media/files/{item_id}", json=file_info) + log.info(f"update status: {update.status_code}") + log.info(f"update result: {update.json()}") + + +def rename_file(file_info: dict): + item_id = file_info['id'] + file = Path(args.dir, file_info['file_name']) + new_file_path = file.with_name(f"{item_id}{file.suffix}") + log.info(f"rename {file} to {new_file_path}") + file.rename(Path(new_file_path)) + file_info['cloud_link'] = str(new_file_path) + if __name__ == '__main__': log = get_logger(args.verbose, args.config) log.info('kontor.download started') - dirs = PlatformDirs(args.config) - database_config = Path(dirs.user_config_dir, 'database-config.yaml') - with open(database_config, 'rt') as f: - db_config = yaml.safe_load(f.read()) - print(db_config) - connect_string = ('mariadb+mariadbconnector://{}:{}@{}:{}/{}'.format( - db_config['mariadb']['user'], - db_config['mariadb']['password'], - db_config['mariadb']['host'], - db_config['mariadb']['port'], - db_config['mariadb']['database'] - )) - engine = create_engine(connect_string) - Base.metadata.create_all(bind=engine, checkfirst=True) - __session__ = sessionmaker(bind=engine) - _filter = {'should_download': 1} - with __session__() as session: - files = session.query(MediaFile).filter_by(**_filter).all() - log.info("found %d entries", len(files)) - files2 = session.query(MediaFile).filter(MediaFile.should_download == 1).all() - log.info("found %d entries", len(files2)) - for mediafile in files2: - mediafile.download_file(download_dir=args.dir, dl_tool="yt-dlp") - log.info("Datei {} erfolgreich heruntergeladen".format(mediafile.file_name)) - if args.rename: - current_file = Path(mediafile.file_name) - new_file_path = current_file.with_name(f"{mediafile.id}{current_file.suffix}") - current_file.rename(Path(new_file_path)) - mediafile.cloud_link = new_file_path - session.add(mediafile) - session.commit() + response = requests.get("http://127.0.0.1:8800/media/files?download=true") + log.info(f"Status: {response.status_code}") + data = response.json() + log.info(f"data: {len(data)}") + for item in data: + link = item['url'] + file_id = item['id'] + log.info(f"{file_id} - {link}") + download_status: FileStatus = is_file_downloaded(item, args.dir) + match download_status: + case FileStatus.DOWNLOADED: + rename_file(item) + update_status(file_id, item) + case FileStatus.RENAMED: + log.info("update status") + update_status(file_id, item) + case FileStatus.UNKNOWN: + download_file(link, item) + rename_file(item) + log.info(f'{item}') + update_status(file_id, item) log.info('kontor.download finished') + diff --git a/scripts/update_title.py b/scripts/update_title.py index a91df45..a410eac 100644 --- a/scripts/update_title.py +++ b/scripts/update_title.py @@ -2,14 +2,14 @@ download files with URLs from DB """ import logging.config + +import requests import yaml from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter from pathlib import Path -from platformdirs import PlatformDirs -from sqlalchemy import create_engine, select -from sqlalchemy.orm import sessionmaker -from schema import MediaFile, Base +from bs4 import BeautifulSoup +from platformdirs import PlatformDirs parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument('--verbose', '-v', action='count', default=0) @@ -37,30 +37,23 @@ def get_logger(level: int, config: str): if __name__ == '__main__': log = get_logger(args.verbose, args.config) log.info('kontor.update_titles started') - dirs = PlatformDirs(args.config) - database_config = Path(dirs.user_config_dir, 'database-config.yaml') - with open(database_config, 'rt') as f: - db_config = yaml.safe_load(f.read()) - print(db_config) - connect_string = ('mariadb+mariadbconnector://{}:{}@{}:{}/{}'.format( - db_config['mariadb']['user'], - db_config['mariadb']['password'], - db_config['mariadb']['host'], - db_config['mariadb']['port'], - db_config['mariadb']['database'] - )) - engine = create_engine(connect_string) - Base.metadata.create_all(bind=engine, checkfirst=True) - __session__ = sessionmaker(engine) - _filter = {'review': 1} - with __session__() as session: - files = session.query(MediaFile).filter_by(**_filter).all() - log.info("found %d entries", len(files)) - files2 = session.query(MediaFile).filter(MediaFile.review ==1).all - log.info("found %d entries", len(files2)) - for mediafile in files: - mediafile.update_title() - session.add(mediafile) - session.commit() - log.info("found %d entries", len(files)) + response = requests.get("http://127.0.0.1:8800/media/files?review=true") + log.info(f"Status: {response.status_code}") + data = response.json() + log.info(f"data: {len(data)}") + for item in data: + link = item['url'] + log.info(f"{item['id']} - {link}") + try: + r = requests.get(link) + soup = BeautifulSoup(r.content, "html.parser") + title = soup.title.string + item['title'] = title + item['review'] = 0 + except: + item['title'] = None + item['review'] = 1 + update = requests.put(f"http://127.0.0.1:8800/media/files/{item['id']}", json=item) + log.info(f"update status: {update.status_code}") + log.info(f"update result: {update.json()}") log.info('kontor.update_titles finished')