refactor scripts to work wit api

This commit is contained in:
Thomas Peetz
2025-04-16 05:08:59 +02:00
parent 98e3d91edd
commit 4a61d6a727
8 changed files with 262 additions and 191 deletions
+1 -1
View File
@@ -4,7 +4,7 @@ clean:
find . -name '*.py[co]' -delete
virtualenv:
virtualenv --prompt '|> kontor <| ' env
virtualenv --prompt '|> kontor-fastapi <| ' env
env/bin/pip install -r requirements.txt
env/bin/python setup.py develop
@echo
-7
View File
@@ -1,5 +1,3 @@
from typing import Union
from fastapi import FastAPI
from .routers import comic, media
@@ -12,8 +10,3 @@ app.include_router(media.router)
@app.get("/")
def read_root():
return {"Hello": "World"}
@app.get("/items/{item_id}")
def read_item(item_id: int, q: Union[str, None] = None):
return {"item_id": item_id, "q": q}
+13 -9
View File
@@ -1,10 +1,11 @@
from uuid import UUID
from fastapi import APIRouter
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import select
from sqlalchemy.orm import Session
from app.models.comic import ComicResponse
from app.schema import Comic, __session__
from app.schema import Comic, get_db, SessionDep
router = APIRouter(
prefix="/comic",
@@ -14,15 +15,18 @@ router = APIRouter(
@router.get("/comics")
def get_all_comics() -> list[ComicResponse]:
def get_all_comics(db: SessionDep) -> list[ComicResponse]:
results: list[ComicResponse] = []
with __session__() as session:
comics = session.scalars(select(Comic)).all()
for comic in comics:
results.append(ComicResponse(id=comic.id, title=comic.title, completed=(comic.completed == 1)))
comics = db.scalars(select(Comic)).all()
for comic in comics:
results.append(ComicResponse(id=comic.id, title=comic.title, completed=(comic.completed == 1)))
return results
@router.get("/comics/{comic_id}")
def get_comic(comic_id: UUID) -> ComicResponse:
return ComicResponse(id=comic_id, title="Comic2", completed=False)
def get_comic(comic_id: UUID, db: SessionDep) -> ComicResponse:
comic = db.get(Comic, comic_id)
if comic is None:
raise HTTPException(status_code=404, detail="Comic could not be found")
response: ComicResponse = ComicResponse(id=comic_id, title=comic.title, completed=comic.completed)
return response
+64 -43
View File
@@ -1,12 +1,12 @@
from uuid import uuid4
from datetime import datetime
from typing import List
from uuid import uuid4, UUID
import mariadb
from fastapi import APIRouter, status, HTTPException
from fastapi.openapi.utils import status_code_ranges
from sqlalchemy import select
from sqlalchemy import select, Sequence
from app.models.media import MediaFileResponse, Link
from app.schema import MediaFile, __session__
from app.schema import MediaFile, SessionDep
router = APIRouter(
prefix="/media",
@@ -14,52 +14,73 @@ router = APIRouter(
)
@router.get("/update-titles")
def update_titles() -> list[MediaFileResponse]:
def update_titles(db: SessionDep) -> list[MediaFileResponse]:
results: list[MediaFileResponse] = []
with __session__() as session:
files = session.query(MediaFile).filter(MediaFile.review == 1).all()
for mediafile in files:
mediafile.update_title()
session.add(mediafile)
response = MediaFileResponse(id=mediafile.id,
title=mediafile.title,
file_name=mediafile.file_name,
cloud_link= mediafile.cloud_link,
url=str(mediafile.url),
review=(mediafile.review == 1),
should_download=(mediafile.should_download == 1))
results.append(response)
session.commit()
files = db.query(MediaFile).filter(MediaFile.review == 1).all()
for mediafile in files:
mediafile.update_title()
db.add(mediafile)
response = MediaFileResponse(id=mediafile.id,
title=mediafile.title,
file_name=mediafile.file_name,
cloud_link=mediafile.cloud_link,
url=str(mediafile.url),
review=(mediafile.review == 1),
should_download=(mediafile.should_download == 1))
results.append(response)
db.commit()
return results
@router.get("/files")
def get_files() -> list[MediaFileResponse]:
@router.get("/files", response_model=List[MediaFileResponse])
def get_files(db: SessionDep, review: bool = False, download: bool = False) -> List[MediaFileResponse]:
results: list[MediaFileResponse] = []
with __session__() as session:
files = session.scalars(select(MediaFile)).all()
for mediafile in files:
response = MediaFileResponse(id=mediafile.id,
title=mediafile.title,
file_name=mediafile.file_name,
cloud_link= mediafile.cloud_link,
url=str(mediafile.url),
review=(mediafile.review == 1),
should_download=(mediafile.should_download == 1))
results.append(response)
files: Sequence[MediaFile]
if review:
files = db.query(MediaFile).filter(MediaFile.review == 1).all()
elif download:
files = db.query(MediaFile).filter(MediaFile.should_download == 1).all()
else:
files = db.scalars(select(MediaFile)).all()
for mediafile in files:
response = MediaFileResponse(id=mediafile.id,
title=mediafile.title,
file_name=mediafile.file_name,
cloud_link=mediafile.cloud_link,
url=str(mediafile.url),
review=(mediafile.review == 1),
should_download=(mediafile.should_download == 1))
results.append(response)
return results
@router.put("/files/{file_id}", response_model=MediaFileResponse)
def update_file(file_id: UUID, db: SessionDep, info: MediaFileResponse) -> MediaFileResponse:
mediaFile = db.get(MediaFile, file_id)
if not mediaFile:
raise HTTPException(status_code=404, detail="MediaFile could not be found")
mediaFile.file_name = info.file_name
mediaFile.cloud_link = info.cloud_link
mediaFile.url = info.url
mediaFile.title = info.title
mediaFile.last_modified_date = datetime.now()
mediaFile.review = info.review
mediaFile.should_download = info.should_download
db.add(mediaFile)
db.commit()
return info
@router.post("/files", status_code=status.HTTP_201_CREATED)
def add_file(new_link: Link) -> MediaFileResponse:
def add_file(new_link: Link, db: SessionDep) -> MediaFileResponse:
print(new_link.url)
try:
with __session__() as session:
mediaFile: MediaFile = MediaFile()
setattr(mediaFile, "url", new_link.url)
setattr(mediaFile, "review", True)
setattr(mediaFile, "should_download", True)
session.add(mediaFile)
session.commit()
except :
mediaFile: MediaFile = MediaFile()
setattr(mediaFile, "url", new_link.url)
setattr(mediaFile, "review", True)
setattr(mediaFile, "should_download", True)
db.add(mediaFile)
db.commit()
except:
raise HTTPException(status_code=409, detail="Link duplicate")
response = MediaFileResponse(id=uuid4(),
title=mediaFile.title,
@@ -67,5 +88,5 @@ def add_file(new_link: Link) -> MediaFileResponse:
cloud_link=mediaFile.cloud_link,
url=new_link.url,
review=(mediaFile.review == 1),
shoud_download=(mediaFile.should_download==1))
should_download=(mediaFile.should_download == 1))
return response
+12 -3
View File
@@ -1,10 +1,12 @@
import logging
import logging.config
from pathlib import Path
from typing import Annotated
import yaml
from fastapi import Depends
from platformdirs import PlatformDirs
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm import sessionmaker, Session
from .admin import User, Token, Role, AuthorizationMatrix, ModuleData, MailAccount, Mail
from .bookshelf import Article, Book, Author, BookshelfPublisher, ArticleAuthor, BookAuthor
@@ -34,5 +36,12 @@ connect_string = ('mariadb+mariadbconnector://{}:{}@{}:{}/{}'.format(
db_config['mariadb']['database']
))
engine = create_engine(connect_string)
SessionLocal = sessionmaker(bind=engine)
Base.metadata.create_all(bind=engine, checkfirst=True)
__session__ = sessionmaker(engine)
def get_db():
logger.info("get_db")
with SessionLocal() as db:
yield db
SessionDep = Annotated[Session, Depends(get_db)]
+48 -61
View File
@@ -1,12 +1,13 @@
"""
Checks the database kontor
"""
from dataclasses import dataclass
from enum import Enum, auto
import mariadb
from pathlib import Path
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
import requests
from config import get_logger, get_database_cursors
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
@@ -25,71 +26,63 @@ class StatusType(Enum):
CLOUD_LINK = auto()
CLOUD_LINK_ID = auto()
@dataclass
class FileStatus:
id: str
status_type: StatusType
def get_status_of_file(found_file, cursor):
status = StatusType.UNKNOWN
file_id = ''
try:
cursor.execute(f'SELECT id, cloud_link FROM media_file WHERE file_name="{found_file.name}"')
rows = cursor.fetchall()
if len(rows) == 1:
status = StatusType.FILE_NAME
file_id = rows[0][0]
except mariadb.Error as error:
logger.debug(f'select failed with {error}')
try:
cursor.execute(f'SELECT id FROM media_file WHERE id="{found_file.stem}"')
rows = cursor.fetchall()
if len(rows) == 1:
status = StatusType.FILE_ID
file_id = rows[0][0]
if len(rows) > 1:
status = StatusType.DUPLICATE
for row in rows:
logger.info(f"found {row[0]} with {found_file}")
except mariadb.Error as error:
logger.debug(f'select failed with {error}')
try:
cursor.execute(f'SELECT id FROM media_file WHERE cloud_link LIKE "%{found_file.stem}%"')
rows = cursor.fetchall()
if len(rows) == 1:
file_id = rows[0][0]
if rows[0][0] == found_file.stem:
status = StatusType.CLOUD_LINK_ID
else:
status = StatusType.CLOUD_LINK
except mariadb.Error as error:
logger.debug(f'select failed with {error}')
return status, file_id
def rename_files_to_id(media_dir, conn, dry_run):
def get_status_of_file(found_file: Path, log) -> FileStatus:
status = FileStatus()
response = requests.post("http://127.0.0.1:8800/media/search")
log.info(f"Status: {response.status_code}")
data = response.json()
status.import(data)
if len(data) == 1:
status = StatusType.FILE_NAME
status.id = data['id']
response = requests.get(f"http://127.0.0.1:8800/media/files/{found_file.stem}")
log.info(f"Status: {response.status_code}")
data = response.json()
if len(data) == 1:
status = StatusType.FILE_ID
file_id = data['id']
response = requests.get(f"http://127.0.0.1:8800/media/files?cloud_link=true")
log.info(f"Status: {response.status_code}")
data = response.json()
if len(data) == 1:
status = StatusType.CLOUD_LINK_ID
file_id = data['id']
return status
def rename_files_to_id(media_dir, dry_run, log):
media_path = Path(media_dir)
cursor = conn.cursor()
for file in media_path.iterdir():
logger.debug('found file: {}'.format(file.name))
(status, file_id) = get_status_of_file(file, cursor)
new_file_path = file.with_name(f"{file_id}{file.suffix}")
match status:
log.debug('found file: {}'.format(file.name))
status = get_status_of_file(file, log)
new_file_path = file.with_name(f"{status.id}{file.suffix}")
file_id = status.id
match status.status_type:
case StatusType.FILE_NAME:
logger.info(f'status of {file.name} is file_name')
log.info(f'status of {file.name} is file_name')
rename_file(file, new_file_path, dry_run)
update_cloud_link(file_id, new_file_path, conn, dry_run)
update_cloud_link(file_id, new_file_path, dry_run)
case StatusType.FILE_ID:
logger.info(f'status of {file.name} is file_id')
update_cloud_link(file_id, new_file_path, conn, dry_run)
log.info(f'status of {file.name} is file_id')
update_cloud_link(file_id, new_file_path, dry_run)
case StatusType.CLOUD_LINK:
logger.info(f'status of {file.name} is cloud_link')
log.info(f'status of {file.name} is cloud_link')
rename_file(file, new_file_path, dry_run)
update_cloud_link(file_id, new_file_path, conn, dry_run)
update_cloud_link(file_id, new_file_path, dry_run)
case StatusType.CLOUD_LINK_ID:
logger.debug(f'status of {file.name} is cloud_link_id')
update_cloud_link(file_id, new_file_path, conn, dry_run)
log.debug(f'status of {file.name} is cloud_link_id')
update_cloud_link(file_id, new_file_path, dry_run)
case StatusType.DUPLICATE:
logger.info(f'status of {file.name} is duplicate')
log.info(f'status of {file.name} is duplicate')
case StatusType.UNKNOWN:
logger.info(f'status of {file.name} is unknown')
log.info(f'status of {file.name} is unknown')
case _:
logger.info(f'status of {file.name} is not defined')
log.info(f'status of {file.name} is not defined')
def rename_file(current_file, new_file_path, dry_run):
if dry_run:
@@ -118,14 +111,8 @@ def reset_cloud_link(conn, dry_run):
if __name__ == '__main__':
logger = get_logger(args.verbose, args.config)
logger.info("kontor.check_kontor started")
_, mariadb_conn = get_database_cursors(logger, args.config)
mariadb_cursor = mariadb_conn.cursor()
if args.reset_cloud_link:
reset_cloud_link(mariadb_conn, args.dry_run)
link_list = []
data_dir = args.dir
logger.info("kontor.check_kontor.rename_files_to_id")
rename_files_to_id(data_dir, mariadb_conn, args.dry_run)
rename_files_to_id(args.dir, args.dry_run, logger)
#logger.info("kontor.check_kontor.update_cloud_link_with_found_files")
#update_cloud_link_with_found_files(data_dir, mariadb_conn, args.dry_run)
#logger.info("kontor.check_kontor.get_ids_from_column_cloud_link")
@@ -133,5 +120,5 @@ if __name__ == '__main__':
#logger.info('found {} ids in column cloud_link'.format(len(link_list)))
#logger.info("kontor.check_kontor.checking_ids_from_cloud_link")
#checking_ids_from_cloud_link(link_list, mariadb_cursor)
mariadb_conn.close()
logger.info("kontor.check_kontor finished")
+101 -37
View File
@@ -1,13 +1,15 @@
"""
download files with URLs from DB
"""
import re
import subprocess
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from platformdirs import PlatformDirs
from datetime import datetime
from enum import Enum, auto
from pathlib import Path
import yaml
from sqlalchemy import create_engine, select
from sqlalchemy.orm import sessionmaker
from schema import Base, KontorDB, MediaFile
from uuid import UUID
import requests
from config import get_logger
@@ -17,42 +19,104 @@ parser.add_argument('--config', '-c', default='kontor-docker')
parser.add_argument('--dir', '-d', default='/data/media')
parser.add_argument('--tool', '-t', default='yt-dlp')
parser.add_argument('--dry-run', '-m', action='store_true')
parser.add_argument('--rename', '-r', action='store_true')
args = parser.parse_args()
class FileStatus(Enum):
DOWNLOADED = auto()
RENAMED = auto()
UNKNOWN = auto()
def download_file(url: str, file_info: dict, download_dir: str = "/data/media", dl_tool: str = "yt-dlp") -> dict:
print(f"download file for {url} to {download_dir}")
result = subprocess.run([dl_tool, url], cwd=download_dir, capture_output=True, text=True)
if result.returncode == 0:
output = result.stdout
output = re.sub(' +', ' ', output)
lines_list = output.splitlines()
file_name = __parse_output__(lines_list)
if file_name is None:
file_info['review'] = True
file_info['should_download'] = True
file_info['file_name'] = None
else:
download_file_name = Path(download_dir, file_name)
file_info['should_download'] = False
file_info['file_name'] = download_file_name.name
file_info['cloud_link'] = str(download_file_name.absolute())
file_info['last_modified_date'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
return file_info
def __parse_output__(lines_list: list[str]) -> str | None:
file_name = None
for line in lines_list:
if 'has already been downloaded' in line:
end_len = len(' has already been downloaded')
file_name = line[11:-end_len]
if 'Destination' in line:
line_len = len(line)
start_len = len('[download] Destination: ')
file_len = line_len - start_len
file_name = line[-file_len:]
return file_name
def is_file_downloaded(item: dict, dir: Path) -> FileStatus:
file_name_as_title = f"{item['file_name']}"
file_title = Path(dir, file_name_as_title, ".mp4")
if file_title.exists():
log.info(f"{file_name_as_title} has been downloaded")
item['should_download'] = 0
return FileStatus.DOWNLOADED
file_name_as_id = f"{item['id']}"
file_with_id_as_name = Path(dir, file_name_as_id, ".mp4")
if file_with_id_as_name.exists():
log.info(f"{file_with_id_as_name} has been downloaded and renamed")
item['cloud_link'] = file_with_id_as_name
item['should_download'] = 0
return FileStatus.RENAMED
log.info("could not find file - start download")
return FileStatus.UNKNOWN
def update_status(item_id: UUID, file_info: dict):
update = requests.put(f"http://127.0.0.1:8800/media/files/{item_id}", json=file_info)
log.info(f"update status: {update.status_code}")
log.info(f"update result: {update.json()}")
def rename_file(file_info: dict):
item_id = file_info['id']
file = Path(args.dir, file_info['file_name'])
new_file_path = file.with_name(f"{item_id}{file.suffix}")
log.info(f"rename {file} to {new_file_path}")
file.rename(Path(new_file_path))
file_info['cloud_link'] = str(new_file_path)
if __name__ == '__main__':
log = get_logger(args.verbose, args.config)
log.info('kontor.download started')
dirs = PlatformDirs(args.config)
database_config = Path(dirs.user_config_dir, 'database-config.yaml')
with open(database_config, 'rt') as f:
db_config = yaml.safe_load(f.read())
print(db_config)
connect_string = ('mariadb+mariadbconnector://{}:{}@{}:{}/{}'.format(
db_config['mariadb']['user'],
db_config['mariadb']['password'],
db_config['mariadb']['host'],
db_config['mariadb']['port'],
db_config['mariadb']['database']
))
engine = create_engine(connect_string)
Base.metadata.create_all(bind=engine, checkfirst=True)
__session__ = sessionmaker(bind=engine)
_filter = {'should_download': 1}
with __session__() as session:
files = session.query(MediaFile).filter_by(**_filter).all()
log.info("found %d entries", len(files))
files2 = session.query(MediaFile).filter(MediaFile.should_download == 1).all()
log.info("found %d entries", len(files2))
for mediafile in files2:
mediafile.download_file(download_dir=args.dir, dl_tool="yt-dlp")
log.info("Datei {} erfolgreich heruntergeladen".format(mediafile.file_name))
if args.rename:
current_file = Path(mediafile.file_name)
new_file_path = current_file.with_name(f"{mediafile.id}{current_file.suffix}")
current_file.rename(Path(new_file_path))
mediafile.cloud_link = new_file_path
session.add(mediafile)
session.commit()
response = requests.get("http://127.0.0.1:8800/media/files?download=true")
log.info(f"Status: {response.status_code}")
data = response.json()
log.info(f"data: {len(data)}")
for item in data:
link = item['url']
file_id = item['id']
log.info(f"{file_id} - {link}")
download_status: FileStatus = is_file_downloaded(item, args.dir)
match download_status:
case FileStatus.DOWNLOADED:
rename_file(item)
update_status(file_id, item)
case FileStatus.RENAMED:
log.info("update status")
update_status(file_id, item)
case FileStatus.UNKNOWN:
download_file(link, item)
rename_file(item)
log.info(f'{item}')
update_status(file_id, item)
log.info('kontor.download finished')
+23 -30
View File
@@ -2,14 +2,14 @@
download files with URLs from DB
"""
import logging.config
import requests
import yaml
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from pathlib import Path
from platformdirs import PlatformDirs
from sqlalchemy import create_engine, select
from sqlalchemy.orm import sessionmaker
from schema import MediaFile, Base
from bs4 import BeautifulSoup
from platformdirs import PlatformDirs
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('--verbose', '-v', action='count', default=0)
@@ -37,30 +37,23 @@ def get_logger(level: int, config: str):
if __name__ == '__main__':
log = get_logger(args.verbose, args.config)
log.info('kontor.update_titles started')
dirs = PlatformDirs(args.config)
database_config = Path(dirs.user_config_dir, 'database-config.yaml')
with open(database_config, 'rt') as f:
db_config = yaml.safe_load(f.read())
print(db_config)
connect_string = ('mariadb+mariadbconnector://{}:{}@{}:{}/{}'.format(
db_config['mariadb']['user'],
db_config['mariadb']['password'],
db_config['mariadb']['host'],
db_config['mariadb']['port'],
db_config['mariadb']['database']
))
engine = create_engine(connect_string)
Base.metadata.create_all(bind=engine, checkfirst=True)
__session__ = sessionmaker(engine)
_filter = {'review': 1}
with __session__() as session:
files = session.query(MediaFile).filter_by(**_filter).all()
log.info("found %d entries", len(files))
files2 = session.query(MediaFile).filter(MediaFile.review ==1).all
log.info("found %d entries", len(files2))
for mediafile in files:
mediafile.update_title()
session.add(mediafile)
session.commit()
log.info("found %d entries", len(files))
response = requests.get("http://127.0.0.1:8800/media/files?review=true")
log.info(f"Status: {response.status_code}")
data = response.json()
log.info(f"data: {len(data)}")
for item in data:
link = item['url']
log.info(f"{item['id']} - {link}")
try:
r = requests.get(link)
soup = BeautifulSoup(r.content, "html.parser")
title = soup.title.string
item['title'] = title
item['review'] = 0
except:
item['title'] = None
item['review'] = 1
update = requests.put(f"http://127.0.0.1:8800/media/files/{item['id']}", json=item)
log.info(f"update status: {update.status_code}")
log.info(f"update result: {update.json()}")
log.info('kontor.update_titles finished')