diff --git a/kontor-api/src/apis/version1/media/mediafile.py b/kontor-api/src/apis/version1/media/mediafile.py index 493cde4..fca4147 100644 --- a/kontor-api/src/apis/version1/media/mediafile.py +++ b/kontor-api/src/apis/version1/media/mediafile.py @@ -10,7 +10,12 @@ from src.db.repository.media import ( from src.db.session import SessionDep from src.schema.media.actor import MediaActorResponse, actor_to_response from src.schema.media.actorfile import MediaActorFileResponse, actorfile_to_response -from src.schema.media.file import MediaFileResponse, Link, file_to_response, set_file +from src.schema.media.file import ( + MediaFileResponse, + Link, + file_to_response, + file_to_model, +) from src.db.models.media import MediaFile router = APIRouter() @@ -128,11 +133,14 @@ def update_file_actors( def update_file( file_id: str, db: SessionDep, info: MediaFileResponse ) -> MediaFileResponse: - mediaFile = db.get(MediaFile, file_id) - if not mediaFile: + """ + Update MediaFile with given id and data. + """ + media_file = db.get(MediaFile, file_id) + if not media_file: raise HTTPException(status_code=404, detail="MediaFile could not be found") - set_file(info, mediaFile) - db.add(mediaFile) + file_to_model(info, media_file) + db.add(media_file) db.commit() mediafile = db.get(MediaFile, file_id) if not mediafile: @@ -143,7 +151,7 @@ def update_file( @router.post("/files", status_code=status.HTTP_201_CREATED) def add_file(new_link: Link, db: SessionDep) -> MediaFileResponse: # type: ignore - logger.info(f"add url {new_link.url}") + logger.info("add url %s", new_link.url) try: mediaFile: MediaFile = create_new_mediafile(new_link.url, db) except: diff --git a/kontor-api/src/schema/media/file.py b/kontor-api/src/schema/media/file.py index c981da5..50e72c6 100644 --- a/kontor-api/src/schema/media/file.py +++ b/kontor-api/src/schema/media/file.py @@ -17,7 +17,11 @@ class MediaFileResponse(BaseModel): review: bool = False should_download: bool = False + def file_to_response(mediafile: MediaFile) -> MediaFileResponse: + """ + Create MediaFileResponse from model. + """ response: MediaFileResponse = MediaFileResponse( id=mediafile.id, created_date=mediafile.created_date, @@ -28,21 +32,34 @@ def file_to_response(mediafile: MediaFile) -> MediaFileResponse: cloud_link=mediafile.cloud_link, url=mediafile.url, review=mediafile.review, - should_download=mediafile.should_download + should_download=mediafile.should_download, ) return response -class Link(BaseModel): - url: str - -def set_file(model: MediaFileResponse, mediafile: MediaFile) -> None: +def file_to_model(model: MediaFileResponse, mediafile: MediaFile) -> MediaFile: + """ + Set data of response to model. + """ mediafile.file_name = model.file_name mediafile.cloud_link = model.cloud_link if model.url is not None: mediafile.url = model.url + else: + mediafile.url = "" if model.title is not None: mediafile.title = model.title + else: + mediafile.title = "" mediafile.last_modified_date = datetime.now() mediafile.review = model.review mediafile.should_download = model.should_download + return mediafile + + +class Link(BaseModel): + """ + PYdantic model for uploading url. + """ + + url: str diff --git a/kontor-scripts/add_links.py b/kontor-scripts/add_links.py index 2b8bbbd..35772dd 100644 --- a/kontor-scripts/add_links.py +++ b/kontor-scripts/add_links.py @@ -1,6 +1,7 @@ """ read file with links and store it in DB """ + from datetime import datetime import logging import re @@ -10,52 +11,46 @@ from bs4 import BeautifulSoup import requests from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter from pathlib import Path -from sqlalchemy import create_engine -from sqlalchemy.orm import sessionmaker, Session from api import Server, get_api_config, get_logger -from db.models.base import Base -import os - from db.models.media import MediaActor, MediaActorFile, MediaFile parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) -parser.add_argument('--file', '-f', help='file with links', default='~/.sync/media/list.txt') -parser.add_argument('--video', help='store Url as VideoFile', action="store_true") -parser.add_argument('--config', '-c', default='kontor-api') +parser.add_argument( + "--file", "-f", help="file with links", default="~/.sync/media/list.txt" +) +parser.add_argument("--video", help="store Url as VideoFile", action="store_true") +parser.add_argument("--config", "-c", default="kontor-api") parser.add_argument("--server", "-s") -parser.add_argument('--verbose', '-v', action='count', default=0) -parser.add_argument('--limit', '-l', type=int, help='maximum number of links to check') -parser.add_argument('--dry-run', '-m', help='excute script without storing', action="store_true") +parser.add_argument("--verbose", "-v", action="count", default=0) +parser.add_argument("--limit", "-l", type=int, help="maximum number of links to check") +parser.add_argument( + "--dry-run", "-m", help="excute script without storing", action="store_true" +) args = parser.parse_args() -DB_USER: str = os.getenv("DB_USER", "kontor") -DB_PASSWORD: str = os.getenv("DB_PASSWORD", "kontor") -DB_SERVER: str = os.getenv("DB_SERVER", "127.0.0.1") -DB_PORT: int = int(os.getenv("DB_PORT", 5432)) -DB_DBNAME: str = os.getenv("DB_DBNAME", "kontor") -DATABASE_URL: str = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_SERVER}:{DB_PORT}/{DB_DBNAME}" - -def get_session() -> Session: - engine = create_engine(DATABASE_URL) - Base.metadata.create_all(bind=engine, checkfirst=True) - SessionLocal = sessionmaker(bind=engine) - return SessionLocal() def load_data(filename: str, log) -> List[str]: - links: List[str] = [] + """ + Read list of links from file. + """ + link_list: List[str] = [] log.debug("load_data") import_file = Path(filename) if not import_file.exists(): log.info(f"File {filename} does not exist. Do nothing.") raise FileNotFoundError() log.info("read txt file") - with open(filename, 'r') as txt_file: + with open(filename, "r", encoding="utf-8") as txt_file: while line := txt_file.readline(): # log.info(line.rstrip()) - links.append(line.rstrip()) - return links + link_list.append(line.rstrip()) + return link_list + def get_actors_mapping(actor_list: List[MediaActor]) -> Dict[str, MediaActor]: + """ + Create dictionary with actor links as key and MediaActor objects as values. + """ mapping: Dict[str, MediaActor] = {} for actor in actor_list: if isinstance(actor, dict): @@ -65,7 +60,11 @@ def get_actors_mapping(actor_list: List[MediaActor]) -> Dict[str, MediaActor]: mapping[url] = actor return mapping + def get_actornames_mapping(actor_list: List[MediaActor]) -> Dict[str, MediaActor]: + """ + Create dictionary with actor names as key and MediaActor objects as values. + """ mapping: Dict[str, MediaActor] = {} for actor in actor_list: if isinstance(actor, dict): @@ -75,42 +74,52 @@ def get_actornames_mapping(actor_list: List[MediaActor]) -> Dict[str, MediaActor mapping[name] = actor return mapping -def get_meta_info(media_file: MediaFile, log) -> List[str]: + +def get_meta_info(media_file_obj: MediaFile, log) -> List[str]: + """ + Get meta info for MediaFile from link. + """ actor_links: List[str] = [] try: - r = requests.get(media_file.url) + r = requests.get(media_file_obj.url, timeout=5) soup = BeautifulSoup(r.content, "html.parser") - error404 = soup.css.select_one('.error404-title') + error404 = soup.css.select_one(".error404-title") if error404 and error404.get_text() == "Video nicht gefunden": log.warning(f"{error404.get_text()}") - media_file.url = None - media_file.review = False + media_file_obj.url = None + media_file_obj.review = False return actor_links - title_tag = soup.find('title') + title_tag = soup.find("title") if title_tag: - media_file.title = title_tag.get_text() - media_file.review = False - anchors = soup.find_all('a', attrs={'href': re.compile("^https://.*pornstars/.*")}) + media_file_obj.title = title_tag.get_text() + media_file_obj.review = False + anchors = soup.find_all( + "a", attrs={"href": re.compile("^https://.*pornstars/.*")} + ) for anchor in anchors: - link_url = str(anchor.get("href")) # type: ignore - if link_url.endswith('all/countries'): + link_url = str(anchor.get("href")) # type: ignore + if link_url.endswith("all/countries"): continue if link_url in actor_links: continue actor_links.append(link_url) except Exception as error: log.info(f"something went wrong: {error}") - media_file.title = None - media_file.review = True - log.info(f"update MediaFile with MetaInfos to {repr(media_file)}") + media_file_obj.title = None + media_file_obj.review = True + log.info(f"update MediaFile with MetaInfos to {repr(media_file_obj)}") log.info(f"links({len(actor_links)}): {actor_links}") return actor_links -def get_actor_name(actor_url: str, log: logging.Logger) -> str | None: + +def get_actor_name(actor_link: str, log: logging.Logger) -> str | None: + """ + Get actor name from link url. + """ try: - r = requests.get(actor_url) + r = requests.get(actor_link, timeout=5) soup = BeautifulSoup(r.content, "html.parser") - titles = soup.find_all('h1') + titles = soup.find_all("h1") for title in titles: log.info(f"title: {title.get_text()}") return title.get_text() @@ -119,31 +128,33 @@ def get_actor_name(actor_url: str, log: logging.Logger) -> str | None: return None -if __name__ == '__main__': +if __name__ == "__main__": logger = get_logger(args.verbose, args.config) - logger.info('kontor.add_links started') + logger.info("kontor.add_links started") if args.limit: - logger.warning(f"check the first {args.limit} links") - apiConfig = get_api_config(logger, args.config) + logger.warning("check the first %s links", args.limit) + APICONFIG = get_api_config(logger, args.config) server_list: List[Server] = [] server: Optional[Server] = None if args.server: - server = apiConfig.get_server(args.server) + server = APICONFIG.get_server(args.server) if not server: - server = apiConfig.server[0] + server = APICONFIG.server[0] else: - server = apiConfig.server[0] + server = APICONFIG.server[0] links_index = 1 links = load_data(args.file, logger) all_media_files = server.request(logger, table="media_file") media_actors: List[MediaActor] = server.request(log=logger, table="media_actor") + actor_mapping = get_actors_mapping(media_actors) + actorname_mapping = get_actornames_mapping(media_actors) for link in links: - logger.info(f"process {link}") - media_files = [media_file for media_file in all_media_files if media_file["url"] == link] - actor_mapping = get_actors_mapping(media_actors) - actorname_mapping = get_actornames_mapping(media_actors) + logger.info("process %s", link) + media_files = [ + media_file for media_file in all_media_files if media_file["url"] == link + ] if len(media_files) == 0: - logger.info(f"MediaFile for link {link} not found") + logger.info("MediaFile for link %s not found", link) media_file = MediaFile() media_file.id = str(uuid.uuid4()) media_file.created_date = datetime.now() @@ -169,7 +180,7 @@ if __name__ == '__main__': media_actor_file.version = 0 media_actor_file.media_file_id = media_file.id media_actor_file.media_actor_id = media_actor.id - logger.info(f"create mapping with {media_actor_file}") + logger.info("create mapping with %s", media_actor_file) if not args.dry_run: logger.info("add MediaFile Actor mapping %s", media_actor_file) else: @@ -184,7 +195,7 @@ if __name__ == '__main__': media_actor.version = 0 media_actor.name = get_actor_name(actor_url, logger) media_actor.url = actor_url - logger.info(f"update MediaActor with {repr(media_actor)}") + logger.info("update MediaActor with %s", repr(media_actor)) if not args.dry_run: logger.info("Update MediaActor %s", media_actor) media_actor_file = MediaActorFile() @@ -194,13 +205,13 @@ if __name__ == '__main__': media_actor_file.version = 0 media_actor_file.media_file_id = media_file.id media_actor_file.media_actor_id = media_actor.id - logger.info(f"create mapping with {media_actor_file}") + logger.info("create mapping with %s", media_actor_file) if not args.dry_run: logger.info("Add MediaFile Actor mapping") else: for media_file in media_files: - logger.info(f"MediaFile with {media_file["id"]} is found") + logger.info("MediaFile with %s is found", media_file["id"]) links_index += 1 if args.limit and args.limit < links_index: break - logger.info('kontor.add_link finished') + logger.info("kontor.add_link finished") diff --git a/kontor-scripts/api.py b/kontor-scripts/api.py index b3d5305..295fac0 100644 --- a/kontor-scripts/api.py +++ b/kontor-scripts/api.py @@ -48,13 +48,25 @@ MAPPING: Dict[str, str] = { "mail_account": "api/admin/mailaccounts", } + class OptionType(Enum): + """ + OptionType defines the type of param for REST API call. + The type PARAM indicates a query parameter. + The type ID indicates the option is an idntifier as part of the path. + """ + PARAM = auto() ID = auto() URL = auto() class Option: + """ + Option is an utility class to simplify options for the REST API call. + The type defines how to handle the value. + """ + def __init__(self, option_type: OptionType, value: str) -> None: self.type: Optional[OptionType] = option_type self.value: Optional[str] = value @@ -71,8 +83,6 @@ class EndPointNotAvailableException(Exception): Raised when calling an not existing endpoint. """ - pass - @dataclass class Login: @@ -120,6 +130,9 @@ class Server: self.token_type = str(token_type) def request(self, log: Logger, table: str, param: Optional[Option] = None): + """ + Requests data from Kontor-API instance by given table and optional parameters. + """ if not param: url: str = f"{self.url}/{MAPPING[table]}" else: @@ -133,13 +146,19 @@ class Server: return data def update(self, log: Logger, table: str, item_id: UUID, file_info: dict): + """ + Updates data to the Kontor-API instance. + """ url: str = f"{self.url}/{MAPPING[table]}/{item_id}" headers: Dict[str, str] = {"Authorization": f"Bearer {self.token}"} update = requests.put( url, headers=headers, json=file_info, timeout=self.timeout ) log.info(f"Status: {update.status_code}") - return update + if update.status_code == 404: + raise EndPointNotAvailableException + data = update.json() + return data @dataclass @@ -152,7 +171,9 @@ class ApiConfig: server: List[Server] def get_server(self, server_name: str) -> Optional[Server]: - """ """ + """ + Return server instance by given name or None. + """ found_server = None for server in self.server: if server.name == server_name: @@ -189,19 +210,22 @@ def get_logger(level, config: str): def get_api_config(log: Logger, config: str) -> ApiConfig: + """ + Load configuration from file. + """ dirs = PlatformDirs(config) api_config = Path(dirs.user_config_dir, "api.yaml") - with open(api_config, "rt") as f: + with open(api_config, "rt", encoding="utf-8") as f: api_data = yaml.safe_load(f.read()) servers = [Server(**server) for server in api_data["server"]] login = Login(**(api_data["login"])) - apiConfig = ApiConfig(server=servers, login=login) - log.debug(apiConfig) + api_config_data = ApiConfig(server=servers, login=login) + log.debug(api_config_data) if not api_data: log.fatal("API configuration is missing") - return apiConfig - for server in apiConfig.server: - server.login(apiConfig.login, log) - with open(api_config, "w") as f: + return api_config_data + for server in api_config_data.server: + server.login(api_config_data.login, log) + with open(api_config, "w", encoding="utf-8") as f: yaml.dump(api_data, f) - return apiConfig + return api_config_data diff --git a/kontor-scripts/download.py b/kontor-scripts/download.py index 8fb1805..9bf5304 100644 --- a/kontor-scripts/download.py +++ b/kontor-scripts/download.py @@ -10,6 +10,7 @@ from datetime import datetime from enum import Enum, auto from pathlib import Path from logging import Logger +from typing import Any, Dict, Optional from uuid import UUID from api import Option, OptionType, Server, get_api_config, get_logger @@ -25,6 +26,10 @@ args = parser.parse_args() class FileStatus(Enum): + """ + Status of video file. + """ + DOWNLOADED = auto() RENAMED = auto() UNKNOWN = auto() @@ -35,7 +40,10 @@ def download_file( file_info: dict, download_dir: str = "/data/media", dl_tool: str = "yt-dlp", -) -> dict: +) -> Dict[str, Any]: + """ + Download file from url. + """ print(f"download file for {url} to {download_dir}") result = subprocess.run( [dl_tool, url], cwd=download_dir, capture_output=True, text=True @@ -45,7 +53,7 @@ def download_file( output = re.sub(" +", " ", output) lines_list = output.splitlines() file_name = __parse_output__(lines_list) - log.info(f"found file: {file_name}") + logger.info("found file: %s", file_name) if file_name is None or not file_name.strip(): file_info["review"] = True file_info["should_download"] = True @@ -60,14 +68,14 @@ def download_file( return file_info -def __parse_output__(lines_list: list[str]) -> str | None: +def __parse_output__(lines_list: list[str]) -> Optional[str]: file_name = None for line in lines_list: - log.debug(f"parse line: {line}") + logger.debug("parse line: %s", line) if "has already been downloaded" in line: end_len = len(" has already been downloaded") file_name = line[11:-end_len] - log.info(f"file_name: {file_name}") + logger.info("file_name: %s", file_name) break if "Destination" in line: line_len = len(line) @@ -80,24 +88,27 @@ def __parse_output__(lines_list: list[str]) -> str | None: return file_name -def is_file_downloaded(media_file: dict, dir: Path) -> FileStatus: +def is_file_downloaded(media_file: dict, path: Path) -> FileStatus: + """ + Check, if file is already downloaded. + """ file_name_as_title = f"{media_file['file_name']}" if not file_name_as_title: - log.info("title has not been set - start download") + logger.info("title has not been set - start download") return FileStatus.UNKNOWN - file_title = Path(dir, f"{file_name_as_title}.mp4") + file_title = Path(path, f"{file_name_as_title}.mp4") if file_title.exists(): - log.info(f"{file_name_as_title} has been downloaded") + logger.info("%s has been downloaded", file_name_as_title) media_file["should_download"] = False return FileStatus.DOWNLOADED file_name_as_id = f"{media_file['id']}" - file_with_id_as_name = Path(dir, f"{file_name_as_id}.mp4") + file_with_id_as_name = Path(path, f"{file_name_as_id}.mp4") if file_with_id_as_name.exists(): log.info(f"{file_with_id_as_name} has been downloaded and renamed") media_file['cloud_link'] = str(file_with_id_as_name) media_file['should_download'] = False return FileStatus.RENAMED - log.info("could not find file - start download") + logger.info("could not find file - start download") return FileStatus.UNKNOWN @@ -108,16 +119,19 @@ def update_status(item_id: UUID, file_info: dict): def rename_file(file_info: dict): + """ + Rename file. + """ item_id = file_info["id"] file_name = file_info["file_name"] if file_name is None or not file_name.strip(): - log.info("file_name is not set, rename is not executed") + logger.info("file_name is not set, rename is not executed") file_info["review"] = True file_info["should_download"] = True return file = Path(args.dir, file_name) new_file_path = file.with_name(f"{item_id}{file.suffix}") - log.info(f"rename {file} to {new_file_path}") + logger.info("rename %s to %s", file, new_file_path) file.rename(Path(new_file_path)) file_info["cloud_link"] = str(new_file_path) @@ -129,33 +143,33 @@ if __name__ == "__main__": log.info(f"Status: {response.status_code}") data = response.json() entries_count = len(data) - log.info(f"data: {entries_count}") + logger.info("data: %s", entries_count) mediafile_index = 1 - log.debug(f"data: {data}") + logger.debug("data: %s", data) missing_actors = {} if args.dry_run: sys.exit(0) if args.limit: - log.warning(f"check the first {args.limit} links") + logger.warning("check the first %s links", args.limit) for item in data: link = item["url"] file_id = item["id"] - log.info(f"{file_id} - {link}") + logger.info("%s - %s", file_id, link) download_status: FileStatus = is_file_downloaded(item, args.dir) match download_status: case FileStatus.DOWNLOADED: rename_file(item) - update_status(file_id, item, server=server, log=log) + update_status(file_id, item, api_server=server, log=logger) case FileStatus.RENAMED: - log.info("update status") - update_status(file_id, item, server=server, log=log) + logger.info("update status") + update_status(file_id, item, api_server=server, log=logger) case FileStatus.UNKNOWN: download_file(link, item, args.dir) rename_file(item) - log.info(f"{item}") - update_status(file_id, item, server=server, log=log) - log.warning(f"processed {mediafile_index}/{entries_count}") + logger.info(item) + update_status(file_id, item, api_server=server, log=logger) + logger.warning("processed %s/%s", mediafile_index, entries_count) if args.limit and args.limit <= mediafile_index: break mediafile_index += 1 - log.info("kontor.download finished") + logger.info("kontor.download finished") diff --git a/kontor-scripts/sync.py b/kontor-scripts/sync.py index 3f14264..2afcaf7 100644 --- a/kontor-scripts/sync.py +++ b/kontor-scripts/sync.py @@ -1,12 +1,15 @@ +""" +Synchronize Kontor data between configured servers. +""" + import json from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser -from typing import List +from logging import Logger +from typing import Dict, List from api import ( MAPPING, EndPointNotAvailableException, - Option, - OptionType, Server, get_api_config, get_logger, @@ -22,17 +25,43 @@ parser.add_argument("--cleanup", "-d", action="store_true") args = parser.parse_args() +def create_item_id_mapping(data_list: List[dict]) -> Dict[str, dict]: + """ + create dictionary with id as key and dictionary as value. + """ + item_id_mapping: Dict[str, dict] = {} + for data_item in data_list: + item_id_mapping[data_item["id"]] = data_item + return item_id_mapping + + +def is_different(log: Logger, first_item, second_item: dict) -> bool: + """ + Check dicts for differences and returns true if values are not equals, except for last_modified_date. + """ + check_result = False + for key, value in first_item.items(): + if key in second_item.keys(): + if value != second_item[key]: + log.info("%s: %s != %s", key, value, second_item[key]) + if key == "last_modified_date": + continue + if not check_result: + check_result = True + return check_result + + if __name__ == "__main__": logger = get_logger(args.verbose, "kontor") logger.info("kontor.sync started") - apiConfig = get_api_config(logger, args.config) + APICONFIG = get_api_config(logger, args.config) server_list: List[Server] = [] if args.server: - server = apiConfig.get_server(args.server) + server = APICONFIG.get_server(args.server) if server: server_list.append(server) else: - server_list.extend(apiConfig.server) + server_list.extend(APICONFIG.server) export_data = {} for server in server_list: export_data[server.name] = {} @@ -48,20 +77,44 @@ if __name__ == "__main__": try: json_dump = json.dumps(export_data[server.name], indent=4) file_name = f"{server.name}-data.json" - with open(file_name, "w") as dump_file: + with open(file_name, "w", encoding="utf-8") as dump_file: dump_file.write(json_dump) except TypeError as error: - logger.info(f"{error}") + logger.info(error) for server in server_list: - logger.info(f"{server.name}: {len(export_data[server.name])} tables exported") + logger.info( + "%s: %s tables exported", server.name, len(export_data[server.name]) + ) if len(server_list) > 1: for table, path in MAPPING.items(): + mapping = create_item_id_mapping(export_data[server_list[1].name][table]) for item in export_data[server_list[0].name][table]: - item_data = server_list[1].request( - logger, table=table, param=Option(OptionType.ID, item["id"]) - ) - if item != item_data: - logger.debug("diff: %s\n%s", item, item_data) + logger.debug("checking %s:%s", table, item["id"]) + check_item_id = item["id"] + if check_item_id in mapping: + check_item = mapping[check_item_id] + if is_different(logger, item, check_item): + logger.info( + "checking values for %s != %s", item["id"], check_item["id"] + ) + logger.debug("diff: %s\n%s", item, check_item) + result = server_list[1].update( + logger, table, check_item_id, item + ) + logger.info("update result: %s", result) + else: + logger.debug( + "no changes for: %s(%s - %s)", + table, + item["id"], + check_item["id"], + ) else: - logger.debug("no changes for: %s(%s)", table, item["id"]) + logger.info( + "item %s in %s missing", check_item_id, server_list[1].name + ) + logger.info("synchronization of %s finished", table) + logger.info("all tables synchronized") + else: + logger.info("not enough server configured for sync") logger.info("kontor.sync finished")