From c9a350bc584fb1d53332c8144fa73829c06710c4 Mon Sep 17 00:00:00 2001 From: Thomas Peetz Date: Sun, 22 Feb 2026 18:45:20 +0100 Subject: [PATCH] change update_title to add authorization to REST calls --- kontor-scripts/update_title.py | 93 ++++++++++++++++++++++++++++++++-- 1 file changed, 89 insertions(+), 4 deletions(-) diff --git a/kontor-scripts/update_title.py b/kontor-scripts/update_title.py index 7031180..bec4544 100644 --- a/kontor-scripts/update_title.py +++ b/kontor-scripts/update_title.py @@ -2,19 +2,35 @@ download files with URLs from DB """ import logging.config +import os +import re +from typing import Any, Dict, List import requests +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker, Session +from db.models.base import Base import yaml from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter from pathlib import Path from bs4 import BeautifulSoup from platformdirs import PlatformDirs +from config import get_api_config +from db.models.media import MediaFile parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument('--verbose', '-v', action='count', default=0) parser.add_argument('--config', '-c', default='kontor-docker') +parser.add_argument('--dry-run', '-m', help='excute script without storing', action="store_true") args = parser.parse_args() +DB_USER: str = os.getenv("DB_USER", "kontor") +DB_PASSWORD: str = os.getenv("DB_PASSWORD", "kontor") +DB_SERVER: str = os.getenv("DB_SERVER", "127.0.0.1") +DB_PORT: int = int(os.getenv("DB_PORT", 5432)) +DB_DBNAME: str = os.getenv("DB_DBNAME", "kontor") +DATABASE_URL: str = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_SERVER}:{DB_PORT}/{DB_DBNAME}" + def get_logger(level: int, config: str): dirs = PlatformDirs(config) logging_config = Path(dirs.user_config_dir, 'logging-config.yaml') @@ -32,13 +48,83 @@ def get_logger(level: int, config: str): logger.setLevel(logging.CRITICAL) return logger +def get_session() -> Session: + engine = create_engine(DATABASE_URL) + Base.metadata.create_all(bind=engine, checkfirst=True) + SessionLocal = sessionmaker(bind=engine) + return SessionLocal() + +def get_media_files(all_files: bool, log: logging.Logger, api_data: Dict[str, Any])-> Any: + files_url = "" + host = api_data["host"] + port = api_data["port"] + token = api_data['token'] + headers: Dict[str, str] = {"Authorization": f"Bearer {token}"} + if all_files: + files_url= f"http://{host}:{port}/api/media/files" + else: + files_url = f"http://{host}:{port}/api/media/files?review=true" + response = requests.get(files_url, headers=headers) + log.debug(f"Status: {response.status_code}") + data = response.json() + return data + +def update_media_file(media_file: MediaFile, log: logging.Logger, api_data: Dict[str, Any]) -> Any: + host = api_data["host"] + port = api_data["port"] + token = api_data['token'] + url: str = f"http://{host}:{port}/api/media/files/{media_file.id}" + headers: Dict[str, str] = {"Authorization": f"Bearer {token}"} + item: Dict[str, Any] = {} + item['id'] = media_file.id + item['title'] = media_file.title + item['file_name'] = media_file.file_name + item['cloud_link'] = media_file.cloud_link + item['url'] = media_file.url + item['review'] = media_file.review + item['should_download'] = media_file.should_download + update = requests.put(url, headers=headers, json=item) + log.debug(f"update status: {update.status_code}") + log.debug(f"update result: {update.json()}") + return update.json() + +def get_meta_info(media_file: MediaFile, log) -> List[str]: + actor_links: List[str] = [] + try: + r = requests.get(media_file.url) + soup = BeautifulSoup(r.content, "html.parser") + error404 = soup.css.select_one('.error404-title') + if error404 and error404.get_text() == "Video nicht gefunden": + log.warning(f"{error404.get_text()}") + media_file.url = None + media_file.review = False + return actor_links + title_tag = soup.find('title') + if title_tag: + media_file.title = title_tag.get_text() + media_file.review = False + anchors = soup.find_all('a', attrs={'href': re.compile("^https://.*pornstars/.*")}) + for anchor in anchors: + link_url = str(anchor.get("href")) # type: ignore + if link_url.endswith('all/countries'): + continue + if link_url in actor_links: + continue + actor_links.append(link_url) + except Exception as error: + log.info(f"something went wrong: {error}") + media_file.title = None + media_file.review = True + log.info(f"update MediaFile with MetaInfos to {repr(media_file)}") + log.info(f"links({len(actor_links)}): {actor_links}") + return actor_links + if __name__ == '__main__': log = get_logger(args.verbose, args.config) log.info('kontor.update_titles started') - response = requests.get("http://127.0.0.1:8800/api/media/files?review=true") - log.info(f"Status: {response.status_code}") - data = response.json() + api_data = get_api_config(log, args.config) + data = get_media_files(False, log, api_data=api_data) log.info(f"data: {len(data)}") for item in data: link = item['url'] @@ -57,4 +143,3 @@ if __name__ == '__main__': log.info(f"update status: {update.status_code}") log.info(f"update result: {update.json()}") log.info('kontor.update_titles finished') -