change update_title to add authorization to REST calls

This commit is contained in:
2026-02-22 18:45:20 +01:00
parent b458f3b6d4
commit 0951cc74ff
+103 -28
View File
@@ -2,19 +2,35 @@
download files with URLs from DB
"""
import logging.config
import os
import re
from typing import Any, Dict, List
import requests
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, Session
from db.models.base import Base
import yaml
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from pathlib import Path
from bs4 import BeautifulSoup
from platformdirs import PlatformDirs
from config import get_api_config
from db.models.media import MediaFile
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('--verbose', '-v', action='count', default=0)
parser.add_argument('--config', '-c', default='kontor-docker')
parser.add_argument('--dry-run', '-m', help='excute script without storing', action="store_true")
args = parser.parse_args()
DB_USER: str = os.getenv("DB_USER", "kontor")
DB_PASSWORD: str = os.getenv("DB_PASSWORD", "kontor")
DB_SERVER: str = os.getenv("DB_SERVER", "127.0.0.1")
DB_PORT: int = int(os.getenv("DB_PORT", 5432))
DB_DBNAME: str = os.getenv("DB_DBNAME", "kontor")
DATABASE_URL: str = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_SERVER}:{DB_PORT}/{DB_DBNAME}"
def get_logger(level: int, config: str):
dirs = PlatformDirs(config)
logging_config = Path(dirs.user_config_dir, 'logging-config.yaml')
@@ -32,37 +48,96 @@ def get_logger(level: int, config: str):
logger.setLevel(logging.CRITICAL)
return logger
def get_session() -> Session:
engine = create_engine(DATABASE_URL)
Base.metadata.create_all(bind=engine, checkfirst=True)
SessionLocal = sessionmaker(bind=engine)
return SessionLocal()
def get_media_files(all_files: bool, log: logging.Logger, api_data: Dict[str, Any])-> Any:
files_url = ""
host = api_data["host"]
port = api_data["port"]
token = api_data['token']
headers: Dict[str, str] = {"Authorization": f"Bearer {token}"}
if all_files:
files_url= f"http://{host}:{port}/api/media/files"
else:
files_url = f"http://{host}:{port}/api/media/files?review=true"
response = requests.get(files_url, headers=headers)
log.debug(f"Status: {response.status_code}")
data = response.json()
return data
def update_media_file(media_file: MediaFile, log: logging.Logger, api_data: Dict[str, Any]) -> Any:
host = api_data["host"]
port = api_data["port"]
token = api_data['token']
url: str = f"http://{host}:{port}/api/media/files/{media_file.id}"
headers: Dict[str, str] = {"Authorization": f"Bearer {token}"}
item: Dict[str, Any] = {}
item['id'] = media_file.id
item['title'] = media_file.title
item['file_name'] = media_file.file_name
item['cloud_link'] = media_file.cloud_link
item['url'] = media_file.url
item['review'] = media_file.review
item['should_download'] = media_file.should_download
update = requests.put(url, headers=headers, json=item)
log.debug(f"update status: {update.status_code}")
log.debug(f"update result: {update.json()}")
return update.json()
def get_meta_info(media_file: MediaFile, log) -> List[str]:
actor_links: List[str] = []
try:
r = requests.get(media_file.url)
soup = BeautifulSoup(r.content, "html.parser")
error404 = soup.css.select_one('.error404-title')
if error404 and error404.get_text() == "Video nicht gefunden":
log.warning(f"{error404.get_text()}")
media_file.url = None
media_file.review = False
return actor_links
title_tag = soup.find('title')
if title_tag:
media_file.title = title_tag.get_text()
media_file.review = False
anchors = soup.find_all('a', attrs={'href': re.compile("^https://.*pornstars/.*")})
for anchor in anchors:
link_url = str(anchor.get("href")) # type: ignore
if link_url.endswith('all/countries'):
continue
if link_url in actor_links:
continue
actor_links.append(link_url)
except Exception as error:
log.info(f"something went wrong: {error}")
media_file.title = None
media_file.review = True
log.info(f"update MediaFile with MetaInfos to {repr(media_file)}")
log.info(f"links({len(actor_links)}): {actor_links}")
return actor_links
if __name__ == '__main__':
log = get_logger(args.verbose, args.config)
log.info('kontor.update_titles started')
response = requests.get("http://127.0.0.1:8800/api/media/files?review=true")
log.info(f"Status: {response.status_code}")
data = response.json()
api_data = get_api_config(log, args.config)
data = get_media_files(False, log, api_data=api_data)
log.info(f"data: {len(data)}")
for item in data:
link = item['url']
log.info(f"{item['id']} - {str(link)}")
if not link:
continue
if link == "None":
item['url'] = None
else:
try:
r = requests.get(link)
soup = BeautifulSoup(r.content, "html.parser")
title_tag = soup.find('title')
if title_tag:
title= title_tag.get_text()
title = soup.title.string
item['title'] = title
item['review'] = False
except Exception as error:
log.info(f"something went wrong: {error}")
item['title'] = None
item['review'] = True
update = requests.put(f"http://127.0.0.1:8800/api/media/files/{item['id']}", json=item)
log.info(f"update status: {update.status_code}")
log.info(f"update result: {update.json()}")
session = get_session()
with session as db:
media_files = db.query(MediaFile).filter(MediaFile.review == True).all()
for media_file in media_files:
link = media_file.url
log.info(f"{media_file.id} - {str(link)}")
if not link:
continue
if link == "None":
media_file.url = None
else:
get_meta_info(media_file, log)
if not args.dry_run:
update_media_file(media_file, log, api_data=api_data)
log.info('kontor.update_titles finished')