synchronize data between configured servers
Gitea Actions Demo / Explore-Gitea-Actions (push) Successful in 4s

This commit is contained in:
2026-05-23 20:32:04 +02:00
parent 8d684908e6
commit 0f9c90b883
6 changed files with 264 additions and 133 deletions
+72 -61
View File
@@ -1,6 +1,7 @@
"""
read file with links and store it in DB
"""
from datetime import datetime
import logging
import re
@@ -10,52 +11,46 @@ from bs4 import BeautifulSoup
import requests
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from pathlib import Path
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, Session
from api import Server, get_api_config, get_logger
from db.models.base import Base
import os
from db.models.media import MediaActor, MediaActorFile, MediaFile
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('--file', '-f', help='file with links', default='~/.sync/media/list.txt')
parser.add_argument('--video', help='store Url as VideoFile', action="store_true")
parser.add_argument('--config', '-c', default='kontor-api')
parser.add_argument(
"--file", "-f", help="file with links", default="~/.sync/media/list.txt"
)
parser.add_argument("--video", help="store Url as VideoFile", action="store_true")
parser.add_argument("--config", "-c", default="kontor-api")
parser.add_argument("--server", "-s")
parser.add_argument('--verbose', '-v', action='count', default=0)
parser.add_argument('--limit', '-l', type=int, help='maximum number of links to check')
parser.add_argument('--dry-run', '-m', help='excute script without storing', action="store_true")
parser.add_argument("--verbose", "-v", action="count", default=0)
parser.add_argument("--limit", "-l", type=int, help="maximum number of links to check")
parser.add_argument(
"--dry-run", "-m", help="excute script without storing", action="store_true"
)
args = parser.parse_args()
DB_USER: str = os.getenv("DB_USER", "kontor")
DB_PASSWORD: str = os.getenv("DB_PASSWORD", "kontor")
DB_SERVER: str = os.getenv("DB_SERVER", "127.0.0.1")
DB_PORT: int = int(os.getenv("DB_PORT", 5432))
DB_DBNAME: str = os.getenv("DB_DBNAME", "kontor")
DATABASE_URL: str = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_SERVER}:{DB_PORT}/{DB_DBNAME}"
def get_session() -> Session:
engine = create_engine(DATABASE_URL)
Base.metadata.create_all(bind=engine, checkfirst=True)
SessionLocal = sessionmaker(bind=engine)
return SessionLocal()
def load_data(filename: str, log) -> List[str]:
links: List[str] = []
"""
Read list of links from file.
"""
link_list: List[str] = []
log.debug("load_data")
import_file = Path(filename)
if not import_file.exists():
log.info(f"File {filename} does not exist. Do nothing.")
raise FileNotFoundError()
log.info("read txt file")
with open(filename, 'r') as txt_file:
with open(filename, "r", encoding="utf-8") as txt_file:
while line := txt_file.readline():
# log.info(line.rstrip())
links.append(line.rstrip())
return links
link_list.append(line.rstrip())
return link_list
def get_actors_mapping(actor_list: List[MediaActor]) -> Dict[str, MediaActor]:
"""
Create dictionary with actor links as key and MediaActor objects as values.
"""
mapping: Dict[str, MediaActor] = {}
for actor in actor_list:
if isinstance(actor, dict):
@@ -65,7 +60,11 @@ def get_actors_mapping(actor_list: List[MediaActor]) -> Dict[str, MediaActor]:
mapping[url] = actor
return mapping
def get_actornames_mapping(actor_list: List[MediaActor]) -> Dict[str, MediaActor]:
"""
Create dictionary with actor names as key and MediaActor objects as values.
"""
mapping: Dict[str, MediaActor] = {}
for actor in actor_list:
if isinstance(actor, dict):
@@ -75,42 +74,52 @@ def get_actornames_mapping(actor_list: List[MediaActor]) -> Dict[str, MediaActor
mapping[name] = actor
return mapping
def get_meta_info(media_file: MediaFile, log) -> List[str]:
def get_meta_info(media_file_obj: MediaFile, log) -> List[str]:
"""
Get meta info for MediaFile from link.
"""
actor_links: List[str] = []
try:
r = requests.get(media_file.url)
r = requests.get(media_file_obj.url, timeout=5)
soup = BeautifulSoup(r.content, "html.parser")
error404 = soup.css.select_one('.error404-title')
error404 = soup.css.select_one(".error404-title")
if error404 and error404.get_text() == "Video nicht gefunden":
log.warning(f"{error404.get_text()}")
media_file.url = None
media_file.review = False
media_file_obj.url = None
media_file_obj.review = False
return actor_links
title_tag = soup.find('title')
title_tag = soup.find("title")
if title_tag:
media_file.title = title_tag.get_text()
media_file.review = False
anchors = soup.find_all('a', attrs={'href': re.compile("^https://.*pornstars/.*")})
media_file_obj.title = title_tag.get_text()
media_file_obj.review = False
anchors = soup.find_all(
"a", attrs={"href": re.compile("^https://.*pornstars/.*")}
)
for anchor in anchors:
link_url = str(anchor.get("href")) # type: ignore
if link_url.endswith('all/countries'):
link_url = str(anchor.get("href")) # type: ignore
if link_url.endswith("all/countries"):
continue
if link_url in actor_links:
continue
actor_links.append(link_url)
except Exception as error:
log.info(f"something went wrong: {error}")
media_file.title = None
media_file.review = True
log.info(f"update MediaFile with MetaInfos to {repr(media_file)}")
media_file_obj.title = None
media_file_obj.review = True
log.info(f"update MediaFile with MetaInfos to {repr(media_file_obj)}")
log.info(f"links({len(actor_links)}): {actor_links}")
return actor_links
def get_actor_name(actor_url: str, log: logging.Logger) -> str | None:
def get_actor_name(actor_link: str, log: logging.Logger) -> str | None:
"""
Get actor name from link url.
"""
try:
r = requests.get(actor_url)
r = requests.get(actor_link, timeout=5)
soup = BeautifulSoup(r.content, "html.parser")
titles = soup.find_all('h1')
titles = soup.find_all("h1")
for title in titles:
log.info(f"title: {title.get_text()}")
return title.get_text()
@@ -119,31 +128,33 @@ def get_actor_name(actor_url: str, log: logging.Logger) -> str | None:
return None
if __name__ == '__main__':
if __name__ == "__main__":
logger = get_logger(args.verbose, args.config)
logger.info('kontor.add_links started')
logger.info("kontor.add_links started")
if args.limit:
logger.warning(f"check the first {args.limit} links")
apiConfig = get_api_config(logger, args.config)
logger.warning("check the first %s links", args.limit)
APICONFIG = get_api_config(logger, args.config)
server_list: List[Server] = []
server: Optional[Server] = None
if args.server:
server = apiConfig.get_server(args.server)
server = APICONFIG.get_server(args.server)
if not server:
server = apiConfig.server[0]
server = APICONFIG.server[0]
else:
server = apiConfig.server[0]
server = APICONFIG.server[0]
links_index = 1
links = load_data(args.file, logger)
all_media_files = server.request(logger, table="media_file")
media_actors: List[MediaActor] = server.request(log=logger, table="media_actor")
actor_mapping = get_actors_mapping(media_actors)
actorname_mapping = get_actornames_mapping(media_actors)
for link in links:
logger.info(f"process {link}")
media_files = [media_file for media_file in all_media_files if media_file["url"] == link]
actor_mapping = get_actors_mapping(media_actors)
actorname_mapping = get_actornames_mapping(media_actors)
logger.info("process %s", link)
media_files = [
media_file for media_file in all_media_files if media_file["url"] == link
]
if len(media_files) == 0:
logger.info(f"MediaFile for link {link} not found")
logger.info("MediaFile for link %s not found", link)
media_file = MediaFile()
media_file.id = str(uuid.uuid4())
media_file.created_date = datetime.now()
@@ -169,7 +180,7 @@ if __name__ == '__main__':
media_actor_file.version = 0
media_actor_file.media_file_id = media_file.id
media_actor_file.media_actor_id = media_actor.id
logger.info(f"create mapping with {media_actor_file}")
logger.info("create mapping with %s", media_actor_file)
if not args.dry_run:
logger.info("add MediaFile Actor mapping %s", media_actor_file)
else:
@@ -184,7 +195,7 @@ if __name__ == '__main__':
media_actor.version = 0
media_actor.name = get_actor_name(actor_url, logger)
media_actor.url = actor_url
logger.info(f"update MediaActor with {repr(media_actor)}")
logger.info("update MediaActor with %s", repr(media_actor))
if not args.dry_run:
logger.info("Update MediaActor %s", media_actor)
media_actor_file = MediaActorFile()
@@ -194,13 +205,13 @@ if __name__ == '__main__':
media_actor_file.version = 0
media_actor_file.media_file_id = media_file.id
media_actor_file.media_actor_id = media_actor.id
logger.info(f"create mapping with {media_actor_file}")
logger.info("create mapping with %s", media_actor_file)
if not args.dry_run:
logger.info("Add MediaFile Actor mapping")
else:
for media_file in media_files:
logger.info(f"MediaFile with {media_file["id"]} is found")
logger.info("MediaFile with %s is found", media_file["id"])
links_index += 1
if args.limit and args.limit < links_index:
break
logger.info('kontor.add_link finished')
logger.info("kontor.add_link finished")