synchronize data between configured servers
Gitea Actions Demo / Explore-Gitea-Actions (push) Successful in 4s

This commit is contained in:
2026-05-23 20:32:04 +02:00
parent 8d684908e6
commit 0f9c90b883
6 changed files with 264 additions and 133 deletions
@@ -10,7 +10,12 @@ from src.db.repository.media import (
from src.db.session import SessionDep from src.db.session import SessionDep
from src.schema.media.actor import MediaActorResponse, actor_to_response from src.schema.media.actor import MediaActorResponse, actor_to_response
from src.schema.media.actorfile import MediaActorFileResponse, actorfile_to_response from src.schema.media.actorfile import MediaActorFileResponse, actorfile_to_response
from src.schema.media.file import MediaFileResponse, Link, file_to_response, set_file from src.schema.media.file import (
MediaFileResponse,
Link,
file_to_response,
file_to_model,
)
from src.db.models.media import MediaFile from src.db.models.media import MediaFile
router = APIRouter() router = APIRouter()
@@ -128,11 +133,14 @@ def update_file_actors(
def update_file( def update_file(
file_id: str, db: SessionDep, info: MediaFileResponse file_id: str, db: SessionDep, info: MediaFileResponse
) -> MediaFileResponse: ) -> MediaFileResponse:
mediaFile = db.get(MediaFile, file_id) """
if not mediaFile: Update MediaFile with given id and data.
"""
media_file = db.get(MediaFile, file_id)
if not media_file:
raise HTTPException(status_code=404, detail="MediaFile could not be found") raise HTTPException(status_code=404, detail="MediaFile could not be found")
set_file(info, mediaFile) file_to_model(info, media_file)
db.add(mediaFile) db.add(media_file)
db.commit() db.commit()
mediafile = db.get(MediaFile, file_id) mediafile = db.get(MediaFile, file_id)
if not mediafile: if not mediafile:
@@ -143,7 +151,7 @@ def update_file(
@router.post("/files", status_code=status.HTTP_201_CREATED) @router.post("/files", status_code=status.HTTP_201_CREATED)
def add_file(new_link: Link, db: SessionDep) -> MediaFileResponse: # type: ignore def add_file(new_link: Link, db: SessionDep) -> MediaFileResponse: # type: ignore
logger.info(f"add url {new_link.url}") logger.info("add url %s", new_link.url)
try: try:
mediaFile: MediaFile = create_new_mediafile(new_link.url, db) mediaFile: MediaFile = create_new_mediafile(new_link.url, db)
except: except:
+22 -5
View File
@@ -17,7 +17,11 @@ class MediaFileResponse(BaseModel):
review: bool = False review: bool = False
should_download: bool = False should_download: bool = False
def file_to_response(mediafile: MediaFile) -> MediaFileResponse: def file_to_response(mediafile: MediaFile) -> MediaFileResponse:
"""
Create MediaFileResponse from model.
"""
response: MediaFileResponse = MediaFileResponse( response: MediaFileResponse = MediaFileResponse(
id=mediafile.id, id=mediafile.id,
created_date=mediafile.created_date, created_date=mediafile.created_date,
@@ -28,21 +32,34 @@ def file_to_response(mediafile: MediaFile) -> MediaFileResponse:
cloud_link=mediafile.cloud_link, cloud_link=mediafile.cloud_link,
url=mediafile.url, url=mediafile.url,
review=mediafile.review, review=mediafile.review,
should_download=mediafile.should_download should_download=mediafile.should_download,
) )
return response return response
class Link(BaseModel): def file_to_model(model: MediaFileResponse, mediafile: MediaFile) -> MediaFile:
url: str """
Set data of response to model.
def set_file(model: MediaFileResponse, mediafile: MediaFile) -> None: """
mediafile.file_name = model.file_name mediafile.file_name = model.file_name
mediafile.cloud_link = model.cloud_link mediafile.cloud_link = model.cloud_link
if model.url is not None: if model.url is not None:
mediafile.url = model.url mediafile.url = model.url
else:
mediafile.url = ""
if model.title is not None: if model.title is not None:
mediafile.title = model.title mediafile.title = model.title
else:
mediafile.title = ""
mediafile.last_modified_date = datetime.now() mediafile.last_modified_date = datetime.now()
mediafile.review = model.review mediafile.review = model.review
mediafile.should_download = model.should_download mediafile.should_download = model.should_download
return mediafile
class Link(BaseModel):
"""
PYdantic model for uploading url.
"""
url: str
+70 -59
View File
@@ -1,6 +1,7 @@
""" """
read file with links and store it in DB read file with links and store it in DB
""" """
from datetime import datetime from datetime import datetime
import logging import logging
import re import re
@@ -10,52 +11,46 @@ from bs4 import BeautifulSoup
import requests import requests
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from pathlib import Path from pathlib import Path
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, Session
from api import Server, get_api_config, get_logger from api import Server, get_api_config, get_logger
from db.models.base import Base
import os
from db.models.media import MediaActor, MediaActorFile, MediaFile from db.models.media import MediaActor, MediaActorFile, MediaFile
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('--file', '-f', help='file with links', default='~/.sync/media/list.txt') parser.add_argument(
parser.add_argument('--video', help='store Url as VideoFile', action="store_true") "--file", "-f", help="file with links", default="~/.sync/media/list.txt"
parser.add_argument('--config', '-c', default='kontor-api') )
parser.add_argument("--video", help="store Url as VideoFile", action="store_true")
parser.add_argument("--config", "-c", default="kontor-api")
parser.add_argument("--server", "-s") parser.add_argument("--server", "-s")
parser.add_argument('--verbose', '-v', action='count', default=0) parser.add_argument("--verbose", "-v", action="count", default=0)
parser.add_argument('--limit', '-l', type=int, help='maximum number of links to check') parser.add_argument("--limit", "-l", type=int, help="maximum number of links to check")
parser.add_argument('--dry-run', '-m', help='excute script without storing', action="store_true") parser.add_argument(
"--dry-run", "-m", help="excute script without storing", action="store_true"
)
args = parser.parse_args() args = parser.parse_args()
DB_USER: str = os.getenv("DB_USER", "kontor")
DB_PASSWORD: str = os.getenv("DB_PASSWORD", "kontor")
DB_SERVER: str = os.getenv("DB_SERVER", "127.0.0.1")
DB_PORT: int = int(os.getenv("DB_PORT", 5432))
DB_DBNAME: str = os.getenv("DB_DBNAME", "kontor")
DATABASE_URL: str = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_SERVER}:{DB_PORT}/{DB_DBNAME}"
def get_session() -> Session:
engine = create_engine(DATABASE_URL)
Base.metadata.create_all(bind=engine, checkfirst=True)
SessionLocal = sessionmaker(bind=engine)
return SessionLocal()
def load_data(filename: str, log) -> List[str]: def load_data(filename: str, log) -> List[str]:
links: List[str] = [] """
Read list of links from file.
"""
link_list: List[str] = []
log.debug("load_data") log.debug("load_data")
import_file = Path(filename) import_file = Path(filename)
if not import_file.exists(): if not import_file.exists():
log.info(f"File {filename} does not exist. Do nothing.") log.info(f"File {filename} does not exist. Do nothing.")
raise FileNotFoundError() raise FileNotFoundError()
log.info("read txt file") log.info("read txt file")
with open(filename, 'r') as txt_file: with open(filename, "r", encoding="utf-8") as txt_file:
while line := txt_file.readline(): while line := txt_file.readline():
# log.info(line.rstrip()) # log.info(line.rstrip())
links.append(line.rstrip()) link_list.append(line.rstrip())
return links return link_list
def get_actors_mapping(actor_list: List[MediaActor]) -> Dict[str, MediaActor]: def get_actors_mapping(actor_list: List[MediaActor]) -> Dict[str, MediaActor]:
"""
Create dictionary with actor links as key and MediaActor objects as values.
"""
mapping: Dict[str, MediaActor] = {} mapping: Dict[str, MediaActor] = {}
for actor in actor_list: for actor in actor_list:
if isinstance(actor, dict): if isinstance(actor, dict):
@@ -65,7 +60,11 @@ def get_actors_mapping(actor_list: List[MediaActor]) -> Dict[str, MediaActor]:
mapping[url] = actor mapping[url] = actor
return mapping return mapping
def get_actornames_mapping(actor_list: List[MediaActor]) -> Dict[str, MediaActor]: def get_actornames_mapping(actor_list: List[MediaActor]) -> Dict[str, MediaActor]:
"""
Create dictionary with actor names as key and MediaActor objects as values.
"""
mapping: Dict[str, MediaActor] = {} mapping: Dict[str, MediaActor] = {}
for actor in actor_list: for actor in actor_list:
if isinstance(actor, dict): if isinstance(actor, dict):
@@ -75,42 +74,52 @@ def get_actornames_mapping(actor_list: List[MediaActor]) -> Dict[str, MediaActor
mapping[name] = actor mapping[name] = actor
return mapping return mapping
def get_meta_info(media_file: MediaFile, log) -> List[str]:
def get_meta_info(media_file_obj: MediaFile, log) -> List[str]:
"""
Get meta info for MediaFile from link.
"""
actor_links: List[str] = [] actor_links: List[str] = []
try: try:
r = requests.get(media_file.url) r = requests.get(media_file_obj.url, timeout=5)
soup = BeautifulSoup(r.content, "html.parser") soup = BeautifulSoup(r.content, "html.parser")
error404 = soup.css.select_one('.error404-title') error404 = soup.css.select_one(".error404-title")
if error404 and error404.get_text() == "Video nicht gefunden": if error404 and error404.get_text() == "Video nicht gefunden":
log.warning(f"{error404.get_text()}") log.warning(f"{error404.get_text()}")
media_file.url = None media_file_obj.url = None
media_file.review = False media_file_obj.review = False
return actor_links return actor_links
title_tag = soup.find('title') title_tag = soup.find("title")
if title_tag: if title_tag:
media_file.title = title_tag.get_text() media_file_obj.title = title_tag.get_text()
media_file.review = False media_file_obj.review = False
anchors = soup.find_all('a', attrs={'href': re.compile("^https://.*pornstars/.*")}) anchors = soup.find_all(
"a", attrs={"href": re.compile("^https://.*pornstars/.*")}
)
for anchor in anchors: for anchor in anchors:
link_url = str(anchor.get("href")) # type: ignore link_url = str(anchor.get("href")) # type: ignore
if link_url.endswith('all/countries'): if link_url.endswith("all/countries"):
continue continue
if link_url in actor_links: if link_url in actor_links:
continue continue
actor_links.append(link_url) actor_links.append(link_url)
except Exception as error: except Exception as error:
log.info(f"something went wrong: {error}") log.info(f"something went wrong: {error}")
media_file.title = None media_file_obj.title = None
media_file.review = True media_file_obj.review = True
log.info(f"update MediaFile with MetaInfos to {repr(media_file)}") log.info(f"update MediaFile with MetaInfos to {repr(media_file_obj)}")
log.info(f"links({len(actor_links)}): {actor_links}") log.info(f"links({len(actor_links)}): {actor_links}")
return actor_links return actor_links
def get_actor_name(actor_url: str, log: logging.Logger) -> str | None:
def get_actor_name(actor_link: str, log: logging.Logger) -> str | None:
"""
Get actor name from link url.
"""
try: try:
r = requests.get(actor_url) r = requests.get(actor_link, timeout=5)
soup = BeautifulSoup(r.content, "html.parser") soup = BeautifulSoup(r.content, "html.parser")
titles = soup.find_all('h1') titles = soup.find_all("h1")
for title in titles: for title in titles:
log.info(f"title: {title.get_text()}") log.info(f"title: {title.get_text()}")
return title.get_text() return title.get_text()
@@ -119,31 +128,33 @@ def get_actor_name(actor_url: str, log: logging.Logger) -> str | None:
return None return None
if __name__ == '__main__': if __name__ == "__main__":
logger = get_logger(args.verbose, args.config) logger = get_logger(args.verbose, args.config)
logger.info('kontor.add_links started') logger.info("kontor.add_links started")
if args.limit: if args.limit:
logger.warning(f"check the first {args.limit} links") logger.warning("check the first %s links", args.limit)
apiConfig = get_api_config(logger, args.config) APICONFIG = get_api_config(logger, args.config)
server_list: List[Server] = [] server_list: List[Server] = []
server: Optional[Server] = None server: Optional[Server] = None
if args.server: if args.server:
server = apiConfig.get_server(args.server) server = APICONFIG.get_server(args.server)
if not server: if not server:
server = apiConfig.server[0] server = APICONFIG.server[0]
else: else:
server = apiConfig.server[0] server = APICONFIG.server[0]
links_index = 1 links_index = 1
links = load_data(args.file, logger) links = load_data(args.file, logger)
all_media_files = server.request(logger, table="media_file") all_media_files = server.request(logger, table="media_file")
media_actors: List[MediaActor] = server.request(log=logger, table="media_actor") media_actors: List[MediaActor] = server.request(log=logger, table="media_actor")
for link in links:
logger.info(f"process {link}")
media_files = [media_file for media_file in all_media_files if media_file["url"] == link]
actor_mapping = get_actors_mapping(media_actors) actor_mapping = get_actors_mapping(media_actors)
actorname_mapping = get_actornames_mapping(media_actors) actorname_mapping = get_actornames_mapping(media_actors)
for link in links:
logger.info("process %s", link)
media_files = [
media_file for media_file in all_media_files if media_file["url"] == link
]
if len(media_files) == 0: if len(media_files) == 0:
logger.info(f"MediaFile for link {link} not found") logger.info("MediaFile for link %s not found", link)
media_file = MediaFile() media_file = MediaFile()
media_file.id = str(uuid.uuid4()) media_file.id = str(uuid.uuid4())
media_file.created_date = datetime.now() media_file.created_date = datetime.now()
@@ -169,7 +180,7 @@ if __name__ == '__main__':
media_actor_file.version = 0 media_actor_file.version = 0
media_actor_file.media_file_id = media_file.id media_actor_file.media_file_id = media_file.id
media_actor_file.media_actor_id = media_actor.id media_actor_file.media_actor_id = media_actor.id
logger.info(f"create mapping with {media_actor_file}") logger.info("create mapping with %s", media_actor_file)
if not args.dry_run: if not args.dry_run:
logger.info("add MediaFile Actor mapping %s", media_actor_file) logger.info("add MediaFile Actor mapping %s", media_actor_file)
else: else:
@@ -184,7 +195,7 @@ if __name__ == '__main__':
media_actor.version = 0 media_actor.version = 0
media_actor.name = get_actor_name(actor_url, logger) media_actor.name = get_actor_name(actor_url, logger)
media_actor.url = actor_url media_actor.url = actor_url
logger.info(f"update MediaActor with {repr(media_actor)}") logger.info("update MediaActor with %s", repr(media_actor))
if not args.dry_run: if not args.dry_run:
logger.info("Update MediaActor %s", media_actor) logger.info("Update MediaActor %s", media_actor)
media_actor_file = MediaActorFile() media_actor_file = MediaActorFile()
@@ -194,13 +205,13 @@ if __name__ == '__main__':
media_actor_file.version = 0 media_actor_file.version = 0
media_actor_file.media_file_id = media_file.id media_actor_file.media_file_id = media_file.id
media_actor_file.media_actor_id = media_actor.id media_actor_file.media_actor_id = media_actor.id
logger.info(f"create mapping with {media_actor_file}") logger.info("create mapping with %s", media_actor_file)
if not args.dry_run: if not args.dry_run:
logger.info("Add MediaFile Actor mapping") logger.info("Add MediaFile Actor mapping")
else: else:
for media_file in media_files: for media_file in media_files:
logger.info(f"MediaFile with {media_file["id"]} is found") logger.info("MediaFile with %s is found", media_file["id"])
links_index += 1 links_index += 1
if args.limit and args.limit < links_index: if args.limit and args.limit < links_index:
break break
logger.info('kontor.add_link finished') logger.info("kontor.add_link finished")
+36 -12
View File
@@ -48,13 +48,25 @@ MAPPING: Dict[str, str] = {
"mail_account": "api/admin/mailaccounts", "mail_account": "api/admin/mailaccounts",
} }
class OptionType(Enum): class OptionType(Enum):
"""
OptionType defines the type of param for REST API call.
The type PARAM indicates a query parameter.
The type ID indicates the option is an idntifier as part of the path.
"""
PARAM = auto() PARAM = auto()
ID = auto() ID = auto()
URL = auto() URL = auto()
class Option: class Option:
"""
Option is an utility class to simplify options for the REST API call.
The type defines how to handle the value.
"""
def __init__(self, option_type: OptionType, value: str) -> None: def __init__(self, option_type: OptionType, value: str) -> None:
self.type: Optional[OptionType] = option_type self.type: Optional[OptionType] = option_type
self.value: Optional[str] = value self.value: Optional[str] = value
@@ -71,8 +83,6 @@ class EndPointNotAvailableException(Exception):
Raised when calling an not existing endpoint. Raised when calling an not existing endpoint.
""" """
pass
@dataclass @dataclass
class Login: class Login:
@@ -120,6 +130,9 @@ class Server:
self.token_type = str(token_type) self.token_type = str(token_type)
def request(self, log: Logger, table: str, param: Optional[Option] = None): def request(self, log: Logger, table: str, param: Optional[Option] = None):
"""
Requests data from Kontor-API instance by given table and optional parameters.
"""
if not param: if not param:
url: str = f"{self.url}/{MAPPING[table]}" url: str = f"{self.url}/{MAPPING[table]}"
else: else:
@@ -133,13 +146,19 @@ class Server:
return data return data
def update(self, log: Logger, table: str, item_id: UUID, file_info: dict): def update(self, log: Logger, table: str, item_id: UUID, file_info: dict):
"""
Updates data to the Kontor-API instance.
"""
url: str = f"{self.url}/{MAPPING[table]}/{item_id}" url: str = f"{self.url}/{MAPPING[table]}/{item_id}"
headers: Dict[str, str] = {"Authorization": f"Bearer {self.token}"} headers: Dict[str, str] = {"Authorization": f"Bearer {self.token}"}
update = requests.put( update = requests.put(
url, headers=headers, json=file_info, timeout=self.timeout url, headers=headers, json=file_info, timeout=self.timeout
) )
log.info(f"Status: {update.status_code}") log.info(f"Status: {update.status_code}")
return update if update.status_code == 404:
raise EndPointNotAvailableException
data = update.json()
return data
@dataclass @dataclass
@@ -152,7 +171,9 @@ class ApiConfig:
server: List[Server] server: List[Server]
def get_server(self, server_name: str) -> Optional[Server]: def get_server(self, server_name: str) -> Optional[Server]:
""" """ """
Return server instance by given name or None.
"""
found_server = None found_server = None
for server in self.server: for server in self.server:
if server.name == server_name: if server.name == server_name:
@@ -189,19 +210,22 @@ def get_logger(level, config: str):
def get_api_config(log: Logger, config: str) -> ApiConfig: def get_api_config(log: Logger, config: str) -> ApiConfig:
"""
Load configuration from file.
"""
dirs = PlatformDirs(config) dirs = PlatformDirs(config)
api_config = Path(dirs.user_config_dir, "api.yaml") api_config = Path(dirs.user_config_dir, "api.yaml")
with open(api_config, "rt") as f: with open(api_config, "rt", encoding="utf-8") as f:
api_data = yaml.safe_load(f.read()) api_data = yaml.safe_load(f.read())
servers = [Server(**server) for server in api_data["server"]] servers = [Server(**server) for server in api_data["server"]]
login = Login(**(api_data["login"])) login = Login(**(api_data["login"]))
apiConfig = ApiConfig(server=servers, login=login) api_config_data = ApiConfig(server=servers, login=login)
log.debug(apiConfig) log.debug(api_config_data)
if not api_data: if not api_data:
log.fatal("API configuration is missing") log.fatal("API configuration is missing")
return apiConfig return api_config_data
for server in apiConfig.server: for server in api_config_data.server:
server.login(apiConfig.login, log) server.login(api_config_data.login, log)
with open(api_config, "w") as f: with open(api_config, "w", encoding="utf-8") as f:
yaml.dump(api_data, f) yaml.dump(api_data, f)
return apiConfig return api_config_data
+52 -34
View File
@@ -10,6 +10,7 @@ from datetime import datetime
from enum import Enum, auto from enum import Enum, auto
from pathlib import Path from pathlib import Path
from logging import Logger from logging import Logger
from typing import Any, Dict, Optional
from uuid import UUID from uuid import UUID
from api import Option, OptionType, Server, get_api_config, get_logger from api import Option, OptionType, Server, get_api_config, get_logger
@@ -25,6 +26,10 @@ args = parser.parse_args()
class FileStatus(Enum): class FileStatus(Enum):
"""
Status of video file.
"""
DOWNLOADED = auto() DOWNLOADED = auto()
RENAMED = auto() RENAMED = auto()
UNKNOWN = auto() UNKNOWN = auto()
@@ -35,7 +40,10 @@ def download_file(
file_info: dict, file_info: dict,
download_dir: str = "/data/media", download_dir: str = "/data/media",
dl_tool: str = "yt-dlp", dl_tool: str = "yt-dlp",
) -> dict: ) -> Dict[str, Any]:
"""
Download file from url.
"""
print(f"download file for {url} to {download_dir}") print(f"download file for {url} to {download_dir}")
result = subprocess.run( result = subprocess.run(
[dl_tool, url], cwd=download_dir, capture_output=True, text=True [dl_tool, url], cwd=download_dir, capture_output=True, text=True
@@ -45,7 +53,7 @@ def download_file(
output = re.sub(" +", " ", output) output = re.sub(" +", " ", output)
lines_list = output.splitlines() lines_list = output.splitlines()
file_name = __parse_output__(lines_list) file_name = __parse_output__(lines_list)
log.info(f"found file: {file_name}") logger.info("found file: %s", file_name)
if file_name is None or not file_name.strip(): if file_name is None or not file_name.strip():
file_info["review"] = True file_info["review"] = True
file_info["should_download"] = True file_info["should_download"] = True
@@ -60,14 +68,14 @@ def download_file(
return file_info return file_info
def __parse_output__(lines_list: list[str]) -> str | None: def __parse_output__(lines_list: list[str]) -> Optional[str]:
file_name = None file_name = None
for line in lines_list: for line in lines_list:
log.debug(f"parse line: {line}") logger.debug("parse line: %s", line)
if "has already been downloaded" in line: if "has already been downloaded" in line:
end_len = len(" has already been downloaded") end_len = len(" has already been downloaded")
file_name = line[11:-end_len] file_name = line[11:-end_len]
log.info(f"file_name: {file_name}") logger.info("file_name: %s", file_name)
break break
if "Destination" in line: if "Destination" in line:
line_len = len(line) line_len = len(line)
@@ -80,82 +88,92 @@ def __parse_output__(lines_list: list[str]) -> str | None:
return file_name return file_name
def is_file_downloaded(media_file: dict, dir: Path) -> FileStatus: def is_file_downloaded(media_file: dict, path: Path) -> FileStatus:
"""
Check, if file is already downloaded.
"""
file_name_as_title = f"{media_file['file_name']}" file_name_as_title = f"{media_file['file_name']}"
if not file_name_as_title: if not file_name_as_title:
log.info("title has not been set - start download") logger.info("title has not been set - start download")
return FileStatus.UNKNOWN return FileStatus.UNKNOWN
file_title = Path(dir, f"{file_name_as_title}.mp4") file_title = Path(path, f"{file_name_as_title}.mp4")
if file_title.exists(): if file_title.exists():
log.info(f"{file_name_as_title} has been downloaded") logger.info("%s has been downloaded", file_name_as_title)
media_file["should_download"] = False media_file["should_download"] = False
return FileStatus.DOWNLOADED return FileStatus.DOWNLOADED
file_name_as_id = f"{media_file['id']}" file_name_as_id = f"{media_file['id']}"
file_with_id_as_name = Path(dir, f"{file_name_as_id}.mp4") file_with_id_as_name = Path(path, f"{file_name_as_id}.mp4")
if file_with_id_as_name.exists(): if file_with_id_as_name.exists():
log.info(f"{file_with_id_as_name} has been downloaded and renamed") logger.info("%s has been downloaded and renamed", file_with_id_as_name)
media_file["cloud_link"] = str(file_with_id_as_name) media_file["cloud_link"] = str(file_with_id_as_name)
media_file["should_download"] = False media_file["should_download"] = False
return FileStatus.RENAMED return FileStatus.RENAMED
log.info("could not find file - start download") logger.info("could not find file - start download")
return FileStatus.UNKNOWN return FileStatus.UNKNOWN
def update_status(item_id: UUID, file_info: dict, server: Server, log: Logger): def update_status(item_id: UUID, file_info: dict, api_server: Server, log: Logger):
update = server.update(log, "media_file", item_id, file_info) """
log.info(f"update status: {update.status_code}") Update MediaFile
log.info(f"update result: {update.json()}") """
update = api_server.update(log, "media_file", item_id, file_info)
log.info("update result: %s", update)
def rename_file(file_info: dict): def rename_file(file_info: dict):
"""
Rename file.
"""
item_id = file_info["id"] item_id = file_info["id"]
file_name = file_info["file_name"] file_name = file_info["file_name"]
if file_name is None or not file_name.strip(): if file_name is None or not file_name.strip():
log.info("file_name is not set, rename is not executed") logger.info("file_name is not set, rename is not executed")
file_info["review"] = True file_info["review"] = True
file_info["should_download"] = True file_info["should_download"] = True
return return
file = Path(args.dir, file_name) file = Path(args.dir, file_name)
new_file_path = file.with_name(f"{item_id}{file.suffix}") new_file_path = file.with_name(f"{item_id}{file.suffix}")
log.info(f"rename {file} to {new_file_path}") logger.info("rename %s to %s", file, new_file_path)
file.rename(Path(new_file_path)) file.rename(Path(new_file_path))
file_info["cloud_link"] = str(new_file_path) file_info["cloud_link"] = str(new_file_path)
if __name__ == "__main__": if __name__ == "__main__":
log = get_logger(args.verbose, args.config) logger = get_logger(args.verbose, args.config)
log.info("kontor.download started") logger.info("kontor.download started")
apiConfig = get_api_config(log, args.config) APICONFIG = get_api_config(logger, args.config)
server: Server = apiConfig.server[0] server: Server = APICONFIG.server[0]
data = server.request(log=log, table="media_file", param=Option(OptionType.PARAM, "download=true")) data = server.request(
log=logger, table="media_file", param=Option(OptionType.PARAM, "download=true")
)
entries_count = len(data) entries_count = len(data)
log.info(f"data: {entries_count}") logger.info("data: %s", entries_count)
mediafile_index = 1 mediafile_index = 1
log.debug(f"data: {data}") logger.debug("data: %s", data)
missing_actors = {} missing_actors = {}
if args.dry_run: if args.dry_run:
sys.exit(0) sys.exit(0)
if args.limit: if args.limit:
log.warning(f"check the first {args.limit} links") logger.warning("check the first %s links", args.limit)
for item in data: for item in data:
link = item["url"] link = item["url"]
file_id = item["id"] file_id = item["id"]
log.info(f"{file_id} - {link}") logger.info("%s - %s", file_id, link)
download_status: FileStatus = is_file_downloaded(item, args.dir) download_status: FileStatus = is_file_downloaded(item, args.dir)
match download_status: match download_status:
case FileStatus.DOWNLOADED: case FileStatus.DOWNLOADED:
rename_file(item) rename_file(item)
update_status(file_id, item, server=server, log=log) update_status(file_id, item, api_server=server, log=logger)
case FileStatus.RENAMED: case FileStatus.RENAMED:
log.info("update status") logger.info("update status")
update_status(file_id, item, server=server, log=log) update_status(file_id, item, api_server=server, log=logger)
case FileStatus.UNKNOWN: case FileStatus.UNKNOWN:
download_file(link, item, args.dir) download_file(link, item, args.dir)
rename_file(item) rename_file(item)
log.info(f"{item}") logger.info(item)
update_status(file_id, item, server=server, log=log) update_status(file_id, item, api_server=server, log=logger)
log.warning(f"processed {mediafile_index}/{entries_count}") logger.warning("processed %s/%s", mediafile_index, entries_count)
if args.limit and args.limit <= mediafile_index: if args.limit and args.limit <= mediafile_index:
break break
mediafile_index += 1 mediafile_index += 1
log.info("kontor.download finished") logger.info("kontor.download finished")
+67 -14
View File
@@ -1,12 +1,15 @@
"""
Synchronize Kontor data between configured servers.
"""
import json import json
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
from typing import List from logging import Logger
from typing import Dict, List
from api import ( from api import (
MAPPING, MAPPING,
EndPointNotAvailableException, EndPointNotAvailableException,
Option,
OptionType,
Server, Server,
get_api_config, get_api_config,
get_logger, get_logger,
@@ -22,17 +25,43 @@ parser.add_argument("--cleanup", "-d", action="store_true")
args = parser.parse_args() args = parser.parse_args()
def create_item_id_mapping(data_list: List[dict]) -> Dict[str, dict]:
"""
create dictionary with id as key and dictionary as value.
"""
item_id_mapping: Dict[str, dict] = {}
for data_item in data_list:
item_id_mapping[data_item["id"]] = data_item
return item_id_mapping
def is_different(log: Logger, first_item, second_item: dict) -> bool:
"""
Check dicts for differences and returns true if values are not equals, except for last_modified_date.
"""
check_result = False
for key, value in first_item.items():
if key in second_item.keys():
if value != second_item[key]:
log.info("%s: %s != %s", key, value, second_item[key])
if key == "last_modified_date":
continue
if not check_result:
check_result = True
return check_result
if __name__ == "__main__": if __name__ == "__main__":
logger = get_logger(args.verbose, "kontor") logger = get_logger(args.verbose, "kontor")
logger.info("kontor.sync started") logger.info("kontor.sync started")
apiConfig = get_api_config(logger, args.config) APICONFIG = get_api_config(logger, args.config)
server_list: List[Server] = [] server_list: List[Server] = []
if args.server: if args.server:
server = apiConfig.get_server(args.server) server = APICONFIG.get_server(args.server)
if server: if server:
server_list.append(server) server_list.append(server)
else: else:
server_list.extend(apiConfig.server) server_list.extend(APICONFIG.server)
export_data = {} export_data = {}
for server in server_list: for server in server_list:
export_data[server.name] = {} export_data[server.name] = {}
@@ -48,20 +77,44 @@ if __name__ == "__main__":
try: try:
json_dump = json.dumps(export_data[server.name], indent=4) json_dump = json.dumps(export_data[server.name], indent=4)
file_name = f"{server.name}-data.json" file_name = f"{server.name}-data.json"
with open(file_name, "w") as dump_file: with open(file_name, "w", encoding="utf-8") as dump_file:
dump_file.write(json_dump) dump_file.write(json_dump)
except TypeError as error: except TypeError as error:
logger.info(f"{error}") logger.info(error)
for server in server_list: for server in server_list:
logger.info(f"{server.name}: {len(export_data[server.name])} tables exported") logger.info(
"%s: %s tables exported", server.name, len(export_data[server.name])
)
if len(server_list) > 1: if len(server_list) > 1:
for table, path in MAPPING.items(): for table, path in MAPPING.items():
mapping = create_item_id_mapping(export_data[server_list[1].name][table])
for item in export_data[server_list[0].name][table]: for item in export_data[server_list[0].name][table]:
item_data = server_list[1].request( logger.debug("checking %s:%s", table, item["id"])
logger, table=table, param=Option(OptionType.ID, item["id"]) check_item_id = item["id"]
if check_item_id in mapping:
check_item = mapping[check_item_id]
if is_different(logger, item, check_item):
logger.info(
"checking values for %s != %s", item["id"], check_item["id"]
) )
if item != item_data: logger.debug("diff: %s\n%s", item, check_item)
logger.debug("diff: %s\n%s", item, item_data) result = server_list[1].update(
logger, table, check_item_id, item
)
logger.info("update result: %s", result)
else: else:
logger.debug("no changes for: %s(%s)", table, item["id"]) logger.debug(
"no changes for: %s(%s - %s)",
table,
item["id"],
check_item["id"],
)
else:
logger.info(
"item %s in %s missing", check_item_id, server_list[1].name
)
logger.info("synchronization of %s finished", table)
logger.info("all tables synchronized")
else:
logger.info("not enough server configured for sync")
logger.info("kontor.sync finished") logger.info("kontor.sync finished")