From 729d019de9858116de259c3b2526104f27281da9 Mon Sep 17 00:00:00 2001 From: Thomas Peetz Date: Tue, 9 Dec 2025 17:05:20 +0100 Subject: [PATCH] format download.py --- kontor-scripts/download.py | 87 +++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/kontor-scripts/download.py b/kontor-scripts/download.py index abb8a51..7a54309 100644 --- a/kontor-scripts/download.py +++ b/kontor-scripts/download.py @@ -1,52 +1,62 @@ """ download files with URLs from DB """ + import re import subprocess -from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter +from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser from datetime import datetime from enum import Enum, auto from pathlib import Path from uuid import UUID import requests + from config import get_logger - parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) -parser.add_argument('--verbose', '-v', action='count', default=0) -parser.add_argument('--config', '-c', default='kontor-docker') -parser.add_argument('--dir', '-d', default='/data/media') -parser.add_argument('--limit', '-l', type=int, help='maximum number of links to check') -parser.add_argument('--tool', '-t', default='yt-dlp') -parser.add_argument('--dry-run', '-m', action='store_true') +parser.add_argument("--verbose", "-v", action="count", default=0) +parser.add_argument("--config", "-c", default="kontor-docker") +parser.add_argument("--dir", "-d", default="/data/media") +parser.add_argument("--limit", "-l", type=int, help="maximum number of links to check") +parser.add_argument("--tool", "-t", default="yt-dlp") +parser.add_argument("--dry-run", "-m", action="store_true") args = parser.parse_args() + class FileStatus(Enum): DOWNLOADED = auto() RENAMED = auto() UNKNOWN = auto() -def download_file(url: str, file_info: dict, download_dir: str = "/data/media", dl_tool: str = "yt-dlp") -> dict: + +def download_file( + url: str, + file_info: dict, + download_dir: str = "/data/media", + dl_tool: str = "yt-dlp", +) -> dict: print(f"download file for {url} to {download_dir}") - result = subprocess.run([dl_tool, url], cwd=download_dir, capture_output=True, text=True) + result = subprocess.run( + [dl_tool, url], cwd=download_dir, capture_output=True, text=True + ) if result.returncode == 0: output = result.stdout - output = re.sub(' +', ' ', output) + output = re.sub(" +", " ", output) lines_list = output.splitlines() file_name = __parse_output__(lines_list) log.info(f"found file: {file_name}") if file_name is None or not file_name.strip(): - file_info['review'] = True - file_info['should_download'] = True - file_info['file_name'] = None + file_info["review"] = True + file_info["should_download"] = True + file_info["file_name"] = None else: download_file_name = Path(download_dir, file_name) - file_info['should_download'] = False - file_info['review'] = False - file_info['file_name'] = download_file_name.name - file_info['cloud_link'] = str(download_file_name.absolute()) - file_info['last_modified_date'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + file_info["should_download"] = False + file_info["review"] = False + file_info["file_name"] = download_file_name.name + file_info["cloud_link"] = str(download_file_name.absolute()) + file_info["last_modified_date"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") return file_info @@ -59,9 +69,9 @@ def __parse_output__(lines_list: list[str]) -> str | None: file_name = line[11:-end_len] log.info(f"file_name: {file_name}") break - if 'Destination' in line: + if "Destination" in line: line_len = len(line) - start_len = len('[download] Destination: ') + start_len = len("[download] Destination: ") file_len = line_len - start_len file_name = line[-file_len:] break @@ -78,43 +88,45 @@ def is_file_downloaded(media_file: dict, dir: Path) -> FileStatus: file_title = Path(dir, f"{file_name_as_title}.mp4") if file_title.exists(): log.info(f"{file_name_as_title} has been downloaded") - media_file['should_download'] = False + media_file["should_download"] = False return FileStatus.DOWNLOADED file_name_as_id = f"{media_file['id']}" file_with_id_as_name = Path(dir, f"{file_name_as_id}.mp4") if file_with_id_as_name.exists(): log.info(f"{file_with_id_as_name} has been downloaded and renamed") - media_file['cloud_link'] = str(file_with_id_as_name) - media_file['should_download'] = False + media_file["cloud_link"] = str(file_with_id_as_name) + media_file["should_download"] = False return FileStatus.RENAMED log.info("could not find file - start download") return FileStatus.UNKNOWN def update_status(item_id: UUID, file_info: dict): - update = requests.put(f"http://127.0.0.1:8800/api/media/files/{item_id}", json=file_info) + update = requests.put( + f"http://127.0.0.1:8800/api/media/files/{item_id}", json=file_info + ) log.info(f"update status: {update.status_code}") log.info(f"update result: {update.json()}") def rename_file(file_info: dict): - item_id = file_info['id'] - file_name = file_info['file_name'] + item_id = file_info["id"] + file_name = file_info["file_name"] if file_name is None or not file_name.strip(): log.info("file_name is not set, rename is not executed") - file_info['review'] = True - file_info['should_download'] = True + file_info["review"] = True + file_info["should_download"] = True return file = Path(args.dir, file_name) new_file_path = file.with_name(f"{item_id}{file.suffix}") log.info(f"rename {file} to {new_file_path}") file.rename(Path(new_file_path)) - file_info['cloud_link'] = str(new_file_path) + file_info["cloud_link"] = str(new_file_path) -if __name__ == '__main__': +if __name__ == "__main__": log = get_logger(args.verbose, args.config) - log.info('kontor.download started') + log.info("kontor.download started") response = requests.get("http://127.0.0.1:8800/api/media/files?download=true") log.info(f"Status: {response.status_code}") data = response.json() @@ -126,8 +138,8 @@ if __name__ == '__main__': if args.limit: log.warning(f"check the first {args.limit} links") for item in data: - link = item['url'] - file_id = item['id'] + link = item["url"] + file_id = item["id"] log.info(f"{file_id} - {link}") download_status: FileStatus = is_file_downloaded(item, args.dir) match download_status: @@ -138,13 +150,12 @@ if __name__ == '__main__': log.info("update status") update_status(file_id, item) case FileStatus.UNKNOWN: - download_file(link, item) + download_file(link, item, args.dir) rename_file(item) - log.info(f'{item}') + log.info(f"{item}") update_status(file_id, item) log.warning(f"processed {mediafile_index}/{entries_count}") if args.limit and args.limit <= mediafile_index: break mediafile_index += 1 - log.info('kontor.download finished') - + log.info("kontor.download finished")