""" Checks the database kontor """ from dataclasses import dataclass from enum import Enum, auto from logging import Logger from typing import Dict, List, Optional from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter from urllib.parse import urlparse from api import Option, OptionType, Server, get_api_config, get_logger parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument("--verbose", "-v", action="count", default=0) parser.add_argument("--config", "-c", default="kontor-api") parser.add_argument("--dir", "-d", default="/data/media") parser.add_argument("--dry-run", "-m", action="store_true") parser.add_argument("--server", "-s") args = parser.parse_args() class StatusType(Enum): UNKNOWN = auto() FILE_NAME = auto() FILE_ID = auto() DUPLICATE = auto() CLOUD_LINK = auto() CLOUD_LINK_ID = auto() class FileStatus: id: str | None = None status_type: StatusType = StatusType.UNKNOWN def get_response(self, response: dict): self.status_type = StatusType.FILE_NAME self.id = response['id'] def create_item_id_mapping(log: Logger, data_list: List[dict]) -> Dict[str, dict]: """ create dictionary with id as key and dictionary as value. """ item_id_mapping: Dict[str, dict] = {} for data_item in data_list: log.debug(data_item) item_id_mapping[data_item["id"]] = data_item return item_id_mapping def check_duplicate_links(log: Logger, server: Server): data = server.request(log=logger, table="media_file") mapping = create_item_id_mapping(log=log, data_list=data) visited_link_path: Dict[str, str] = {} duplicate_link_paths: Dict[str, List[str]] = {} for item in data: link = item["url"] if len(link) == 0: continue file_id = item["id"] parsed_url = urlparse(link) link_path = parsed_url.path if link_path in visited_link_path: log.info("duplicate url path found: %s", link_path) if link_path in duplicate_link_paths: duplicate_link_paths[link_path].append(file_id) else: duplicate_link_paths[link_path] = [] duplicate_link_paths[link_path].append(visited_link_path[link_path]) duplicate_link_paths[link_path].append(file_id) else: visited_link_path[link_path] = file_id log.info("found %s duplicate links", len(duplicate_link_paths.keys())) deletion_list: List[str] = [] for key, value in duplicate_link_paths.items(): if len(value) == 2: log.info("%s:\n%s - %s\n%s - %s", key, value[0], mapping[value[0]]["url"], value[1], mapping[value[1]]["url"]) if mapping[value[0]]["url"].startswith("https://xhamster"): deletion_list.append(value[0]) else: deletion_list.append(value[1]) else: log.info("found %s links", len(value)) for key in deletion_list: log.info("%s - %s", key, mapping[key]["url"]) if __name__ == '__main__': logger = get_logger(args.verbose, args.config) logger.info("kontor.check_kontor started") APICONFIG = get_api_config(logger, args.config) server: Server = APICONFIG.server[0] logger.info("kontor.check_kontor.check_duplicate_links") check_duplicate_links(logger, server) #logger.info("kontor.check_kontor.update_cloud_link_with_found_files") #update_cloud_link_with_found_files(data_dir, mariadb_conn, args.dry_run) #logger.info("kontor.check_kontor.get_ids_from_column_cloud_link") #get_ids_from_column_cloud_link(link_list, mariadb_cursor) #logger.info('found {} ids in column cloud_link'.format(len(link_list))) #logger.info("kontor.check_kontor.checking_ids_from_cloud_link") #checking_ids_from_cloud_link(link_list, mariadb_cursor) logger.info("kontor.check_kontor finished")