This commit is contained in:
+62
-144
@@ -1,22 +1,22 @@
|
||||
"""
|
||||
Checks the database kontor
|
||||
"""
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum, auto
|
||||
import json
|
||||
import mariadb
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from logging import Logger
|
||||
from typing import Dict, List, Optional
|
||||
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from api import Option, OptionType, Server, get_api_config, get_logger
|
||||
|
||||
from config import get_logger, get_database_cursors
|
||||
|
||||
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('--verbose', '-v', action='count', default=0)
|
||||
parser.add_argument('--config', '-c', default='kontor')
|
||||
parser.add_argument('--file', '-f')
|
||||
parser.add_argument('--dir', '-d')
|
||||
parser.add_argument('--dry-run', '-m', action='store_true')
|
||||
parser.add_argument('--reset-cloud-link', '-r', action='store_true')
|
||||
parser.add_argument("--verbose", "-v", action="count", default=0)
|
||||
parser.add_argument("--config", "-c", default="kontor-api")
|
||||
parser.add_argument("--dir", "-d", default="/data/media")
|
||||
parser.add_argument("--dry-run", "-m", action="store_true")
|
||||
parser.add_argument("--server", "-s")
|
||||
args = parser.parse_args()
|
||||
|
||||
class StatusType(Enum):
|
||||
@@ -36,142 +36,61 @@ class FileStatus:
|
||||
self.id = response['id']
|
||||
|
||||
|
||||
def get_status_of_file(found_file: Path, cursor, log) -> FileStatus:
|
||||
status = FileStatus()
|
||||
try:
|
||||
cursor.execute(f'SELECT id, cloud_link FROM media_file WHERE file_name="{found_file.name}"')
|
||||
rows = cursor.fetchall()
|
||||
if len(rows) == 1:
|
||||
status.status_type = StatusType.FILE_NAME
|
||||
status.id = rows[0][0]
|
||||
except mariadb.Error as error:
|
||||
log.debug(f'select failed with {error}')
|
||||
try:
|
||||
cursor.execute(f'SELECT id FROM media_file WHERE id="{found_file.stem}"')
|
||||
rows = cursor.fetchall()
|
||||
if len(rows) == 1:
|
||||
status.status_type = StatusType.FILE_ID
|
||||
status.id = rows[0][0]
|
||||
if len(rows) > 1:
|
||||
status.status_type = StatusType.DUPLICATE
|
||||
for row in rows:
|
||||
log.info(f"found {row[0]} with {found_file}")
|
||||
except mariadb.Error as error:
|
||||
log.debug(f'select failed with {error}')
|
||||
try:
|
||||
cursor.execute(f'SELECT id FROM media_file WHERE cloud_link LIKE "%{found_file.stem}%"')
|
||||
rows = cursor.fetchall()
|
||||
if len(rows) == 1:
|
||||
status.id = rows[0][0]
|
||||
if rows[0][0] == found_file.stem:
|
||||
status.status_type = StatusType.CLOUD_LINK_ID
|
||||
else:
|
||||
status.status_type = StatusType.CLOUD_LINK
|
||||
except mariadb.Error as error:
|
||||
log.debug(f'select failed with {error}')
|
||||
response = requests.get(f"http://127.0.0.1:8800/media/files/{found_file.stem}")
|
||||
log.debug(f"Status: {response.status_code}")
|
||||
if response.status_code == 200:
|
||||
status.status_type = StatusType.FILE_ID
|
||||
status.id = response.json()['id']
|
||||
return status
|
||||
def create_item_id_mapping(log: Logger, data_list: List[dict]) -> Dict[str, dict]:
|
||||
"""
|
||||
create dictionary with id as key and dictionary as value.
|
||||
"""
|
||||
item_id_mapping: Dict[str, dict] = {}
|
||||
for data_item in data_list:
|
||||
log.debug(data_item)
|
||||
item_id_mapping[data_item["id"]] = data_item
|
||||
return item_id_mapping
|
||||
|
||||
def rename_files_to_id(media_dir, dry_run, conn, log):
|
||||
media_path = Path(media_dir)
|
||||
cursor = conn.cursor()
|
||||
for file in media_path.iterdir():
|
||||
log.debug('found file: {}'.format(file.name))
|
||||
status: FileStatus = get_status_of_file(file, cursor, log)
|
||||
file_id = status.id
|
||||
if not file_id:
|
||||
log.info(f"ID of file {file.name} is unknown")
|
||||
|
||||
def check_duplicate_links(log: Logger, server: Server):
|
||||
data = server.request(log=logger, table="media_file")
|
||||
mapping = create_item_id_mapping(log=log, data_list=data)
|
||||
visited_link_path: Dict[str, str] = {}
|
||||
duplicate_link_paths: Dict[str, List[str]] = {}
|
||||
for item in data:
|
||||
link = item["url"]
|
||||
if len(link) == 0:
|
||||
continue
|
||||
new_file_path = file.with_name(f"{file_id}{file.suffix}")
|
||||
match status.status_type:
|
||||
case StatusType.FILE_NAME:
|
||||
log.info(f'status of {file.name} is file_name')
|
||||
rename_file(file, new_file_path, dry_run, log)
|
||||
update_cloud_link(file_id, new_file_path, conn, dry_run, log)
|
||||
case StatusType.FILE_ID:
|
||||
log.info(f'status of {file.name} is file_id')
|
||||
update_cloud_link(file_id, new_file_path, conn, dry_run, log)
|
||||
case StatusType.CLOUD_LINK:
|
||||
log.info(f'status of {file.name} is cloud_link')
|
||||
rename_file(file, new_file_path, dry_run, log)
|
||||
update_cloud_link(file_id, new_file_path, conn, dry_run, log)
|
||||
case StatusType.CLOUD_LINK_ID:
|
||||
log.debug(f'status of {file.name} is cloud_link_id')
|
||||
update_cloud_link(file_id, new_file_path, conn, dry_run, log)
|
||||
case StatusType.DUPLICATE:
|
||||
log.info(f'status of {file.name} is duplicate')
|
||||
case StatusType.UNKNOWN:
|
||||
log.info(f'status of {file.name} is unknown')
|
||||
|
||||
def rename_file(current_file, new_file_path, dry_run, log):
|
||||
if dry_run:
|
||||
log.info('rename file {} to {}'.format(current_file.name, new_file_path.name))
|
||||
else:
|
||||
current_file.rename(Path(new_file_path))
|
||||
|
||||
def update_cloud_link(file_id, file_path, conn, dry_run, log):
|
||||
cursor = conn.cursor()
|
||||
log.debug(f'update entry {file_id} with {file_path.absolute()}')
|
||||
if dry_run:
|
||||
log.debug(f'UPDATE media_file: cloud_link={file_path.absolute()}')
|
||||
else:
|
||||
cursor.execute('UPDATE media_file SET cloud_link="{}" WHERE id="{}"'.format(file_path.absolute(), file_id))
|
||||
conn.commit()
|
||||
|
||||
def reset_cloud_link(conn, dry_run, log):
|
||||
cursor = conn.cursor()
|
||||
if dry_run:
|
||||
log.info('UPDATE media_file SET cloud_link=""')
|
||||
else:
|
||||
cursor.execute('UPDATE media_file SET cloud_link="" WHERE id is NOT NULL')
|
||||
conn.commit()
|
||||
|
||||
def check_file_with_db(data_file: Path, m_conn, log):
|
||||
log.info(f"read json file: {data_file}")
|
||||
cursor = m_conn.cursor()
|
||||
with open(data_file, 'r') as json_file:
|
||||
json_load = json.load(json_file)
|
||||
for table in json_load:
|
||||
log.info(f"{table}: {len(json_load[table])}")
|
||||
items = json_load[table]
|
||||
for item in items:
|
||||
item_id = item['id']
|
||||
select_statement = f"SELECT * FROM {table} WHERE id='{item_id}'"
|
||||
cursor.execute(select_statement)
|
||||
rows = cursor.fetchall()
|
||||
count = len(rows)
|
||||
log.info(f"{count} entries found for {item_id}")
|
||||
if count == 0:
|
||||
log.info(f"entry for {item_id} not found")
|
||||
if count == 1:
|
||||
log.info(f"check entry {item_id}")
|
||||
#log.info(f"entry {rows[0]}")
|
||||
columns = []
|
||||
values = []
|
||||
for (key, value) in item.items():
|
||||
columns.append(key)
|
||||
values.append(value)
|
||||
for index, _ in enumerate(columns):
|
||||
log.info(f"compare {values[index]} with {rows[0][index]}")
|
||||
|
||||
file_id = item["id"]
|
||||
parsed_url = urlparse(link)
|
||||
link_path = parsed_url.path
|
||||
if link_path in visited_link_path:
|
||||
log.info("duplicate url path found: %s", link_path)
|
||||
if link_path in duplicate_link_paths:
|
||||
duplicate_link_paths[link_path].append(file_id)
|
||||
else:
|
||||
duplicate_link_paths[link_path] = []
|
||||
duplicate_link_paths[link_path].append(visited_link_path[link_path])
|
||||
duplicate_link_paths[link_path].append(file_id)
|
||||
else:
|
||||
visited_link_path[link_path] = file_id
|
||||
log.info("found %s duplicate links", len(duplicate_link_paths.keys()))
|
||||
deletion_list: List[str] = []
|
||||
for key, value in duplicate_link_paths.items():
|
||||
if len(value) == 2:
|
||||
log.info("%s:\n%s - %s\n%s - %s", key, value[0], mapping[value[0]]["url"], value[1], mapping[value[1]]["url"])
|
||||
if mapping[value[0]]["url"].startswith("https://xhamster"):
|
||||
deletion_list.append(value[0])
|
||||
else:
|
||||
deletion_list.append(value[1])
|
||||
else:
|
||||
log.info("found %s links", len(value))
|
||||
for key in deletion_list:
|
||||
log.info("%s - %s", key, mapping[key]["url"])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
log = get_logger(args.verbose, args.config)
|
||||
log.info("kontor.check_kontor started")
|
||||
_, m_conn = get_database_cursors(log, args.config)
|
||||
if args.dir:
|
||||
log.info("kontor.check_kontor.rename_files_to_id")
|
||||
rename_files_to_id(args.dir, args.dry_run, m_conn, log)
|
||||
if args.file:
|
||||
data_file = Path(args.file)
|
||||
if data_file.exists():
|
||||
log.info("kontor.check_kontor.check_file_with_db")
|
||||
check_file_with_db(data_file, m_conn, log)
|
||||
logger = get_logger(args.verbose, args.config)
|
||||
logger.info("kontor.check_kontor started")
|
||||
APICONFIG = get_api_config(logger, args.config)
|
||||
server: Server = APICONFIG.server[0]
|
||||
logger.info("kontor.check_kontor.check_duplicate_links")
|
||||
check_duplicate_links(logger, server)
|
||||
#logger.info("kontor.check_kontor.update_cloud_link_with_found_files")
|
||||
#update_cloud_link_with_found_files(data_dir, mariadb_conn, args.dry_run)
|
||||
#logger.info("kontor.check_kontor.get_ids_from_column_cloud_link")
|
||||
@@ -179,5 +98,4 @@ if __name__ == '__main__':
|
||||
#logger.info('found {} ids in column cloud_link'.format(len(link_list)))
|
||||
#logger.info("kontor.check_kontor.checking_ids_from_cloud_link")
|
||||
#checking_ids_from_cloud_link(link_list, mariadb_cursor)
|
||||
log.info("kontor.check_kontor finished")
|
||||
|
||||
logger.info("kontor.check_kontor finished")
|
||||
|
||||
@@ -31,7 +31,7 @@ def create_item_id_mapping(log: Logger, data_list: List[dict]) -> Dict[str, dict
|
||||
"""
|
||||
item_id_mapping: Dict[str, dict] = {}
|
||||
for data_item in data_list:
|
||||
log.debug(data_item)
|
||||
log.debug(data_item)
|
||||
item_id_mapping[data_item["id"]] = data_item
|
||||
return item_id_mapping
|
||||
|
||||
|
||||
Reference in New Issue
Block a user