This commit is contained in:
+59
-141
@@ -1,22 +1,22 @@
|
|||||||
"""
|
"""
|
||||||
Checks the database kontor
|
Checks the database kontor
|
||||||
"""
|
"""
|
||||||
|
from dataclasses import dataclass
|
||||||
from enum import Enum, auto
|
from enum import Enum, auto
|
||||||
import json
|
from logging import Logger
|
||||||
import mariadb
|
from typing import Dict, List, Optional
|
||||||
import requests
|
|
||||||
from pathlib import Path
|
|
||||||
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
|
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
from api import Option, OptionType, Server, get_api_config, get_logger
|
||||||
|
|
||||||
from config import get_logger, get_database_cursors
|
|
||||||
|
|
||||||
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
|
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
|
||||||
parser.add_argument('--verbose', '-v', action='count', default=0)
|
parser.add_argument("--verbose", "-v", action="count", default=0)
|
||||||
parser.add_argument('--config', '-c', default='kontor')
|
parser.add_argument("--config", "-c", default="kontor-api")
|
||||||
parser.add_argument('--file', '-f')
|
parser.add_argument("--dir", "-d", default="/data/media")
|
||||||
parser.add_argument('--dir', '-d')
|
parser.add_argument("--dry-run", "-m", action="store_true")
|
||||||
parser.add_argument('--dry-run', '-m', action='store_true')
|
parser.add_argument("--server", "-s")
|
||||||
parser.add_argument('--reset-cloud-link', '-r', action='store_true')
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
class StatusType(Enum):
|
class StatusType(Enum):
|
||||||
@@ -36,142 +36,61 @@ class FileStatus:
|
|||||||
self.id = response['id']
|
self.id = response['id']
|
||||||
|
|
||||||
|
|
||||||
def get_status_of_file(found_file: Path, cursor, log) -> FileStatus:
|
def create_item_id_mapping(log: Logger, data_list: List[dict]) -> Dict[str, dict]:
|
||||||
status = FileStatus()
|
"""
|
||||||
try:
|
create dictionary with id as key and dictionary as value.
|
||||||
cursor.execute(f'SELECT id, cloud_link FROM media_file WHERE file_name="{found_file.name}"')
|
"""
|
||||||
rows = cursor.fetchall()
|
item_id_mapping: Dict[str, dict] = {}
|
||||||
if len(rows) == 1:
|
for data_item in data_list:
|
||||||
status.status_type = StatusType.FILE_NAME
|
log.debug(data_item)
|
||||||
status.id = rows[0][0]
|
item_id_mapping[data_item["id"]] = data_item
|
||||||
except mariadb.Error as error:
|
return item_id_mapping
|
||||||
log.debug(f'select failed with {error}')
|
|
||||||
try:
|
|
||||||
cursor.execute(f'SELECT id FROM media_file WHERE id="{found_file.stem}"')
|
|
||||||
rows = cursor.fetchall()
|
|
||||||
if len(rows) == 1:
|
|
||||||
status.status_type = StatusType.FILE_ID
|
|
||||||
status.id = rows[0][0]
|
|
||||||
if len(rows) > 1:
|
|
||||||
status.status_type = StatusType.DUPLICATE
|
|
||||||
for row in rows:
|
|
||||||
log.info(f"found {row[0]} with {found_file}")
|
|
||||||
except mariadb.Error as error:
|
|
||||||
log.debug(f'select failed with {error}')
|
|
||||||
try:
|
|
||||||
cursor.execute(f'SELECT id FROM media_file WHERE cloud_link LIKE "%{found_file.stem}%"')
|
|
||||||
rows = cursor.fetchall()
|
|
||||||
if len(rows) == 1:
|
|
||||||
status.id = rows[0][0]
|
|
||||||
if rows[0][0] == found_file.stem:
|
|
||||||
status.status_type = StatusType.CLOUD_LINK_ID
|
|
||||||
else:
|
|
||||||
status.status_type = StatusType.CLOUD_LINK
|
|
||||||
except mariadb.Error as error:
|
|
||||||
log.debug(f'select failed with {error}')
|
|
||||||
response = requests.get(f"http://127.0.0.1:8800/media/files/{found_file.stem}")
|
|
||||||
log.debug(f"Status: {response.status_code}")
|
|
||||||
if response.status_code == 200:
|
|
||||||
status.status_type = StatusType.FILE_ID
|
|
||||||
status.id = response.json()['id']
|
|
||||||
return status
|
|
||||||
|
|
||||||
def rename_files_to_id(media_dir, dry_run, conn, log):
|
|
||||||
media_path = Path(media_dir)
|
def check_duplicate_links(log: Logger, server: Server):
|
||||||
cursor = conn.cursor()
|
data = server.request(log=logger, table="media_file")
|
||||||
for file in media_path.iterdir():
|
mapping = create_item_id_mapping(log=log, data_list=data)
|
||||||
log.debug('found file: {}'.format(file.name))
|
visited_link_path: Dict[str, str] = {}
|
||||||
status: FileStatus = get_status_of_file(file, cursor, log)
|
duplicate_link_paths: Dict[str, List[str]] = {}
|
||||||
file_id = status.id
|
for item in data:
|
||||||
if not file_id:
|
link = item["url"]
|
||||||
log.info(f"ID of file {file.name} is unknown")
|
if len(link) == 0:
|
||||||
continue
|
continue
|
||||||
new_file_path = file.with_name(f"{file_id}{file.suffix}")
|
file_id = item["id"]
|
||||||
match status.status_type:
|
parsed_url = urlparse(link)
|
||||||
case StatusType.FILE_NAME:
|
link_path = parsed_url.path
|
||||||
log.info(f'status of {file.name} is file_name')
|
if link_path in visited_link_path:
|
||||||
rename_file(file, new_file_path, dry_run, log)
|
log.info("duplicate url path found: %s", link_path)
|
||||||
update_cloud_link(file_id, new_file_path, conn, dry_run, log)
|
if link_path in duplicate_link_paths:
|
||||||
case StatusType.FILE_ID:
|
duplicate_link_paths[link_path].append(file_id)
|
||||||
log.info(f'status of {file.name} is file_id')
|
|
||||||
update_cloud_link(file_id, new_file_path, conn, dry_run, log)
|
|
||||||
case StatusType.CLOUD_LINK:
|
|
||||||
log.info(f'status of {file.name} is cloud_link')
|
|
||||||
rename_file(file, new_file_path, dry_run, log)
|
|
||||||
update_cloud_link(file_id, new_file_path, conn, dry_run, log)
|
|
||||||
case StatusType.CLOUD_LINK_ID:
|
|
||||||
log.debug(f'status of {file.name} is cloud_link_id')
|
|
||||||
update_cloud_link(file_id, new_file_path, conn, dry_run, log)
|
|
||||||
case StatusType.DUPLICATE:
|
|
||||||
log.info(f'status of {file.name} is duplicate')
|
|
||||||
case StatusType.UNKNOWN:
|
|
||||||
log.info(f'status of {file.name} is unknown')
|
|
||||||
|
|
||||||
def rename_file(current_file, new_file_path, dry_run, log):
|
|
||||||
if dry_run:
|
|
||||||
log.info('rename file {} to {}'.format(current_file.name, new_file_path.name))
|
|
||||||
else:
|
else:
|
||||||
current_file.rename(Path(new_file_path))
|
duplicate_link_paths[link_path] = []
|
||||||
|
duplicate_link_paths[link_path].append(visited_link_path[link_path])
|
||||||
def update_cloud_link(file_id, file_path, conn, dry_run, log):
|
duplicate_link_paths[link_path].append(file_id)
|
||||||
cursor = conn.cursor()
|
|
||||||
log.debug(f'update entry {file_id} with {file_path.absolute()}')
|
|
||||||
if dry_run:
|
|
||||||
log.debug(f'UPDATE media_file: cloud_link={file_path.absolute()}')
|
|
||||||
else:
|
else:
|
||||||
cursor.execute('UPDATE media_file SET cloud_link="{}" WHERE id="{}"'.format(file_path.absolute(), file_id))
|
visited_link_path[link_path] = file_id
|
||||||
conn.commit()
|
log.info("found %s duplicate links", len(duplicate_link_paths.keys()))
|
||||||
|
deletion_list: List[str] = []
|
||||||
def reset_cloud_link(conn, dry_run, log):
|
for key, value in duplicate_link_paths.items():
|
||||||
cursor = conn.cursor()
|
if len(value) == 2:
|
||||||
if dry_run:
|
log.info("%s:\n%s - %s\n%s - %s", key, value[0], mapping[value[0]]["url"], value[1], mapping[value[1]]["url"])
|
||||||
log.info('UPDATE media_file SET cloud_link=""')
|
if mapping[value[0]]["url"].startswith("https://xhamster"):
|
||||||
|
deletion_list.append(value[0])
|
||||||
else:
|
else:
|
||||||
cursor.execute('UPDATE media_file SET cloud_link="" WHERE id is NOT NULL')
|
deletion_list.append(value[1])
|
||||||
conn.commit()
|
else:
|
||||||
|
log.info("found %s links", len(value))
|
||||||
def check_file_with_db(data_file: Path, m_conn, log):
|
for key in deletion_list:
|
||||||
log.info(f"read json file: {data_file}")
|
log.info("%s - %s", key, mapping[key]["url"])
|
||||||
cursor = m_conn.cursor()
|
|
||||||
with open(data_file, 'r') as json_file:
|
|
||||||
json_load = json.load(json_file)
|
|
||||||
for table in json_load:
|
|
||||||
log.info(f"{table}: {len(json_load[table])}")
|
|
||||||
items = json_load[table]
|
|
||||||
for item in items:
|
|
||||||
item_id = item['id']
|
|
||||||
select_statement = f"SELECT * FROM {table} WHERE id='{item_id}'"
|
|
||||||
cursor.execute(select_statement)
|
|
||||||
rows = cursor.fetchall()
|
|
||||||
count = len(rows)
|
|
||||||
log.info(f"{count} entries found for {item_id}")
|
|
||||||
if count == 0:
|
|
||||||
log.info(f"entry for {item_id} not found")
|
|
||||||
if count == 1:
|
|
||||||
log.info(f"check entry {item_id}")
|
|
||||||
#log.info(f"entry {rows[0]}")
|
|
||||||
columns = []
|
|
||||||
values = []
|
|
||||||
for (key, value) in item.items():
|
|
||||||
columns.append(key)
|
|
||||||
values.append(value)
|
|
||||||
for index, _ in enumerate(columns):
|
|
||||||
log.info(f"compare {values[index]} with {rows[0][index]}")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
log = get_logger(args.verbose, args.config)
|
logger = get_logger(args.verbose, args.config)
|
||||||
log.info("kontor.check_kontor started")
|
logger.info("kontor.check_kontor started")
|
||||||
_, m_conn = get_database_cursors(log, args.config)
|
APICONFIG = get_api_config(logger, args.config)
|
||||||
if args.dir:
|
server: Server = APICONFIG.server[0]
|
||||||
log.info("kontor.check_kontor.rename_files_to_id")
|
logger.info("kontor.check_kontor.check_duplicate_links")
|
||||||
rename_files_to_id(args.dir, args.dry_run, m_conn, log)
|
check_duplicate_links(logger, server)
|
||||||
if args.file:
|
|
||||||
data_file = Path(args.file)
|
|
||||||
if data_file.exists():
|
|
||||||
log.info("kontor.check_kontor.check_file_with_db")
|
|
||||||
check_file_with_db(data_file, m_conn, log)
|
|
||||||
#logger.info("kontor.check_kontor.update_cloud_link_with_found_files")
|
#logger.info("kontor.check_kontor.update_cloud_link_with_found_files")
|
||||||
#update_cloud_link_with_found_files(data_dir, mariadb_conn, args.dry_run)
|
#update_cloud_link_with_found_files(data_dir, mariadb_conn, args.dry_run)
|
||||||
#logger.info("kontor.check_kontor.get_ids_from_column_cloud_link")
|
#logger.info("kontor.check_kontor.get_ids_from_column_cloud_link")
|
||||||
@@ -179,5 +98,4 @@ if __name__ == '__main__':
|
|||||||
#logger.info('found {} ids in column cloud_link'.format(len(link_list)))
|
#logger.info('found {} ids in column cloud_link'.format(len(link_list)))
|
||||||
#logger.info("kontor.check_kontor.checking_ids_from_cloud_link")
|
#logger.info("kontor.check_kontor.checking_ids_from_cloud_link")
|
||||||
#checking_ids_from_cloud_link(link_list, mariadb_cursor)
|
#checking_ids_from_cloud_link(link_list, mariadb_cursor)
|
||||||
log.info("kontor.check_kontor finished")
|
logger.info("kontor.check_kontor finished")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user