refactor scripts to work wit api
This commit is contained in:
+48
-61
@@ -1,12 +1,13 @@
|
||||
"""
|
||||
Checks the database kontor
|
||||
"""
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum, auto
|
||||
|
||||
import mariadb
|
||||
from pathlib import Path
|
||||
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
|
||||
|
||||
import requests
|
||||
from config import get_logger, get_database_cursors
|
||||
|
||||
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
|
||||
@@ -25,71 +26,63 @@ class StatusType(Enum):
|
||||
CLOUD_LINK = auto()
|
||||
CLOUD_LINK_ID = auto()
|
||||
|
||||
@dataclass
|
||||
class FileStatus:
|
||||
id: str
|
||||
status_type: StatusType
|
||||
|
||||
def get_status_of_file(found_file, cursor):
|
||||
status = StatusType.UNKNOWN
|
||||
file_id = ''
|
||||
try:
|
||||
cursor.execute(f'SELECT id, cloud_link FROM media_file WHERE file_name="{found_file.name}"')
|
||||
rows = cursor.fetchall()
|
||||
if len(rows) == 1:
|
||||
status = StatusType.FILE_NAME
|
||||
file_id = rows[0][0]
|
||||
except mariadb.Error as error:
|
||||
logger.debug(f'select failed with {error}')
|
||||
try:
|
||||
cursor.execute(f'SELECT id FROM media_file WHERE id="{found_file.stem}"')
|
||||
rows = cursor.fetchall()
|
||||
if len(rows) == 1:
|
||||
status = StatusType.FILE_ID
|
||||
file_id = rows[0][0]
|
||||
if len(rows) > 1:
|
||||
status = StatusType.DUPLICATE
|
||||
for row in rows:
|
||||
logger.info(f"found {row[0]} with {found_file}")
|
||||
except mariadb.Error as error:
|
||||
logger.debug(f'select failed with {error}')
|
||||
try:
|
||||
cursor.execute(f'SELECT id FROM media_file WHERE cloud_link LIKE "%{found_file.stem}%"')
|
||||
rows = cursor.fetchall()
|
||||
if len(rows) == 1:
|
||||
file_id = rows[0][0]
|
||||
if rows[0][0] == found_file.stem:
|
||||
status = StatusType.CLOUD_LINK_ID
|
||||
else:
|
||||
status = StatusType.CLOUD_LINK
|
||||
except mariadb.Error as error:
|
||||
logger.debug(f'select failed with {error}')
|
||||
return status, file_id
|
||||
|
||||
def rename_files_to_id(media_dir, conn, dry_run):
|
||||
def get_status_of_file(found_file: Path, log) -> FileStatus:
|
||||
status = FileStatus()
|
||||
response = requests.post("http://127.0.0.1:8800/media/search")
|
||||
log.info(f"Status: {response.status_code}")
|
||||
data = response.json()
|
||||
status.import(data)
|
||||
if len(data) == 1:
|
||||
status = StatusType.FILE_NAME
|
||||
status.id = data['id']
|
||||
response = requests.get(f"http://127.0.0.1:8800/media/files/{found_file.stem}")
|
||||
log.info(f"Status: {response.status_code}")
|
||||
data = response.json()
|
||||
if len(data) == 1:
|
||||
status = StatusType.FILE_ID
|
||||
file_id = data['id']
|
||||
response = requests.get(f"http://127.0.0.1:8800/media/files?cloud_link=true")
|
||||
log.info(f"Status: {response.status_code}")
|
||||
data = response.json()
|
||||
if len(data) == 1:
|
||||
status = StatusType.CLOUD_LINK_ID
|
||||
file_id = data['id']
|
||||
return status
|
||||
|
||||
def rename_files_to_id(media_dir, dry_run, log):
|
||||
media_path = Path(media_dir)
|
||||
cursor = conn.cursor()
|
||||
for file in media_path.iterdir():
|
||||
logger.debug('found file: {}'.format(file.name))
|
||||
(status, file_id) = get_status_of_file(file, cursor)
|
||||
new_file_path = file.with_name(f"{file_id}{file.suffix}")
|
||||
match status:
|
||||
log.debug('found file: {}'.format(file.name))
|
||||
status = get_status_of_file(file, log)
|
||||
new_file_path = file.with_name(f"{status.id}{file.suffix}")
|
||||
file_id = status.id
|
||||
match status.status_type:
|
||||
case StatusType.FILE_NAME:
|
||||
logger.info(f'status of {file.name} is file_name')
|
||||
log.info(f'status of {file.name} is file_name')
|
||||
rename_file(file, new_file_path, dry_run)
|
||||
update_cloud_link(file_id, new_file_path, conn, dry_run)
|
||||
update_cloud_link(file_id, new_file_path, dry_run)
|
||||
case StatusType.FILE_ID:
|
||||
logger.info(f'status of {file.name} is file_id')
|
||||
update_cloud_link(file_id, new_file_path, conn, dry_run)
|
||||
log.info(f'status of {file.name} is file_id')
|
||||
update_cloud_link(file_id, new_file_path, dry_run)
|
||||
case StatusType.CLOUD_LINK:
|
||||
logger.info(f'status of {file.name} is cloud_link')
|
||||
log.info(f'status of {file.name} is cloud_link')
|
||||
rename_file(file, new_file_path, dry_run)
|
||||
update_cloud_link(file_id, new_file_path, conn, dry_run)
|
||||
update_cloud_link(file_id, new_file_path, dry_run)
|
||||
case StatusType.CLOUD_LINK_ID:
|
||||
logger.debug(f'status of {file.name} is cloud_link_id')
|
||||
update_cloud_link(file_id, new_file_path, conn, dry_run)
|
||||
log.debug(f'status of {file.name} is cloud_link_id')
|
||||
update_cloud_link(file_id, new_file_path, dry_run)
|
||||
case StatusType.DUPLICATE:
|
||||
logger.info(f'status of {file.name} is duplicate')
|
||||
log.info(f'status of {file.name} is duplicate')
|
||||
case StatusType.UNKNOWN:
|
||||
logger.info(f'status of {file.name} is unknown')
|
||||
log.info(f'status of {file.name} is unknown')
|
||||
case _:
|
||||
logger.info(f'status of {file.name} is not defined')
|
||||
log.info(f'status of {file.name} is not defined')
|
||||
|
||||
def rename_file(current_file, new_file_path, dry_run):
|
||||
if dry_run:
|
||||
@@ -118,14 +111,8 @@ def reset_cloud_link(conn, dry_run):
|
||||
if __name__ == '__main__':
|
||||
logger = get_logger(args.verbose, args.config)
|
||||
logger.info("kontor.check_kontor started")
|
||||
_, mariadb_conn = get_database_cursors(logger, args.config)
|
||||
mariadb_cursor = mariadb_conn.cursor()
|
||||
if args.reset_cloud_link:
|
||||
reset_cloud_link(mariadb_conn, args.dry_run)
|
||||
link_list = []
|
||||
data_dir = args.dir
|
||||
logger.info("kontor.check_kontor.rename_files_to_id")
|
||||
rename_files_to_id(data_dir, mariadb_conn, args.dry_run)
|
||||
rename_files_to_id(args.dir, args.dry_run, logger)
|
||||
#logger.info("kontor.check_kontor.update_cloud_link_with_found_files")
|
||||
#update_cloud_link_with_found_files(data_dir, mariadb_conn, args.dry_run)
|
||||
#logger.info("kontor.check_kontor.get_ids_from_column_cloud_link")
|
||||
@@ -133,5 +120,5 @@ if __name__ == '__main__':
|
||||
#logger.info('found {} ids in column cloud_link'.format(len(link_list)))
|
||||
#logger.info("kontor.check_kontor.checking_ids_from_cloud_link")
|
||||
#checking_ids_from_cloud_link(link_list, mariadb_cursor)
|
||||
mariadb_conn.close()
|
||||
logger.info("kontor.check_kontor finished")
|
||||
|
||||
|
||||
+101
-37
@@ -1,13 +1,15 @@
|
||||
"""
|
||||
download files with URLs from DB
|
||||
"""
|
||||
import re
|
||||
import subprocess
|
||||
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
|
||||
from platformdirs import PlatformDirs
|
||||
from datetime import datetime
|
||||
from enum import Enum, auto
|
||||
from pathlib import Path
|
||||
import yaml
|
||||
from sqlalchemy import create_engine, select
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from schema import Base, KontorDB, MediaFile
|
||||
from uuid import UUID
|
||||
|
||||
import requests
|
||||
from config import get_logger
|
||||
|
||||
|
||||
@@ -17,42 +19,104 @@ parser.add_argument('--config', '-c', default='kontor-docker')
|
||||
parser.add_argument('--dir', '-d', default='/data/media')
|
||||
parser.add_argument('--tool', '-t', default='yt-dlp')
|
||||
parser.add_argument('--dry-run', '-m', action='store_true')
|
||||
parser.add_argument('--rename', '-r', action='store_true')
|
||||
args = parser.parse_args()
|
||||
|
||||
class FileStatus(Enum):
|
||||
DOWNLOADED = auto()
|
||||
RENAMED = auto()
|
||||
UNKNOWN = auto()
|
||||
|
||||
def download_file(url: str, file_info: dict, download_dir: str = "/data/media", dl_tool: str = "yt-dlp") -> dict:
|
||||
print(f"download file for {url} to {download_dir}")
|
||||
result = subprocess.run([dl_tool, url], cwd=download_dir, capture_output=True, text=True)
|
||||
if result.returncode == 0:
|
||||
output = result.stdout
|
||||
output = re.sub(' +', ' ', output)
|
||||
lines_list = output.splitlines()
|
||||
file_name = __parse_output__(lines_list)
|
||||
if file_name is None:
|
||||
file_info['review'] = True
|
||||
file_info['should_download'] = True
|
||||
file_info['file_name'] = None
|
||||
else:
|
||||
download_file_name = Path(download_dir, file_name)
|
||||
file_info['should_download'] = False
|
||||
file_info['file_name'] = download_file_name.name
|
||||
file_info['cloud_link'] = str(download_file_name.absolute())
|
||||
file_info['last_modified_date'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
return file_info
|
||||
|
||||
|
||||
def __parse_output__(lines_list: list[str]) -> str | None:
|
||||
file_name = None
|
||||
for line in lines_list:
|
||||
if 'has already been downloaded' in line:
|
||||
end_len = len(' has already been downloaded')
|
||||
file_name = line[11:-end_len]
|
||||
if 'Destination' in line:
|
||||
line_len = len(line)
|
||||
start_len = len('[download] Destination: ')
|
||||
file_len = line_len - start_len
|
||||
file_name = line[-file_len:]
|
||||
return file_name
|
||||
|
||||
|
||||
def is_file_downloaded(item: dict, dir: Path) -> FileStatus:
|
||||
file_name_as_title = f"{item['file_name']}"
|
||||
file_title = Path(dir, file_name_as_title, ".mp4")
|
||||
if file_title.exists():
|
||||
log.info(f"{file_name_as_title} has been downloaded")
|
||||
item['should_download'] = 0
|
||||
return FileStatus.DOWNLOADED
|
||||
file_name_as_id = f"{item['id']}"
|
||||
file_with_id_as_name = Path(dir, file_name_as_id, ".mp4")
|
||||
if file_with_id_as_name.exists():
|
||||
log.info(f"{file_with_id_as_name} has been downloaded and renamed")
|
||||
item['cloud_link'] = file_with_id_as_name
|
||||
item['should_download'] = 0
|
||||
return FileStatus.RENAMED
|
||||
log.info("could not find file - start download")
|
||||
return FileStatus.UNKNOWN
|
||||
|
||||
|
||||
def update_status(item_id: UUID, file_info: dict):
|
||||
update = requests.put(f"http://127.0.0.1:8800/media/files/{item_id}", json=file_info)
|
||||
log.info(f"update status: {update.status_code}")
|
||||
log.info(f"update result: {update.json()}")
|
||||
|
||||
|
||||
def rename_file(file_info: dict):
|
||||
item_id = file_info['id']
|
||||
file = Path(args.dir, file_info['file_name'])
|
||||
new_file_path = file.with_name(f"{item_id}{file.suffix}")
|
||||
log.info(f"rename {file} to {new_file_path}")
|
||||
file.rename(Path(new_file_path))
|
||||
file_info['cloud_link'] = str(new_file_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
log = get_logger(args.verbose, args.config)
|
||||
log.info('kontor.download started')
|
||||
dirs = PlatformDirs(args.config)
|
||||
database_config = Path(dirs.user_config_dir, 'database-config.yaml')
|
||||
with open(database_config, 'rt') as f:
|
||||
db_config = yaml.safe_load(f.read())
|
||||
print(db_config)
|
||||
connect_string = ('mariadb+mariadbconnector://{}:{}@{}:{}/{}'.format(
|
||||
db_config['mariadb']['user'],
|
||||
db_config['mariadb']['password'],
|
||||
db_config['mariadb']['host'],
|
||||
db_config['mariadb']['port'],
|
||||
db_config['mariadb']['database']
|
||||
))
|
||||
engine = create_engine(connect_string)
|
||||
Base.metadata.create_all(bind=engine, checkfirst=True)
|
||||
__session__ = sessionmaker(bind=engine)
|
||||
_filter = {'should_download': 1}
|
||||
with __session__() as session:
|
||||
files = session.query(MediaFile).filter_by(**_filter).all()
|
||||
log.info("found %d entries", len(files))
|
||||
files2 = session.query(MediaFile).filter(MediaFile.should_download == 1).all()
|
||||
log.info("found %d entries", len(files2))
|
||||
for mediafile in files2:
|
||||
mediafile.download_file(download_dir=args.dir, dl_tool="yt-dlp")
|
||||
log.info("Datei {} erfolgreich heruntergeladen".format(mediafile.file_name))
|
||||
if args.rename:
|
||||
current_file = Path(mediafile.file_name)
|
||||
new_file_path = current_file.with_name(f"{mediafile.id}{current_file.suffix}")
|
||||
current_file.rename(Path(new_file_path))
|
||||
mediafile.cloud_link = new_file_path
|
||||
session.add(mediafile)
|
||||
session.commit()
|
||||
response = requests.get("http://127.0.0.1:8800/media/files?download=true")
|
||||
log.info(f"Status: {response.status_code}")
|
||||
data = response.json()
|
||||
log.info(f"data: {len(data)}")
|
||||
for item in data:
|
||||
link = item['url']
|
||||
file_id = item['id']
|
||||
log.info(f"{file_id} - {link}")
|
||||
download_status: FileStatus = is_file_downloaded(item, args.dir)
|
||||
match download_status:
|
||||
case FileStatus.DOWNLOADED:
|
||||
rename_file(item)
|
||||
update_status(file_id, item)
|
||||
case FileStatus.RENAMED:
|
||||
log.info("update status")
|
||||
update_status(file_id, item)
|
||||
case FileStatus.UNKNOWN:
|
||||
download_file(link, item)
|
||||
rename_file(item)
|
||||
log.info(f'{item}')
|
||||
update_status(file_id, item)
|
||||
log.info('kontor.download finished')
|
||||
|
||||
|
||||
+23
-30
@@ -2,14 +2,14 @@
|
||||
download files with URLs from DB
|
||||
"""
|
||||
import logging.config
|
||||
|
||||
import requests
|
||||
import yaml
|
||||
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
|
||||
from pathlib import Path
|
||||
from platformdirs import PlatformDirs
|
||||
from sqlalchemy import create_engine, select
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from schema import MediaFile, Base
|
||||
from bs4 import BeautifulSoup
|
||||
from platformdirs import PlatformDirs
|
||||
|
||||
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('--verbose', '-v', action='count', default=0)
|
||||
@@ -37,30 +37,23 @@ def get_logger(level: int, config: str):
|
||||
if __name__ == '__main__':
|
||||
log = get_logger(args.verbose, args.config)
|
||||
log.info('kontor.update_titles started')
|
||||
dirs = PlatformDirs(args.config)
|
||||
database_config = Path(dirs.user_config_dir, 'database-config.yaml')
|
||||
with open(database_config, 'rt') as f:
|
||||
db_config = yaml.safe_load(f.read())
|
||||
print(db_config)
|
||||
connect_string = ('mariadb+mariadbconnector://{}:{}@{}:{}/{}'.format(
|
||||
db_config['mariadb']['user'],
|
||||
db_config['mariadb']['password'],
|
||||
db_config['mariadb']['host'],
|
||||
db_config['mariadb']['port'],
|
||||
db_config['mariadb']['database']
|
||||
))
|
||||
engine = create_engine(connect_string)
|
||||
Base.metadata.create_all(bind=engine, checkfirst=True)
|
||||
__session__ = sessionmaker(engine)
|
||||
_filter = {'review': 1}
|
||||
with __session__() as session:
|
||||
files = session.query(MediaFile).filter_by(**_filter).all()
|
||||
log.info("found %d entries", len(files))
|
||||
files2 = session.query(MediaFile).filter(MediaFile.review ==1).all
|
||||
log.info("found %d entries", len(files2))
|
||||
for mediafile in files:
|
||||
mediafile.update_title()
|
||||
session.add(mediafile)
|
||||
session.commit()
|
||||
log.info("found %d entries", len(files))
|
||||
response = requests.get("http://127.0.0.1:8800/media/files?review=true")
|
||||
log.info(f"Status: {response.status_code}")
|
||||
data = response.json()
|
||||
log.info(f"data: {len(data)}")
|
||||
for item in data:
|
||||
link = item['url']
|
||||
log.info(f"{item['id']} - {link}")
|
||||
try:
|
||||
r = requests.get(link)
|
||||
soup = BeautifulSoup(r.content, "html.parser")
|
||||
title = soup.title.string
|
||||
item['title'] = title
|
||||
item['review'] = 0
|
||||
except:
|
||||
item['title'] = None
|
||||
item['review'] = 1
|
||||
update = requests.put(f"http://127.0.0.1:8800/media/files/{item['id']}", json=item)
|
||||
log.info(f"update status: {update.status_code}")
|
||||
log.info(f"update result: {update.json()}")
|
||||
log.info('kontor.update_titles finished')
|
||||
|
||||
Reference in New Issue
Block a user