add scripts from repository python-scripts

This commit is contained in:
Thomas Peetz
2025-04-01 08:12:53 +02:00
parent 38e77b25b1
commit 1b18dae311
8 changed files with 599 additions and 0 deletions
+135
View File
@@ -0,0 +1,135 @@
"""
Checks the database kontor
"""
from enum import Enum, auto
import mariadb
from pathlib import Path
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from setup import get_database_cursors, get_logger
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('--verbose', '-v', action='count', default=0)
parser.add_argument('--dir', '-d', default='/media/tpeetz/Media')
parser.add_argument('--dry-run', '-m', action='store_true')
parser.add_argument('--reset-cloud-link', '-r', action='store_true')
args = parser.parse_args()
class StatusType(Enum):
UNKNOWN = auto()
FILE_NAME = auto()
FILE_ID = auto()
DUPLICATE = auto()
CLOUD_LINK = auto()
CLOUD_LINK_ID = auto()
def get_status_of_file(found_file, cursor):
status = StatusType.UNKNOWN
file_id = ''
try:
cursor.execute(f'SELECT id, cloud_link FROM media_file WHERE file_name="{found_file.name}"')
rows = cursor.fetchall()
if len(rows) == 1:
status = StatusType.FILE_NAME
file_id = rows[0][0]
except mariadb.Error as error:
logger.debug(f'select failed with {error}')
try:
cursor.execute(f'SELECT id FROM media_file WHERE id="{found_file.stem}"')
rows = cursor.fetchall()
if len(rows) == 1:
status = StatusType.FILE_ID
file_id = rows[0][0]
if len(rows) > 1:
status = StatusType.DUPLICATE
for row in rows:
logger.info(f"found {row[0]} with {found_file}")
except mariadb.Error as error:
logger.debug(f'select failed with {error}')
try:
cursor.execute(f'SELECT id FROM media_file WHERE cloud_link LIKE "%{found_file.stem}%"')
rows = cursor.fetchall()
if len(rows) == 1:
file_id = rows[0][0]
if rows[0][0] == found_file.stem:
status = StatusType.CLOUD_LINK_ID
else:
status = StatusType.CLOUD_LINK
except mariadb.Error as error:
logger.debug(f'select failed with {error}')
return status, file_id
def rename_files_to_id(media_dir, conn, dry_run):
media_path = Path(media_dir)
cursor = conn.cursor()
for file in media_path.iterdir():
logger.debug('found file: {}'.format(file.name))
(status, file_id) = get_status_of_file(file, cursor)
new_file_path = file.with_name(f"{file_id}{file.suffix}")
match status:
case StatusType.FILE_NAME:
logger.info(f'status of {file.name} is file_name')
rename_file(file, new_file_path, dry_run)
update_cloud_link(file_id, new_file_path, conn, dry_run)
case StatusType.FILE_ID:
logger.info(f'status of {file.name} is file_id')
update_cloud_link(file_id, new_file_path, conn, dry_run)
case StatusType.CLOUD_LINK:
logger.info(f'status of {file.name} is cloud_link')
rename_file(file, new_file_path, dry_run)
update_cloud_link(file_id, new_file_path, conn, dry_run)
case StatusType.CLOUD_LINK_ID:
logger.debug(f'status of {file.name} is cloud_link_id')
update_cloud_link(file_id, new_file_path, conn, dry_run)
case StatusType.DUPLICATE:
logger.info(f'status of {file.name} is duplicate')
case StatusType.UNKNOWN:
logger.info(f'status of {file.name} is unknown')
case _:
logger.info(f'status of {file.name} is not defined')
def rename_file(current_file, new_file_path, dry_run):
if dry_run:
logger.info('rename file {} to {}'.format(current_file.name, new_file_path.name))
else:
current_file.rename(Path(new_file_path))
def update_cloud_link(file_id, file_path, conn, dry_run):
cursor = conn.cursor()
logger.debug(f'update entry {file_id} with {file_path.absolute()}')
if dry_run:
logger.info(f'UPDATE media_file: cloud_link={file_path.absolute()}')
else:
cursor.execute('UPDATE media_file SET cloud_link="{}" WHERE id="{}"'.format(file_path.absolute(), file_id))
conn.commit()
def reset_cloud_link(conn, dry_run):
cursor = conn.cursor()
if dry_run:
logger.info('UPDATE media_file SET cloud_link=""')
else:
cursor.execute('UPDATE media_file SET cloud_link="" WHERE id is NOT NULL')
conn.commit()
if __name__ == '__main__':
logger = get_logger(args.verbose)
logger.info("kontor.check_kontor started")
_, mariadb_conn = get_database_cursors(logger)
mariadb_cursor = mariadb_conn.cursor()
if args.reset_cloud_link:
reset_cloud_link(mariadb_conn, args.dry_run)
link_list = []
data_dir = args.dir
logger.info("kontor.check_kontor.rename_files_to_id")
rename_files_to_id(data_dir, mariadb_conn, args.dry_run)
#logger.info("kontor.check_kontor.update_cloud_link_with_found_files")
#update_cloud_link_with_found_files(data_dir, mariadb_conn, args.dry_run)
#logger.info("kontor.check_kontor.get_ids_from_column_cloud_link")
#get_ids_from_column_cloud_link(link_list, mariadb_cursor)
#logger.info('found {} ids in column cloud_link'.format(len(link_list)))
#logger.info("kontor.check_kontor.checking_ids_from_cloud_link")
#checking_ids_from_cloud_link(link_list, mariadb_cursor)
mariadb_conn.close()
logger.info("kontor.check_kontor finished")
+52
View File
@@ -0,0 +1,52 @@
"""
copy data from SQLite to MariaDB
"""
import sqlite3
import mariadb
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from setup import get_database_cursors, create_tables, get_logger, get_scripts, get_meta_data
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('--recreate-db', action='store_true')
parser.add_argument('--verbose', '-v', action='count', default=0)
args = parser.parse_args()
def copy_data(mariadb_conn, sqlite_conn, table_scripts):
mariadb_cursor = mariadb_conn.cursor()
sqlite_cursor = sqlite_conn.cursor()
# logger.info(table_scripts)
for table_id in scripts:
select_statement = scripts[table_id]['select_sqlite']
# logger.info(select_statement)
insert_statement = scripts[table_id]['insert_mariadb']
mariadb_cursor.execute("SET FOREIGN_KEY_CHECKS = 0")
mariadb_cursor.execute(scripts[table_id]['truncate_mariadb'])
try:
sqlite_cursor.execute(select_statement)
rows = sqlite_cursor.fetchall()
for row in rows:
try:
mariadb_cursor.execute(insert_statement, row)
except sqlite3.Error as error:
logger.info('insert failed with %s\n%s\n%s', error, insert_statement, row)
mariadb_conn.commit()
mariadb_cursor.execute(scripts[table_id]['count'])
(number_of_rows,) = mariadb_cursor.fetchone()
row = sqlite_cursor.execute(scripts[table_id]['count']).fetchone()
logger.info('%s contains %d : %d entries', scripts[table_id]['name'], number_of_rows, row[0])
except sqlite3.Error as error:
logger.info('select failed with %s', error)
if __name__ == '__main__':
logger = get_logger(args.verbose)
logger.info('kontor.copy_to_sqlite started')
s_conn, m_conn = get_database_cursors(logger)
meta_data_tables = get_meta_data(m_conn)
# logger.info(meta_data_tables)
scripts = get_scripts(meta_data_tables, logger)
copy_data(m_conn, s_conn, scripts)
s_conn.close()
m_conn.close()
logger.info('kontor.copy_to_sqlite finished')
+51
View File
@@ -0,0 +1,51 @@
"""
copy data from MariaDB to SQLite
"""
import sqlite3
import mariadb
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from setup import get_database_cursors, create_tables, get_logger, get_meta_data, get_scripts
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('--recreate-db', action='store_true')
parser.add_argument('--verbose', '-v', action='count', default=0)
args = parser.parse_args()
def copy_data(mariadb_conn, sqlite_conn, table_scripts):
mariadb_cursor = mariadb_conn.cursor()
sqlite_cursor = sqlite_conn.cursor()
# logger.info(table_scripts)
for table_id in table_scripts:
select_statement = scripts[table_id]['select_mariadb']
# logger.info(select_statement)
insert_statement = scripts[table_id]['insert_sqlite']
try:
mariadb_cursor.execute(select_statement)
rows = mariadb_cursor.fetchall()
for row in rows:
try:
sqlite_cursor.execute(insert_statement, row)
except sqlite3.Error as error:
logger.info('insert failed with %s\n%s\n%s', error, insert_statement, row)
sqlite_conn.commit()
mariadb_cursor.execute(scripts[table_id]['count'])
(number_of_rows,) = mariadb_cursor.fetchone()
row = sqlite_cursor.execute(scripts[table_id]['count']).fetchone()
logger.info('%s contains %d : %d entries', scripts[table_id]['name'], number_of_rows, row[0])
except mariadb.Error as error:
logger.info('select failed with %s', error)
if __name__ == '__main__':
logger = get_logger(args.verbose)
logger.info('kontor.copy_to_sqlite started')
s_conn, m_conn = get_database_cursors(logger)
meta_data_tables = get_meta_data(m_conn)
# logger.info(meta_data_tables)
scripts = get_scripts(meta_data_tables, logger)
create_tables(s_conn, logger, args.recreate_db, scripts)
copy_data(m_conn, s_conn, scripts)
s_conn.close()
m_conn.close()
logger.info('kontor.copy_to_sqlite finished')
+67
View File
@@ -0,0 +1,67 @@
"""
Prints the database kontor structure
"""
import mariadb
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from setup import get_database_cursors, get_logger
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('--verbose', '-v', action='count', default=0)
args = parser.parse_args()
def show_tables(cur, log):
"""
Retrieves the list of tables from the database
:param cur:
:param log:
:return:
"""
log.info('get list of tables')
table_list = []
cur.execute("SHOW TABLES")
for (tablename,) in cur.fetchall():
table_list.append(tablename)
return table_list
def get_field_info(cur):
"""
Retrieves the field info associated with a cursor
:param cur:
:return:
"""
field_info = mariadb.fieldinfo()
field_info_text_list = []
for column in cur.description:
column_name = column[0]
column_type = field_info.type(column)
column_flags = field_info.flag(column)
field_info_text_list.append(f"{column_name}: {column_type} {column_flags}")
return field_info_text_list
def get_table_field_info(cur, tablename):
"""
Retrieves the field info associated with a table
:param cur:
:param tablename:
:return:
"""
cur.execute(f"SELECT * FROM {tablename} LIMIT 1")
field_info = get_field_info(cur)
return field_info
if __name__ == '__main__':
logger = get_logger(args.verbose)
logger.info("kontor.db_structure started")
_, mariadb_conn = get_database_cursors(logger)
tables = show_tables(mariadb_conn.cursor(), logger)
for table in tables:
field_info_text = get_table_field_info(mariadb_conn.cursor(), table)
print(f"Columns in table {table}:")
print("\n".join(field_info_text))
print("\n")
mariadb_conn.close()
logger.info("kontor.db_structure finished")
+73
View File
@@ -0,0 +1,73 @@
"""
download files with URLs from DB
"""
import re
import subprocess
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
import mariadb
from setup import get_database_cursors, create_tables, get_logger
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('--verbose', '-v', action='count', default=0)
parser.add_argument('--dir', '-d', default='/data/media')
parser.add_argument('--dry-run', '-m', action='store_true')
parser.add_argument('--rename', '-r', action='store_true')
args = parser.parse_args()
def parse_output(lines_list, log):
file_name = ""
for line in lines_list:
if 'has already been downloaded' in line:
end_len = len(' has already been downloaded')
file_name = line[11:-end_len]
log.info('found file: "%s"', file_name)
if 'Destination' in line:
line_len = len(line)
start_len = len('[download] Destination: ')
file_len = line_len-start_len
file_name = line[-file_len:]
log.info('new file: "%s"', file_name)
return file_name
def download_url(video_url, log):
result = subprocess.run(["/home/tpeetz/bin/yt-dlp", video_url], cwd=args.dir, capture_output=True, text=True)
if result.returncode == 0:
output = result.stdout
output = re.sub(' +', ' ', output)
lines_list = output.splitlines()
return parse_output(lines_list, log)
else:
return None
def download_and_update(link, entry_id, conn):
m_cursor = conn.cursor()
filename = download_url(link, logger)
if filename is None:
update_statement = 'UPDATE media_file set review = true WHERE id = ?'
logger.debug(f'entry {entry_id} could not downloaded, set to Review')
m_cursor.execute(update_statement, (entry_id,))
else:
update_statement = 'UPDATE media_file set file_name = ?, should_download = false, review = false WHERE id = ?'
logger.debug(f'entry {entry_id} successfully downloaded, set review and should_download to false')
m_cursor.execute(update_statement, (filename, entry_id))
conn.commit()
if __name__ == '__main__':
logger = get_logger(args.verbose)
logger.info('kontor.download started')
s_conn, m_conn = get_database_cursors(logger)
cursor = m_conn.cursor()
cursor.execute('SELECT id, url FROM media_file where should_download is true')
for (link_id, url) in cursor.fetchall():
if url is None:
logger.info('There is no url for id {}'.format(link_id))
else:
if args.dry_run:
logger.info(f'download {url} for {link_id}')
else:
download_and_update(url, link_id, m_conn)
logger.info('kontor.download finished')
+57
View File
@@ -0,0 +1,57 @@
"""
read file with URLs and store in DB
"""
import uuid
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
import datetime
import mariadb
from setup import get_database_cursors, get_logger, get_scripts, get_meta_data
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('-f', '--links', help='file with links')
parser.add_argument('--verbose', '-v', action='count', default=0)
args = parser.parse_args()
def read_links_file(links_file):
with open(links_file, 'r') as input_file:
lines = input_file.readlines()
return lines
def add_link_to_db(statement, connection, video_url, log):
entry_id = str(uuid.uuid4())
current_date_time = datetime.datetime.now()
try:
cur = connection.cursor()
cur.execute(statement, (entry_id, current_date_time, current_date_time, 0, video_url, True, True, None, None, None, None))
connection.commit()
log.info(f'link {video_url} added to db')
except mariadb.Error as insert_error:
log.debug("insert failed with %s", insert_error)
entry_id = None
return entry_id
if __name__ == '__main__':
logger = get_logger(args.verbose)
logger.info('kontor.read_list started')
s_conn, m_conn = get_database_cursors(logger)
meta_data_tables = get_meta_data(m_conn)
scripts = get_scripts(meta_data_tables, logger)
tables = {}
for table_id in scripts:
tables[scripts[table_id]['name']] = table_id
media_file_id = tables['media_file']
insert_statement = scripts[tables['media_file']]['insert_mariadb']
if args.links:
logger.info("read links from file")
links = read_links_file(args.links)
for link in links:
logger.info("add link to db")
add_link_to_db(insert_statement, m_conn, link.strip(), logger)
else:
logger.info('script used: {}'.format(insert_statement))
logger.info('kontor.read_list finished')
+116
View File
@@ -0,0 +1,116 @@
"""
Setup database connections
"""
import sqlite3
import mariadb
import logging.config
from platformdirs import PlatformDirs
from pathlib import Path
import yaml
def get_database_cursors(log):
dirs = PlatformDirs("kontor")
database_config = Path(dirs.user_config_dir, 'database-config.yaml')
with open(database_config, 'rt') as f:
db_config = yaml.safe_load(f.read())
sqlite_db = db_config["sqlite"]["file"]
log.info('using SQLite3 database {}'.format(sqlite_db))
sqlite_conn = sqlite3.connect(sqlite_db, detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES)
mariadb_conn = mariadb.connect(
host=db_config['mariadb']['host'],
port=db_config['mariadb']['port'],
user=db_config['mariadb']['user'],
password=db_config['mariadb']['password'],
database=db_config['mariadb']['database']
)
return sqlite_conn, mariadb_conn
def create_tables(sqlite_conn, logger, recreate_db, scripts):
logger.info('create_tables')
for table_id in scripts:
create_statement = scripts[table_id]['create']
drop_statement = scripts[table_id]['drop']
logger.debug(create_statement)
cursor = sqlite_conn.cursor()
if recreate_db:
logger.debug(drop_statement)
cursor.execute(drop_statement)
cursor.execute(create_statement)
def get_logger(level):
dirs = PlatformDirs("kontor")
logging_config = Path(dirs.user_config_dir, 'logging-config.yaml')
with open(logging_config, 'rt') as f:
config = yaml.safe_load(f.read())
logging.config.dictConfig(config)
logger = logging.getLogger('development')
if level is not None:
match level:
case 0:
logger.setLevel(logging.INFO)
case 1:
logger.setLevel(logging.DEBUG)
case _:
logger.setLevel(logging.CRITICAL)
return logger
def get_meta_data(mariadb_conn):
mariadb_cursor = mariadb_conn.cursor()
select_statement = "SELECT id, table_name FROM meta_data_table"
mariadb_cursor.execute(select_statement)
rows = mariadb_cursor.fetchall()
meta_data = {}
for (identifier, table_name) in rows:
table_data = {"name": table_name}
mariadb_cursor.execute("SELECT column_name, column_sync_name, column_type, column_modifier, column_order FROM meta_data_column WHERE table_id=?", (identifier, ))
column_rows = mariadb_cursor.fetchall()
column_list = []
for (column_name, column_sync_name, column_type, column_modifier, column_order) in column_rows:
column_data = {"column_name": column_name, "column_sync_name": column_sync_name, "column_type": column_type,
"column_modifier": column_modifier, "column_order": column_order}
column_list.append(column_data)
# logger.info(column_list)
table_data["columns"] = column_list
meta_data[identifier] = table_data
return meta_data
def get_scripts(meta_data, logger):
scripts_map = {}
for table_id in meta_data:
table_scripts = {}
m_columns = []
s_columns = []
columns = []
for column_data in meta_data[table_id]["columns"]:
column_line = "{} {}".format(column_data["column_sync_name"], column_data["column_type"])
if column_data["column_modifier"]:
column_line += " " + column_data["column_modifier"]
columns.append(column_line)
m_columns.append(column_data['column_name'])
s_columns.append(column_data['column_sync_name'])
table_name = meta_data[table_id]["name"]
create_statement = "CREATE TABLE IF NOT EXISTS {} ({});".format(table_name, ", ".join(columns))
drop_statement = 'DROP TABLE IF EXISTS {}'.format(table_name)
select_mariadb_statement = 'SELECT {} FROM {}'.format(', '.join(m_columns), table_name)
select_sqlite_statement = 'SELECT {} FROM {}'.format(', '.join(s_columns), table_name)
insert_sqlite_statement = 'INSERT INTO {}({}) VALUES({})'.format(table_name, ', '.join(s_columns), ', '.join(['?']*len(s_columns)))
insert_mariadb_statement = 'INSERT INTO {}({}) VALUES({})'.format(table_name, ', '.join(m_columns), ', '.join(['?']*len(m_columns)))
truncate_mariadb_statement = 'TRUNCATE {}'.format(table_name)
#logger.debug(create_statement)
#logger.debug(select_mariadb_statement)
table_scripts["create"] = create_statement
table_scripts["drop"] = drop_statement
table_scripts["select_mariadb"] = select_mariadb_statement
table_scripts["select_sqlite"] = select_sqlite_statement
table_scripts["insert_sqlite"] = insert_sqlite_statement
table_scripts["insert_mariadb"] = insert_mariadb_statement
table_scripts["truncate_mariadb"] = truncate_mariadb_statement
table_scripts["count"] = "SELECT COUNT(*) FROM {}".format(table_name)
table_scripts["name"] = table_name
scripts_map[table_id] = table_scripts
return scripts_map
+48
View File
@@ -0,0 +1,48 @@
"""
download files with URLs from DB
"""
import re
import subprocess
import datetime
import logging
import mariadb
import requests
from bs4 import BeautifulSoup
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
import mariadb
from setup import get_database_cursors, create_tables, get_logger
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('--verbose', '-v', action='count', default=0)
args = parser.parse_args()
if __name__ == '__main__':
logger = get_logger(args.verbose)
logger.info('kontor.download started')
s_conn, m_conn = get_database_cursors(logger)
cursor = m_conn.cursor()
cursor.execute('SELECT id, url FROM media_file where review is true')
for (link_id, url) in cursor.fetchall():
if url is None:
logger.info('There is no url for id {}'.format(link_id))
else:
logger.info('get title for url {}'.format(url))
try:
r = requests.get(url)
soup = BeautifulSoup(r.content, "html.parser")
title = soup.title.string
except:
logger.info("Sorry, could not retrieve title")
update_statement = 'UPDATE media_file set review = true WHERE id = ?'
cursor.execute(update_statement, (link_id, ))
logger.info('ID {} has title {}'.format(link_id, title))
update = 'UPDATE media_file SET title = ?, review= False where id= ?'
try:
cursor.execute(update, (title, link_id))
logger.info('entry {} updated'.format(link_id))
except mariadb.Error as error:
logger.info(error)
m_conn.commit()
logger.info('kontor.download finished')