reorganize python projects

This commit is contained in:
Thomas Peetz
2025-04-15 01:29:08 +02:00
parent a169f6a6c1
commit 98e3d91edd
35 changed files with 136 additions and 458 deletions
+54 -37
View File
@@ -1,49 +1,66 @@
"""
download files with URLs from DB
"""
import re
import subprocess
import datetime
import logging
import mariadb
import requests
from bs4 import BeautifulSoup
import logging.config
import yaml
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
import mariadb
from setup import get_database_cursors, get_logger
from pathlib import Path
from platformdirs import PlatformDirs
from sqlalchemy import create_engine, select
from sqlalchemy.orm import sessionmaker
from schema import MediaFile, Base
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('--verbose', '-v', action='count', default=0)
parser.add_argument('--config', '-c', default='kontor-docker')
args = parser.parse_args()
def get_logger(level: int, config: str):
dirs = PlatformDirs(config)
logging_config = Path(dirs.user_config_dir, 'logging-config.yaml')
with open(logging_config, 'rt') as f:
configDict = yaml.safe_load(f.read())
logging.config.dictConfig(configDict)
logger = logging.getLogger('development')
if level is not None:
match level:
case 0:
logger.setLevel(logging.INFO)
case 1:
logger.setLevel(logging.DEBUG)
case _:
logger.setLevel(logging.CRITICAL)
return logger
if __name__ == '__main__':
logger = get_logger(args.verbose)
logger.info('kontor.download started')
s_conn, m_conn = get_database_cursors(logger)
cursor = m_conn.cursor()
cursor.execute('SELECT id, url FROM media_file where review is true')
for (link_id, url) in cursor.fetchall():
if url is None:
logger.info('There is no url for id {}'.format(link_id))
else:
logger.info('get title for url {}'.format(url))
try:
r = requests.get(url)
soup = BeautifulSoup(r.content, "html.parser")
title = soup.title.string
except:
logger.info("Sorry, could not retrieve title")
update_statement = 'UPDATE media_file set review = true WHERE id = ?'
cursor.execute(update_statement, (link_id, ))
logger.info('ID {} has title {}'.format(link_id, title))
update = 'UPDATE media_file SET title = ?, review= False where id= ?'
try:
cursor.execute(update, (title, link_id))
logger.info('entry {} updated'.format(link_id))
except mariadb.Error as error:
logger.info(error)
m_conn.commit()
logger.info('kontor.download finished')
log = get_logger(args.verbose, args.config)
log.info('kontor.update_titles started')
dirs = PlatformDirs(args.config)
database_config = Path(dirs.user_config_dir, 'database-config.yaml')
with open(database_config, 'rt') as f:
db_config = yaml.safe_load(f.read())
print(db_config)
connect_string = ('mariadb+mariadbconnector://{}:{}@{}:{}/{}'.format(
db_config['mariadb']['user'],
db_config['mariadb']['password'],
db_config['mariadb']['host'],
db_config['mariadb']['port'],
db_config['mariadb']['database']
))
engine = create_engine(connect_string)
Base.metadata.create_all(bind=engine, checkfirst=True)
__session__ = sessionmaker(engine)
_filter = {'review': 1}
with __session__() as session:
files = session.query(MediaFile).filter_by(**_filter).all()
log.info("found %d entries", len(files))
files2 = session.query(MediaFile).filter(MediaFile.review ==1).all
log.info("found %d entries", len(files2))
for mediafile in files:
mediafile.update_title()
session.add(mediafile)
session.commit()
log.info("found %d entries", len(files))
log.info('kontor.update_titles finished')