Files
kontor/kontor-scripts/update_title.py
T
2025-11-07 08:00:52 +01:00

69 lines
2.3 KiB
Python

"""
download files with URLs from DB
"""
import logging.config
import requests
import yaml
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from pathlib import Path
from bs4 import BeautifulSoup
from platformdirs import PlatformDirs
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('--verbose', '-v', action='count', default=0)
parser.add_argument('--config', '-c', default='kontor-docker')
args = parser.parse_args()
def get_logger(level: int, config: str):
dirs = PlatformDirs(config)
logging_config = Path(dirs.user_config_dir, 'logging-config.yaml')
with open(logging_config, 'rt') as f:
configDict = yaml.safe_load(f.read())
logging.config.dictConfig(configDict)
logger = logging.getLogger('development')
if level is not None:
match level:
case 0:
logger.setLevel(logging.INFO)
case 1:
logger.setLevel(logging.DEBUG)
case _:
logger.setLevel(logging.CRITICAL)
return logger
if __name__ == '__main__':
log = get_logger(args.verbose, args.config)
log.info('kontor.update_titles started')
response = requests.get("http://127.0.0.1:8800/api/media/files?review=true")
log.info(f"Status: {response.status_code}")
data = response.json()
log.info(f"data: {len(data)}")
for item in data:
link = item['url']
log.info(f"{item['id']} - {str(link)}")
if not link:
continue
if link == "None":
item['url'] = None
else:
try:
r = requests.get(link)
soup = BeautifulSoup(r.content, "html.parser")
title_tag = soup.find('title')
if title_tag:
title= title_tag.get_text()
title = soup.title.string
item['title'] = title
item['review'] = False
except Exception as error:
log.info(f"something went wrong: {error}")
item['title'] = None
item['review'] = True
update = requests.put(f"http://127.0.0.1:8800/api/media/files/{item['id']}", json=item)
log.info(f"update status: {update.status_code}")
log.info(f"update result: {update.json()}")
log.info('kontor.update_titles finished')