Files
kontor/scripts/update_title.py
T
2025-04-13 16:16:10 +02:00

50 lines
1.8 KiB
Python

"""
download files with URLs from DB
"""
import re
import subprocess
import datetime
import logging
import mariadb
import requests
from bs4 import BeautifulSoup
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
import mariadb
from setup import get_database_cursors, get_logger
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('--verbose', '-v', action='count', default=0)
parser.add_argument('--config', '-c', default='kontor-docker')
args = parser.parse_args()
if __name__ == '__main__':
logger = get_logger(args.verbose)
logger.info('kontor.download started')
s_conn, m_conn = get_database_cursors(logger)
cursor = m_conn.cursor()
cursor.execute('SELECT id, url FROM media_file where review is true')
for (link_id, url) in cursor.fetchall():
if url is None:
logger.info('There is no url for id {}'.format(link_id))
else:
logger.info('get title for url {}'.format(url))
try:
r = requests.get(url)
soup = BeautifulSoup(r.content, "html.parser")
title = soup.title.string
except:
logger.info("Sorry, could not retrieve title")
update_statement = 'UPDATE media_file set review = true WHERE id = ?'
cursor.execute(update_statement, (link_id, ))
logger.info('ID {} has title {}'.format(link_id, title))
update = 'UPDATE media_file SET title = ?, review= False where id= ?'
try:
cursor.execute(update, (title, link_id))
logger.info('entry {} updated'.format(link_id))
except mariadb.Error as error:
logger.info(error)
m_conn.commit()
logger.info('kontor.download finished')