""" download files with URLs from DB """ import re import subprocess import datetime import logging import mariadb import requests from bs4 import BeautifulSoup from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter import mariadb from setup import get_database_cursors, create_tables, get_logger parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument('--verbose', '-v', action='count', default=0) args = parser.parse_args() if __name__ == '__main__': logger = get_logger(args.verbose) logger.info('kontor.download started') s_conn, m_conn = get_database_cursors(logger) cursor = m_conn.cursor() cursor.execute('SELECT id, url FROM media_file where review is true') for (link_id, url) in cursor.fetchall(): if url is None: logger.info('There is no url for id {}'.format(link_id)) else: logger.info('get title for url {}'.format(url)) try: r = requests.get(url) soup = BeautifulSoup(r.content, "html.parser") title = soup.title.string except: logger.info("Sorry, could not retrieve title") update_statement = 'UPDATE media_file set review = true WHERE id = ?' cursor.execute(update_statement, (link_id, )) logger.info('ID {} has title {}'.format(link_id, title)) update = 'UPDATE media_file SET title = ?, review= False where id= ?' try: cursor.execute(update, (title, link_id)) logger.info('entry {} updated'.format(link_id)) except mariadb.Error as error: logger.info(error) m_conn.commit() logger.info('kontor.download finished')