50 lines
1.8 KiB
Python
50 lines
1.8 KiB
Python
"""
|
|
download files with URLs from DB
|
|
"""
|
|
import re
|
|
import subprocess
|
|
import datetime
|
|
import logging
|
|
import mariadb
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
|
|
import mariadb
|
|
from setup import get_database_cursors, get_logger
|
|
|
|
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
|
|
parser.add_argument('--verbose', '-v', action='count', default=0)
|
|
parser.add_argument('--config', '-c', default='kontor-docker')
|
|
args = parser.parse_args()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
logger = get_logger(args.verbose)
|
|
logger.info('kontor.download started')
|
|
s_conn, m_conn = get_database_cursors(logger)
|
|
cursor = m_conn.cursor()
|
|
cursor.execute('SELECT id, url FROM media_file where review is true')
|
|
for (link_id, url) in cursor.fetchall():
|
|
if url is None:
|
|
logger.info('There is no url for id {}'.format(link_id))
|
|
else:
|
|
logger.info('get title for url {}'.format(url))
|
|
try:
|
|
r = requests.get(url)
|
|
soup = BeautifulSoup(r.content, "html.parser")
|
|
title = soup.title.string
|
|
except:
|
|
logger.info("Sorry, could not retrieve title")
|
|
update_statement = 'UPDATE media_file set review = true WHERE id = ?'
|
|
cursor.execute(update_statement, (link_id, ))
|
|
logger.info('ID {} has title {}'.format(link_id, title))
|
|
update = 'UPDATE media_file SET title = ?, review= False where id= ?'
|
|
try:
|
|
cursor.execute(update, (title, link_id))
|
|
logger.info('entry {} updated'.format(link_id))
|
|
except mariadb.Error as error:
|
|
logger.info(error)
|
|
m_conn.commit()
|
|
logger.info('kontor.download finished')
|
|
|