66 lines
2.2 KiB
Python
66 lines
2.2 KiB
Python
"""
|
|
download files with URLs from DB
|
|
"""
|
|
import logging.config
|
|
import requests
|
|
import yaml
|
|
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
|
|
from pathlib import Path
|
|
|
|
from bs4 import BeautifulSoup
|
|
from platformdirs import PlatformDirs
|
|
|
|
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
|
|
parser.add_argument('--verbose', '-v', action='count', default=0)
|
|
parser.add_argument('--config', '-c', default='kontor-docker')
|
|
args = parser.parse_args()
|
|
|
|
def get_logger(level: int, config: str):
|
|
dirs = PlatformDirs(config)
|
|
logging_config = Path(dirs.user_config_dir, 'logging-config.yaml')
|
|
with open(logging_config, 'rt') as f:
|
|
configDict = yaml.safe_load(f.read())
|
|
logging.config.dictConfig(configDict)
|
|
logger = logging.getLogger('development')
|
|
if level is not None:
|
|
match level:
|
|
case 0:
|
|
logger.setLevel(logging.INFO)
|
|
case 1:
|
|
logger.setLevel(logging.DEBUG)
|
|
case _:
|
|
logger.setLevel(logging.CRITICAL)
|
|
return logger
|
|
|
|
|
|
if __name__ == '__main__':
|
|
log = get_logger(args.verbose, args.config)
|
|
log.info('kontor.update_titles started')
|
|
response = requests.get("http://127.0.0.1:8800/api/media/files?review=true")
|
|
log.info(f"Status: {response.status_code}")
|
|
data = response.json()
|
|
log.info(f"data: {len(data)}")
|
|
for item in data:
|
|
link = item['url']
|
|
log.info(f"{item['id']} - {str(link)}")
|
|
if not link:
|
|
continue
|
|
try:
|
|
r = requests.get(link)
|
|
soup = BeautifulSoup(r.content, "html.parser")
|
|
title_tag = soup.find('title')
|
|
if title_tag:
|
|
title= title_tag.get_text()
|
|
title = soup.title.string
|
|
item['title'] = title
|
|
item['review'] = False
|
|
except Exception as error:
|
|
log.info(f"something went wrong: {error}")
|
|
item['title'] = None
|
|
item['review'] = True
|
|
update = requests.put(f"http://127.0.0.1:8800/api/media/files/{item['id']}", json=item)
|
|
log.info(f"update status: {update.status_code}")
|
|
log.info(f"update result: {update.json()}")
|
|
log.info('kontor.update_titles finished')
|
|
|