""" download files with URLs from DB """ import logging.config import requests import yaml from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter from pathlib import Path from bs4 import BeautifulSoup from platformdirs import PlatformDirs parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument('--verbose', '-v', action='count', default=0) parser.add_argument('--config', '-c', default='kontor-docker') args = parser.parse_args() def get_logger(level: int, config: str): dirs = PlatformDirs(config) logging_config = Path(dirs.user_config_dir, 'logging-config.yaml') with open(logging_config, 'rt') as f: configDict = yaml.safe_load(f.read()) logging.config.dictConfig(configDict) logger = logging.getLogger('development') if level is not None: match level: case 0: logger.setLevel(logging.INFO) case 1: logger.setLevel(logging.DEBUG) case _: logger.setLevel(logging.CRITICAL) return logger if __name__ == '__main__': log = get_logger(args.verbose, args.config) log.info('kontor.update_titles started') response = requests.get("http://127.0.0.1:8800/api/media/files?review=true") log.info(f"Status: {response.status_code}") data = response.json() log.info(f"data: {len(data)}") for item in data: link = item['url'] log.info(f"{item['id']} - {str(link)}") try: r = requests.get(link) soup = BeautifulSoup(r.content, "html.parser") title = soup.title.string anchors = soup.find_all('a') for anchor in anchors: if anchor.has_attr('href'): link_url = anchor['href'] if link_url and link_url.__contains__('pornstars/'): log.info(link_url) item['title'] = title item['review'] = False except Exception as error: log.info(f"something went wrong: {error} {anchor}") item['title'] = None item['review'] = True #update = requests.put(f"http://127.0.0.1:8800/api/media/files/{item['id']}", json=item) #log.info(f"update status: {update.status_code}") #log.info(f"update result: {update.json()}") log.info('kontor.update_titles finished')