change update_title to add authorization to REST calls
This commit is contained in:
@@ -2,19 +2,35 @@
|
||||
download files with URLs from DB
|
||||
"""
|
||||
import logging.config
|
||||
import os
|
||||
import re
|
||||
from typing import Any, Dict, List
|
||||
import requests
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker, Session
|
||||
from db.models.base import Base
|
||||
import yaml
|
||||
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
|
||||
from pathlib import Path
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from platformdirs import PlatformDirs
|
||||
from config import get_api_config
|
||||
from db.models.media import MediaFile
|
||||
|
||||
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('--verbose', '-v', action='count', default=0)
|
||||
parser.add_argument('--config', '-c', default='kontor-docker')
|
||||
parser.add_argument('--dry-run', '-m', help='excute script without storing', action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
DB_USER: str = os.getenv("DB_USER", "kontor")
|
||||
DB_PASSWORD: str = os.getenv("DB_PASSWORD", "kontor")
|
||||
DB_SERVER: str = os.getenv("DB_SERVER", "127.0.0.1")
|
||||
DB_PORT: int = int(os.getenv("DB_PORT", 5432))
|
||||
DB_DBNAME: str = os.getenv("DB_DBNAME", "kontor")
|
||||
DATABASE_URL: str = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_SERVER}:{DB_PORT}/{DB_DBNAME}"
|
||||
|
||||
def get_logger(level: int, config: str):
|
||||
dirs = PlatformDirs(config)
|
||||
logging_config = Path(dirs.user_config_dir, 'logging-config.yaml')
|
||||
@@ -32,37 +48,96 @@ def get_logger(level: int, config: str):
|
||||
logger.setLevel(logging.CRITICAL)
|
||||
return logger
|
||||
|
||||
def get_session() -> Session:
|
||||
engine = create_engine(DATABASE_URL)
|
||||
Base.metadata.create_all(bind=engine, checkfirst=True)
|
||||
SessionLocal = sessionmaker(bind=engine)
|
||||
return SessionLocal()
|
||||
|
||||
def get_media_files(all_files: bool, log: logging.Logger, api_data: Dict[str, Any])-> Any:
|
||||
files_url = ""
|
||||
host = api_data["host"]
|
||||
port = api_data["port"]
|
||||
token = api_data['token']
|
||||
headers: Dict[str, str] = {"Authorization": f"Bearer {token}"}
|
||||
if all_files:
|
||||
files_url= f"http://{host}:{port}/api/media/files"
|
||||
else:
|
||||
files_url = f"http://{host}:{port}/api/media/files?review=true"
|
||||
response = requests.get(files_url, headers=headers)
|
||||
log.debug(f"Status: {response.status_code}")
|
||||
data = response.json()
|
||||
return data
|
||||
|
||||
def update_media_file(media_file: MediaFile, log: logging.Logger, api_data: Dict[str, Any]) -> Any:
|
||||
host = api_data["host"]
|
||||
port = api_data["port"]
|
||||
token = api_data['token']
|
||||
url: str = f"http://{host}:{port}/api/media/files/{media_file.id}"
|
||||
headers: Dict[str, str] = {"Authorization": f"Bearer {token}"}
|
||||
item: Dict[str, Any] = {}
|
||||
item['id'] = media_file.id
|
||||
item['title'] = media_file.title
|
||||
item['file_name'] = media_file.file_name
|
||||
item['cloud_link'] = media_file.cloud_link
|
||||
item['url'] = media_file.url
|
||||
item['review'] = media_file.review
|
||||
item['should_download'] = media_file.should_download
|
||||
update = requests.put(url, headers=headers, json=item)
|
||||
log.debug(f"update status: {update.status_code}")
|
||||
log.debug(f"update result: {update.json()}")
|
||||
return update.json()
|
||||
|
||||
def get_meta_info(media_file: MediaFile, log) -> List[str]:
|
||||
actor_links: List[str] = []
|
||||
try:
|
||||
r = requests.get(media_file.url)
|
||||
soup = BeautifulSoup(r.content, "html.parser")
|
||||
error404 = soup.css.select_one('.error404-title')
|
||||
if error404 and error404.get_text() == "Video nicht gefunden":
|
||||
log.warning(f"{error404.get_text()}")
|
||||
media_file.url = None
|
||||
media_file.review = False
|
||||
return actor_links
|
||||
title_tag = soup.find('title')
|
||||
if title_tag:
|
||||
media_file.title = title_tag.get_text()
|
||||
media_file.review = False
|
||||
anchors = soup.find_all('a', attrs={'href': re.compile("^https://.*pornstars/.*")})
|
||||
for anchor in anchors:
|
||||
link_url = str(anchor.get("href")) # type: ignore
|
||||
if link_url.endswith('all/countries'):
|
||||
continue
|
||||
if link_url in actor_links:
|
||||
continue
|
||||
actor_links.append(link_url)
|
||||
except Exception as error:
|
||||
log.info(f"something went wrong: {error}")
|
||||
media_file.title = None
|
||||
media_file.review = True
|
||||
log.info(f"update MediaFile with MetaInfos to {repr(media_file)}")
|
||||
log.info(f"links({len(actor_links)}): {actor_links}")
|
||||
return actor_links
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
log = get_logger(args.verbose, args.config)
|
||||
log.info('kontor.update_titles started')
|
||||
response = requests.get("http://127.0.0.1:8800/api/media/files?review=true")
|
||||
log.info(f"Status: {response.status_code}")
|
||||
data = response.json()
|
||||
api_data = get_api_config(log, args.config)
|
||||
data = get_media_files(False, log, api_data=api_data)
|
||||
log.info(f"data: {len(data)}")
|
||||
for item in data:
|
||||
link = item['url']
|
||||
log.info(f"{item['id']} - {str(link)}")
|
||||
session = get_session()
|
||||
with session as db:
|
||||
media_files = db.query(MediaFile).filter(MediaFile.review == True).all()
|
||||
for media_file in media_files:
|
||||
link = media_file.url
|
||||
log.info(f"{media_file.id} - {str(link)}")
|
||||
if not link:
|
||||
continue
|
||||
if link == "None":
|
||||
item['url'] = None
|
||||
media_file.url = None
|
||||
else:
|
||||
try:
|
||||
r = requests.get(link)
|
||||
soup = BeautifulSoup(r.content, "html.parser")
|
||||
title_tag = soup.find('title')
|
||||
if title_tag:
|
||||
title= title_tag.get_text()
|
||||
title = soup.title.string
|
||||
item['title'] = title
|
||||
item['review'] = False
|
||||
except Exception as error:
|
||||
log.info(f"something went wrong: {error}")
|
||||
item['title'] = None
|
||||
item['review'] = True
|
||||
update = requests.put(f"http://127.0.0.1:8800/api/media/files/{item['id']}", json=item)
|
||||
log.info(f"update status: {update.status_code}")
|
||||
log.info(f"update result: {update.json()}")
|
||||
get_meta_info(media_file, log)
|
||||
if not args.dry_run:
|
||||
update_media_file(media_file, log, api_data=api_data)
|
||||
log.info('kontor.update_titles finished')
|
||||
|
||||
|
||||
Reference in New Issue
Block a user