Vorbereitung Release 0.2.0 #83
+71
-14
@@ -2,7 +2,10 @@
|
||||
read file with links and store it in DB
|
||||
"""
|
||||
import logging.config
|
||||
import re
|
||||
from typing import List
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
import yaml
|
||||
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
|
||||
from pathlib import Path
|
||||
@@ -11,11 +14,7 @@ from pathlib import Path
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker, Session
|
||||
from db.models.base import Base
|
||||
from db.models import registry
|
||||
from psycopg2.errors import NotNullViolation
|
||||
from config import get_logger
|
||||
import os
|
||||
import json
|
||||
|
||||
from db.models.media import MediaFile
|
||||
|
||||
@@ -24,6 +23,7 @@ parser.add_argument('--file', '-f', help='file with links', default='~/.sync/med
|
||||
parser.add_argument('--video', help='store Url as VideoFile', action="store_true")
|
||||
parser.add_argument('--config', '-c', default='kontor-docker')
|
||||
parser.add_argument('--verbose', '-v', action='count', default=0)
|
||||
parser.add_argument('--dry-run', '-m', help='excute script without storing', action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
DB_USER: str = os.getenv("DB_USER", "kontor")
|
||||
@@ -33,6 +33,31 @@ DB_PORT: int = int(os.getenv("DB_PORT", 5432))
|
||||
DB_DBNAME: str = os.getenv("DB_DBNAME", "kontor")
|
||||
DATABASE_URL: str = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_SERVER}:{DB_PORT}/{DB_DBNAME}"
|
||||
|
||||
def get_logger(level, config: str):
|
||||
dirs = PlatformDirs(config)
|
||||
logging_config = Path(dirs.user_config_dir, 'logging-config.yaml')
|
||||
with open(logging_config, 'rt') as f:
|
||||
log_config = yaml.safe_load(f.read())
|
||||
logging.config.dictConfig(log_config)
|
||||
logger = logging.getLogger('development')
|
||||
if level is not None:
|
||||
match level:
|
||||
case 0:
|
||||
logger.setLevel(logging.CRITICAL)
|
||||
case 1:
|
||||
logger.setLevel(logging.INFO)
|
||||
case 2:
|
||||
logger.setLevel(logging.DEBUG)
|
||||
case _:
|
||||
logger.setLevel(logging.INFO)
|
||||
return logger
|
||||
|
||||
def get_session() -> Session:
|
||||
engine = create_engine(DATABASE_URL)
|
||||
Base.metadata.create_all(bind=engine, checkfirst=True)
|
||||
SessionLocal = sessionmaker(bind=engine)
|
||||
return SessionLocal()
|
||||
|
||||
def load_data(filename: str, log) -> List[str]:
|
||||
links: List[str] = []
|
||||
log.debug("load_data")
|
||||
@@ -47,26 +72,58 @@ def load_data(filename: str, log) -> List[str]:
|
||||
links.append(line.rstrip())
|
||||
return links
|
||||
|
||||
def get_meta_info(media_file: MediaFile, log):
|
||||
try:
|
||||
r = requests.get(media_file.url)
|
||||
soup = BeautifulSoup(r.content, "html.parser")
|
||||
error404 = soup.css.select_one('.error404-title')
|
||||
if error404 and error404.get_text() == "Video nicht gefunden":
|
||||
log.warning(f"{error404.get_text()}")
|
||||
media_file.url = None
|
||||
media_file.review = False
|
||||
return
|
||||
title_tag = soup.find('title')
|
||||
if title_tag:
|
||||
media_file.title = title_tag.get_text()
|
||||
media_file.review = False
|
||||
anchors = soup.find_all('a', attrs={'href': re.compile("^https://.*pornstars/.*")})
|
||||
actor_links = []
|
||||
for anchor in anchors:
|
||||
link_url = str(anchor.get("href")) # type: ignore
|
||||
if link_url.endswith('all/countries'):
|
||||
continue
|
||||
if link_url in actor_links:
|
||||
continue
|
||||
actor_links.append(link_url)
|
||||
log.info(f"links({len(actor_links)}): {actor_links}")
|
||||
except Exception as error:
|
||||
log.info(f"something went wrong: {error}")
|
||||
media_file.title = None
|
||||
media_file.review = True
|
||||
log.info(f"update MediaFile with MetaInfos to {repr(media_file)}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logger = get_logger(args.verbose, "kontor")
|
||||
logger.info('kontor.add_links started')
|
||||
engine = create_engine(DATABASE_URL)
|
||||
Base.metadata.create_all(bind=engine, checkfirst=True)
|
||||
SessionLocal = sessionmaker(bind=engine)
|
||||
with SessionLocal() as db:
|
||||
session = get_session()
|
||||
with session as db:
|
||||
links = load_data(args.file, logger)
|
||||
for link in links:
|
||||
logger.info(f"process {link}")
|
||||
logger.debug(f"process {link}")
|
||||
media_files = db.query(MediaFile).filter(MediaFile.url == link).all()
|
||||
if len(media_files) == 0:
|
||||
logger.info("no entry is found")
|
||||
logger.info(f"MediaFile for link {link} not found")
|
||||
media_file = MediaFile()
|
||||
media_file.url = link
|
||||
media_file.review = True
|
||||
media_file.should_download = True
|
||||
db.add(media_file)
|
||||
db.commit()
|
||||
# else:
|
||||
# logger.info("entry is found")
|
||||
get_meta_info(media_file, logger)
|
||||
if not args.dry_run:
|
||||
db.add(media_file)
|
||||
db.commit()
|
||||
db.refresh(media_file)
|
||||
else:
|
||||
for media_file in media_files:
|
||||
logger.debug(f"MediaFile with {media_file.id} is found")
|
||||
logger.info('kontor.add_link finished')
|
||||
|
||||
@@ -16,7 +16,7 @@ class MediaFile(Base, BaseMixin, BaseVideoMixin):
|
||||
media_actor_files = relationship("MediaActorFile")
|
||||
|
||||
def __repr__(self):
|
||||
return f'MediaFile({self.id} {self.title} {self.title})'
|
||||
return f'MediaFile(\n\tID: {self.id}\n\tTitle: {self.title}\n\tURL: {self.url}\n\tReview: {self.review}\n\tDownload: {self.should_download})'
|
||||
|
||||
def __str__(self):
|
||||
return f'{self.title}({self.id})'
|
||||
|
||||
Reference in New Issue
Block a user