Vorbereitung Release 0.2.0 #83

Merged
tpeetz merged 178 commits from develop/0.2.0 into main 2026-01-29 22:50:42 +00:00
13 changed files with 221 additions and 50 deletions
Showing only changes of commit a6a03e3f04 - Show all commits
+3 -2
View File
@@ -1,9 +1,10 @@
from fastapi import APIRouter
from src.apis.version1 import comic, media, tysc, admin
from src.apis.version1 import comic, mediaactor, mediafile, tysc, admin
api_router = APIRouter(prefix="/api")
api_router.include_router(comic.router, prefix="/comics", tags=["comics"])
api_router.include_router(media.router, prefix="/media", tags=["media"])
api_router.include_router(mediafile.router, prefix="/media", tags=["media"])
api_router.include_router(mediaactor.router, prefix="/media", tags=["media"])
api_router.include_router(tysc.router, prefix="/tysc", tags=["tysc"])
api_router.include_router(admin.router, prefix="/login", tags=["login"])
@@ -0,0 +1,21 @@
from typing import List, AnyStr
from fastapi import APIRouter, status, HTTPException, Depends
from sqlalchemy import select, Sequence
from src.core.log_conf import logger
from src.apis.utils import SessionDep
from src.db.repository.media import create_new_mediafile
from src.schema.media.actor import MediaActorResponse
from src.db.models.media import MediaActor
router = APIRouter()
@router.get("/actors", response_model=List[MediaActorResponse])
#def get_all_files(db: SessionDep, review: bool = False, download: bool = False, current_user: Profile = Depends(get_current_user_from_token)) -> List[MediaFileResponse]:
def get_all_files(db: SessionDep, review: bool = False, download: bool = False) -> List[MediaActorResponse]:
results: List[MediaActorResponse] = []
actors = db.scalars(select(MediaActor)).all()
for mediaactor in actors:
response = MediaActorResponse(id=mediaactor.id, name=str(mediaactor.name), url=str(mediaactor.url))
results.append(response)
return results
@@ -4,7 +4,9 @@ from fastapi import APIRouter, status, HTTPException, Depends
from sqlalchemy import select, Sequence
from src.core.log_conf import logger
from src.apis.utils import SessionDep
from src.db.repository.media import create_new_mediafile
from src.db.repository.media import create_new_mediaactorfile, create_new_mediafile
from src.schema.media.actor import MediaActorResponse
from src.schema.media.actorfile import MediaActorFileResponse
from src.schema.media.file import MediaFileResponse, Link, get_file_details, set_file
from src.db.models.media import MediaFile
@@ -47,6 +49,43 @@ def get_file(file_id: AnyStr, db: SessionDep) -> MediaFileResponse:
response = get_file_details(mediafile)
return response
@router.get("/files/{file_id}/actors", response_model=List[MediaActorResponse])
def get_file_actors(file_id: AnyStr, db: SessionDep) -> List[MediaActorResponse]:
mediafile = db.get(MediaFile, file_id)
if not mediafile:
raise HTTPException(status_code=404, detail="MediaFile could not be found")
actor_files = mediafile.media_actor_files
logger.info(f"already known actors: {actor_files}")
results: List[MediaActorResponse] = []
for actor_file in actor_files:
response = MediaActorResponse(id=actor_file.media_actor.id, name=actor_file.media_actor.name, url=actor_file.media_actor.url)
results.append(response)
return results
@router.put("/files/{file_id}/actors", response_model=List[MediaActorFileResponse])
def update_file_actors(file_id: AnyStr, db: SessionDep, actors: List[MediaActorResponse]) -> List[MediaActorFileResponse]:
mediafile = db.get(MediaFile, file_id)
if not mediafile:
raise HTTPException(status_code=404, detail="MediaFile could not be found")
actor_files = mediafile.media_actor_files
logger.info(f"already known actors: {actor_files}")
for actor in actors:
already_associated = False
for actor_file in actor_files:
if actor.id == actor_file.media_actor_id:
logger.info("alreay associated - do nothing")
already_associated = True
break
if not already_associated:
create_new_mediaactorfile(db, actor.id, mediafile.id)
db.refresh(mediafile)
actor_files = mediafile.media_actor_files
results: List[MediaActorFileResponse] = []
for actor_file in actor_files:
response = MediaActorFileResponse(id=actor_file.id, actor_id=actor_file.media_actor_id, file_id=actor_file.media_file_id)
results.append(response)
return results
@router.put("/files/{file_id}", response_model=MediaFileResponse)
def update_file(file_id: AnyStr, db: SessionDep, info: MediaFileResponse) -> MediaFileResponse:
mediaFile = db.get(MediaFile, file_id)
@@ -55,7 +94,11 @@ def update_file(file_id: AnyStr, db: SessionDep, info: MediaFileResponse) -> Med
set_file(info, mediaFile)
db.add(mediaFile)
db.commit()
return info
mediafile = db.get(MediaFile, file_id)
if not mediafile:
raise HTTPException(status_code=404, detail="MediaFile could not be updated")
response = get_file_details(mediafile)
return response
@router.post("/files", status_code=status.HTTP_201_CREATED)
+3 -3
View File
@@ -21,10 +21,10 @@ class BaseMixin:
class BaseVideoMixin:
cloud_link = Column(String)
file_name = Column(String)
cloud_link = Column(String, nullable=True)
file_name = Column(String, nullable=True)
path = Column(String)
review = Column(Boolean)
title = Column(String)
url = Column(String, unique=True)
url = Column(String, nullable=True)
should_download = Column(Boolean)
+6 -1
View File
@@ -71,7 +71,7 @@ class MediaFile(Base, BaseMixin, BaseVideoMixin):
class MediaActor(Base, BaseMixin):
__tablename__ = 'media_actor'
name = Column(String)
url = Column(String, unique=True)
url = Column(String, unique=True, nullable=True)
media_actor_files = relationship("MediaActorFile")
@@ -82,6 +82,11 @@ class MediaActorFile(Base, BaseMixin):
media_file_id = Column(String, ForeignKey("media_file.id"), nullable=True)
media_file = relationship("MediaFile", back_populates="media_actor_files")
def __repr__(self):
return f'MediaActorFile({self.id} {self.media_actor_id} {self.media_file_id})'
def __str__(self) -> str:
return f'{self.id} {self.media_actor_id} {self.media_file_id}'
class MediaArticle(Base, BaseMixin):
__tablename__ = 'media_article'
+14 -1
View File
@@ -3,7 +3,7 @@ from typing import AnyStr
import uuid
from datetime import datetime
from src.core.log_conf import logger
from src.db.models.media import MediaFile, MediaVideo
from src.db.models.media import MediaActorFile, MediaFile, MediaVideo
from src.webapps.media.forms import AddLinkForm
@@ -38,3 +38,16 @@ def create_new_mediafile(link: AnyStr, db: Session) -> MediaFile:
logger.info(f"created {media_file}")
return media_file
def create_new_mediaactorfile(db: Session, actor_id: AnyStr, file_id: AnyStr) -> MediaActorFile:
logger.info(f"create MediaActorFile with actor {actor_id} and file {file_id}")
media_actor_file: MediaActorFile = MediaActorFile()
media_actor_file.id = str(uuid.uuid4())
media_actor_file.created_date = datetime.now()
media_actor_file.last_modified_date = datetime.now()
media_actor_file.version = 0
media_actor_file.media_actor_id = actor_id
media_actor_file.media_file_id = file_id
db.add(media_actor_file)
db.commit()
db.refresh(media_actor_file)
return media_actor_file
+10
View File
@@ -0,0 +1,10 @@
from datetime import datetime
from src.db.models.media import MediaActor
from pydantic import BaseModel
class MediaActorResponse(BaseModel):
id: str
name: str
url: str
+10
View File
@@ -0,0 +1,10 @@
from datetime import datetime
from src.db.models.media import MediaFile
from pydantic import BaseModel
class MediaActorFileResponse(BaseModel):
id: str
file_id: str
actor_id: str
+2 -2
View File
@@ -9,14 +9,14 @@ class MediaFileResponse(BaseModel):
title: str | None = None
file_name: str | None = None
cloud_link: str | None = None
url: str
url: str | None = None
review: bool = False
should_download: bool = False
class Link(BaseModel):
url: str
def get_file_details(mediafile: MediaFile) -> MediaFileResponse | None:
def get_file_details(mediafile: MediaFile) -> MediaFileResponse:
response = MediaFileResponse(id=mediafile.id,
title=mediafile.title,
file_name=mediafile.file_name,
+3 -3
View File
@@ -21,10 +21,10 @@ class BaseMixin:
class BaseVideoMixin:
cloud_link = Column(String)
file_name = Column(String)
cloud_link = Column(String, nullable=True)
file_name = Column(String, nullable=True)
path = Column(String)
review = Column(Boolean)
title = Column(String)
url = Column(String, unique=True)
url = Column(String, nullable=True)
should_download = Column(Boolean)
+99 -35
View File
@@ -3,25 +3,46 @@ download files with URLs from DB
"""
import logging.config
import requests
import yaml
import re
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from pathlib import Path
from bs4 import BeautifulSoup
from platformdirs import PlatformDirs
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('--verbose', '-v', action='count', default=0)
parser.add_argument('--config', '-c', default='kontor-docker')
parser.add_argument('--all', '-a', action='store_true')
args = parser.parse_args()
def get_logger(level: int, config: str):
dirs = PlatformDirs(config)
logging_config = Path(dirs.user_config_dir, 'logging-config.yaml')
with open(logging_config, 'rt') as f:
configDict = yaml.safe_load(f.read())
logging.config.dictConfig(configDict)
logger = logging.getLogger('development')
def get_logger(level: int) -> logging.Logger:
logging.config.dictConfig({
'version': 1,
'disable_existing_loggers': False,
'formatters': {
'simple': {
'format': '[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s',
'datefmt': '%Y-%m-%d %H:%M:%S',
},
},
'handlers': {
'console': {
'class': logging.StreamHandler,
'level': logging.DEBUG,
'formatter': 'simple',
'stream': 'ext://sys.stdout'
},
},
'loggers': {
'urllib3.connectionpool': {
'level': 'WARNING',
'propagate': False,
},
'root': {
'level': 'DEBUG',
'handlers': ['console'],
},
},
})
logger = logging.getLogger(__file__)
if level is not None:
match level:
case 0:
@@ -32,35 +53,78 @@ def get_logger(level: int, config: str):
logger.setLevel(logging.CRITICAL)
return logger
def update_file(log: logging.Logger, media_file):
update = requests.put(f"http://127.0.0.1:8800/api/media/files/{media_file['id']}", json=media_file)
log.info(f"update status: {update.status_code}")
log.info(f"update result: {update.json()}")
def get_actor_links(log: logging.Logger, media_file_url: str) -> list:
try:
r = requests.get(media_file_url)
soup = BeautifulSoup(r.content, "html.parser")
error404 = soup.css.select_one('.error404-title')
if error404 and error404.get_text() == "Video nicht gefunden":
log.info(f"{error404.get_text()}")
item['url'] = None
item['review'] = False
update_file(log, item)
return []
anchors = soup.find_all('a', attrs={'href': re.compile("^https://.*pornstars/.*")})
actor_links = []
for anchor in anchors:
link_url = anchor.get('href')
if link_url.endswith('all/countries'):
continue
actor_links.append(link_url)
log.info(f"links({len(actor_links)}): {actor_links}")
return actor_links
except Exception as error:
log.info(f"something went wrong: {error}")
return []
if __name__ == '__main__':
log = get_logger(args.verbose, args.config)
log.info('kontor.update_titles started')
response = requests.get("http://127.0.0.1:8800/api/media/files?review=true")
log = get_logger(args.verbose)
log.info('kontor.find_links started')
log.info('get all actors')
response = requests.get("http://127.0.0.1:8800/api/media/actors")
data = response.json()
actors = {}
for item in data:
actor = {}
actor['id'] = item['id']
actor['name'] = item['name']
actor['url'] = item['url']
actors[item['url']] = actor
log.debug(f'all actors: {actors}')
files_url = ""
if args.all:
files_url= "http://127.0.0.1:8800/api/media/files"
else:
files_url = "http://127.0.0.1:8800/api/media/files?review=true"
response = requests.get(files_url)
log.info(f"Status: {response.status_code}")
data = response.json()
log.info(f"data: {len(data)}")
for item in data:
link = item['url']
if not link:
continue
if str(link) == "None":
continue
log.info(f"{item['id']} - {str(link)}")
try:
r = requests.get(link)
soup = BeautifulSoup(r.content, "html.parser")
title = soup.title.string
anchors = soup.find_all('a')
for anchor in anchors:
if anchor.has_attr('href'):
link_url = anchor['href']
if link_url and link_url.__contains__('pornstars/'):
log.info(link_url)
item['title'] = title
item['review'] = False
except Exception as error:
log.info(f"something went wrong: {error} {anchor}")
item['title'] = None
item['review'] = True
#update = requests.put(f"http://127.0.0.1:8800/api/media/files/{item['id']}", json=item)
#log.info(f"update status: {update.status_code}")
#log.info(f"update result: {update.json()}")
log.info('kontor.update_titles finished')
actor_links = get_actor_links(log, link)
actor_list = []
for actor_link in actor_links:
if actor_link in actors:
log.info(f"found actor with id: {actors[actor_link]['id']}")
actor_list.append(actors[actor_link])
actor_response = requests.put(f"http://127.0.0.1:8800/api/media/files/{item['id']}/actors", json=actor_list)
actor_data = actor_response.json()
log.info(f"found {len(actor_data)} actors")
log.info(f"found actors: {actor_data}")
item['review'] = False
update = requests.put(f"http://127.0.0.1:8800/api/media/files/{item['id']}", json=item)
log.info(f"update status: {update.status_code}")
log.info(f"update result: {update.json()}")
log.info('kontor.find_links finished')
+5
View File
@@ -43,9 +43,14 @@ if __name__ == '__main__':
for item in data:
link = item['url']
log.info(f"{item['id']} - {str(link)}")
if not link:
continue
try:
r = requests.get(link)
soup = BeautifulSoup(r.content, "html.parser")
title_tag = soup.find('title')
if title_tag:
title= title_tag.get_text()
title = soup.title.string
item['title'] = title
item['review'] = False
@@ -16,7 +16,6 @@ import java.util.List;
@Setter
@EqualsAndHashCode(callSuper = false)
@Entity
@Table(uniqueConstraints = { @UniqueConstraint(columnNames = { "url" }) })
public class MediaFile extends AbstractEntity {
@Nullable