refactor find_links.py by adding methods for specific tasks

This commit is contained in:
Thomas Peetz
2025-09-08 12:42:01 +02:00
parent acbf9c51a3
commit 8b1b84b195
9 changed files with 111 additions and 43 deletions
+2 -1
View File
@@ -1,10 +1,11 @@
from fastapi import APIRouter from fastapi import APIRouter
from src.apis.version1 import comic, mediaactor, mediafile, tysc, admin from src.apis.version1 import comic, mediaactor, mediafile, mediaactorfile, tysc, admin
api_router = APIRouter(prefix="/api") api_router = APIRouter(prefix="/api")
api_router.include_router(comic.router, prefix="/comics", tags=["comics"]) api_router.include_router(comic.router, prefix="/comics", tags=["comics"])
api_router.include_router(mediafile.router, prefix="/media", tags=["media"]) api_router.include_router(mediafile.router, prefix="/media", tags=["media"])
api_router.include_router(mediaactor.router, prefix="/media", tags=["media"]) api_router.include_router(mediaactor.router, prefix="/media", tags=["media"])
api_router.include_router(mediaactorfile.router, prefix="/media", tags=["media"])
api_router.include_router(tysc.router, prefix="/tysc", tags=["tysc"]) api_router.include_router(tysc.router, prefix="/tysc", tags=["tysc"])
api_router.include_router(admin.router, prefix="/login", tags=["login"]) api_router.include_router(admin.router, prefix="/login", tags=["login"])
+5 -7
View File
@@ -1,5 +1,3 @@
from typing import List, AnyStr
from fastapi import APIRouter, status, HTTPException from fastapi import APIRouter, status, HTTPException
from sqlalchemy import select from sqlalchemy import select
from src.core.log_conf import logger from src.core.log_conf import logger
@@ -10,10 +8,10 @@ from src.db.models.media import MediaActor
router = APIRouter() router = APIRouter()
@router.get("/actors", response_model=List[MediaActorResponse]) @router.get("/actors", response_model=list[MediaActorResponse])
#def get_all_files(db: SessionDep, review: bool = False, download: bool = False, current_user: Profile = Depends(get_current_user_from_token)) -> List[MediaFileResponse]: # def get_all_files(db: SessionDep, review: bool = False, download: bool = False, current_user: Profile = Depends(get_current_user_from_token)) -> List[MediaFileResponse]:
def get_all_actors(db: SessionDep, review: bool = False, download: bool = False) -> List[MediaActorResponse]: def get_all_actors(db: SessionDep, review: bool = False, download: bool = False) -> list[MediaActorResponse]:
results: List[MediaActorResponse] = [] results: list[MediaActorResponse] = []
actors = db.scalars(select(MediaActor)).all() actors = db.scalars(select(MediaActor)).all()
for mediaactor in actors: for mediaactor in actors:
response = MediaActorResponse(id=mediaactor.id, name=str(mediaactor.name), url=str(mediaactor.url)) response = MediaActorResponse(id=mediaactor.id, name=str(mediaactor.name), url=str(mediaactor.url))
@@ -21,7 +19,7 @@ def get_all_actors(db: SessionDep, review: bool = False, download: bool = False)
return results return results
@router.get("/actors/{actor_id}", response_model=MediaActorResponse) @router.get("/actors/{actor_id}", response_model=MediaActorResponse)
def get_actor(actor_id: AnyStr, db: SessionDep) -> MediaActorResponse: def get_actor(actor_id: str, db: SessionDep) -> MediaActorResponse:
media_actor = db.get(MediaActor, actor_id) media_actor = db.get(MediaActor, actor_id)
if not media_actor: if not media_actor:
raise HTTPException(status_code=404, detail="MediaActor could not be found") raise HTTPException(status_code=404, detail="MediaActor could not be found")
@@ -0,0 +1,33 @@
from fastapi import APIRouter, status, HTTPException
from sqlalchemy import select
from src.apis.utils import SessionDep
from src.db.models.media import MediaActorFile
from src.db.repository.media import delete_mediaactorfile
from src.schema.media.actorfile import MediaActorFileResponse, get_actorfile_details
router = APIRouter()
@router.get("/actorfiles", response_model=list[MediaActorFileResponse])
def get_all_actorfiles(db: SessionDep) -> list[MediaActorFileResponse]:
results: list[MediaActorFileResponse] = []
actorfiles = db.scalars(select(MediaActorFile)).all()
for mediaactorfile in actorfiles:
response = MediaActorFileResponse(id=mediaactorfile.id, actor_id=str(mediaactorfile.media_actor_id), file_id=str(mediaactorfile.media_file_id))
results.append(response)
return results
@router.get("/actorfiles/{actorfile_id}", response_model=MediaActorFileResponse)
def get_actorfile(actorfile_id: str, db: SessionDep) -> MediaActorFileResponse:
media_actorfile = db.get(MediaActorFile, actorfile_id)
if not media_actorfile:
raise HTTPException(status_code=404, detail="MediaActor could not be found")
response = get_actorfile_details(media_actorfile)
return response
@router.delete("/actorfiles/{actorfile_id}", status_code=status.HTTP_204_NO_CONTENT)
def delete_actorfile(actorfile_id: str, db: SessionDep):
media_actorfile = db.get(MediaActorFile, actorfile_id)
if not media_actorfile:
raise HTTPException(status_code=404, detail="MediaActor could not be found")
delete_mediaactorfile(db, media_actorfile.id)
+15 -17
View File
@@ -1,5 +1,3 @@
from typing import List, AnyStr
from fastapi import APIRouter, status, HTTPException, Depends from fastapi import APIRouter, status, HTTPException, Depends
from sqlalchemy import select, Sequence from sqlalchemy import select, Sequence
from src.core.log_conf import logger from src.core.log_conf import logger
@@ -25,45 +23,45 @@ def update_titles(db: SessionDep) -> list[MediaFileResponse]:
return results return results
@router.get("/files", response_model=List[MediaFileResponse]) @router.get("/files", response_model=list[MediaFileResponse])
#def get_all_files(db: SessionDep, review: bool = False, download: bool = False, current_user: Profile = Depends(get_current_user_from_token)) -> List[MediaFileResponse]: # def get_all_files(db: SessionDep, review: bool = False, download: bool = False, current_user: Profile = Depends(get_current_user_from_token)) -> List[MediaFileResponse]:
def get_all_files(db: SessionDep, review: bool = False, download: bool = False) -> List[MediaFileResponse]: def get_all_files(db: SessionDep, review: bool = False, download: bool = False) -> list[MediaFileResponse]:
results: list[MediaFileResponse] = [] results: list[MediaFileResponse] = []
files: Sequence[MediaFile] files: Sequence[MediaFile]
if review: if review:
files = db.query(MediaFile).filter(MediaFile.review == True).all() files = db.query(MediaFile).filter(MediaFile.review == True).all() # type: ignore
elif download: elif download:
files = db.query(MediaFile).filter(MediaFile.should_download == True).all() files = db.query(MediaFile).filter(MediaFile.should_download == True).all() # type: ignore
else: else:
files = db.scalars(select(MediaFile)).all() files = db.scalars(select(MediaFile)).all() # type: ignore
for mediafile in files: for mediafile in files: # type: ignore
response = get_file_details(mediafile) response = get_file_details(mediafile)
results.append(response) results.append(response)
return results return results
@router.get("/files/{file_id}", response_model=MediaFileResponse) @router.get("/files/{file_id}", response_model=MediaFileResponse)
def get_file(file_id: AnyStr, db: SessionDep) -> MediaFileResponse: def get_file(file_id: str, db: SessionDep) -> MediaFileResponse:
mediafile = db.get(MediaFile, file_id) mediafile = db.get(MediaFile, file_id)
if not mediafile: if not mediafile:
raise HTTPException(status_code=404, detail="MediaFile could not be found") raise HTTPException(status_code=404, detail="MediaFile could not be found")
response = get_file_details(mediafile) response = get_file_details(mediafile)
return response return response
@router.get("/files/{file_id}/actors", response_model=List[MediaActorResponse]) @router.get("/files/{file_id}/actors", response_model=list[MediaActorResponse])
def get_file_actors(file_id: AnyStr, db: SessionDep) -> List[MediaActorResponse]: def get_file_actors(file_id: str, db: SessionDep) -> list[MediaActorResponse]:
mediafile = db.get(MediaFile, file_id) mediafile = db.get(MediaFile, file_id)
if not mediafile: if not mediafile:
raise HTTPException(status_code=404, detail="MediaFile could not be found") raise HTTPException(status_code=404, detail="MediaFile could not be found")
actor_files = mediafile.media_actor_files actor_files = mediafile.media_actor_files
logger.info(f"already known actors: {actor_files}") logger.info(f"already known actors: {actor_files}")
results: List[MediaActorResponse] = [] results: list[MediaActorResponse] = []
for actor_file in actor_files: for actor_file in actor_files:
response = MediaActorResponse(id=actor_file.media_actor.id, name=actor_file.media_actor.name, url=actor_file.media_actor.url) response = MediaActorResponse(id=actor_file.media_actor.id, name=actor_file.media_actor.name, url=actor_file.media_actor.url)
results.append(response) results.append(response)
return results return results
@router.put("/files/{file_id}/actors", response_model=List[MediaActorFileResponse]) @router.put("/files/{file_id}/actors", response_model=list[MediaActorFileResponse])
def update_file_actors(file_id: AnyStr, db: SessionDep, actors: List[MediaActorResponse]) -> List[MediaActorFileResponse]: def update_file_actors(file_id: str, db: SessionDep, actors: list[MediaActorResponse]) -> list[MediaActorFileResponse]:
mediafile = db.get(MediaFile, file_id) mediafile = db.get(MediaFile, file_id)
if not mediafile: if not mediafile:
raise HTTPException(status_code=404, detail="MediaFile could not be found") raise HTTPException(status_code=404, detail="MediaFile could not be found")
@@ -80,14 +78,14 @@ def update_file_actors(file_id: AnyStr, db: SessionDep, actors: List[MediaActorR
create_new_mediaactorfile(db, actor.id, mediafile.id) create_new_mediaactorfile(db, actor.id, mediafile.id)
db.refresh(mediafile) db.refresh(mediafile)
actor_files = mediafile.media_actor_files actor_files = mediafile.media_actor_files
results: List[MediaActorFileResponse] = [] results: list[MediaActorFileResponse] = []
for actor_file in actor_files: for actor_file in actor_files:
response = MediaActorFileResponse(id=actor_file.id, actor_id=actor_file.media_actor_id, file_id=actor_file.media_file_id) response = MediaActorFileResponse(id=actor_file.id, actor_id=actor_file.media_actor_id, file_id=actor_file.media_file_id)
results.append(response) results.append(response)
return results return results
@router.put("/files/{file_id}", response_model=MediaFileResponse) @router.put("/files/{file_id}", response_model=MediaFileResponse)
def update_file(file_id: AnyStr, db: SessionDep, info: MediaFileResponse) -> MediaFileResponse: def update_file(file_id: str, db: SessionDep, info: MediaFileResponse) -> MediaFileResponse:
mediaFile = db.get(MediaFile, file_id) mediaFile = db.get(MediaFile, file_id)
if not mediaFile: if not mediaFile:
raise HTTPException(status_code=404, detail="MediaFile could not be found") raise HTTPException(status_code=404, detail="MediaFile could not be found")
+3 -3
View File
@@ -25,9 +25,9 @@ class MediaFile(Base, BaseMixin, BaseVideoMixin):
def update_title(self) -> None: def update_title(self) -> None:
logging.info(f"update title for {self.url}") logging.info(f"update title for {self.url}")
try: try:
r = requests.get(self.url) r = requests.get(str(self.url))
soup = BeautifulSoup(r.content, "html.parser") soup = BeautifulSoup(r.content, "html.parser")
title = soup.title.string title = soup.title.get_text() # type: ignore
self.title = title self.title = title
self.review = False self.review = False
except: except:
@@ -37,7 +37,7 @@ class MediaFile(Base, BaseMixin, BaseVideoMixin):
def download_file(self, download_dir: str, dl_tool: str): def download_file(self, download_dir: str, dl_tool: str):
logging.info(f"download file for {self.url} to {download_dir}") logging.info(f"download file for {self.url} to {download_dir}")
result = subprocess.run([dl_tool, self.url], cwd=download_dir, capture_output=True, text=True) result = subprocess.run([dl_tool, self.url], cwd=download_dir, capture_output=True, text=True) # type: ignore
if result.returncode == 0: if result.returncode == 0:
output = result.stdout output = result.stdout
output = re.sub(' +', ' ', output) output = re.sub(' +', ' ', output)
+16 -11
View File
@@ -1,5 +1,4 @@
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from typing import AnyStr
import uuid import uuid
from datetime import datetime from datetime import datetime
from src.core.log_conf import logger from src.core.log_conf import logger
@@ -12,22 +11,22 @@ def create_new_video(video: AddLinkForm, db: Session) -> MediaVideo:
print(video.url) print(video.url)
media_video = MediaVideo() media_video = MediaVideo()
media_video.id = str(uuid.uuid4()) media_video.id = str(uuid.uuid4())
media_video.url = video.url media_video.url = video.url # type: ignore
media_video.created_date = datetime.now() media_video.created_date = datetime.now()
media_video.last_modified_date = datetime.now() media_video.last_modified_date = datetime.now()
media_video.review = True media_video.review = True # type: ignore
media_video.should_download = True media_video.should_download = True # type: ignore
db.add(media_video) db.add(media_video)
db.commit() db.commit()
db.refresh(media_video) db.refresh(media_video)
print(media_video) print(media_video)
return media_video return media_video
def create_new_mediafile(link: AnyStr, db: Session) -> MediaFile: def create_new_mediafile(link: str, db: Session) -> MediaFile:
logger.info("create MediaFile with url {link}") logger.info("create MediaFile with url {link}")
media_file: MediaFile = MediaFile() media_file: MediaFile = MediaFile()
media_file.id = str(uuid.uuid4()) media_file.id = str(uuid.uuid4())
media_file.url = link media_file.url = link # type: ignore
media_file.created_date = datetime.now() media_file.created_date = datetime.now()
media_file.last_modified_date = datetime.now() media_file.last_modified_date = datetime.now()
media_file.version = 0 media_file.version = 0
@@ -43,8 +42,8 @@ def create_new_mediaactor(new_actor: Actor, db: Session) -> MediaActor:
logger.info(f"create MediaActor with url {new_actor.url}") logger.info(f"create MediaActor with url {new_actor.url}")
media_actor: MediaActor = MediaActor() media_actor: MediaActor = MediaActor()
media_actor.id = str(uuid.uuid4()) media_actor.id = str(uuid.uuid4())
media_actor.name = str(new_actor.name) media_actor.name = str(new_actor.name) # type: ignore
media_actor.url = str(new_actor.url) media_actor.url = str(new_actor.url) # type: ignore
media_actor.created_date = datetime.now() media_actor.created_date = datetime.now()
media_actor.last_modified_date = datetime.now() media_actor.last_modified_date = datetime.now()
media_actor.version = 0 media_actor.version = 0
@@ -54,16 +53,22 @@ def create_new_mediaactor(new_actor: Actor, db: Session) -> MediaActor:
logger.info(f"created {media_actor}") logger.info(f"created {media_actor}")
return media_actor return media_actor
def create_new_mediaactorfile(db: Session, actor_id: AnyStr, file_id: AnyStr) -> MediaActorFile: def create_new_mediaactorfile(db: Session, actor_id: str, file_id: str) -> MediaActorFile:
logger.info(f"create MediaActorFile with actor {actor_id} and file {file_id}") logger.info(f"create MediaActorFile with actor {actor_id} and file {file_id}")
media_actor_file: MediaActorFile = MediaActorFile() media_actor_file: MediaActorFile = MediaActorFile()
media_actor_file.id = str(uuid.uuid4()) media_actor_file.id = str(uuid.uuid4())
media_actor_file.created_date = datetime.now() media_actor_file.created_date = datetime.now()
media_actor_file.last_modified_date = datetime.now() media_actor_file.last_modified_date = datetime.now()
media_actor_file.version = 0 media_actor_file.version = 0
media_actor_file.media_actor_id = actor_id media_actor_file.media_actor_id = actor_id # type: ignore
media_actor_file.media_file_id = file_id media_actor_file.media_file_id = file_id # type: ignore
db.add(media_actor_file) db.add(media_actor_file)
db.commit() db.commit()
db.refresh(media_actor_file) db.refresh(media_actor_file)
return media_actor_file return media_actor_file
def delete_mediaactorfile(db: Session, actorfile_id: str):
logger.info(f"delete MediaActorFile with id {actorfile_id}")
media_actorfile = db.get(MediaActorFile, actorfile_id)
db.delete(media_actorfile)
db.commit()
+7 -1
View File
@@ -1,6 +1,6 @@
from datetime import datetime from datetime import datetime
from src.db.models.media import MediaFile from src.db.models.media import MediaActorFile, MediaFile
from pydantic import BaseModel from pydantic import BaseModel
@@ -8,3 +8,9 @@ class MediaActorFileResponse(BaseModel):
id: str id: str
file_id: str file_id: str
actor_id: str actor_id: str
def get_actorfile_details(media_actorfile: MediaActorFile) -> MediaActorFileResponse:
response: MediaActorFileResponse = MediaActorFileResponse(id=media_actorfile.id,
file_id=str(media_actorfile.media_file_id),
actor_id=str(media_actorfile.media_actor_id))
return response
+2 -2
View File
@@ -30,8 +30,8 @@ def get_file_details(mediafile: MediaFile) -> MediaFileResponse:
def set_file(model: MediaFileResponse, mediafile: MediaFile) -> None: def set_file(model: MediaFileResponse, mediafile: MediaFile) -> None:
mediafile.file_name = model.file_name mediafile.file_name = model.file_name
mediafile.cloud_link = model.cloud_link mediafile.cloud_link = model.cloud_link # type: ignore
mediafile.url = model.url mediafile.url = model.url # type: ignore
mediafile.title = model.title mediafile.title = model.title
mediafile.last_modified_date = datetime.now() mediafile.last_modified_date = datetime.now()
mediafile.review = model.review mediafile.review = model.review
+28 -1
View File
@@ -129,12 +129,18 @@ def update_media_file_actors(mediafile: dict,
log.debug(f"check if actor({actor_id}) with {actor_url} in list") log.debug(f"check if actor({actor_id}) with {actor_url} in list")
if actor_url not in actor_links: if actor_url not in actor_links:
log.info(f"actor not found in links, delete relation {file_actor['id']}") log.info(f"actor not found in links, delete relation {file_actor['id']}")
delete_media_file_actor(file_actor['id'], log)
mediafile['review'] = True mediafile['review'] = True
else: else:
mediafile['review'] = False mediafile['review'] = False
log.debug(f"found {persisted_actor_links_count} actors") log.debug(f"found {persisted_actor_links_count} actors")
log.debug(f"found actors: {files_actor_list}") log.debug(f"found actors: {files_actor_list}")
def delete_media_file_actor(media_actor_file_id: str, log: logging.Logger):
delete_response = requests.delete(f"http://127.0.0.1:8800/api/media/actorfiles/{media_actor_file_id}")
if delete_response.status_code == 204:
log.info(f"actor file relation with id {media_actor_file_id} successfully deleted")
def get_actor_ids(link_list: list[str], def get_actor_ids(link_list: list[str],
map_url_actor: dict[str, str], map_url_actor: dict[str, str],
map_ids_actor: dict[str, str], map_ids_actor: dict[str, str],
@@ -161,7 +167,28 @@ def get_persisted_actor(actor_url: str,
log: logging.Logger) -> dict[str, str] | None: log: logging.Logger) -> dict[str, str] | None:
alternate_url_actor: dict[str, dict[str, str]] = { alternate_url_actor: dict[str, dict[str, str]] = {
'https://ge.xhamster2.com/pornstars/jean-yves-lecastel': 'https://ge.xhamster2.com/pornstars/jean-yves-lecastel':
{'id': 'e354b866-717c-4a66-ad38-bc7c23d97e36', 'name': 'Jean-Yves Le Castel', 'url': 'https://ge.xhamster.com/pornstars/jean-yves-le-castel'}} # type: ignore {'id': 'e354b866-717c-4a66-ad38-bc7c23d97e36', 'name': 'Jean-Yves Le Castel', 'url': 'https://ge.xhamster.com/pornstars/jean-yves-le-castel'},
'https://ge.xhamster.com/pornstars/jean-yves-lecastel':
{'id': 'e354b866-717c-4a66-ad38-bc7c23d97e36', 'name': 'Jean-Yves Le Castel', 'url': 'https://ge.xhamster.com/pornstars/jean-yves-le-castel'},
'https://ge.xhamster.com/pornstars/gracie-green':
{'id': 'cbec2e0d-869c-40f1-923f-21958d938d9f', 'name': 'Gracie May Green', 'url':'https://ge.xhamster.com/pornstars/gracie-may-green'},
'https://ge.xhamster.com/pornstars/thomas-hyka':
{'id': '1d814b45-ea98-4acc-88a2-227d3ed36959', 'name': 'Thomas Crown', 'url':'https://ge.xhamster.com/pornstars/thomas-crown'},
'https://ge.xhamster.com/pornstars/chloe-couture':
{'id': 'e22003a5-60a9-4d86-a1df-ae09ecbe5200', 'name': 'Chloe Cherry', 'url':'https://ge.xhamster.com/pornstars/chloe-cherry'},
'https://ge.xhamster.com/pornstars/dava-fox':
{'id': 'd913b778-4507-421b-88e0-9da73bb80a63', 'name': 'Dava Foxx', 'url':'https://ge.xhamster.com/pornstars/dava-foxx'},
'https://ge.xhamster.com/pornstars/john-dough':
{'id': 'a2ecd50f-09b2-4d31-9fcf-1a1438700f51', 'name': 'Jon Dough', 'url':'https://ge.xhamster.com/pornstars/jon-dough'},
'https://ge.xhamster.com/pornstars/erica-mori':
{'id': '5379dab9-63da-44ed-baf1-929d74ac60b1', 'name': 'Polly Yangs', 'url':'https://ge.xhamster.com/pornstars/polly-yangs'},
'https://ge.xhamster.com/pornstars/elnara-cat':
{'id': '543952d7-59a9-4492-a70f-e384b5f8eb57', 'name': 'Renata Fox', 'url':'https://ge.xhamster.com/pornstars/renata-fox'},
'https://ge.xhamster.com/pornstars/melissa-grand':
{'id': '5d025bea-4af6-4197-b38d-3b3afa9d30b9', 'name': 'Melissa Benz', 'url':'https://ge.xhamster.com/pornstars/melissa-benz'},
'https://ge.xhamster.com/pornstars/sindy-dollar':
{'id': 'fa97769c-9e53-4613-b3c3-4cc1a2672d4b', 'name': 'Cindy Dollar', 'url':'https://ge.xhamster.com/pornstars/cindy-dollar'},
} # type: ignore
if actor_url in map_url_actor: if actor_url in map_url_actor:
actor_id: str = map_url_actor[actor_url]['id'] # type: ignore actor_id: str = map_url_actor[actor_url]['id'] # type: ignore
log.debug(f"found actor with id: {actor_id}") log.debug(f"found actor with id: {actor_id}")