refactor find_links.py by adding methods for specific tasks

This commit is contained in:
Thomas Peetz
2025-09-08 12:42:01 +02:00
parent acbf9c51a3
commit 8b1b84b195
9 changed files with 111 additions and 43 deletions
+2 -1
View File
@@ -1,10 +1,11 @@
from fastapi import APIRouter
from src.apis.version1 import comic, mediaactor, mediafile, tysc, admin
from src.apis.version1 import comic, mediaactor, mediafile, mediaactorfile, tysc, admin
api_router = APIRouter(prefix="/api")
api_router.include_router(comic.router, prefix="/comics", tags=["comics"])
api_router.include_router(mediafile.router, prefix="/media", tags=["media"])
api_router.include_router(mediaactor.router, prefix="/media", tags=["media"])
api_router.include_router(mediaactorfile.router, prefix="/media", tags=["media"])
api_router.include_router(tysc.router, prefix="/tysc", tags=["tysc"])
api_router.include_router(admin.router, prefix="/login", tags=["login"])
+4 -6
View File
@@ -1,5 +1,3 @@
from typing import List, AnyStr
from fastapi import APIRouter, status, HTTPException
from sqlalchemy import select
from src.core.log_conf import logger
@@ -10,10 +8,10 @@ from src.db.models.media import MediaActor
router = APIRouter()
@router.get("/actors", response_model=List[MediaActorResponse])
@router.get("/actors", response_model=list[MediaActorResponse])
# def get_all_files(db: SessionDep, review: bool = False, download: bool = False, current_user: Profile = Depends(get_current_user_from_token)) -> List[MediaFileResponse]:
def get_all_actors(db: SessionDep, review: bool = False, download: bool = False) -> List[MediaActorResponse]:
results: List[MediaActorResponse] = []
def get_all_actors(db: SessionDep, review: bool = False, download: bool = False) -> list[MediaActorResponse]:
results: list[MediaActorResponse] = []
actors = db.scalars(select(MediaActor)).all()
for mediaactor in actors:
response = MediaActorResponse(id=mediaactor.id, name=str(mediaactor.name), url=str(mediaactor.url))
@@ -21,7 +19,7 @@ def get_all_actors(db: SessionDep, review: bool = False, download: bool = False)
return results
@router.get("/actors/{actor_id}", response_model=MediaActorResponse)
def get_actor(actor_id: AnyStr, db: SessionDep) -> MediaActorResponse:
def get_actor(actor_id: str, db: SessionDep) -> MediaActorResponse:
media_actor = db.get(MediaActor, actor_id)
if not media_actor:
raise HTTPException(status_code=404, detail="MediaActor could not be found")
@@ -0,0 +1,33 @@
from fastapi import APIRouter, status, HTTPException
from sqlalchemy import select
from src.apis.utils import SessionDep
from src.db.models.media import MediaActorFile
from src.db.repository.media import delete_mediaactorfile
from src.schema.media.actorfile import MediaActorFileResponse, get_actorfile_details
router = APIRouter()
@router.get("/actorfiles", response_model=list[MediaActorFileResponse])
def get_all_actorfiles(db: SessionDep) -> list[MediaActorFileResponse]:
results: list[MediaActorFileResponse] = []
actorfiles = db.scalars(select(MediaActorFile)).all()
for mediaactorfile in actorfiles:
response = MediaActorFileResponse(id=mediaactorfile.id, actor_id=str(mediaactorfile.media_actor_id), file_id=str(mediaactorfile.media_file_id))
results.append(response)
return results
@router.get("/actorfiles/{actorfile_id}", response_model=MediaActorFileResponse)
def get_actorfile(actorfile_id: str, db: SessionDep) -> MediaActorFileResponse:
media_actorfile = db.get(MediaActorFile, actorfile_id)
if not media_actorfile:
raise HTTPException(status_code=404, detail="MediaActor could not be found")
response = get_actorfile_details(media_actorfile)
return response
@router.delete("/actorfiles/{actorfile_id}", status_code=status.HTTP_204_NO_CONTENT)
def delete_actorfile(actorfile_id: str, db: SessionDep):
media_actorfile = db.get(MediaActorFile, actorfile_id)
if not media_actorfile:
raise HTTPException(status_code=404, detail="MediaActor could not be found")
delete_mediaactorfile(db, media_actorfile.id)
+14 -16
View File
@@ -1,5 +1,3 @@
from typing import List, AnyStr
from fastapi import APIRouter, status, HTTPException, Depends
from sqlalchemy import select, Sequence
from src.core.log_conf import logger
@@ -25,45 +23,45 @@ def update_titles(db: SessionDep) -> list[MediaFileResponse]:
return results
@router.get("/files", response_model=List[MediaFileResponse])
@router.get("/files", response_model=list[MediaFileResponse])
# def get_all_files(db: SessionDep, review: bool = False, download: bool = False, current_user: Profile = Depends(get_current_user_from_token)) -> List[MediaFileResponse]:
def get_all_files(db: SessionDep, review: bool = False, download: bool = False) -> List[MediaFileResponse]:
def get_all_files(db: SessionDep, review: bool = False, download: bool = False) -> list[MediaFileResponse]:
results: list[MediaFileResponse] = []
files: Sequence[MediaFile]
if review:
files = db.query(MediaFile).filter(MediaFile.review == True).all()
files = db.query(MediaFile).filter(MediaFile.review == True).all() # type: ignore
elif download:
files = db.query(MediaFile).filter(MediaFile.should_download == True).all()
files = db.query(MediaFile).filter(MediaFile.should_download == True).all() # type: ignore
else:
files = db.scalars(select(MediaFile)).all()
for mediafile in files:
files = db.scalars(select(MediaFile)).all() # type: ignore
for mediafile in files: # type: ignore
response = get_file_details(mediafile)
results.append(response)
return results
@router.get("/files/{file_id}", response_model=MediaFileResponse)
def get_file(file_id: AnyStr, db: SessionDep) -> MediaFileResponse:
def get_file(file_id: str, db: SessionDep) -> MediaFileResponse:
mediafile = db.get(MediaFile, file_id)
if not mediafile:
raise HTTPException(status_code=404, detail="MediaFile could not be found")
response = get_file_details(mediafile)
return response
@router.get("/files/{file_id}/actors", response_model=List[MediaActorResponse])
def get_file_actors(file_id: AnyStr, db: SessionDep) -> List[MediaActorResponse]:
@router.get("/files/{file_id}/actors", response_model=list[MediaActorResponse])
def get_file_actors(file_id: str, db: SessionDep) -> list[MediaActorResponse]:
mediafile = db.get(MediaFile, file_id)
if not mediafile:
raise HTTPException(status_code=404, detail="MediaFile could not be found")
actor_files = mediafile.media_actor_files
logger.info(f"already known actors: {actor_files}")
results: List[MediaActorResponse] = []
results: list[MediaActorResponse] = []
for actor_file in actor_files:
response = MediaActorResponse(id=actor_file.media_actor.id, name=actor_file.media_actor.name, url=actor_file.media_actor.url)
results.append(response)
return results
@router.put("/files/{file_id}/actors", response_model=List[MediaActorFileResponse])
def update_file_actors(file_id: AnyStr, db: SessionDep, actors: List[MediaActorResponse]) -> List[MediaActorFileResponse]:
@router.put("/files/{file_id}/actors", response_model=list[MediaActorFileResponse])
def update_file_actors(file_id: str, db: SessionDep, actors: list[MediaActorResponse]) -> list[MediaActorFileResponse]:
mediafile = db.get(MediaFile, file_id)
if not mediafile:
raise HTTPException(status_code=404, detail="MediaFile could not be found")
@@ -80,14 +78,14 @@ def update_file_actors(file_id: AnyStr, db: SessionDep, actors: List[MediaActorR
create_new_mediaactorfile(db, actor.id, mediafile.id)
db.refresh(mediafile)
actor_files = mediafile.media_actor_files
results: List[MediaActorFileResponse] = []
results: list[MediaActorFileResponse] = []
for actor_file in actor_files:
response = MediaActorFileResponse(id=actor_file.id, actor_id=actor_file.media_actor_id, file_id=actor_file.media_file_id)
results.append(response)
return results
@router.put("/files/{file_id}", response_model=MediaFileResponse)
def update_file(file_id: AnyStr, db: SessionDep, info: MediaFileResponse) -> MediaFileResponse:
def update_file(file_id: str, db: SessionDep, info: MediaFileResponse) -> MediaFileResponse:
mediaFile = db.get(MediaFile, file_id)
if not mediaFile:
raise HTTPException(status_code=404, detail="MediaFile could not be found")
+3 -3
View File
@@ -25,9 +25,9 @@ class MediaFile(Base, BaseMixin, BaseVideoMixin):
def update_title(self) -> None:
logging.info(f"update title for {self.url}")
try:
r = requests.get(self.url)
r = requests.get(str(self.url))
soup = BeautifulSoup(r.content, "html.parser")
title = soup.title.string
title = soup.title.get_text() # type: ignore
self.title = title
self.review = False
except:
@@ -37,7 +37,7 @@ class MediaFile(Base, BaseMixin, BaseVideoMixin):
def download_file(self, download_dir: str, dl_tool: str):
logging.info(f"download file for {self.url} to {download_dir}")
result = subprocess.run([dl_tool, self.url], cwd=download_dir, capture_output=True, text=True)
result = subprocess.run([dl_tool, self.url], cwd=download_dir, capture_output=True, text=True) # type: ignore
if result.returncode == 0:
output = result.stdout
output = re.sub(' +', ' ', output)
+16 -11
View File
@@ -1,5 +1,4 @@
from sqlalchemy.orm import Session
from typing import AnyStr
import uuid
from datetime import datetime
from src.core.log_conf import logger
@@ -12,22 +11,22 @@ def create_new_video(video: AddLinkForm, db: Session) -> MediaVideo:
print(video.url)
media_video = MediaVideo()
media_video.id = str(uuid.uuid4())
media_video.url = video.url
media_video.url = video.url # type: ignore
media_video.created_date = datetime.now()
media_video.last_modified_date = datetime.now()
media_video.review = True
media_video.should_download = True
media_video.review = True # type: ignore
media_video.should_download = True # type: ignore
db.add(media_video)
db.commit()
db.refresh(media_video)
print(media_video)
return media_video
def create_new_mediafile(link: AnyStr, db: Session) -> MediaFile:
def create_new_mediafile(link: str, db: Session) -> MediaFile:
logger.info("create MediaFile with url {link}")
media_file: MediaFile = MediaFile()
media_file.id = str(uuid.uuid4())
media_file.url = link
media_file.url = link # type: ignore
media_file.created_date = datetime.now()
media_file.last_modified_date = datetime.now()
media_file.version = 0
@@ -43,8 +42,8 @@ def create_new_mediaactor(new_actor: Actor, db: Session) -> MediaActor:
logger.info(f"create MediaActor with url {new_actor.url}")
media_actor: MediaActor = MediaActor()
media_actor.id = str(uuid.uuid4())
media_actor.name = str(new_actor.name)
media_actor.url = str(new_actor.url)
media_actor.name = str(new_actor.name) # type: ignore
media_actor.url = str(new_actor.url) # type: ignore
media_actor.created_date = datetime.now()
media_actor.last_modified_date = datetime.now()
media_actor.version = 0
@@ -54,16 +53,22 @@ def create_new_mediaactor(new_actor: Actor, db: Session) -> MediaActor:
logger.info(f"created {media_actor}")
return media_actor
def create_new_mediaactorfile(db: Session, actor_id: AnyStr, file_id: AnyStr) -> MediaActorFile:
def create_new_mediaactorfile(db: Session, actor_id: str, file_id: str) -> MediaActorFile:
logger.info(f"create MediaActorFile with actor {actor_id} and file {file_id}")
media_actor_file: MediaActorFile = MediaActorFile()
media_actor_file.id = str(uuid.uuid4())
media_actor_file.created_date = datetime.now()
media_actor_file.last_modified_date = datetime.now()
media_actor_file.version = 0
media_actor_file.media_actor_id = actor_id
media_actor_file.media_file_id = file_id
media_actor_file.media_actor_id = actor_id # type: ignore
media_actor_file.media_file_id = file_id # type: ignore
db.add(media_actor_file)
db.commit()
db.refresh(media_actor_file)
return media_actor_file
def delete_mediaactorfile(db: Session, actorfile_id: str):
logger.info(f"delete MediaActorFile with id {actorfile_id}")
media_actorfile = db.get(MediaActorFile, actorfile_id)
db.delete(media_actorfile)
db.commit()
+7 -1
View File
@@ -1,6 +1,6 @@
from datetime import datetime
from src.db.models.media import MediaFile
from src.db.models.media import MediaActorFile, MediaFile
from pydantic import BaseModel
@@ -8,3 +8,9 @@ class MediaActorFileResponse(BaseModel):
id: str
file_id: str
actor_id: str
def get_actorfile_details(media_actorfile: MediaActorFile) -> MediaActorFileResponse:
response: MediaActorFileResponse = MediaActorFileResponse(id=media_actorfile.id,
file_id=str(media_actorfile.media_file_id),
actor_id=str(media_actorfile.media_actor_id))
return response
+2 -2
View File
@@ -30,8 +30,8 @@ def get_file_details(mediafile: MediaFile) -> MediaFileResponse:
def set_file(model: MediaFileResponse, mediafile: MediaFile) -> None:
mediafile.file_name = model.file_name
mediafile.cloud_link = model.cloud_link
mediafile.url = model.url
mediafile.cloud_link = model.cloud_link # type: ignore
mediafile.url = model.url # type: ignore
mediafile.title = model.title
mediafile.last_modified_date = datetime.now()
mediafile.review = model.review
+28 -1
View File
@@ -129,12 +129,18 @@ def update_media_file_actors(mediafile: dict,
log.debug(f"check if actor({actor_id}) with {actor_url} in list")
if actor_url not in actor_links:
log.info(f"actor not found in links, delete relation {file_actor['id']}")
delete_media_file_actor(file_actor['id'], log)
mediafile['review'] = True
else:
mediafile['review'] = False
log.debug(f"found {persisted_actor_links_count} actors")
log.debug(f"found actors: {files_actor_list}")
def delete_media_file_actor(media_actor_file_id: str, log: logging.Logger):
delete_response = requests.delete(f"http://127.0.0.1:8800/api/media/actorfiles/{media_actor_file_id}")
if delete_response.status_code == 204:
log.info(f"actor file relation with id {media_actor_file_id} successfully deleted")
def get_actor_ids(link_list: list[str],
map_url_actor: dict[str, str],
map_ids_actor: dict[str, str],
@@ -161,7 +167,28 @@ def get_persisted_actor(actor_url: str,
log: logging.Logger) -> dict[str, str] | None:
alternate_url_actor: dict[str, dict[str, str]] = {
'https://ge.xhamster2.com/pornstars/jean-yves-lecastel':
{'id': 'e354b866-717c-4a66-ad38-bc7c23d97e36', 'name': 'Jean-Yves Le Castel', 'url': 'https://ge.xhamster.com/pornstars/jean-yves-le-castel'}} # type: ignore
{'id': 'e354b866-717c-4a66-ad38-bc7c23d97e36', 'name': 'Jean-Yves Le Castel', 'url': 'https://ge.xhamster.com/pornstars/jean-yves-le-castel'},
'https://ge.xhamster.com/pornstars/jean-yves-lecastel':
{'id': 'e354b866-717c-4a66-ad38-bc7c23d97e36', 'name': 'Jean-Yves Le Castel', 'url': 'https://ge.xhamster.com/pornstars/jean-yves-le-castel'},
'https://ge.xhamster.com/pornstars/gracie-green':
{'id': 'cbec2e0d-869c-40f1-923f-21958d938d9f', 'name': 'Gracie May Green', 'url':'https://ge.xhamster.com/pornstars/gracie-may-green'},
'https://ge.xhamster.com/pornstars/thomas-hyka':
{'id': '1d814b45-ea98-4acc-88a2-227d3ed36959', 'name': 'Thomas Crown', 'url':'https://ge.xhamster.com/pornstars/thomas-crown'},
'https://ge.xhamster.com/pornstars/chloe-couture':
{'id': 'e22003a5-60a9-4d86-a1df-ae09ecbe5200', 'name': 'Chloe Cherry', 'url':'https://ge.xhamster.com/pornstars/chloe-cherry'},
'https://ge.xhamster.com/pornstars/dava-fox':
{'id': 'd913b778-4507-421b-88e0-9da73bb80a63', 'name': 'Dava Foxx', 'url':'https://ge.xhamster.com/pornstars/dava-foxx'},
'https://ge.xhamster.com/pornstars/john-dough':
{'id': 'a2ecd50f-09b2-4d31-9fcf-1a1438700f51', 'name': 'Jon Dough', 'url':'https://ge.xhamster.com/pornstars/jon-dough'},
'https://ge.xhamster.com/pornstars/erica-mori':
{'id': '5379dab9-63da-44ed-baf1-929d74ac60b1', 'name': 'Polly Yangs', 'url':'https://ge.xhamster.com/pornstars/polly-yangs'},
'https://ge.xhamster.com/pornstars/elnara-cat':
{'id': '543952d7-59a9-4492-a70f-e384b5f8eb57', 'name': 'Renata Fox', 'url':'https://ge.xhamster.com/pornstars/renata-fox'},
'https://ge.xhamster.com/pornstars/melissa-grand':
{'id': '5d025bea-4af6-4197-b38d-3b3afa9d30b9', 'name': 'Melissa Benz', 'url':'https://ge.xhamster.com/pornstars/melissa-benz'},
'https://ge.xhamster.com/pornstars/sindy-dollar':
{'id': 'fa97769c-9e53-4613-b3c3-4cc1a2672d4b', 'name': 'Cindy Dollar', 'url':'https://ge.xhamster.com/pornstars/cindy-dollar'},
} # type: ignore
if actor_url in map_url_actor:
actor_id: str = map_url_actor[actor_url]['id'] # type: ignore
log.debug(f"found actor with id: {actor_id}")