import re import subprocess from datetime import datetime from pathlib import Path from typing import Any, Dict import requests from bs4 import BeautifulSoup from sqlalchemy import Boolean, Column, String, ForeignKey from sqlalchemy.orm import relationship from db.models.base import Base, BaseMixin, BaseVideoMixin class MediaFile(Base, BaseMixin, BaseVideoMixin): __tablename__ = 'media_file' media_actor_files = relationship("MediaActorFile") def __repr__(self): return f'MediaFile(\n\tID: {self.id}\n\tTitle: {self.title}\n\tURL: {self.url}\n\tReview: {self.review}\n\tDownload: {self.should_download}\n\tPath: {self.path}\n\tCloudlink: {self.cloud_link})' def __str__(self): return f'{self.title}({self.id})' def import_dict(self, import_data: Dict[str, Any]): self.id = import_data['id'] self.created_date = import_data['created_date'] self.last_modified_date = import_data['last_modified_date'] self.version = import_data['version'] self.cloud_link = import_data['cloud_link'] self.file_name = import_data['file_name'] self.path = import_data['path'] self.review = import_data['review'] self.title = import_data['title'] self.url = import_data['url'] self.should_download = import_data['should_download'] def export_dict(self) -> Dict[str, Any]: item: Dict[str, Any] = {} item['id'] = self.id item['created_date'] = str(self.created_date) item['last_modified_date'] = str(self.last_modified_date) item['version'] = self.version item['cloud_link'] = self.cloud_link item['file_name'] = self.file_name item['path'] = self.path item['review'] = self.review item['title'] = self.title item['url'] = self.url item['should_download'] = self.should_download return item def update_title(self) -> None: print(f"update title for {self.url}") try: r = requests.get(self.url) soup = BeautifulSoup(r.content, "html.parser") title_tag = soup.find('title') if title_tag: self.title = title_tag.get_text() self.review = False except: self.title = None self.review = True self.last_modified_date = datetime.now() def download_file(self, download_dir: str, dl_tool: str): print(f"download file for {self.url} to {download_dir}") result = subprocess.run([dl_tool, self.url], cwd=download_dir, capture_output=True, text=True) if result.returncode == 0: output = result.stdout output = re.sub(' +', ' ', output) lines_list = output.splitlines() file_name = self.__parse_output__(lines_list) if file_name is None: self.review = True self.should_download = True self.file_name = None else: download_file = Path(file_name) self.should_download = False self.file_name = download_file.name self.cloud_link = str(download_file.absolute()) self.last_modified_date = datetime.now() def __parse_output__(self, lines_list): self.file_name = None for line in lines_list: if 'has already been downloaded' in line: end_len = len(' has already been downloaded') self.file_name = line[11:-end_len] if 'Destination' in line: line_len = len(line) start_len = len('[download] Destination: ') file_len = line_len - start_len self.file_name = line[-file_len:] return self.file_name class MediaActor(Base, BaseMixin): __tablename__ = 'media_actor' name = Column(String) url = Column(String, unique=True) media_actor_files = relationship("MediaActorFile") def __repr__(self): return f'MediaActor(\n\tID: {self.id}\n\tName: {self.name}\n\tURL: {self.url})' def __str__(self): return f'{self.name}({self.id})' def import_dict(self, import_data: Dict[str, Any]): self.id = import_data['id'] self.created_date = import_data['created_date'] self.last_modified_date = import_data['last_modified_date'] self.version = import_data['version'] self.name = import_data['name'] self.url = import_data['url'] def export_dict(self) -> Dict[str, Any]: item: Dict[str, Any] = {} item['id'] = self.id item['created_date'] = str(self.created_date) item['last_modified_date'] = str(self.last_modified_date) item['version'] = self.version item['name'] = self.name item['url'] = self.url return item class MediaActorFile(Base, BaseMixin): __tablename__ = 'media_actor_file' media_actor_id = Column(String, ForeignKey("media_actor.id"), nullable=False) media_actor = relationship("MediaActor", back_populates="media_actor_files") media_file_id = Column(String, ForeignKey("media_file.id"), nullable=True) media_file = relationship("MediaFile", back_populates="media_actor_files") def __repr__(self): return f'MediaActorFile(\n\tID: {self.id}\n\tMediaActor: {self.media_actor_id}\n\tMediaFile: {self.media_file_id})' def __str__(self): return f'{self.id}: MediaActor: {self.media_actor_id} - MediaFile: {self.media_file_id}' def import_dict(self, import_data: Dict[str, Any]): self.id = import_data['id'] self.created_date = import_data['created_date'] self.last_modified_date = import_data['last_modified_date'] self.version = import_data['version'] self.media_actor_id = import_data['media_actor_id'] self.media_file_id = import_data['media_file_id'] def export_dict(self) -> Dict[str, Any]: item: Dict[str, Any] = {} item['id'] = self.id item['created_date'] = str(self.created_date) item['last_modified_date'] = str(self.last_modified_date) item['version'] = self.version item['media_actor_id'] = self.media_actor_id item['media_file_id'] = self.media_file_id return item class MediaArticle(Base, BaseMixin): __tablename__ = 'media_article' review = Column(Boolean) title = Column(String) url = Column(String, unique=True) def import_dict(self, import_data: Dict[str, Any]): self.id = import_data['id'] self.created_date = import_data['created_date'] self.last_modified_date = import_data['last_modified_date'] self.version = import_data['version'] self.review = import_data['review'] self.title = import_data['title'] self.url = import_data['url'] def export_dict(self) -> Dict[str, Any]: item: Dict[str, Any] = {} item['id'] = self.id item['created_date'] = str(self.created_date) item['last_modified_date'] = str(self.last_modified_date) item['version'] = self.version item['review'] = self.review item['title'] = self.title item['url'] = self.url return item class MediaVideo(Base, BaseMixin): __tablename__ = 'media_video' cloud_link = Column(String) file_name = Column(String) path = Column(String) review = Column(Boolean) title = Column(String) url = Column(String, unique=True) should_download = Column(Boolean) def import_dict(self, import_data: Dict[str, Any]): self.id = import_data['id'] self.created_date = import_data['created_date'] self.last_modified_date = import_data['last_modified_date'] self.version = import_data['version'] self.cloud_link = import_data['cloud_link'] self.file_name = import_data['file_name'] self.path = import_data['path'] self.review = import_data['review'] self.title = import_data['title'] self.url = import_data['url'] self.should_download = import_data['should_download'] def export_dict(self) -> Dict[str, Any]: item: Dict[str, Any] = {} item['id'] = self.id item['created_date'] = str(self.created_date) item['last_modified_date'] = str(self.last_modified_date) item['version'] = self.version item['cloud_link'] = self.cloud_link item['file_name'] = self.file_name item['path'] = self.path item['review'] = self.review item['title'] = self.title item['url'] = self.url item['should_download'] = self.should_download return item