import re import subprocess from datetime import datetime from pathlib import Path from typing import Any, AnyStr, Dict import requests from bs4 import BeautifulSoup from sqlalchemy import Boolean, Column, False_, String, ForeignKey from sqlalchemy.orm import relationship from db.models.base import Base, BaseMixin, BaseVideoMixin class MediaFile(Base, BaseMixin, BaseVideoMixin): __tablename__ = 'media_file' media_actor_files = relationship("MediaActorFile") def __repr__(self): return f'MediaFile({self.id} {self.title} {self.title})' def __str__(self): return f'{self.title}({self.id})' def import_dict(self, import_data: Dict[AnyStr, Any]): self.id = import_data['id'] self.created_date = import_data['created_date'] self.last_modified_date = import_data['last_modified_date'] self.version = import_data['version'] self.cloud_link = import_data['cloud_link'] self.file_name = import_data['file_name'] self.path = import_data['path'] self.review = import_data['review'] self.title = import_data['title'] self.url = import_data['url'] self.should_download = import_data['should_download'] def export_dict(self) -> Dict[AnyStr, Any]: item: Dict[AnyStr, Any] = {} item['id'] = self.id item['created_date'] = str(self.created_date) item['last_modified_date'] = str(self.last_modified_date) item['version'] = self.version item['cloud_link'] = self.cloud_link item['file_name'] = self.file_name item['path'] = self.path item['review'] = self.review item['title'] = self.title item['url'] = self.url item['should_download'] = self.should_download return item def update_title(self) -> None: print(f"update title for {self.url}") try: r = requests.get(self.url) soup = BeautifulSoup(r.content, "html.parser") title = soup.title.string self.title = title self.review = False except: self.title = None self.review = True self.last_modified_date = datetime.now() def download_file(self, download_dir: str, dl_tool: str): print(f"download file for {self.url} to {download_dir}") result = subprocess.run([dl_tool, self.url], cwd=download_dir, capture_output=True, text=True) if result.returncode == 0: output = result.stdout output = re.sub(' +', ' ', output) lines_list = output.splitlines() file_name = self.__parse_output__(lines_list) if file_name is None: self.review = True self.should_download = True self.file_name = None else: download_file = Path(file_name) self.should_download = False self.file_name = download_file.name self.cloud_link = str(download_file.absolute()) self.last_modified_date = datetime.now() def __parse_output__(self, lines_list): self.file_name = None for line in lines_list: if 'has already been downloaded' in line: end_len = len(' has already been downloaded') self.file_name = line[11:-end_len] if 'Destination' in line: line_len = len(line) start_len = len('[download] Destination: ') file_len = line_len - start_len self.file_name = line[-file_len:] return self.file_name class MediaActor(Base, BaseMixin): __tablename__ = 'media_actor' name = Column(String) url = Column(String, unique=True) media_actor_files = relationship("MediaActorFile") def import_dict(self, import_data: Dict[AnyStr, Any]): self.id = import_data['id'] self.created_date = import_data['created_date'] self.last_modified_date = import_data['last_modified_date'] self.version = import_data['version'] self.name = import_data['name'] self.url = import_data['url'] def export_dict(self) -> Dict[AnyStr, Any]: item: Dict[AnyStr, Any] = {} item['id'] = self.id item['created_date'] = str(self.created_date) item['last_modified_date'] = str(self.last_modified_date) item['version'] = self.version item['name'] = self.name item['url'] = self.url return item class MediaActorFile(Base, BaseMixin): __tablename__ = 'media_actor_file' media_actor_id = Column(String, ForeignKey("media_actor.id"), nullable=False) media_actor = relationship("MediaActor", back_populates="media_actor_files") media_file_id = Column(String, ForeignKey("media_file.id"), nullable=True) media_file = relationship("MediaFile", back_populates="media_actor_files") def import_dict(self, import_data: Dict[AnyStr, Any]): self.id = import_data['id'] self.created_date = import_data['created_date'] self.last_modified_date = import_data['last_modified_date'] self.version = import_data['version'] self.media_actor_id = import_data['media_actor_id'] self.media_file_id = import_data['media_file_id'] def export_dict(self) -> Dict[AnyStr, Any]: item: Dict[AnyStr, Any] = {} item['id'] = self.id item['created_date'] = str(self.created_date) item['last_modified_date'] = str(self.last_modified_date) item['version'] = self.version item['media_actor_id'] = self.media_actor_id item['media_file_id'] = self.media_file_id return item class MediaArticle(Base, BaseMixin): __tablename__ = 'media_article' review = Column(Boolean) title = Column(String) url = Column(String, unique=True) def import_dict(self, import_data: Dict[AnyStr, Any]): self.id = import_data['id'] self.created_date = import_data['created_date'] self.last_modified_date = import_data['last_modified_date'] self.version = import_data['version'] self.review = import_data['review'] self.title = import_data['title'] self.url = import_data['url'] def export_dict(self) -> Dict[AnyStr, Any]: item: Dict[AnyStr, Any] = {} item['id'] = self.id item['created_date'] = str(self.created_date) item['last_modified_date'] = str(self.last_modified_date) item['version'] = self.version item['review'] = self.review item['title'] = self.title item['url'] = self.url return item class MediaVideo(Base, BaseMixin): __tablename__ = 'media_video' cloud_link = Column(String) file_name = Column(String) path = Column(String) review = Column(Boolean) title = Column(String) url = Column(String, unique=True) should_download = Column(Boolean) def import_dict(self, import_data: Dict[AnyStr, Any]): self.id = import_data['id'] self.created_date = import_data['created_date'] self.last_modified_date = import_data['last_modified_date'] self.version = import_data['version'] self.cloud_link = import_data['cloud_link'] self.file_name = import_data['file_name'] self.path = import_data['path'] self.review = import_data['review'] self.title = import_data['title'] self.url = import_data['url'] self.should_download = import_data['should_download'] def export_dict(self) -> Dict[AnyStr, Any]: item: Dict[AnyStr, Any] = {} item['id'] = self.id item['created_date'] = str(self.created_date) item['last_modified_date'] = str(self.last_modified_date) item['version'] = self.version item['cloud_link'] = self.cloud_link item['file_name'] = self.file_name item['path'] = self.path item['review'] = self.review item['title'] = self.title item['url'] = self.url item['should_download'] = self.should_download return item