import re import subprocess from datetime import datetime from pathlib import Path import requests from bs4 import BeautifulSoup from sqlalchemy import Boolean, Column, DateTime, Integer, String, ForeignKey from sqlalchemy.orm import relationship from .base import Base, BaseMixin, BaseVideoMixin class MediaFile(Base, BaseMixin, BaseVideoMixin): __tablename__ = 'media_file' media_actor_files = relationship("MediaActorFile") def __repr__(self): return f'MediaFile({self.id} {self.title} {self.title})' def __str__(self): return f'{self.title}({self.id})' def update_title(self) -> None: print(f"update title for {self.url}") try: r = requests.get(self.url) soup = BeautifulSoup(r.content, "html.parser") title = soup.title.string self.title = title self.review = False except: self.title = None self.review = True self.last_modified_date = datetime.now() def download_file(self, download_dir: str, dl_tool: str): print(f"download file for {self.url} to {download_dir}") result = subprocess.run([dl_tool, self.url], cwd=download_dir, capture_output=True, text=True) if result.returncode == 0: output = result.stdout output = re.sub(' +', ' ', output) lines_list = output.splitlines() file_name = self.__parse_output__(lines_list) if file_name is None: self.review = True self.should_download = True self.file_name = None else: download_file = Path(file_name) self.should_download = False self.file_name = download_file.name self.cloud_link = str(download_file.absolute()) self.last_modified_date = datetime.now() def __parse_output__(self, lines_list): self.file_name = None for line in lines_list: if 'has already been downloaded' in line: end_len = len(' has already been downloaded') self.file_name = line[11:-end_len] if 'Destination' in line: line_len = len(line) start_len = len('[download] Destination: ') file_len = line_len - start_len self.file_name = line[-file_len:] return self.file_name class MediaActor(Base, BaseMixin): __tablename__ = 'media_actor' name = Column(String(255)) media_actor_files = relationship("MediaActorFile") class MediaActorFile(Base, BaseMixin): __tablename__ = 'media_actor_file' media_actor_id = Column(String(255), ForeignKey("media_actor.id"), nullable=False) media_actor = relationship("MediaActor", back_populates="media_actor_files") media_file_id = Column(String(255), ForeignKey("media_file.id"), nullable=True) media_file = relationship("MediaFile", back_populates="media_actor_files") class MediaArticle(Base, BaseMixin): __tablename__ = 'media_article' review = Column(Boolean) title = Column(String(255)) url = Column(String(255), unique=True) class MediaVideo(Base, BaseMixin): __tablename__ = 'media_video' cloud_link = Column(String(255)) file_name = Column(String(255)) path = Column(String(255)) review = Column(Boolean) title = Column(String(255)) url = Column(String(255), unique=True) should_download = Column(Boolean)