import re import subprocess from datetime import datetime from pathlib import Path import requests from bs4 import BeautifulSoup from sqlalchemy import Column, DateTime, Integer, String, ForeignKey from sqlalchemy.dialects.mysql import BIT from sqlalchemy.orm import relationship from .base import Base, BaseMixin, BaseVideoMixin class MediaFile(Base, BaseMixin, BaseVideoMixin): __tablename__ = 'media_file' media_actor_files = relationship("MediaActorFile") def __repr__(self): return f'MediaFile({self.id} {self.title} {self.title})' def __str__(self): return f'{self.title}({self.id})' def update_title(self) -> None: print(f"update title for {self.url}") try: r = requests.get(self.url) soup = BeautifulSoup(r.content, "html.parser") title = soup.title.string self.title = title self.review = 0 except: self.title = None self.review = 1 self.last_modified_date = datetime.now() def download_file(self, download_dir: str, dl_tool: str): print(f"download file for {self.url} to {download_dir}") result = subprocess.run([dl_tool, self.url], cwd=download_dir, capture_output=True, text=True) if result.returncode == 0: output = result.stdout output = re.sub(' +', ' ', output) lines_list = output.splitlines() file_name = self.__parse_output__(lines_list) if file_name is None: self.review = 1 self.should_download = 1 self.file_name = None else: download_file = Path(file_name) self.should_download = 0 self.file_name = download_file.name self.cloud_link = str(download_file.absolute()) self.last_modified_date = datetime.now() def __parse_output__(self, lines_list): self.file_name = None for line in lines_list: if 'has already been downloaded' in line: end_len = len(' has already been downloaded') self.file_name = line[11:-end_len] if 'Destination' in line: line_len = len(line) start_len = len('[download] Destination: ') file_len = line_len - start_len self.file_name = line[-file_len:] return self.file_name class MediaActor(Base, BaseMixin): __tablename__ = 'media_actor' name = Column(String(255)) media_actor_files = relationship("MediaActorFile") class MediaActorFile(Base, BaseMixin): __tablename__ = 'media_actor_file' media_actor_id = Column(String(255), ForeignKey("media_actor.id"), nullable=False) media_actor = relationship("MediaActor", back_populates="media_actor_files") media_file_id = Column(String(255), ForeignKey("media_file.id"), nullable=False) media_file = relationship("MediaFile", back_populates="media_actor_files") class MediaArticle(Base, BaseMixin): __tablename__ = 'media_article' review = Column(BIT(1)) title = Column(String(255)) url = Column(String(255), unique=True) class MediaVideo(Base, BaseMixin): __tablename__ = 'media_video' cloud_link = Column(String(255)) file_name = Column(String(255)) path = Column(String(255)) review = Column(BIT(1)) title = Column(String(255)) url = Column(String(255), unique=True) should_download = Column(BIT(1))