102 lines
3.5 KiB
Python
102 lines
3.5 KiB
Python
import logging
|
|
import re
|
|
import subprocess
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
from sqlalchemy import Column, String, ForeignKey
|
|
from sqlalchemy.dialects.mysql import BIT
|
|
from sqlalchemy.orm import relationship
|
|
|
|
from src.db.models.base import Base, BaseMixin, BaseVideoMixin
|
|
|
|
|
|
class MediaFile(Base, BaseMixin, BaseVideoMixin):
|
|
__tablename__ = 'media_file'
|
|
media_actor_files = relationship("MediaActorFile")
|
|
|
|
def __repr__(self):
|
|
return f'MediaFile({self.id} {self.title} {self.title})'
|
|
|
|
def __str__(self):
|
|
return f'{self.title}({self.id})'
|
|
|
|
def update_title(self) -> None:
|
|
logging.info(f"update title for {self.url}")
|
|
try:
|
|
r = requests.get(self.url)
|
|
soup = BeautifulSoup(r.content, "html.parser")
|
|
title = soup.title.string
|
|
self.title = title
|
|
self.review = 0
|
|
except:
|
|
self.title = None
|
|
self.review = 1
|
|
self.last_modified_date = datetime.now()
|
|
|
|
def download_file(self, download_dir: str, dl_tool: str):
|
|
logging.info(f"download file for {self.url} to {download_dir}")
|
|
result = subprocess.run([dl_tool, self.url], cwd=download_dir, capture_output=True, text=True)
|
|
if result.returncode == 0:
|
|
output = result.stdout
|
|
output = re.sub(' +', ' ', output)
|
|
lines_list = output.splitlines()
|
|
file_name = self.__parse_output__(lines_list)
|
|
if file_name is None:
|
|
self.review = 1
|
|
self.should_download = 1
|
|
self.file_name = None
|
|
else:
|
|
download_file = Path(file_name)
|
|
self.should_download = 0
|
|
self.file_name = download_file.name
|
|
self.cloud_link = str(download_file.absolute())
|
|
self.last_modified_date = datetime.now()
|
|
|
|
def __parse_output__(self, lines_list):
|
|
self.file_name = None
|
|
for line in lines_list:
|
|
if 'has already been downloaded' in line:
|
|
end_len = len(' has already been downloaded')
|
|
self.file_name = line[11:-end_len]
|
|
if 'Destination' in line:
|
|
line_len = len(line)
|
|
start_len = len('[download] Destination: ')
|
|
file_len = line_len - start_len
|
|
self.file_name = line[-file_len:]
|
|
return self.file_name
|
|
|
|
|
|
class MediaActor(Base, BaseMixin):
|
|
__tablename__ = 'media_actor'
|
|
name = Column(String(255))
|
|
media_actor_files = relationship("MediaActorFile")
|
|
|
|
|
|
class MediaActorFile(Base, BaseMixin):
|
|
__tablename__ = 'media_actor_file'
|
|
media_actor_id = Column(String(255), ForeignKey("media_actor.id"), nullable=False)
|
|
media_actor = relationship("MediaActor", back_populates="media_actor_files")
|
|
media_file_id = Column(String(255), ForeignKey("media_file.id"), nullable=True)
|
|
media_file = relationship("MediaFile", back_populates="media_actor_files")
|
|
|
|
|
|
class MediaArticle(Base, BaseMixin):
|
|
__tablename__ = 'media_article'
|
|
review = Column(BIT(1))
|
|
title = Column(String(255))
|
|
url = Column(String(255), unique=True)
|
|
|
|
|
|
class MediaVideo(Base, BaseMixin):
|
|
__tablename__ = 'media_video'
|
|
cloud_link = Column(String(255))
|
|
file_name = Column(String(255))
|
|
path = Column(String(255))
|
|
review = Column(BIT(1))
|
|
title = Column(String(255))
|
|
url = Column(String(255), unique=True)
|
|
should_download = Column(BIT(1))
|