212 lines
7.9 KiB
Python
212 lines
7.9 KiB
Python
import re
|
|
import subprocess
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Any, AnyStr, Dict
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
from sqlalchemy import Boolean, Column, False_, String, ForeignKey
|
|
from sqlalchemy.orm import relationship
|
|
|
|
from db.models.base import Base, BaseMixin, BaseVideoMixin
|
|
|
|
|
|
class MediaFile(Base, BaseMixin, BaseVideoMixin):
|
|
__tablename__ = 'media_file'
|
|
media_actor_files = relationship("MediaActorFile")
|
|
|
|
def __repr__(self):
|
|
return f'MediaFile({self.id} {self.title} {self.title})'
|
|
|
|
def __str__(self):
|
|
return f'{self.title}({self.id})'
|
|
|
|
def import_dict(self, import_data: Dict[AnyStr, Any]):
|
|
self.id = import_data['id']
|
|
self.created_date = import_data['created_date']
|
|
self.last_modified_date = import_data['last_modified_date']
|
|
self.version = import_data['version']
|
|
self.cloud_link = import_data['cloud_link']
|
|
self.file_name = import_data['file_name']
|
|
self.path = import_data['path']
|
|
self.review = import_data['review']
|
|
self.title = import_data['title']
|
|
self.url = import_data['url']
|
|
self.should_download = import_data['should_download']
|
|
|
|
def export_dict(self) -> Dict[AnyStr, Any]:
|
|
item: Dict[AnyStr, Any] = {}
|
|
item['id'] = self.id
|
|
item['created_date'] = str(self.created_date)
|
|
item['last_modified_date'] = str(self.last_modified_date)
|
|
item['version'] = self.version
|
|
item['cloud_link'] = self.cloud_link
|
|
item['file_name'] = self.file_name
|
|
item['path'] = self.path
|
|
item['review'] = self.review
|
|
item['title'] = self.title
|
|
item['url'] = self.url
|
|
item['should_download'] = self.should_download
|
|
return item
|
|
|
|
def update_title(self) -> None:
|
|
print(f"update title for {self.url}")
|
|
try:
|
|
r = requests.get(self.url)
|
|
soup = BeautifulSoup(r.content, "html.parser")
|
|
title = soup.title.string
|
|
self.title = title
|
|
self.review = False
|
|
except:
|
|
self.title = None
|
|
self.review = True
|
|
self.last_modified_date = datetime.now()
|
|
|
|
def download_file(self, download_dir: str, dl_tool: str):
|
|
print(f"download file for {self.url} to {download_dir}")
|
|
result = subprocess.run([dl_tool, self.url], cwd=download_dir, capture_output=True, text=True)
|
|
if result.returncode == 0:
|
|
output = result.stdout
|
|
output = re.sub(' +', ' ', output)
|
|
lines_list = output.splitlines()
|
|
file_name = self.__parse_output__(lines_list)
|
|
if file_name is None:
|
|
self.review = True
|
|
self.should_download = True
|
|
self.file_name = None
|
|
else:
|
|
download_file = Path(file_name)
|
|
self.should_download = False
|
|
self.file_name = download_file.name
|
|
self.cloud_link = str(download_file.absolute())
|
|
self.last_modified_date = datetime.now()
|
|
|
|
def __parse_output__(self, lines_list):
|
|
self.file_name = None
|
|
for line in lines_list:
|
|
if 'has already been downloaded' in line:
|
|
end_len = len(' has already been downloaded')
|
|
self.file_name = line[11:-end_len]
|
|
if 'Destination' in line:
|
|
line_len = len(line)
|
|
start_len = len('[download] Destination: ')
|
|
file_len = line_len - start_len
|
|
self.file_name = line[-file_len:]
|
|
return self.file_name
|
|
|
|
|
|
class MediaActor(Base, BaseMixin):
|
|
__tablename__ = 'media_actor'
|
|
name = Column(String)
|
|
url = Column(String, unique=True)
|
|
media_actor_files = relationship("MediaActorFile")
|
|
|
|
def import_dict(self, import_data: Dict[AnyStr, Any]):
|
|
self.id = import_data['id']
|
|
self.created_date = import_data['created_date']
|
|
self.last_modified_date = import_data['last_modified_date']
|
|
self.version = import_data['version']
|
|
self.name = import_data['name']
|
|
self.url = import_data['url']
|
|
|
|
def export_dict(self) -> Dict[AnyStr, Any]:
|
|
item: Dict[AnyStr, Any] = {}
|
|
item['id'] = self.id
|
|
item['created_date'] = str(self.created_date)
|
|
item['last_modified_date'] = str(self.last_modified_date)
|
|
item['version'] = self.version
|
|
item['name'] = self.name
|
|
item['url'] = self.url
|
|
return item
|
|
|
|
|
|
class MediaActorFile(Base, BaseMixin):
|
|
__tablename__ = 'media_actor_file'
|
|
media_actor_id = Column(String, ForeignKey("media_actor.id"), nullable=False)
|
|
media_actor = relationship("MediaActor", back_populates="media_actor_files")
|
|
media_file_id = Column(String, ForeignKey("media_file.id"), nullable=True)
|
|
media_file = relationship("MediaFile", back_populates="media_actor_files")
|
|
|
|
def import_dict(self, import_data: Dict[AnyStr, Any]):
|
|
self.id = import_data['id']
|
|
self.created_date = import_data['created_date']
|
|
self.last_modified_date = import_data['last_modified_date']
|
|
self.version = import_data['version']
|
|
self.media_actor_id = import_data['media_actor_id']
|
|
self.media_file_id = import_data['media_file_id']
|
|
|
|
def export_dict(self) -> Dict[AnyStr, Any]:
|
|
item: Dict[AnyStr, Any] = {}
|
|
item['id'] = self.id
|
|
item['created_date'] = str(self.created_date)
|
|
item['last_modified_date'] = str(self.last_modified_date)
|
|
item['version'] = self.version
|
|
item['media_actor_id'] = self.media_actor_id
|
|
item['media_file_id'] = self.media_file_id
|
|
return item
|
|
|
|
class MediaArticle(Base, BaseMixin):
|
|
__tablename__ = 'media_article'
|
|
review = Column(Boolean)
|
|
title = Column(String)
|
|
url = Column(String, unique=True)
|
|
|
|
def import_dict(self, import_data: Dict[AnyStr, Any]):
|
|
self.id = import_data['id']
|
|
self.created_date = import_data['created_date']
|
|
self.last_modified_date = import_data['last_modified_date']
|
|
self.version = import_data['version']
|
|
self.review = import_data['review']
|
|
self.title = import_data['title']
|
|
self.url = import_data['url']
|
|
|
|
def export_dict(self) -> Dict[AnyStr, Any]:
|
|
item: Dict[AnyStr, Any] = {}
|
|
item['id'] = self.id
|
|
item['created_date'] = str(self.created_date)
|
|
item['last_modified_date'] = str(self.last_modified_date)
|
|
item['version'] = self.version
|
|
item['review'] = self.review
|
|
item['title'] = self.title
|
|
item['url'] = self.url
|
|
return item
|
|
|
|
|
|
class MediaVideo(Base, BaseMixin):
|
|
__tablename__ = 'media_video'
|
|
cloud_link = Column(String)
|
|
file_name = Column(String)
|
|
path = Column(String)
|
|
review = Column(Boolean)
|
|
title = Column(String)
|
|
url = Column(String, unique=True)
|
|
should_download = Column(Boolean)
|
|
|
|
def import_dict(self, import_data: Dict[AnyStr, Any]):
|
|
self.id = import_data['id']
|
|
self.created_date = import_data['created_date']
|
|
self.last_modified_date = import_data['last_modified_date']
|
|
self.version = import_data['version']
|
|
self.cloud_link = import_data['cloud_link']
|
|
self.file_name = import_data['file_name']
|
|
self.path = import_data['path']
|
|
self.review = import_data['review']
|
|
self.title = import_data['title']
|
|
self.url = import_data['url']
|
|
self.should_download = import_data['should_download']
|
|
|
|
def export_dict(self) -> Dict[AnyStr, Any]:
|
|
item: Dict[AnyStr, Any] = {}
|
|
item['id'] = self.id
|
|
item['created_date'] = str(self.created_date)
|
|
item['last_modified_date'] = str(self.last_modified_date)
|
|
item['version'] = self.version
|
|
item['cloud_link'] = self.cloud_link
|
|
item['file_name'] = self.file_name
|
|
item['path'] = self.path
|
|
item['review'] = self.review
|
|
item['title'] = self.title
|
|
item['url'] = self.url
|
|
item['should_download'] = self.should_download
|
|
return item
|