add script for import data
This commit is contained in:
@@ -0,0 +1,100 @@
|
||||
import re
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from sqlalchemy import Column, DateTime, Integer, String, ForeignKey
|
||||
from sqlalchemy.dialects.mysql import BIT
|
||||
from sqlalchemy.orm import relationship
|
||||
|
||||
from .base import Base, BaseMixin, BaseVideoMixin
|
||||
|
||||
|
||||
class MediaFile(Base, BaseMixin, BaseVideoMixin):
|
||||
__tablename__ = 'media_file'
|
||||
media_actor_files = relationship("MediaActorFile")
|
||||
|
||||
def __repr__(self):
|
||||
return f'MediaFile({self.id} {self.title} {self.title})'
|
||||
|
||||
def __str__(self):
|
||||
return f'{self.title}({self.id})'
|
||||
|
||||
def update_title(self) -> None:
|
||||
print(f"update title for {self.url}")
|
||||
try:
|
||||
r = requests.get(self.url)
|
||||
soup = BeautifulSoup(r.content, "html.parser")
|
||||
title = soup.title.string
|
||||
self.title = title
|
||||
self.review = 0
|
||||
except:
|
||||
self.title = None
|
||||
self.review = 1
|
||||
self.last_modified_date = datetime.now()
|
||||
|
||||
def download_file(self, download_dir: str, dl_tool: str):
|
||||
print(f"download file for {self.url} to {download_dir}")
|
||||
result = subprocess.run([dl_tool, self.url], cwd=download_dir, capture_output=True, text=True)
|
||||
if result.returncode == 0:
|
||||
output = result.stdout
|
||||
output = re.sub(' +', ' ', output)
|
||||
lines_list = output.splitlines()
|
||||
file_name = self.__parse_output__(lines_list)
|
||||
if file_name is None:
|
||||
self.review = 1
|
||||
self.should_download = 1
|
||||
self.file_name = None
|
||||
else:
|
||||
download_file = Path(file_name)
|
||||
self.should_download = 0
|
||||
self.file_name = download_file.name
|
||||
self.cloud_link = str(download_file.absolute())
|
||||
self.last_modified_date = datetime.now()
|
||||
|
||||
def __parse_output__(self, lines_list):
|
||||
self.file_name = None
|
||||
for line in lines_list:
|
||||
if 'has already been downloaded' in line:
|
||||
end_len = len(' has already been downloaded')
|
||||
self.file_name = line[11:-end_len]
|
||||
if 'Destination' in line:
|
||||
line_len = len(line)
|
||||
start_len = len('[download] Destination: ')
|
||||
file_len = line_len - start_len
|
||||
self.file_name = line[-file_len:]
|
||||
return self.file_name
|
||||
|
||||
|
||||
class MediaActor(Base, BaseMixin):
|
||||
__tablename__ = 'media_actor'
|
||||
name = Column(String(255))
|
||||
media_actor_files = relationship("MediaActorFile")
|
||||
|
||||
|
||||
class MediaActorFile(Base, BaseMixin):
|
||||
__tablename__ = 'media_actor_file'
|
||||
media_actor_id = Column(String(255), ForeignKey("media_actor.id"), nullable=False)
|
||||
media_actor = relationship("MediaActor", back_populates="media_actor_files")
|
||||
media_file_id = Column(String(255), ForeignKey("media_file.id"), nullable=True)
|
||||
media_file = relationship("MediaFile", back_populates="media_actor_files")
|
||||
|
||||
|
||||
class MediaArticle(Base, BaseMixin):
|
||||
__tablename__ = 'media_article'
|
||||
review = Column(BIT(1))
|
||||
title = Column(String(255))
|
||||
url = Column(String(255), unique=True)
|
||||
|
||||
|
||||
class MediaVideo(Base, BaseMixin):
|
||||
__tablename__ = 'media_video'
|
||||
cloud_link = Column(String(255))
|
||||
file_name = Column(String(255))
|
||||
path = Column(String(255))
|
||||
review = Column(BIT(1))
|
||||
title = Column(String(255))
|
||||
url = Column(String(255), unique=True)
|
||||
should_download = Column(BIT(1))
|
||||
Reference in New Issue
Block a user