add script for import data

This commit is contained in:
Thomas Peetz
2025-04-08 11:49:52 +02:00
parent 3d97c3360a
commit 10834df92b
14 changed files with 1024 additions and 4 deletions
+100
View File
@@ -0,0 +1,100 @@
import re
import subprocess
from datetime import datetime
from pathlib import Path
import requests
from bs4 import BeautifulSoup
from sqlalchemy import Column, DateTime, Integer, String, ForeignKey
from sqlalchemy.dialects.mysql import BIT
from sqlalchemy.orm import relationship
from .base import Base, BaseMixin, BaseVideoMixin
class MediaFile(Base, BaseMixin, BaseVideoMixin):
__tablename__ = 'media_file'
media_actor_files = relationship("MediaActorFile")
def __repr__(self):
return f'MediaFile({self.id} {self.title} {self.title})'
def __str__(self):
return f'{self.title}({self.id})'
def update_title(self) -> None:
print(f"update title for {self.url}")
try:
r = requests.get(self.url)
soup = BeautifulSoup(r.content, "html.parser")
title = soup.title.string
self.title = title
self.review = 0
except:
self.title = None
self.review = 1
self.last_modified_date = datetime.now()
def download_file(self, download_dir: str, dl_tool: str):
print(f"download file for {self.url} to {download_dir}")
result = subprocess.run([dl_tool, self.url], cwd=download_dir, capture_output=True, text=True)
if result.returncode == 0:
output = result.stdout
output = re.sub(' +', ' ', output)
lines_list = output.splitlines()
file_name = self.__parse_output__(lines_list)
if file_name is None:
self.review = 1
self.should_download = 1
self.file_name = None
else:
download_file = Path(file_name)
self.should_download = 0
self.file_name = download_file.name
self.cloud_link = str(download_file.absolute())
self.last_modified_date = datetime.now()
def __parse_output__(self, lines_list):
self.file_name = None
for line in lines_list:
if 'has already been downloaded' in line:
end_len = len(' has already been downloaded')
self.file_name = line[11:-end_len]
if 'Destination' in line:
line_len = len(line)
start_len = len('[download] Destination: ')
file_len = line_len - start_len
self.file_name = line[-file_len:]
return self.file_name
class MediaActor(Base, BaseMixin):
__tablename__ = 'media_actor'
name = Column(String(255))
media_actor_files = relationship("MediaActorFile")
class MediaActorFile(Base, BaseMixin):
__tablename__ = 'media_actor_file'
media_actor_id = Column(String(255), ForeignKey("media_actor.id"), nullable=False)
media_actor = relationship("MediaActor", back_populates="media_actor_files")
media_file_id = Column(String(255), ForeignKey("media_file.id"), nullable=True)
media_file = relationship("MediaFile", back_populates="media_actor_files")
class MediaArticle(Base, BaseMixin):
__tablename__ = 'media_article'
review = Column(BIT(1))
title = Column(String(255))
url = Column(String(255), unique=True)
class MediaVideo(Base, BaseMixin):
__tablename__ = 'media_video'
cloud_link = Column(String(255))
file_name = Column(String(255))
path = Column(String(255))
review = Column(BIT(1))
title = Column(String(255))
url = Column(String(255), unique=True)
should_download = Column(BIT(1))