import sources from develop/0.1.0

This commit is contained in:
Thomas Peetz
2025-04-29 12:52:55 +02:00
parent 304005822c
commit 4c96de27db
976 changed files with 58265 additions and 0 deletions
+101
View File
@@ -0,0 +1,101 @@
import logging
import re
import subprocess
from datetime import datetime
from pathlib import Path
import requests
from bs4 import BeautifulSoup
from sqlalchemy import Column, String, ForeignKey
from sqlalchemy.dialects.mysql import BIT
from sqlalchemy.orm import relationship
from src.db.models.base import Base, BaseMixin, BaseVideoMixin
class MediaFile(Base, BaseMixin, BaseVideoMixin):
__tablename__ = 'media_file'
media_actor_files = relationship("MediaActorFile")
def __repr__(self):
return f'MediaFile({self.id} {self.title} {self.title})'
def __str__(self):
return f'{self.title}({self.id})'
def update_title(self) -> None:
logging.info(f"update title for {self.url}")
try:
r = requests.get(self.url)
soup = BeautifulSoup(r.content, "html.parser")
title = soup.title.string
self.title = title
self.review = 0
except:
self.title = None
self.review = 1
self.last_modified_date = datetime.now()
def download_file(self, download_dir: str, dl_tool: str):
logging.info(f"download file for {self.url} to {download_dir}")
result = subprocess.run([dl_tool, self.url], cwd=download_dir, capture_output=True, text=True)
if result.returncode == 0:
output = result.stdout
output = re.sub(' +', ' ', output)
lines_list = output.splitlines()
file_name = self.__parse_output__(lines_list)
if file_name is None:
self.review = 1
self.should_download = 1
self.file_name = None
else:
download_file = Path(file_name)
self.should_download = 0
self.file_name = download_file.name
self.cloud_link = str(download_file.absolute())
self.last_modified_date = datetime.now()
def __parse_output__(self, lines_list):
self.file_name = None
for line in lines_list:
if 'has already been downloaded' in line:
end_len = len(' has already been downloaded')
self.file_name = line[11:-end_len]
if 'Destination' in line:
line_len = len(line)
start_len = len('[download] Destination: ')
file_len = line_len - start_len
self.file_name = line[-file_len:]
return self.file_name
class MediaActor(Base, BaseMixin):
__tablename__ = 'media_actor'
name = Column(String(255))
media_actor_files = relationship("MediaActorFile")
class MediaActorFile(Base, BaseMixin):
__tablename__ = 'media_actor_file'
media_actor_id = Column(String(255), ForeignKey("media_actor.id"), nullable=False)
media_actor = relationship("MediaActor", back_populates="media_actor_files")
media_file_id = Column(String(255), ForeignKey("media_file.id"), nullable=True)
media_file = relationship("MediaFile", back_populates="media_actor_files")
class MediaArticle(Base, BaseMixin):
__tablename__ = 'media_article'
review = Column(BIT(1))
title = Column(String(255))
url = Column(String(255), unique=True)
class MediaVideo(Base, BaseMixin):
__tablename__ = 'media_video'
cloud_link = Column(String(255))
file_name = Column(String(255))
path = Column(String(255))
review = Column(BIT(1))
title = Column(String(255))
url = Column(String(255), unique=True)
should_download = Column(BIT(1))