Add source

This commit is contained in:
2025-12-04 00:12:56 +03:00
parent b75875df5e
commit 0cb7045e7a
75 changed files with 9055 additions and 0 deletions

4
bot/utils/__init__.py Normal file
View File

@@ -0,0 +1,4 @@
"""
Bot utilities
"""

147
bot/utils/file_cleanup.py Normal file
View File

@@ -0,0 +1,147 @@
"""
Utilities for cleaning up old files
"""
from pathlib import Path
from datetime import datetime, timedelta
import logging
import asyncio
logger = logging.getLogger(__name__)
# Constants for time intervals
SECONDS_PER_HOUR = 3600
# Maximum file age before deletion (24 hours)
MAX_FILE_AGE_HOURS = 24
MAX_FILE_AGE = timedelta(hours=MAX_FILE_AGE_HOURS)
# Default cleanup interval (6 hours)
DEFAULT_CLEANUP_INTERVAL_HOURS = 6
# Queue of files to delete (files that couldn't be deleted on first attempt)
_files_to_cleanup: set[str] = set()
_files_to_cleanup_lock = asyncio.Lock()
def add_file_to_cleanup_queue(file_path: str):
"""
Add file to cleanup queue
Args:
file_path: Path to file
"""
global _files_to_cleanup
_files_to_cleanup.add(file_path)
logger.debug(f"File added to cleanup queue: {file_path}")
async def cleanup_queued_files():
"""
Delete files from queue
"""
global _files_to_cleanup
async with _files_to_cleanup_lock:
if not _files_to_cleanup:
return
files_to_remove = list(_files_to_cleanup)
_files_to_cleanup.clear()
from bot.modules.media_loader.sender import delete_file
for file_path in files_to_remove:
try:
await delete_file(file_path, max_retries=1) # One attempt, as there were already attempts
except Exception as e:
logger.warning(f"Failed to delete file from queue: {file_path}: {e}")
async def cleanup_old_files(downloads_dir: str = "downloads"):
"""
Clean up old files from downloads/ directory
Args:
downloads_dir: Path to downloads directory
"""
try:
downloads_path = Path(downloads_dir)
if not downloads_path.exists():
return
now = datetime.now()
deleted_count = 0
total_size = 0
for file_path in downloads_path.iterdir():
if file_path.is_file():
try:
# Get last modification time
mtime = datetime.fromtimestamp(file_path.stat().st_mtime)
age = now - mtime
# Delete files older than MAX_FILE_AGE
if age > MAX_FILE_AGE:
file_size = file_path.stat().st_size
file_path.unlink()
deleted_count += 1
total_size += file_size
logger.debug(f"Deleted old file: {file_path.name} (age: {age})")
except Exception as e:
logger.warning(f"Failed to delete file {file_path}: {e}")
if deleted_count > 0:
logger.info(f"Cleaned up {deleted_count} old files, freed {total_size / (1024*1024):.2f} MB")
except Exception as e:
logger.error(f"Error cleaning up old files: {e}", exc_info=True)
async def cleanup_files_periodically(
downloads_dir: str = "downloads",
interval_hours: int = DEFAULT_CLEANUP_INTERVAL_HOURS
) -> None:
"""
Periodically clean up old files
Args:
downloads_dir: Path to downloads directory
interval_hours: Interval between cleanups in hours
"""
while True:
try:
await asyncio.sleep(interval_hours * SECONDS_PER_HOUR)
await cleanup_old_files(downloads_dir)
except asyncio.CancelledError:
logger.info("File cleanup task stopped")
break
except Exception as e:
logger.error(f"Error in file cleanup task: {e}", exc_info=True)
def get_downloads_dir_size(downloads_dir: str = "downloads") -> int:
"""
Get total size of downloads/ directory
Args:
downloads_dir: Path to downloads directory
Returns:
Size in bytes
"""
try:
downloads_path = Path(downloads_dir)
if not downloads_path.exists():
return 0
total_size = 0
for file_path in downloads_path.rglob('*'):
if file_path.is_file():
try:
total_size += file_path.stat().st_size
except Exception:
pass
return total_size
except Exception as e:
logger.error(f"Error calculating directory size: {e}")
return 0

121
bot/utils/file_processor.py Normal file
View File

@@ -0,0 +1,121 @@
"""
File processing (archives, thumbnails)
"""
import asyncio
from pathlib import Path
from typing import Optional
from PIL import Image
import logging
logger = logging.getLogger(__name__)
async def generate_thumbnail(video_path: str, output_path: str, size: tuple = (320, 240)) -> bool:
"""
Generate thumbnail for video using ffmpeg
Args:
video_path: Path to video file
output_path: Path to save thumbnail
size: Thumbnail size (width, height)
Returns:
True if successful, False otherwise
"""
try:
import subprocess
import asyncio
# Check if ffmpeg is available
try:
result = await asyncio.create_subprocess_exec(
'ffmpeg', '-version',
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
await result.wait()
if result.returncode != 0:
logger.warning("ffmpeg not found, thumbnail generation not possible")
return False
except FileNotFoundError:
logger.warning("ffmpeg not installed, thumbnail generation not possible")
return False
# Generate thumbnail from middle of video
output_file = Path(output_path)
output_file.parent.mkdir(parents=True, exist_ok=True)
# Get video duration
duration_cmd = [
'ffprobe', '-v', 'error', '-show_entries', 'format=duration',
'-of', 'default=noprint_wrappers=1:nokey=1', str(video_path)
]
proc = await asyncio.create_subprocess_exec(
*duration_cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, _ = await proc.communicate()
if proc.returncode != 0:
logger.warning("Failed to get video duration")
# Use 1 second as default
seek_time = 1
else:
try:
duration = float(stdout.decode().strip())
seek_time = duration / 2 # Middle of video
except (ValueError, IndexError):
seek_time = 1
# Generate thumbnail
cmd = [
'ffmpeg', '-i', str(video_path),
'-ss', str(seek_time),
'-vframes', '1',
'-vf', f'scale={size[0]}:{size[1]}',
'-y', # Overwrite if exists
str(output_path)
]
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
await proc.wait()
if proc.returncode == 0 and Path(output_path).exists():
logger.info(f"Thumbnail created: {output_path}")
return True
else:
logger.warning(f"Failed to create thumbnail for {video_path}")
return False
except Exception as e:
logger.error(f"Error generating thumbnail: {e}", exc_info=True)
return False
async def extract_archive(archive_path: str, output_dir: str, password: Optional[str] = None) -> bool:
"""
Extract archive
Args:
archive_path: Path to archive
output_dir: Directory for extraction
password: Archive password (if required)
Returns:
True if successful, False otherwise
"""
try:
# TODO: Implement archive extraction
# Support zip, rar, 7z
logger.warning("Archive extraction not implemented yet")
return False
except Exception as e:
logger.error(f"Error extracting archive: {e}")
return False

121
bot/utils/file_splitter.py Normal file
View File

@@ -0,0 +1,121 @@
"""
Utilities for splitting large files into parts
"""
from pathlib import Path
from typing import List, Optional
import logging
import aiofiles
logger = logging.getLogger(__name__)
# Maximum part size (1.9 GB for safety)
MAX_PART_SIZE = int(1.9 * 1024 * 1024 * 1024) # 1.9 GB
async def split_file(file_path: str, part_size: int = MAX_PART_SIZE) -> List[str]:
"""
Split file into parts
Args:
file_path: Path to source file
part_size: Size of each part in bytes
Returns:
List of paths to file parts
"""
file = Path(file_path)
if not file.exists():
raise FileNotFoundError(f"File not found: {file_path}")
file_size = file.stat().st_size
if file_size <= part_size:
# File doesn't need to be split
return [str(file)]
parts = []
part_number = 1
try:
async with aiofiles.open(file_path, 'rb') as source_file:
while True:
part_path = file.parent / f"{file.stem}.part{part_number:03d}{file.suffix}"
parts.append(str(part_path))
async with aiofiles.open(part_path, 'wb') as part_file:
bytes_written = 0
while bytes_written < part_size:
chunk_size = min(8192, part_size - bytes_written)
chunk = await source_file.read(chunk_size)
if not chunk:
# End of file reached
break
await part_file.write(chunk)
bytes_written += len(chunk)
if bytes_written == 0:
# No data to write, remove empty part
part_path.unlink()
parts.pop()
break
if bytes_written < part_size:
# End of file reached
break
part_number += 1
logger.info(f"File {file_path} split into {len(parts)} parts")
return parts
except Exception as e:
# Clean up partially created parts on error
for part_path in parts:
try:
Path(part_path).unlink()
except:
pass
raise Exception(f"Error splitting file: {e}")
async def delete_file_parts(parts: List[str]) -> None:
"""
Delete all file parts
Args:
parts: List of paths to file parts
"""
for part_path in parts:
try:
file = Path(part_path)
if file.exists():
file.unlink()
logger.debug(f"Deleted file part: {part_path}")
except Exception as e:
logger.warning(f"Failed to delete file part {part_path}: {e}")
def get_part_info(parts: List[str]) -> dict:
"""
Get information about file parts
Args:
parts: List of paths to file parts
Returns:
Dictionary with information about parts
"""
total_size = 0
for part_path in parts:
file = Path(part_path)
if file.exists():
total_size += file.stat().st_size
return {
"total_parts": len(parts),
"total_size": total_size,
"parts": parts
}

146
bot/utils/helpers.py Normal file
View File

@@ -0,0 +1,146 @@
"""
Utility functions
"""
import re
import uuid
from typing import Optional, Tuple
def is_valid_url(url: str) -> bool:
"""
Validate URL with protection against dangerous schemes
Args:
url: URL to validate
Returns:
True if URL is valid and safe
"""
if not url or not isinstance(url, str):
return False
# Check URL length (maximum 2048 characters)
if len(url) > 2048:
return False
# Block dangerous schemes
dangerous_schemes = ['file://', 'javascript:', 'data:', 'vbscript:', 'about:']
url_lower = url.lower().strip()
for scheme in dangerous_schemes:
if url_lower.startswith(scheme):
return False
# Check URL format
url_pattern = re.compile(
r'^https?://' # http:// or https://
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|' # domain...
r'localhost|' # localhost...
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
r'(?::\d+)?' # optional port
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
return url_pattern.match(url) is not None
def parse_user_id(text: str) -> Optional[int]:
"""
Parse user_id from text (numeric only)
Args:
text: Text that may contain user_id
Returns:
User ID as integer or None if not a valid number
"""
try:
# Remove @ if present
text = text.replace("@", "").strip()
return int(text)
except (ValueError, AttributeError):
return None
async def resolve_user_identifier(identifier: str) -> Tuple[Optional[int], Optional[str]]:
"""
Resolve user identifier (user_id or username) to user_id
Args:
identifier: User ID (number) or username (with or without @)
Returns:
Tuple of (user_id: Optional[int], error_message: Optional[str])
If user_id is None, error_message contains the reason
"""
# First, try to parse as user_id
user_id = parse_user_id(identifier)
if user_id:
return (user_id, None)
# If not a number, try to resolve username via Telegram API
username = identifier.lstrip('@').strip()
if not username:
return (None, "Идентификатор не может быть пустым")
try:
from bot.modules.task_scheduler.executor import get_app_client
app_client = get_app_client()
if not app_client:
return (None, "Telegram клиент не инициализирован. Попробуйте использовать User ID.")
# Try to get user by username via get_chat
# Note: This only works if bot has already interacted with the user
chat = await app_client.get_chat(username)
if chat and hasattr(chat, 'id'):
return (chat.id, None)
else:
return (None, f"Пользователь @{username} не найден через Telegram API")
except Exception as e:
# Log the error but return user-friendly message
import logging
logger = logging.getLogger(__name__)
logger.debug(f"Failed to resolve username {username}: {e}")
return (None, f"Не удалось найти пользователя @{username}. Убедитесь, что бот взаимодействовал с этим пользователем, или используйте User ID.")
def format_file_size(size_bytes: int) -> str:
"""Format file size"""
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
if size_bytes < 1024.0:
return f"{size_bytes:.2f} {unit}"
size_bytes /= 1024.0
return f"{size_bytes:.2f} PB"
def format_duration(seconds) -> str:
"""
Format duration
Args:
seconds: Duration in seconds (int or float)
Returns:
Formatted string in "HH:MM:SS" or "MM:SS" format
"""
# Convert to int as we don't need fractional seconds for display
seconds = int(seconds) if seconds else 0
hours = seconds // 3600
minutes = (seconds % 3600) // 60
secs = seconds % 60
if hours > 0:
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
return f"{minutes:02d}:{secs:02d}"
def generate_unique_task_id() -> int:
"""
Generate unique task ID
Uses UUID to guarantee uniqueness
Returns:
Unique 63-bit integer ID
"""
# Use UUID and take first 63 bits (to fit in int)
return uuid.uuid4().int & ((1 << 63) - 1)

42
bot/utils/logger.py Normal file
View File

@@ -0,0 +1,42 @@
"""
Logging configuration
"""
import logging
import os
from pathlib import Path
from bot.config import settings
def setup_logger():
"""Setup logging system"""
# Create log directory
log_dir = Path(settings.LOG_FILE).parent
log_dir.mkdir(parents=True, exist_ok=True)
# Setup log format
log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
date_format = "%Y-%m-%d %H:%M:%S"
# Log level
log_level = getattr(logging, settings.LOG_LEVEL.upper(), logging.INFO)
# Setup file handler
file_handler = logging.FileHandler(settings.LOG_FILE, encoding="utf-8")
file_handler.setLevel(log_level)
file_handler.setFormatter(logging.Formatter(log_format, date_format))
# Setup console handler
console_handler = logging.StreamHandler()
console_handler.setLevel(log_level)
console_handler.setFormatter(logging.Formatter(log_format, date_format))
# Setup root logger
root_logger = logging.getLogger()
root_logger.setLevel(log_level)
root_logger.addHandler(file_handler)
root_logger.addHandler(console_handler)
# Setup loggers for external libraries
logging.getLogger("pyrogram").setLevel(logging.WARNING)
logging.getLogger("httpx").setLevel(logging.WARNING)

View File

@@ -0,0 +1,82 @@
"""
Utilities for working with Telegram user information
"""
from typing import Optional, Dict
from pyrogram import Client
import logging
logger = logging.getLogger(__name__)
def get_app_client() -> Optional[Client]:
"""
Get Pyrogram client from executor.py.
Uses a single source of client to avoid conflicts.
Returns:
Pyrogram client instance, or None if import fails
"""
try:
from bot.modules.task_scheduler.executor import get_app_client as get_executor_client
return get_executor_client()
except ImportError:
logger.warning("Failed to import get_app_client from executor")
return None
async def get_user_info(user_id: int) -> Optional[Dict[str, Optional[str]]]:
"""
Get user information from Telegram API.
Args:
user_id: Telegram user ID
Returns:
Dictionary with user information:
{
"username": str or None,
"first_name": str or None,
"last_name": str or None
}
or None in case of error
"""
app_client = get_app_client()
if not app_client:
logger.warning(f"Pyrogram client not set, cannot get user information for {user_id}")
return None
try:
# Get user information through Pyrogram
user = await app_client.get_users(user_id)
if user:
return {
"username": user.username,
"first_name": user.first_name,
"last_name": user.last_name
}
else:
logger.warning(f"User {user_id} not found in Telegram")
return None
except Exception as e:
logger.error(f"Error getting user information for {user_id}: {e}", exc_info=True)
return None
async def get_username_by_id(user_id: int) -> Optional[str]:
"""
Get username by user ID.
Args:
user_id: Telegram user ID
Returns:
Username or None if not found
"""
user_info = await get_user_info(user_id)
if user_info:
return user_info.get("username")
return None

View File

@@ -0,0 +1,161 @@
"""
Utilities for updating user information from Telegram API
"""
import asyncio
import logging
from typing import Optional, Dict
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from shared.database.session import get_async_session_local
from shared.database.models import User
from bot.utils.telegram_user import get_user_info
logger = logging.getLogger(__name__)
# Constants for time intervals
SECONDS_PER_HOUR = 3600
SECONDS_PER_MINUTE = 60
TELEGRAM_API_DELAY = 0.5 # Delay between Telegram API requests in seconds
USER_INFO_UPDATE_INTERVAL_HOURS = 24 # Interval for updating user information
ERROR_RETRY_DELAY_SECONDS = 60 # Delay before retry on error
def _update_user_fields(user: User, user_info: Dict[str, Optional[str]]) -> bool:
"""
Update user fields from Telegram API information.
Updates username, first_name, and last_name if they are missing
and available in user_info.
Args:
user: User object from database
user_info: Dictionary with user information from Telegram API
Returns:
True if any fields were updated, False otherwise
"""
updated = False
if not user.username and user_info.get("username"):
user.username = user_info.get("username")
updated = True
if not user.first_name and user_info.get("first_name"):
user.first_name = user_info.get("first_name")
updated = True
if not user.last_name and user_info.get("last_name"):
user.last_name = user_info.get("last_name")
updated = True
return updated
async def update_user_info_from_telegram(
user_id: int,
db_session: Optional[AsyncSession] = None
) -> bool:
"""
Update user information from Telegram API.
Fetches user information from Telegram API and updates the database
with missing fields (username, first_name, last_name).
Args:
user_id: Telegram user ID
db_session: Database session (if None, creates a new one)
Returns:
True if information was updated, False otherwise
"""
try:
# Get user information from Telegram API
user_info = await get_user_info(user_id)
if not user_info:
return False
# Update information in database
if db_session:
# Use provided session
user = await db_session.get(User, user_id)
if user:
if _update_user_fields(user, user_info):
await db_session.commit()
logger.info(f"User {user_id} information updated from Telegram API")
return True
else:
# Create new session
async with get_async_session_local()() as session:
user = await session.get(User, user_id)
if user:
if _update_user_fields(user, user_info):
await session.commit()
logger.info(f"User {user_id} information updated from Telegram API")
return True
return False
except Exception as e:
logger.error(f"Error updating user {user_id} information: {e}", exc_info=True)
return False
async def update_users_without_info_periodically(
interval_hours: int = USER_INFO_UPDATE_INTERVAL_HOURS
) -> None:
"""
Periodically update information for users without username or first_name.
Runs in an infinite loop, updating user information at specified intervals.
Can be cancelled with asyncio.CancelledError.
Args:
interval_hours: Interval between updates in hours (default: 24 hours)
"""
logger.info("Background task for updating user information started")
while True:
try:
await asyncio.sleep(interval_hours * SECONDS_PER_HOUR)
logger.info("Starting update of user information for users without username or first_name")
async with get_async_session_local()() as session:
# Get users without username or first_name
result = await session.execute(
select(User).where(
(User.username == None) | (User.first_name == None)
)
)
users = result.scalars().all()
updated_count = 0
error_count = 0
for user in users:
try:
# Update user information
if await update_user_info_from_telegram(user.user_id, db_session=session):
updated_count += 1
# Delay between requests to avoid overloading Telegram API
await asyncio.sleep(TELEGRAM_API_DELAY)
except Exception as e:
error_count += 1
logger.warning(f"Error updating user {user.user_id}: {e}")
if updated_count > 0 or error_count > 0:
logger.info(
f"User information update completed: "
f"updated {updated_count}, errors {error_count}, total checked {len(users)}"
)
else:
logger.debug("No users found for update")
except asyncio.CancelledError:
logger.info("User information update task stopped")
break
except Exception as e:
logger.error(f"Error in user information update task: {e}", exc_info=True)
# Continue working even on error
await asyncio.sleep(ERROR_RETRY_DELAY_SECONDS)