Add source

2025-12-04 00:12:56 +03:00
parent b75875df5e
commit 0cb7045e7a
75 changed files with 9055 additions and 0 deletions
--- a/bot/utils/init.py
+++ b/bot/utils/init.py
@@ -0,0 +1,4 @@
+"""
+Bot utilities
+"""
+
--- a/bot/utils/file_cleanup.py
+++ b/bot/utils/file_cleanup.py
@@ -0,0 +1,147 @@
+"""
+Utilities for cleaning up old files
+"""
+from pathlib import Path
+from datetime import datetime, timedelta
+import logging
+import asyncio
+
+logger = logging.getLogger(__name__)
+
+# Constants for time intervals
+SECONDS_PER_HOUR = 3600
+
+# Maximum file age before deletion (24 hours)
+MAX_FILE_AGE_HOURS = 24
+MAX_FILE_AGE = timedelta(hours=MAX_FILE_AGE_HOURS)
+
+# Default cleanup interval (6 hours)
+DEFAULT_CLEANUP_INTERVAL_HOURS = 6
+
+# Queue of files to delete (files that couldn't be deleted on first attempt)
+_files_to_cleanup: set[str] = set()
+_files_to_cleanup_lock = asyncio.Lock()
+
+
+def add_file_to_cleanup_queue(file_path: str):
+    """
+    Add file to cleanup queue
+    
+    Args:
+        file_path: Path to file
+    """
+    global _files_to_cleanup
+    _files_to_cleanup.add(file_path)
+    logger.debug(f"File added to cleanup queue: {file_path}")
+
+
+async def cleanup_queued_files():
+    """
+    Delete files from queue
+    """
+    global _files_to_cleanup
+    async with _files_to_cleanup_lock:
+        if not _files_to_cleanup:
+            return
+        
+        files_to_remove = list(_files_to_cleanup)
+        _files_to_cleanup.clear()
+    
+    from bot.modules.media_loader.sender import delete_file
+    for file_path in files_to_remove:
+        try:
+            await delete_file(file_path, max_retries=1)  # One attempt, as there were already attempts
+        except Exception as e:
+            logger.warning(f"Failed to delete file from queue: {file_path}: {e}")
+
+
+async def cleanup_old_files(downloads_dir: str = "downloads"):
+    """
+    Clean up old files from downloads/ directory
+    
+    Args:
+        downloads_dir: Path to downloads directory
+    """
+    try:
+        downloads_path = Path(downloads_dir)
+        if not downloads_path.exists():
+            return
+        
+        now = datetime.now()
+        deleted_count = 0
+        total_size = 0
+        
+        for file_path in downloads_path.iterdir():
+            if file_path.is_file():
+                try:
+                    # Get last modification time
+                    mtime = datetime.fromtimestamp(file_path.stat().st_mtime)
+                    age = now - mtime
+                    
+                    # Delete files older than MAX_FILE_AGE
+                    if age > MAX_FILE_AGE:
+                        file_size = file_path.stat().st_size
+                        file_path.unlink()
+                        deleted_count += 1
+                        total_size += file_size
+                        logger.debug(f"Deleted old file: {file_path.name} (age: {age})")
+                except Exception as e:
+                    logger.warning(f"Failed to delete file {file_path}: {e}")
+        
+        if deleted_count > 0:
+            logger.info(f"Cleaned up {deleted_count} old files, freed {total_size / (1024*1024):.2f} MB")
+    
+    except Exception as e:
+        logger.error(f"Error cleaning up old files: {e}", exc_info=True)
+
+
+async def cleanup_files_periodically(
+    downloads_dir: str = "downloads", 
+    interval_hours: int = DEFAULT_CLEANUP_INTERVAL_HOURS
+) -> None:
+    """
+    Periodically clean up old files
+    
+    Args:
+        downloads_dir: Path to downloads directory
+        interval_hours: Interval between cleanups in hours
+    """
+    while True:
+        try:
+            await asyncio.sleep(interval_hours * SECONDS_PER_HOUR)
+            await cleanup_old_files(downloads_dir)
+        except asyncio.CancelledError:
+            logger.info("File cleanup task stopped")
+            break
+        except Exception as e:
+            logger.error(f"Error in file cleanup task: {e}", exc_info=True)
+
+
+def get_downloads_dir_size(downloads_dir: str = "downloads") -> int:
+    """
+    Get total size of downloads/ directory
+    
+    Args:
+        downloads_dir: Path to downloads directory
+    
+    Returns:
+        Size in bytes
+    """
+    try:
+        downloads_path = Path(downloads_dir)
+        if not downloads_path.exists():
+            return 0
+        
+        total_size = 0
+        for file_path in downloads_path.rglob('*'):
+            if file_path.is_file():
+                try:
+                    total_size += file_path.stat().st_size
+                except Exception:
+                    pass
+        
+        return total_size
+    except Exception as e:
+        logger.error(f"Error calculating directory size: {e}")
+        return 0
+
--- a/bot/utils/file_processor.py
+++ b/bot/utils/file_processor.py
@@ -0,0 +1,121 @@
+"""
+File processing (archives, thumbnails)
+"""
+import asyncio
+from pathlib import Path
+from typing import Optional
+from PIL import Image
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+async def generate_thumbnail(video_path: str, output_path: str, size: tuple = (320, 240)) -> bool:
+    """
+    Generate thumbnail for video using ffmpeg
+    
+    Args:
+        video_path: Path to video file
+        output_path: Path to save thumbnail
+        size: Thumbnail size (width, height)
+    
+    Returns:
+        True if successful, False otherwise
+    """
+    try:
+        import subprocess
+        import asyncio
+        
+        # Check if ffmpeg is available
+        try:
+            result = await asyncio.create_subprocess_exec(
+                'ffmpeg', '-version',
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE
+            )
+            await result.wait()
+            if result.returncode != 0:
+                logger.warning("ffmpeg not found, thumbnail generation not possible")
+                return False
+        except FileNotFoundError:
+            logger.warning("ffmpeg not installed, thumbnail generation not possible")
+            return False
+        
+        # Generate thumbnail from middle of video
+        output_file = Path(output_path)
+        output_file.parent.mkdir(parents=True, exist_ok=True)
+        
+        # Get video duration
+        duration_cmd = [
+            'ffprobe', '-v', 'error', '-show_entries', 'format=duration',
+            '-of', 'default=noprint_wrappers=1:nokey=1', str(video_path)
+        ]
+        
+        proc = await asyncio.create_subprocess_exec(
+            *duration_cmd,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE
+        )
+        stdout, _ = await proc.communicate()
+        
+        if proc.returncode != 0:
+            logger.warning("Failed to get video duration")
+            # Use 1 second as default
+            seek_time = 1
+        else:
+            try:
+                duration = float(stdout.decode().strip())
+                seek_time = duration / 2  # Middle of video
+            except (ValueError, IndexError):
+                seek_time = 1
+        
+        # Generate thumbnail
+        cmd = [
+            'ffmpeg', '-i', str(video_path),
+            '-ss', str(seek_time),
+            '-vframes', '1',
+            '-vf', f'scale={size[0]}:{size[1]}',
+            '-y',  # Overwrite if exists
+            str(output_path)
+        ]
+        
+        proc = await asyncio.create_subprocess_exec(
+            *cmd,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE
+        )
+        await proc.wait()
+        
+        if proc.returncode == 0 and Path(output_path).exists():
+            logger.info(f"Thumbnail created: {output_path}")
+            return True
+        else:
+            logger.warning(f"Failed to create thumbnail for {video_path}")
+            return False
+    
+    except Exception as e:
+        logger.error(f"Error generating thumbnail: {e}", exc_info=True)
+        return False
+
+
+async def extract_archive(archive_path: str, output_dir: str, password: Optional[str] = None) -> bool:
+    """
+    Extract archive
+    
+    Args:
+        archive_path: Path to archive
+        output_dir: Directory for extraction
+        password: Archive password (if required)
+    
+    Returns:
+        True if successful, False otherwise
+    """
+    try:
+        # TODO: Implement archive extraction
+        # Support zip, rar, 7z
+        logger.warning("Archive extraction not implemented yet")
+        return False
+    except Exception as e:
+        logger.error(f"Error extracting archive: {e}")
+        return False
+
--- a/bot/utils/file_splitter.py
+++ b/bot/utils/file_splitter.py
@@ -0,0 +1,121 @@
+"""
+Utilities for splitting large files into parts
+"""
+from pathlib import Path
+from typing import List, Optional
+import logging
+import aiofiles
+
+logger = logging.getLogger(__name__)
+
+# Maximum part size (1.9 GB for safety)
+MAX_PART_SIZE = int(1.9 * 1024 * 1024 * 1024)  # 1.9 GB
+
+
+async def split_file(file_path: str, part_size: int = MAX_PART_SIZE) -> List[str]:
+    """
+    Split file into parts
+    
+    Args:
+        file_path: Path to source file
+        part_size: Size of each part in bytes
+    
+    Returns:
+        List of paths to file parts
+    """
+    file = Path(file_path)
+    if not file.exists():
+        raise FileNotFoundError(f"File not found: {file_path}")
+    
+    file_size = file.stat().st_size
+    if file_size <= part_size:
+        # File doesn't need to be split
+        return [str(file)]
+    
+    parts = []
+    part_number = 1
+    
+    try:
+        async with aiofiles.open(file_path, 'rb') as source_file:
+            while True:
+                part_path = file.parent / f"{file.stem}.part{part_number:03d}{file.suffix}"
+                parts.append(str(part_path))
+                
+                async with aiofiles.open(part_path, 'wb') as part_file:
+                    bytes_written = 0
+                    
+                    while bytes_written < part_size:
+                        chunk_size = min(8192, part_size - bytes_written)
+                        chunk = await source_file.read(chunk_size)
+                        
+                        if not chunk:
+                            # End of file reached
+                            break
+                        
+                        await part_file.write(chunk)
+                        bytes_written += len(chunk)
+                    
+                    if bytes_written == 0:
+                        # No data to write, remove empty part
+                        part_path.unlink()
+                        parts.pop()
+                        break
+                
+                if bytes_written < part_size:
+                    # End of file reached
+                    break
+                
+                part_number += 1
+        
+        logger.info(f"File {file_path} split into {len(parts)} parts")
+        return parts
+    
+    except Exception as e:
+        # Clean up partially created parts on error
+        for part_path in parts:
+            try:
+                Path(part_path).unlink()
+            except:
+                pass
+        raise Exception(f"Error splitting file: {e}")
+
+
+async def delete_file_parts(parts: List[str]) -> None:
+    """
+    Delete all file parts
+    
+    Args:
+        parts: List of paths to file parts
+    """
+    for part_path in parts:
+        try:
+            file = Path(part_path)
+            if file.exists():
+                file.unlink()
+                logger.debug(f"Deleted file part: {part_path}")
+        except Exception as e:
+            logger.warning(f"Failed to delete file part {part_path}: {e}")
+
+
+def get_part_info(parts: List[str]) -> dict:
+    """
+    Get information about file parts
+    
+    Args:
+        parts: List of paths to file parts
+    
+    Returns:
+        Dictionary with information about parts
+    """
+    total_size = 0
+    for part_path in parts:
+        file = Path(part_path)
+        if file.exists():
+            total_size += file.stat().st_size
+    
+    return {
+        "total_parts": len(parts),
+        "total_size": total_size,
+        "parts": parts
+    }
+
--- a/bot/utils/helpers.py
+++ b/bot/utils/helpers.py
@@ -0,0 +1,146 @@
+"""
+Utility functions
+"""
+import re
+import uuid
+from typing import Optional, Tuple
+
+
+def is_valid_url(url: str) -> bool:
+    """
+    Validate URL with protection against dangerous schemes
+    
+    Args:
+        url: URL to validate
+    
+    Returns:
+        True if URL is valid and safe
+    """
+    if not url or not isinstance(url, str):
+        return False
+    
+    # Check URL length (maximum 2048 characters)
+    if len(url) > 2048:
+        return False
+    
+    # Block dangerous schemes
+    dangerous_schemes = ['file://', 'javascript:', 'data:', 'vbscript:', 'about:']
+    url_lower = url.lower().strip()
+    for scheme in dangerous_schemes:
+        if url_lower.startswith(scheme):
+            return False
+    
+    # Check URL format
+    url_pattern = re.compile(
+        r'^https?://'  # http:// or https://
+        r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|'  # domain...
+        r'localhost|'  # localhost...
+        r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'  # ...or ip
+        r'(?::\d+)?'  # optional port
+        r'(?:/?|[/?]\S+)$', re.IGNORECASE)
+    return url_pattern.match(url) is not None
+
+
+def parse_user_id(text: str) -> Optional[int]:
+    """
+    Parse user_id from text (numeric only)
+    
+    Args:
+        text: Text that may contain user_id
+        
+    Returns:
+        User ID as integer or None if not a valid number
+    """
+    try:
+        # Remove @ if present
+        text = text.replace("@", "").strip()
+        return int(text)
+    except (ValueError, AttributeError):
+        return None
+
+
+async def resolve_user_identifier(identifier: str) -> Tuple[Optional[int], Optional[str]]:
+    """
+    Resolve user identifier (user_id or username) to user_id
+    
+    Args:
+        identifier: User ID (number) or username (with or without @)
+        
+    Returns:
+        Tuple of (user_id: Optional[int], error_message: Optional[str])
+        If user_id is None, error_message contains the reason
+    """
+    # First, try to parse as user_id
+    user_id = parse_user_id(identifier)
+    if user_id:
+        return (user_id, None)
+    
+    # If not a number, try to resolve username via Telegram API
+    username = identifier.lstrip('@').strip()
+    if not username:
+        return (None, "Идентификатор не может быть пустым")
+    
+    try:
+        from bot.modules.task_scheduler.executor import get_app_client
+        app_client = get_app_client()
+        
+        if not app_client:
+            return (None, "Telegram клиент не инициализирован. Попробуйте использовать User ID.")
+        
+        # Try to get user by username via get_chat
+        # Note: This only works if bot has already interacted with the user
+        chat = await app_client.get_chat(username)
+        if chat and hasattr(chat, 'id'):
+            return (chat.id, None)
+        else:
+            return (None, f"Пользователь @{username} не найден через Telegram API")
+            
+    except Exception as e:
+        # Log the error but return user-friendly message
+        import logging
+        logger = logging.getLogger(__name__)
+        logger.debug(f"Failed to resolve username {username}: {e}")
+        return (None, f"Не удалось найти пользователя @{username}. Убедитесь, что бот взаимодействовал с этим пользователем, или используйте User ID.")
+
+
+def format_file_size(size_bytes: int) -> str:
+    """Format file size"""
+    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
+        if size_bytes < 1024.0:
+            return f"{size_bytes:.2f} {unit}"
+        size_bytes /= 1024.0
+    return f"{size_bytes:.2f} PB"
+
+
+def format_duration(seconds) -> str:
+    """
+    Format duration
+    
+    Args:
+        seconds: Duration in seconds (int or float)
+    
+    Returns:
+        Formatted string in "HH:MM:SS" or "MM:SS" format
+    """
+    # Convert to int as we don't need fractional seconds for display
+    seconds = int(seconds) if seconds else 0
+    hours = seconds // 3600
+    minutes = (seconds % 3600) // 60
+    secs = seconds % 60
+    
+    if hours > 0:
+        return f"{hours:02d}:{minutes:02d}:{secs:02d}"
+    return f"{minutes:02d}:{secs:02d}"
+
+
+def generate_unique_task_id() -> int:
+    """
+    Generate unique task ID
+    Uses UUID to guarantee uniqueness
+    
+    Returns:
+        Unique 63-bit integer ID
+    """
+    # Use UUID and take first 63 bits (to fit in int)
+    return uuid.uuid4().int & ((1 << 63) - 1)
+
--- a/bot/utils/logger.py
+++ b/bot/utils/logger.py
@@ -0,0 +1,42 @@
+"""
+Logging configuration
+"""
+import logging
+import os
+from pathlib import Path
+from bot.config import settings
+
+
+def setup_logger():
+    """Setup logging system"""
+    # Create log directory
+    log_dir = Path(settings.LOG_FILE).parent
+    log_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Setup log format
+    log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    date_format = "%Y-%m-%d %H:%M:%S"
+    
+    # Log level
+    log_level = getattr(logging, settings.LOG_LEVEL.upper(), logging.INFO)
+    
+    # Setup file handler
+    file_handler = logging.FileHandler(settings.LOG_FILE, encoding="utf-8")
+    file_handler.setLevel(log_level)
+    file_handler.setFormatter(logging.Formatter(log_format, date_format))
+    
+    # Setup console handler
+    console_handler = logging.StreamHandler()
+    console_handler.setLevel(log_level)
+    console_handler.setFormatter(logging.Formatter(log_format, date_format))
+    
+    # Setup root logger
+    root_logger = logging.getLogger()
+    root_logger.setLevel(log_level)
+    root_logger.addHandler(file_handler)
+    root_logger.addHandler(console_handler)
+    
+    # Setup loggers for external libraries
+    logging.getLogger("pyrogram").setLevel(logging.WARNING)
+    logging.getLogger("httpx").setLevel(logging.WARNING)
+
--- a/bot/utils/telegram_user.py
+++ b/bot/utils/telegram_user.py
@@ -0,0 +1,82 @@
+"""
+Utilities for working with Telegram user information
+"""
+from typing import Optional, Dict
+from pyrogram import Client
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def get_app_client() -> Optional[Client]:
+    """
+    Get Pyrogram client from executor.py.
+    
+    Uses a single source of client to avoid conflicts.
+    
+    Returns:
+        Pyrogram client instance, or None if import fails
+    """
+    try:
+        from bot.modules.task_scheduler.executor import get_app_client as get_executor_client
+        return get_executor_client()
+    except ImportError:
+        logger.warning("Failed to import get_app_client from executor")
+        return None
+
+
+async def get_user_info(user_id: int) -> Optional[Dict[str, Optional[str]]]:
+    """
+    Get user information from Telegram API.
+    
+    Args:
+        user_id: Telegram user ID
+    
+    Returns:
+        Dictionary with user information:
+        {
+            "username": str or None,
+            "first_name": str or None,
+            "last_name": str or None
+        }
+        or None in case of error
+    """
+    app_client = get_app_client()
+    
+    if not app_client:
+        logger.warning(f"Pyrogram client not set, cannot get user information for {user_id}")
+        return None
+    
+    try:
+        # Get user information through Pyrogram
+        user = await app_client.get_users(user_id)
+        
+        if user:
+            return {
+                "username": user.username,
+                "first_name": user.first_name,
+                "last_name": user.last_name
+            }
+        else:
+            logger.warning(f"User {user_id} not found in Telegram")
+            return None
+    except Exception as e:
+        logger.error(f"Error getting user information for {user_id}: {e}", exc_info=True)
+        return None
+
+
+async def get_username_by_id(user_id: int) -> Optional[str]:
+    """
+    Get username by user ID.
+    
+    Args:
+        user_id: Telegram user ID
+    
+    Returns:
+        Username or None if not found
+    """
+    user_info = await get_user_info(user_id)
+    if user_info:
+        return user_info.get("username")
+    return None
+
--- a/bot/utils/user_info_updater.py
+++ b/bot/utils/user_info_updater.py
@@ -0,0 +1,161 @@
+"""
+Utilities for updating user information from Telegram API
+"""
+import asyncio
+import logging
+from typing import Optional, Dict
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy import select
+from shared.database.session import get_async_session_local
+from shared.database.models import User
+from bot.utils.telegram_user import get_user_info
+
+logger = logging.getLogger(__name__)
+
+# Constants for time intervals
+SECONDS_PER_HOUR = 3600
+SECONDS_PER_MINUTE = 60
+TELEGRAM_API_DELAY = 0.5  # Delay between Telegram API requests in seconds
+USER_INFO_UPDATE_INTERVAL_HOURS = 24  # Interval for updating user information
+ERROR_RETRY_DELAY_SECONDS = 60  # Delay before retry on error
+
+
+def _update_user_fields(user: User, user_info: Dict[str, Optional[str]]) -> bool:
+    """
+    Update user fields from Telegram API information.
+    
+    Updates username, first_name, and last_name if they are missing
+    and available in user_info.
+    
+    Args:
+        user: User object from database
+        user_info: Dictionary with user information from Telegram API
+    
+    Returns:
+        True if any fields were updated, False otherwise
+    """
+    updated = False
+    
+    if not user.username and user_info.get("username"):
+        user.username = user_info.get("username")
+        updated = True
+    
+    if not user.first_name and user_info.get("first_name"):
+        user.first_name = user_info.get("first_name")
+        updated = True
+    
+    if not user.last_name and user_info.get("last_name"):
+        user.last_name = user_info.get("last_name")
+        updated = True
+    
+    return updated
+
+
+async def update_user_info_from_telegram(
+    user_id: int, 
+    db_session: Optional[AsyncSession] = None
+) -> bool:
+    """
+    Update user information from Telegram API.
+    
+    Fetches user information from Telegram API and updates the database
+    with missing fields (username, first_name, last_name).
+    
+    Args:
+        user_id: Telegram user ID
+        db_session: Database session (if None, creates a new one)
+    
+    Returns:
+        True if information was updated, False otherwise
+    """
+    try:
+        # Get user information from Telegram API
+        user_info = await get_user_info(user_id)
+        if not user_info:
+            return False
+        
+        # Update information in database
+        if db_session:
+            # Use provided session
+            user = await db_session.get(User, user_id)
+            if user:
+                if _update_user_fields(user, user_info):
+                    await db_session.commit()
+                    logger.info(f"User {user_id} information updated from Telegram API")
+                    return True
+        else:
+            # Create new session
+            async with get_async_session_local()() as session:
+                user = await session.get(User, user_id)
+                if user:
+                    if _update_user_fields(user, user_info):
+                        await session.commit()
+                        logger.info(f"User {user_id} information updated from Telegram API")
+                        return True
+        
+        return False
+    except Exception as e:
+        logger.error(f"Error updating user {user_id} information: {e}", exc_info=True)
+        return False
+
+
+async def update_users_without_info_periodically(
+    interval_hours: int = USER_INFO_UPDATE_INTERVAL_HOURS
+) -> None:
+    """
+    Periodically update information for users without username or first_name.
+    
+    Runs in an infinite loop, updating user information at specified intervals.
+    Can be cancelled with asyncio.CancelledError.
+    
+    Args:
+        interval_hours: Interval between updates in hours (default: 24 hours)
+    """
+    logger.info("Background task for updating user information started")
+    
+    while True:
+        try:
+            await asyncio.sleep(interval_hours * SECONDS_PER_HOUR)
+            
+            logger.info("Starting update of user information for users without username or first_name")
+            
+            async with get_async_session_local()() as session:
+                # Get users without username or first_name
+                result = await session.execute(
+                    select(User).where(
+                        (User.username == None) | (User.first_name == None)
+                    )
+                )
+                users = result.scalars().all()
+                
+                updated_count = 0
+                error_count = 0
+                
+                for user in users:
+                    try:
+                        # Update user information
+                        if await update_user_info_from_telegram(user.user_id, db_session=session):
+                            updated_count += 1
+                        
+                        # Delay between requests to avoid overloading Telegram API
+                        await asyncio.sleep(TELEGRAM_API_DELAY)
+                    except Exception as e:
+                        error_count += 1
+                        logger.warning(f"Error updating user {user.user_id}: {e}")
+                
+                if updated_count > 0 or error_count > 0:
+                    logger.info(
+                        f"User information update completed: "
+                        f"updated {updated_count}, errors {error_count}, total checked {len(users)}"
+                    )
+                else:
+                    logger.debug("No users found for update")
+                    
+        except asyncio.CancelledError:
+            logger.info("User information update task stopped")
+            break
+        except Exception as e:
+            logger.error(f"Error in user information update task: {e}", exc_info=True)
+            # Continue working even on error
+            await asyncio.sleep(ERROR_RETRY_DELAY_SECONDS)
+