Fix yt-dlp title error, and inline title

2025-12-05 22:16:33 +03:00
parent 5c280a4d3a
commit ec0fd18317
4 changed files with 188 additions and 7 deletions
--- a/bot/modules/media_loader/ytdlp.py
+++ b/bot/modules/media_loader/ytdlp.py
@@ -475,8 +475,25 @@ async def download_media(
        # Search for downloaded file
        title = info.get('title', 'video')
        # Handle cases where title extraction failed (e.g., PornHub)
        if not title or title == 'NA' or title.strip() == '':
            # Try to generate title from URL or use default
            try:
                from urllib.parse import urlparse
                parsed = urlparse(url)
                if parsed.netloc:
                    # Use domain name as part of title
                    domain = parsed.netloc.replace('www.', '').split('.')[0]
                    title = f"video_from_{domain}"
                else:
                    title = 'video'
            except:
                title = 'video'
        # Clean title from invalid characters
        title = "".join(c for c in title if c.isalnum() or c in (' ', '-', '_')).strip()
        if not title:  # If cleaning removed everything, use default
            title = 'video'
        ext = info.get('ext', 'mp4')
        logger.info(f"Searching for downloaded file. Title: {title}, ext: {ext}, task_id: {task_id}")
@@ -670,6 +687,7 @@ async def get_videos_list(url: str, cookies_file: Optional[str] = None) -> Optio
            'quiet': True,
            'no_warnings': True,
            'extract_flat': 'in_playlist',  # Extract flat for playlist entries, full for single videos
            'ignoreerrors': True,  # Continue on extraction errors (e.g., missing title)
        }
        # Add cookies if specified
@@ -698,8 +716,20 @@ async def get_videos_list(url: str, cookies_file: Optional[str] = None) -> Optio
        def extract_info_sync():
            """Synchronous function for extracting information"""
            try:
                with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                    return ydl.extract_info(url, download=False)
            except Exception as e:
                # Log but don't fail completely - some metadata might still be available
                logger.warning(f"Error extracting info (some metadata may be missing): {e}")
                # Try to extract with ignoreerrors to get partial info
                ydl_opts['ignoreerrors'] = True
                try:
                    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                        return ydl.extract_info(url, download=False)
                except Exception as e2:
                    logger.error(f"Failed to extract info even with ignoreerrors: {e2}")
                    raise
        # Extract info without downloading
        info = await loop.run_in_executor(None, extract_info_sync)
@@ -721,10 +751,21 @@ async def get_videos_list(url: str, cookies_file: Optional[str] = None) -> Optio
                    if not entry_url:
                        continue
                    # Handle missing title gracefully
                    title = entry.get('title')
                    if not title or title == 'NA':
                        # Try to generate a title from URL or use default
                        try:
                            from urllib.parse import urlparse
                            parsed = urlparse(entry_url)
                            title = f"Video from {parsed.netloc}" if parsed.netloc else "Video"
                        except:
                            title = "Video"
                    videos.append({
                        'id': entry.get('id'),
                        'url': entry_url,
-                        'title': entry.get('title', 'Unknown'),
+                        'title': title,
                        'duration': entry.get('duration'),
                        'thumbnail': entry.get('thumbnail'),
                    })
--- a/bot/modules/message_handler/callbacks.py
+++ b/bot/modules/message_handler/callbacks.py
@@ -214,7 +214,15 @@ async def callback_handler(client: Client, callback_query: CallbackQuery):
    elif data.startswith("video_select:"):
        # Handle video selection from webpage
-        video_url = data.replace("video_select:", "", 1)
+        selection_id = data.replace("video_select:", "", 1)
        # Retrieve video URL from cache using short identifier
        from bot.modules.message_handler.video_selection_cache import get_video_selection
        video_url = get_video_selection(selection_id, user_id=user_id)
        if not video_url:
            await callback_query.answer("❌ Ссылка истекла или не найдена. Пожалуйста, попробуйте снова.", show_alert=True)
            return
        # Create task for selected video
        try:
--- a/bot/modules/message_handler/commands.py
+++ b/bot/modules/message_handler/commands.py
@@ -588,6 +588,8 @@ async def url_handler(client: Client, message: Message):
                        text += "Выберите видео для загрузки:\n\n"
                        # Create inline keyboard with video selection buttons
                        from bot.modules.message_handler.video_selection_cache import store_video_selection
                        buttons = []
                        for idx, video in enumerate(videos[:10], 1):  # Limit to 10 videos
                            title = video.get('title', f'Видео {idx}')[:50]  # Limit title length
@@ -597,8 +599,10 @@ async def url_handler(client: Client, message: Message):
                                duration_str = format_duration(duration)
                                title += f" ({duration_str})"
-                            # Use callback data format: video_select:<video_url>
+                            # Store video URL in cache and use short identifier in callback_data
-                            callback_data = f"video_select:{video['url']}"
+                            # This avoids Telegram's 64-byte limit on callback_data
                            selection_id = store_video_selection(video['url'], user_id)
                            callback_data = f"video_select:{selection_id}"
                            buttons.append([InlineKeyboardButton(f"{idx}. {title}", callback_data=callback_data)])
                        keyboard = InlineKeyboardMarkup(buttons)
@@ -779,4 +783,3 @@ def register_commands(app: Client):
    app.add_handler(MessageHandler(url_handler, filters=is_url_message))
    logger.info("Commands registered")
--- a/bot/modules/message_handler/video_selection_cache.py
+++ b/bot/modules/message_handler/video_selection_cache.py
@@ -0,0 +1,129 @@
 """
 Video selection cache for callback handlers
 Stores mappings between short identifiers and video URLs to work around
 Telegram's 64-byte limit on callback_data.
 """
 import secrets
 import threading
 import time
 from typing import Optional, Dict
 import logging
 logger = logging.getLogger(__name__)
 # Dictionary to store video URL mappings
 # Format: {selection_id: {'url': str, 'user_id': int, 'created_at': float}}
 _video_selections: Dict[str, Dict] = {}
 _video_selections_lock = threading.Lock()
 _MAX_SELECTIONS = 10000  # Maximum number of cached selections
 _SELECTION_TTL = 3600  # Time to live: 1 hour in seconds
 def generate_selection_id() -> str:
    """Generate a short unique identifier for video selection"""
    return secrets.token_urlsafe(8)  # ~11 characters, well under 64 bytes
 def store_video_selection(url: str, user_id: int) -> str:
    """
    Store a video URL and return a short identifier
    Args:
        url: Video URL to store
        user_id: User ID who requested the selection
    Returns:
        Short identifier to use in callback_data
    """
    selection_id = generate_selection_id()
    with _video_selections_lock:
        # Clean up old entries if cache is too large
        if len(_video_selections) >= _MAX_SELECTIONS:
            _cleanup_expired_selections()
        # If still too large, remove oldest entries
        if len(_video_selections) >= _MAX_SELECTIONS:
            # Remove 10% of oldest entries
            sorted_items = sorted(
                _video_selections.items(),
                key=lambda x: x[1].get('created_at', 0)
            )
            items_to_remove = len(sorted_items) // 10
            for item_id, _ in sorted_items[:items_to_remove]:
                del _video_selections[item_id]
        _video_selections[selection_id] = {
            'url': url,
            'user_id': user_id,
            'created_at': time.time()
        }
    logger.debug(f"Stored video selection: {selection_id} for user {user_id}")
    return selection_id
 def get_video_selection(selection_id: str, user_id: Optional[int] = None) -> Optional[str]:
    """
    Retrieve a video URL by selection identifier
    Args:
        selection_id: Short identifier from callback_data
        user_id: Optional user ID to verify ownership
    Returns:
        Video URL or None if not found or expired
    """
    with _video_selections_lock:
        selection = _video_selections.get(selection_id)
        if not selection:
            logger.debug(f"Selection not found: {selection_id}")
            return None
        # Check if expired
        if time.time() - selection['created_at'] > _SELECTION_TTL:
            del _video_selections[selection_id]
            logger.debug(f"Selection expired: {selection_id}")
            return None
        # Verify user ownership if provided
        if user_id is not None and selection['user_id'] != user_id:
            logger.warning(
                f"User {user_id} attempted to access selection {selection_id} "
                f"owned by user {selection['user_id']}"
            )
            return None
        return selection['url']
 def _cleanup_expired_selections():
    """Remove expired selections from cache"""
    current_time = time.time()
    expired_ids = [
        sel_id for sel_id, sel_data in _video_selections.items()
        if current_time - sel_data['created_at'] > _SELECTION_TTL
    ]
    for sel_id in expired_ids:
        del _video_selections[sel_id]
    if expired_ids:
        logger.debug(f"Cleaned up {len(expired_ids)} expired selections")
 def clear_user_selections(user_id: int):
    """Clear all selections for a specific user"""
    with _video_selections_lock:
        to_remove = [
            sel_id for sel_id, sel_data in _video_selections.items()
            if sel_data['user_id'] == user_id
        ]
        for sel_id in to_remove:
            del _video_selections[sel_id]
        if to_remove:
            logger.debug(f"Cleared {len(to_remove)} selections for user {user_id}")