Fix yt-dlp title error, and inline title

2025-12-05 22:16:33 +03:00
parent 5c280a4d3a
commit ec0fd18317
4 changed files with 188 additions and 7 deletions
--- a/bot/modules/media_loader/ytdlp.py
+++ b/bot/modules/media_loader/ytdlp.py
@@ -475,8 +475,25 @@ async def download_media(
        
        # Search for downloaded file
        title = info.get('title', 'video')
+        # Handle cases where title extraction failed (e.g., PornHub)
+        if not title or title == 'NA' or title.strip() == '':
+            # Try to generate title from URL or use default
+            try:
+                from urllib.parse import urlparse
+                parsed = urlparse(url)
+                if parsed.netloc:
+                    # Use domain name as part of title
+                    domain = parsed.netloc.replace('www.', '').split('.')[0]
+                    title = f"video_from_{domain}"
+                else:
+                    title = 'video'
+            except:
+                title = 'video'
+        
        # Clean title from invalid characters
        title = "".join(c for c in title if c.isalnum() or c in (' ', '-', '_')).strip()
+        if not title:  # If cleaning removed everything, use default
+            title = 'video'
        ext = info.get('ext', 'mp4')
        
        logger.info(f"Searching for downloaded file. Title: {title}, ext: {ext}, task_id: {task_id}")
@@ -670,6 +687,7 @@ async def get_videos_list(url: str, cookies_file: Optional[str] = None) -> Optio
            'quiet': True,
            'no_warnings': True,
            'extract_flat': 'in_playlist',  # Extract flat for playlist entries, full for single videos
+            'ignoreerrors': True,  # Continue on extraction errors (e.g., missing title)
        }
        
        # Add cookies if specified
@@ -698,8 +716,20 @@ async def get_videos_list(url: str, cookies_file: Optional[str] = None) -> Optio
        
        def extract_info_sync():
            """Synchronous function for extracting information"""
-            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-                return ydl.extract_info(url, download=False)
+            try:
+                with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                    return ydl.extract_info(url, download=False)
+            except Exception as e:
+                # Log but don't fail completely - some metadata might still be available
+                logger.warning(f"Error extracting info (some metadata may be missing): {e}")
+                # Try to extract with ignoreerrors to get partial info
+                ydl_opts['ignoreerrors'] = True
+                try:
+                    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                        return ydl.extract_info(url, download=False)
+                except Exception as e2:
+                    logger.error(f"Failed to extract info even with ignoreerrors: {e2}")
+                    raise
        
        # Extract info without downloading
        info = await loop.run_in_executor(None, extract_info_sync)
@@ -721,10 +751,21 @@ async def get_videos_list(url: str, cookies_file: Optional[str] = None) -> Optio
                    if not entry_url:
                        continue
                    
+                    # Handle missing title gracefully
+                    title = entry.get('title')
+                    if not title or title == 'NA':
+                        # Try to generate a title from URL or use default
+                        try:
+                            from urllib.parse import urlparse
+                            parsed = urlparse(entry_url)
+                            title = f"Video from {parsed.netloc}" if parsed.netloc else "Video"
+                        except:
+                            title = "Video"
+                    
                    videos.append({
                        'id': entry.get('id'),
                        'url': entry_url,
-                        'title': entry.get('title', 'Unknown'),
+                        'title': title,
                        'duration': entry.get('duration'),
                        'thumbnail': entry.get('thumbnail'),
                    })
--- a/bot/modules/message_handler/callbacks.py
+++ b/bot/modules/message_handler/callbacks.py
@@ -214,7 +214,15 @@ async def callback_handler(client: Client, callback_query: CallbackQuery):
    
    elif data.startswith("video_select:"):
        # Handle video selection from webpage
-        video_url = data.replace("video_select:", "", 1)
+        selection_id = data.replace("video_select:", "", 1)
+        
+        # Retrieve video URL from cache using short identifier
+        from bot.modules.message_handler.video_selection_cache import get_video_selection
+        video_url = get_video_selection(selection_id, user_id=user_id)
+        
+        if not video_url:
+            await callback_query.answer("❌ Ссылка истекла или не найдена. Пожалуйста, попробуйте снова.", show_alert=True)
+            return
        
        # Create task for selected video
        try:
--- a/bot/modules/message_handler/commands.py
+++ b/bot/modules/message_handler/commands.py
@@ -588,6 +588,8 @@ async def url_handler(client: Client, message: Message):
                        text += "Выберите видео для загрузки:\n\n"
                        
                        # Create inline keyboard with video selection buttons
+                        from bot.modules.message_handler.video_selection_cache import store_video_selection
+                        
                        buttons = []
                        for idx, video in enumerate(videos[:10], 1):  # Limit to 10 videos
                            title = video.get('title', f'Видео {idx}')[:50]  # Limit title length
@@ -597,8 +599,10 @@ async def url_handler(client: Client, message: Message):
                                duration_str = format_duration(duration)
                                title += f" ({duration_str})"
                            
-                            # Use callback data format: video_select:<video_url>
-                            callback_data = f"video_select:{video['url']}"
+                            # Store video URL in cache and use short identifier in callback_data
+                            # This avoids Telegram's 64-byte limit on callback_data
+                            selection_id = store_video_selection(video['url'], user_id)
+                            callback_data = f"video_select:{selection_id}"
                            buttons.append([InlineKeyboardButton(f"{idx}. {title}", callback_data=callback_data)])
                        
                        keyboard = InlineKeyboardMarkup(buttons)
@@ -779,4 +783,3 @@ def register_commands(app: Client):
    app.add_handler(MessageHandler(url_handler, filters=is_url_message))
    
    logger.info("Commands registered")
-
--- a/bot/modules/message_handler/video_selection_cache.py
+++ b/bot/modules/message_handler/video_selection_cache.py
@@ -0,0 +1,129 @@
+"""
+Video selection cache for callback handlers
+
+Stores mappings between short identifiers and video URLs to work around
+Telegram's 64-byte limit on callback_data.
+"""
+import secrets
+import threading
+import time
+from typing import Optional, Dict
+import logging
+
+logger = logging.getLogger(__name__)
+
+# Dictionary to store video URL mappings
+# Format: {selection_id: {'url': str, 'user_id': int, 'created_at': float}}
+_video_selections: Dict[str, Dict] = {}
+_video_selections_lock = threading.Lock()
+_MAX_SELECTIONS = 10000  # Maximum number of cached selections
+_SELECTION_TTL = 3600  # Time to live: 1 hour in seconds
+
+
+def generate_selection_id() -> str:
+    """Generate a short unique identifier for video selection"""
+    return secrets.token_urlsafe(8)  # ~11 characters, well under 64 bytes
+
+
+def store_video_selection(url: str, user_id: int) -> str:
+    """
+    Store a video URL and return a short identifier
+    
+    Args:
+        url: Video URL to store
+        user_id: User ID who requested the selection
+        
+    Returns:
+        Short identifier to use in callback_data
+    """
+    selection_id = generate_selection_id()
+    
+    with _video_selections_lock:
+        # Clean up old entries if cache is too large
+        if len(_video_selections) >= _MAX_SELECTIONS:
+            _cleanup_expired_selections()
+        
+        # If still too large, remove oldest entries
+        if len(_video_selections) >= _MAX_SELECTIONS:
+            # Remove 10% of oldest entries
+            sorted_items = sorted(
+                _video_selections.items(),
+                key=lambda x: x[1].get('created_at', 0)
+            )
+            items_to_remove = len(sorted_items) // 10
+            for item_id, _ in sorted_items[:items_to_remove]:
+                del _video_selections[item_id]
+        
+        _video_selections[selection_id] = {
+            'url': url,
+            'user_id': user_id,
+            'created_at': time.time()
+        }
+    
+    logger.debug(f"Stored video selection: {selection_id} for user {user_id}")
+    return selection_id
+
+
+def get_video_selection(selection_id: str, user_id: Optional[int] = None) -> Optional[str]:
+    """
+    Retrieve a video URL by selection identifier
+    
+    Args:
+        selection_id: Short identifier from callback_data
+        user_id: Optional user ID to verify ownership
+        
+    Returns:
+        Video URL or None if not found or expired
+    """
+    with _video_selections_lock:
+        selection = _video_selections.get(selection_id)
+        
+        if not selection:
+            logger.debug(f"Selection not found: {selection_id}")
+            return None
+        
+        # Check if expired
+        if time.time() - selection['created_at'] > _SELECTION_TTL:
+            del _video_selections[selection_id]
+            logger.debug(f"Selection expired: {selection_id}")
+            return None
+        
+        # Verify user ownership if provided
+        if user_id is not None and selection['user_id'] != user_id:
+            logger.warning(
+                f"User {user_id} attempted to access selection {selection_id} "
+                f"owned by user {selection['user_id']}"
+            )
+            return None
+        
+        return selection['url']
+
+
+def _cleanup_expired_selections():
+    """Remove expired selections from cache"""
+    current_time = time.time()
+    expired_ids = [
+        sel_id for sel_id, sel_data in _video_selections.items()
+        if current_time - sel_data['created_at'] > _SELECTION_TTL
+    ]
+    
+    for sel_id in expired_ids:
+        del _video_selections[sel_id]
+    
+    if expired_ids:
+        logger.debug(f"Cleaned up {len(expired_ids)} expired selections")
+
+
+def clear_user_selections(user_id: int):
+    """Clear all selections for a specific user"""
+    with _video_selections_lock:
+        to_remove = [
+            sel_id for sel_id, sel_data in _video_selections.items()
+            if sel_data['user_id'] == user_id
+        ]
+        for sel_id in to_remove:
+            del _video_selections[sel_id]
+        
+        if to_remove:
+            logger.debug(f"Cleared {len(to_remove)} selections for user {user_id}")
+