""" Utilities for splitting large files into parts """ from pathlib import Path from typing import List, Optional import logging import aiofiles logger = logging.getLogger(__name__) # Maximum part size (1.9 GB for safety) MAX_PART_SIZE = int(1.9 * 1024 * 1024 * 1024) # 1.9 GB async def split_file(file_path: str, part_size: int = MAX_PART_SIZE) -> List[str]: """ Split file into parts Args: file_path: Path to source file part_size: Size of each part in bytes Returns: List of paths to file parts """ file = Path(file_path) if not file.exists(): raise FileNotFoundError(f"File not found: {file_path}") file_size = file.stat().st_size if file_size <= part_size: # File doesn't need to be split return [str(file)] parts = [] part_number = 1 try: async with aiofiles.open(file_path, 'rb') as source_file: while True: part_path = file.parent / f"{file.stem}.part{part_number:03d}{file.suffix}" parts.append(str(part_path)) async with aiofiles.open(part_path, 'wb') as part_file: bytes_written = 0 while bytes_written < part_size: chunk_size = min(8192, part_size - bytes_written) chunk = await source_file.read(chunk_size) if not chunk: # End of file reached break await part_file.write(chunk) bytes_written += len(chunk) if bytes_written == 0: # No data to write, remove empty part part_path.unlink() parts.pop() break if bytes_written < part_size: # End of file reached break part_number += 1 logger.info(f"File {file_path} split into {len(parts)} parts") return parts except Exception as e: # Clean up partially created parts on error for part_path in parts: try: Path(part_path).unlink() except: pass raise Exception(f"Error splitting file: {e}") async def delete_file_parts(parts: List[str]) -> None: """ Delete all file parts Args: parts: List of paths to file parts """ for part_path in parts: try: file = Path(part_path) if file.exists(): file.unlink() logger.debug(f"Deleted file part: {part_path}") except Exception as e: logger.warning(f"Failed to delete file part {part_path}: {e}") def get_part_info(parts: List[str]) -> dict: """ Get information about file parts Args: parts: List of paths to file parts Returns: Dictionary with information about parts """ total_size = 0 for part_path in parts: file = Path(part_path) if file.exists(): total_size += file.stat().st_size return { "total_parts": len(parts), "total_size": total_size, "parts": parts }