Add source
This commit is contained in:
121
bot/utils/file_splitter.py
Normal file
121
bot/utils/file_splitter.py
Normal file
@@ -0,0 +1,121 @@
|
||||
"""
|
||||
Utilities for splitting large files into parts
|
||||
"""
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
import logging
|
||||
import aiofiles
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Maximum part size (1.9 GB for safety)
|
||||
MAX_PART_SIZE = int(1.9 * 1024 * 1024 * 1024) # 1.9 GB
|
||||
|
||||
|
||||
async def split_file(file_path: str, part_size: int = MAX_PART_SIZE) -> List[str]:
|
||||
"""
|
||||
Split file into parts
|
||||
|
||||
Args:
|
||||
file_path: Path to source file
|
||||
part_size: Size of each part in bytes
|
||||
|
||||
Returns:
|
||||
List of paths to file parts
|
||||
"""
|
||||
file = Path(file_path)
|
||||
if not file.exists():
|
||||
raise FileNotFoundError(f"File not found: {file_path}")
|
||||
|
||||
file_size = file.stat().st_size
|
||||
if file_size <= part_size:
|
||||
# File doesn't need to be split
|
||||
return [str(file)]
|
||||
|
||||
parts = []
|
||||
part_number = 1
|
||||
|
||||
try:
|
||||
async with aiofiles.open(file_path, 'rb') as source_file:
|
||||
while True:
|
||||
part_path = file.parent / f"{file.stem}.part{part_number:03d}{file.suffix}"
|
||||
parts.append(str(part_path))
|
||||
|
||||
async with aiofiles.open(part_path, 'wb') as part_file:
|
||||
bytes_written = 0
|
||||
|
||||
while bytes_written < part_size:
|
||||
chunk_size = min(8192, part_size - bytes_written)
|
||||
chunk = await source_file.read(chunk_size)
|
||||
|
||||
if not chunk:
|
||||
# End of file reached
|
||||
break
|
||||
|
||||
await part_file.write(chunk)
|
||||
bytes_written += len(chunk)
|
||||
|
||||
if bytes_written == 0:
|
||||
# No data to write, remove empty part
|
||||
part_path.unlink()
|
||||
parts.pop()
|
||||
break
|
||||
|
||||
if bytes_written < part_size:
|
||||
# End of file reached
|
||||
break
|
||||
|
||||
part_number += 1
|
||||
|
||||
logger.info(f"File {file_path} split into {len(parts)} parts")
|
||||
return parts
|
||||
|
||||
except Exception as e:
|
||||
# Clean up partially created parts on error
|
||||
for part_path in parts:
|
||||
try:
|
||||
Path(part_path).unlink()
|
||||
except:
|
||||
pass
|
||||
raise Exception(f"Error splitting file: {e}")
|
||||
|
||||
|
||||
async def delete_file_parts(parts: List[str]) -> None:
|
||||
"""
|
||||
Delete all file parts
|
||||
|
||||
Args:
|
||||
parts: List of paths to file parts
|
||||
"""
|
||||
for part_path in parts:
|
||||
try:
|
||||
file = Path(part_path)
|
||||
if file.exists():
|
||||
file.unlink()
|
||||
logger.debug(f"Deleted file part: {part_path}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete file part {part_path}: {e}")
|
||||
|
||||
|
||||
def get_part_info(parts: List[str]) -> dict:
|
||||
"""
|
||||
Get information about file parts
|
||||
|
||||
Args:
|
||||
parts: List of paths to file parts
|
||||
|
||||
Returns:
|
||||
Dictionary with information about parts
|
||||
"""
|
||||
total_size = 0
|
||||
for part_path in parts:
|
||||
file = Path(part_path)
|
||||
if file.exists():
|
||||
total_size += file.stat().st_size
|
||||
|
||||
return {
|
||||
"total_parts": len(parts),
|
||||
"total_size": total_size,
|
||||
"parts": parts
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user