122 lines
3.4 KiB
Python
122 lines
3.4 KiB
Python
"""
|
|
Utilities for splitting large files into parts
|
|
"""
|
|
from pathlib import Path
|
|
from typing import List, Optional
|
|
import logging
|
|
import aiofiles
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Maximum part size (1.9 GB for safety)
|
|
MAX_PART_SIZE = int(1.9 * 1024 * 1024 * 1024) # 1.9 GB
|
|
|
|
|
|
async def split_file(file_path: str, part_size: int = MAX_PART_SIZE) -> List[str]:
|
|
"""
|
|
Split file into parts
|
|
|
|
Args:
|
|
file_path: Path to source file
|
|
part_size: Size of each part in bytes
|
|
|
|
Returns:
|
|
List of paths to file parts
|
|
"""
|
|
file = Path(file_path)
|
|
if not file.exists():
|
|
raise FileNotFoundError(f"File not found: {file_path}")
|
|
|
|
file_size = file.stat().st_size
|
|
if file_size <= part_size:
|
|
# File doesn't need to be split
|
|
return [str(file)]
|
|
|
|
parts = []
|
|
part_number = 1
|
|
|
|
try:
|
|
async with aiofiles.open(file_path, 'rb') as source_file:
|
|
while True:
|
|
part_path = file.parent / f"{file.stem}.part{part_number:03d}{file.suffix}"
|
|
parts.append(str(part_path))
|
|
|
|
async with aiofiles.open(part_path, 'wb') as part_file:
|
|
bytes_written = 0
|
|
|
|
while bytes_written < part_size:
|
|
chunk_size = min(8192, part_size - bytes_written)
|
|
chunk = await source_file.read(chunk_size)
|
|
|
|
if not chunk:
|
|
# End of file reached
|
|
break
|
|
|
|
await part_file.write(chunk)
|
|
bytes_written += len(chunk)
|
|
|
|
if bytes_written == 0:
|
|
# No data to write, remove empty part
|
|
part_path.unlink()
|
|
parts.pop()
|
|
break
|
|
|
|
if bytes_written < part_size:
|
|
# End of file reached
|
|
break
|
|
|
|
part_number += 1
|
|
|
|
logger.info(f"File {file_path} split into {len(parts)} parts")
|
|
return parts
|
|
|
|
except Exception as e:
|
|
# Clean up partially created parts on error
|
|
for part_path in parts:
|
|
try:
|
|
Path(part_path).unlink()
|
|
except:
|
|
pass
|
|
raise Exception(f"Error splitting file: {e}")
|
|
|
|
|
|
async def delete_file_parts(parts: List[str]) -> None:
|
|
"""
|
|
Delete all file parts
|
|
|
|
Args:
|
|
parts: List of paths to file parts
|
|
"""
|
|
for part_path in parts:
|
|
try:
|
|
file = Path(part_path)
|
|
if file.exists():
|
|
file.unlink()
|
|
logger.debug(f"Deleted file part: {part_path}")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to delete file part {part_path}: {e}")
|
|
|
|
|
|
def get_part_info(parts: List[str]) -> dict:
|
|
"""
|
|
Get information about file parts
|
|
|
|
Args:
|
|
parts: List of paths to file parts
|
|
|
|
Returns:
|
|
Dictionary with information about parts
|
|
"""
|
|
total_size = 0
|
|
for part_path in parts:
|
|
file = Path(part_path)
|
|
if file.exists():
|
|
total_size += file.stat().st_size
|
|
|
|
return {
|
|
"total_parts": len(parts),
|
|
"total_size": total_size,
|
|
"parts": parts
|
|
}
|
|
|