136 lines
4.8 KiB
Python
136 lines
4.8 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
本地文件处理器
|
|
简化版的文件处理器,不依赖外部服务
|
|
"""
|
|
|
|
import os
|
|
import io
|
|
import tempfile
|
|
import logging
|
|
from contextlib import contextmanager
|
|
from typing import Dict, List, Optional, Any, Union, BinaryIO
|
|
from pathlib import Path
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class LocalFileHandler:
|
|
"""本地文件处理器"""
|
|
|
|
# 小文件阈值 (5MB)
|
|
SMALL_FILE_THRESHOLD = 5 * 1024 * 1024
|
|
|
|
# 支持的图像格式
|
|
SUPPORTED_IMAGE_FORMATS = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp'}
|
|
|
|
def __init__(self, temp_dir: str = None):
|
|
"""
|
|
初始化本地文件处理器
|
|
|
|
Args:
|
|
temp_dir: 临时文件目录
|
|
"""
|
|
self.temp_dir = temp_dir or tempfile.gettempdir()
|
|
self.temp_files = set() # 跟踪临时文件
|
|
|
|
# 确保临时目录存在
|
|
os.makedirs(self.temp_dir, exist_ok=True)
|
|
|
|
@contextmanager
|
|
def temp_file_context(self, content: bytes = None, suffix: str = None, delete_on_exit: bool = True):
|
|
"""临时文件上下文管理器,确保自动清理"""
|
|
temp_fd, temp_path = tempfile.mkstemp(suffix=suffix, dir=self.temp_dir)
|
|
self.temp_files.add(temp_path)
|
|
|
|
try:
|
|
os.close(temp_fd) # 关闭文件描述符
|
|
|
|
# 如果提供了内容,写入文件
|
|
if content is not None:
|
|
with open(temp_path, 'wb') as f:
|
|
f.write(content)
|
|
|
|
yield temp_path
|
|
finally:
|
|
if delete_on_exit and os.path.exists(temp_path):
|
|
try:
|
|
os.unlink(temp_path)
|
|
self.temp_files.discard(temp_path)
|
|
logger.debug(f"🗑️ 临时文件已清理: {temp_path}")
|
|
except Exception as e:
|
|
logger.warning(f"⚠️ 临时文件清理失败: {temp_path}, {e}")
|
|
|
|
def cleanup_all_temp_files(self):
|
|
"""清理所有跟踪的临时文件"""
|
|
for temp_path in list(self.temp_files):
|
|
if os.path.exists(temp_path):
|
|
try:
|
|
os.unlink(temp_path)
|
|
logger.debug(f"🗑️ 清理临时文件: {temp_path}")
|
|
except Exception as e:
|
|
logger.warning(f"⚠️ 清理临时文件失败: {temp_path}, {e}")
|
|
self.temp_files.clear()
|
|
|
|
def get_file_size(self, file_obj) -> int:
|
|
"""获取文件大小"""
|
|
if hasattr(file_obj, 'content_length') and file_obj.content_length:
|
|
return file_obj.content_length
|
|
|
|
# 通过读取内容获取大小
|
|
current_pos = file_obj.tell()
|
|
file_obj.seek(0, 2) # 移动到文件末尾
|
|
size = file_obj.tell()
|
|
file_obj.seek(current_pos) # 恢复原位置
|
|
return size
|
|
|
|
def is_small_file(self, file_obj) -> bool:
|
|
"""判断是否为小文件"""
|
|
return self.get_file_size(file_obj) <= self.SMALL_FILE_THRESHOLD
|
|
|
|
def get_temp_file_for_model(self, file_obj, filename: str) -> Optional[str]:
|
|
"""为模型处理获取临时文件路径(确保文件存在于本地)"""
|
|
try:
|
|
ext = os.path.splitext(filename)[1].lower()
|
|
|
|
# 创建临时文件(不自动删除,供模型使用)
|
|
temp_fd, temp_path = tempfile.mkstemp(suffix=ext, dir=self.temp_dir)
|
|
self.temp_files.add(temp_path)
|
|
|
|
try:
|
|
# 写入文件内容
|
|
file_obj.seek(0)
|
|
with os.fdopen(temp_fd, 'wb') as temp_file:
|
|
temp_file.write(file_obj.read())
|
|
|
|
logger.debug(f"📁 为模型创建临时文件: {temp_path}")
|
|
return temp_path
|
|
|
|
except Exception as e:
|
|
os.close(temp_fd)
|
|
raise e
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ 为模型创建临时文件失败: {filename}, {e}")
|
|
return None
|
|
|
|
def cleanup_temp_file(self, temp_path: str):
|
|
"""清理指定的临时文件"""
|
|
if temp_path and os.path.exists(temp_path):
|
|
try:
|
|
os.unlink(temp_path)
|
|
self.temp_files.discard(temp_path)
|
|
logger.debug(f"🗑️ 清理临时文件: {temp_path}")
|
|
except Exception as e:
|
|
logger.warning(f"⚠️ 清理临时文件失败: {temp_path}, {e}")
|
|
|
|
# 全局实例
|
|
file_handler = None
|
|
|
|
def get_file_handler(temp_dir: str = None) -> LocalFileHandler:
|
|
"""获取文件处理器实例"""
|
|
global file_handler
|
|
if file_handler is None:
|
|
file_handler = LocalFileHandler(temp_dir=temp_dir)
|
|
return file_handler
|