343 lines
11 KiB
Python
343 lines
11 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
百度对象存储BOS管理器
|
||
用于存储和管理多模态文件的原始数据
|
||
"""
|
||
|
||
import os
|
||
import logging
|
||
import json
|
||
import copy
|
||
from datetime import datetime
|
||
from typing import Dict, List, Optional, Any, Union
|
||
from pathlib import Path
|
||
|
||
from baidubce.auth import bce_credentials
|
||
from baidubce import bce_base_client, bce_client_configuration
|
||
from baidubce.services.bos.bos_client import BosClient
|
||
from baidubce.exception import BceError
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
class BaiduBOSManager:
|
||
"""百度对象存储BOS管理器"""
|
||
|
||
def __init__(self, ak: str = None, sk: str = None, endpoint: str = None, bucket: str = None):
|
||
"""
|
||
初始化BOS客户端
|
||
|
||
Args:
|
||
ak: Access Key
|
||
sk: Secret Key
|
||
endpoint: BOS服务端点
|
||
bucket: 存储桶名称
|
||
"""
|
||
self.ak = ak or "ALTAKmzKDy1OqhqmepD2OeXqbN"
|
||
self.sk = sk or "b79c5fbc26344868916ec6e9e2ff65f0"
|
||
self.endpoint = endpoint or "https://bj.bcebos.com"
|
||
self.bucket = bucket or "dmtyz-demo"
|
||
|
||
self.client = None
|
||
self._init_client()
|
||
|
||
def _init_client(self):
|
||
"""初始化BOS客户端"""
|
||
try:
|
||
config = bce_client_configuration.BceClientConfiguration(
|
||
credentials=bce_credentials.BceCredentials(self.ak, self.sk),
|
||
endpoint=self.endpoint
|
||
)
|
||
self.client = BosClient(config)
|
||
|
||
# 测试连接
|
||
self._test_connection()
|
||
logger.info(f"✅ BOS客户端初始化成功: {self.bucket}")
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ BOS客户端初始化失败: {e}")
|
||
raise
|
||
|
||
def _test_connection(self):
|
||
"""测试BOS连接"""
|
||
try:
|
||
# 尝试列出存储桶
|
||
self.client.list_buckets()
|
||
logger.info("✅ BOS连接测试成功")
|
||
except Exception as e:
|
||
logger.error(f"❌ BOS连接测试失败: {e}")
|
||
raise
|
||
|
||
def upload_file(self, local_path: str, bos_key: str = None,
|
||
content_type: str = None) -> Dict[str, Any]:
|
||
"""
|
||
上传文件到BOS
|
||
|
||
Args:
|
||
local_path: 本地文件路径
|
||
bos_key: BOS对象键,如果为None则自动生成
|
||
content_type: 文件内容类型
|
||
|
||
Returns:
|
||
上传结果信息
|
||
"""
|
||
try:
|
||
if not os.path.exists(local_path):
|
||
raise FileNotFoundError(f"文件不存在: {local_path}")
|
||
|
||
# 生成BOS键
|
||
if bos_key is None:
|
||
bos_key = self._generate_bos_key(local_path)
|
||
|
||
# 自动检测内容类型
|
||
if content_type is None:
|
||
content_type = self._detect_content_type(local_path)
|
||
|
||
# 获取文件大小
|
||
file_stat = os.stat(local_path)
|
||
file_size = file_stat.st_size
|
||
|
||
# 上传文件(使用put_object_from_file方法)
|
||
response = self.client.put_object_from_file(
|
||
self.bucket,
|
||
bos_key,
|
||
local_path,
|
||
content_type=content_type
|
||
)
|
||
|
||
# 获取文件信息
|
||
file_stat = os.stat(local_path)
|
||
|
||
result = {
|
||
"bos_key": bos_key,
|
||
"bucket": self.bucket,
|
||
"file_size": file_stat.st_size,
|
||
"content_type": content_type,
|
||
"upload_time": datetime.utcnow().isoformat(),
|
||
"etag": response.metadata.etag if hasattr(response, 'metadata') else None,
|
||
"url": f"{self.endpoint}/{self.bucket}/{bos_key}"
|
||
}
|
||
|
||
logger.info(f"✅ 文件上传成功: {bos_key}")
|
||
return result
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ 文件上传失败: {e}")
|
||
raise
|
||
|
||
def download_file(self, bos_key: str, local_path: str) -> bool:
|
||
"""
|
||
从BOS下载文件
|
||
|
||
Args:
|
||
bos_key: BOS对象键
|
||
local_path: 本地保存路径
|
||
|
||
Returns:
|
||
是否下载成功
|
||
"""
|
||
try:
|
||
# 确保目录存在
|
||
os.makedirs(os.path.dirname(local_path), exist_ok=True)
|
||
|
||
# 下载文件
|
||
response = self.client.get_object(self.bucket, bos_key)
|
||
|
||
with open(local_path, 'wb') as f:
|
||
for chunk in response.data:
|
||
f.write(chunk)
|
||
|
||
logger.info(f"✅ 文件下载成功: {bos_key} -> {local_path}")
|
||
return True
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ 文件下载失败: {e}")
|
||
return False
|
||
|
||
def get_object_metadata(self, bos_key: str) -> Optional[Dict]:
|
||
"""
|
||
获取对象元数据
|
||
|
||
Args:
|
||
bos_key: BOS对象键
|
||
|
||
Returns:
|
||
对象元数据
|
||
"""
|
||
try:
|
||
response = self.client.get_object_meta_data(self.bucket, bos_key)
|
||
|
||
metadata = {
|
||
"bos_key": bos_key,
|
||
"bucket": self.bucket,
|
||
"content_length": response.metadata.content_length,
|
||
"content_type": response.metadata.content_type,
|
||
"etag": response.metadata.etag,
|
||
"last_modified": response.metadata.last_modified,
|
||
"url": f"{self.endpoint}/{self.bucket}/{bos_key}"
|
||
}
|
||
|
||
return metadata
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ 获取对象元数据失败: {e}")
|
||
return None
|
||
|
||
def delete_object(self, bos_key: str) -> bool:
|
||
"""
|
||
删除BOS对象
|
||
|
||
Args:
|
||
bos_key: BOS对象键
|
||
|
||
Returns:
|
||
是否删除成功
|
||
"""
|
||
try:
|
||
self.client.delete_object(self.bucket, bos_key)
|
||
logger.info(f"✅ 对象删除成功: {bos_key}")
|
||
return True
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ 对象删除失败: {e}")
|
||
return False
|
||
|
||
def list_objects(self, prefix: str = "", max_keys: int = 1000) -> List[Dict]:
|
||
"""
|
||
列出BOS对象
|
||
|
||
Args:
|
||
prefix: 对象键前缀
|
||
max_keys: 最大返回数量
|
||
|
||
Returns:
|
||
对象列表
|
||
"""
|
||
try:
|
||
response = self.client.list_objects(
|
||
bucket_name=self.bucket,
|
||
prefix=prefix,
|
||
max_keys=max_keys
|
||
)
|
||
|
||
objects = []
|
||
if hasattr(response, 'contents'):
|
||
for obj in response.contents:
|
||
objects.append({
|
||
"key": obj.key,
|
||
"size": obj.size,
|
||
"last_modified": obj.last_modified,
|
||
"etag": obj.etag,
|
||
"url": f"{self.endpoint}/{self.bucket}/{obj.key}"
|
||
})
|
||
|
||
return objects
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ 列出对象失败: {e}")
|
||
return []
|
||
|
||
def restore_archive_object(self, bos_key: str, days: int = 1, tier: str = "Standard") -> bool:
|
||
"""
|
||
恢复归档对象
|
||
|
||
Args:
|
||
bos_key: BOS对象键
|
||
days: 恢复天数
|
||
tier: 恢复级别 (Expedited/Standard/Bulk)
|
||
|
||
Returns:
|
||
是否成功发起恢复
|
||
"""
|
||
try:
|
||
# 使用自定义客户端进行归档恢复
|
||
restore_client = ArchiveRestoreClient(
|
||
bce_client_configuration.BceClientConfiguration(
|
||
credentials=bce_credentials.BceCredentials(self.ak, self.sk),
|
||
endpoint=self.endpoint
|
||
)
|
||
)
|
||
|
||
response = restore_client.restore_object(self.bucket, bos_key, days, tier)
|
||
logger.info(f"✅ 归档恢复请求已发送: {bos_key}")
|
||
return True
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ 归档恢复失败: {e}")
|
||
return False
|
||
|
||
def _generate_bos_key(self, local_path: str) -> str:
|
||
"""生成BOS对象键"""
|
||
filename = os.path.basename(local_path)
|
||
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
|
||
|
||
# 根据文件类型分类存储
|
||
if self._is_image_file(local_path):
|
||
return f"images/{timestamp}_{filename}"
|
||
elif self._is_text_file(local_path):
|
||
return f"texts/{timestamp}_{filename}"
|
||
else:
|
||
return f"files/{timestamp}_{filename}"
|
||
|
||
def _detect_content_type(self, file_path: str) -> str:
|
||
"""检测文件内容类型"""
|
||
ext = os.path.splitext(file_path)[1].lower()
|
||
|
||
content_types = {
|
||
'.jpg': 'image/jpeg',
|
||
'.jpeg': 'image/jpeg',
|
||
'.png': 'image/png',
|
||
'.gif': 'image/gif',
|
||
'.bmp': 'image/bmp',
|
||
'.webp': 'image/webp',
|
||
'.txt': 'text/plain',
|
||
'.json': 'application/json',
|
||
'.pdf': 'application/pdf'
|
||
}
|
||
|
||
return content_types.get(ext, 'application/octet-stream')
|
||
|
||
def _is_image_file(self, file_path: str) -> bool:
|
||
"""判断是否为图像文件"""
|
||
image_exts = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp'}
|
||
ext = os.path.splitext(file_path)[1].lower()
|
||
return ext in image_exts
|
||
|
||
def _is_text_file(self, file_path: str) -> bool:
|
||
"""判断是否为文本文件"""
|
||
text_exts = {'.txt', '.json', '.csv', '.md'}
|
||
ext = os.path.splitext(file_path)[1].lower()
|
||
return ext in text_exts
|
||
|
||
|
||
class ArchiveRestoreClient(bce_base_client.BceBaseClient):
|
||
"""归档恢复客户端"""
|
||
|
||
def __init__(self, config):
|
||
self.config = copy.deepcopy(bce_client_configuration.DEFAULT_CONFIG)
|
||
self.config.merge_non_none_values(config)
|
||
|
||
def restore_object(self, bucket: str, key: str, days: int = 1, tier: str = "Standard"):
|
||
"""恢复归档对象"""
|
||
path = f'/{bucket}/{key}'.encode('utf-8')
|
||
headers = {
|
||
b'x-bce-restore-days': str(days).encode('utf-8'),
|
||
b'x-bce-restore-tier': tier.encode('utf-8'),
|
||
b'Accept': b'application/json'
|
||
}
|
||
|
||
params = {"restore": ""}
|
||
payload = json.dumps({}, ensure_ascii=False)
|
||
return self._send_request(b'POST', path, headers, params, payload.encode('utf-8'))
|
||
|
||
|
||
# 全局实例
|
||
bos_manager = None
|
||
|
||
def get_bos_manager() -> BaiduBOSManager:
|
||
"""获取BOS管理器实例"""
|
||
global bos_manager
|
||
if bos_manager is None:
|
||
bos_manager = BaiduBOSManager()
|
||
return bos_manager
|