""" DashScope API service """ import base64 import json import logging from pathlib import Path from typing import Optional, Dict, Any from dashscope import MultiModalConversation import dashscope from app.config import Config logger = logging.getLogger('videoSummary') class DashScopeService: """DashScope API service wrapper""" def __init__(self, config: Config): """ Initialize DashScope service Args: config: Application configuration """ self.config = config self.api_key = config.dashscope_api_key self.model = config.dashscope_model self.fps = config.dashscope_fps self._api_key_set = False def _ensure_api_key(self): """ Ensure API key is set before making API calls Raises: ValueError: If API key is not configured """ if not self.api_key: raise ValueError( "DashScope API key is required for video analysis. " "Please set 'dashscope.api_key' in config.yaml file. " "You can obtain your API key from: https://dashscope.console.aliyun.com/" ) if not self._api_key_set: dashscope.api_key = self.api_key self._api_key_set = True def _log_request(self, method_name: str, request_data: Dict[str, Any]): """ Log complete request data for third-party API calls Args: method_name: Name of the calling method request_data: Request data dictionary containing model, messages, api_key, etc. """ try: # Create a copy for logging to avoid modifying original log_data = request_data.copy() # Handle Base64 encoded video data - only log summary for large data if 'messages' in log_data: messages_copy = [] for msg in log_data['messages']: msg_copy = msg.copy() if isinstance(msg, dict) else msg if isinstance(msg_copy, dict) and 'content' in msg_copy: content_copy = [] for content_item in msg_copy['content']: if isinstance(content_item, dict) and 'video' in content_item: video_value = content_item['video'] # If it's a Base64 data URI, only log the prefix and size if isinstance(video_value, str) and video_value.startswith('data:video'): # Extract size info if available size_info = '' if 'encoded_size' in log_data: size_info = f" (encoded size: {log_data['encoded_size']} bytes)" content_copy.append({ 'video': f"data:video/mp4;base64,[BASE64_DATA{size_info}]", 'fps': content_item.get('fps', 'N/A') }) else: content_copy.append(content_item) else: content_copy.append(content_item) msg_copy['content'] = content_copy messages_copy.append(msg_copy) log_data['messages'] = messages_copy # Log complete request at DEBUG level logger.debug(f"DashScope API Request [{method_name}] - Complete Request Data:") logger.debug(json.dumps(log_data, indent=2, ensure_ascii=False)) # Log summary at INFO level video_count = log_data.get('video_count', 'N/A') if video_count == 'N/A' and 'video_paths' in log_data: video_paths = log_data.get('video_paths', []) video_count = len(video_paths) if isinstance(video_paths, list) else 'N/A' summary = { 'method': method_name, 'model': log_data.get('model', 'N/A'), 'api_key': log_data.get('api_key', 'N/A'), 'messages_count': len(log_data.get('messages', [])), 'fps': log_data.get('fps', 'N/A'), 'video_count': video_count } logger.info(f"DashScope API Request [{method_name}] - Summary: {json.dumps(summary, ensure_ascii=False)}") except Exception as e: logger.warning(f"Failed to log request data: {str(e)}") def _log_response(self, method_name: str, response: Any): """ Log complete response data from third-party API calls Args: method_name: Name of the calling method response: Response object from DashScope API """ try: # Log complete response at DEBUG level logger.debug(f"DashScope API Response [{method_name}] - Complete Response Data:") response_data = { 'status_code': getattr(response, 'status_code', 'N/A'), 'message': getattr(response, 'message', 'N/A'), 'request_id': getattr(response, 'request_id', 'N/A'), } # Try to get response output if available if hasattr(response, 'output'): try: output_dict = { 'choices': [] } if hasattr(response.output, 'choices') and response.output.choices: for choice in response.output.choices: choice_dict = {} if hasattr(choice, 'message'): if hasattr(choice.message, 'content'): content_list = [] for content_item in choice.message.content: if isinstance(content_item, dict): # For text content, include full text if 'text' in content_item: content_list.append({'text': content_item['text']}) else: # For other content types, include type only content_list.append({k: v for k, v in content_item.items() if k != 'video' or not isinstance(v, str) or len(v) < 100}) else: content_list.append(str(content_item)) choice_dict['message'] = {'content': content_list} output_dict['choices'].append(choice_dict) response_data['output'] = output_dict except Exception as e: response_data['output'] = f"Error extracting output: {str(e)}" logger.debug(json.dumps(response_data, indent=2, ensure_ascii=False)) # Log summary at INFO level summary = { 'method': method_name, 'status_code': response_data['status_code'], 'message': response_data['message'], 'request_id': response_data['request_id'] } logger.info(f"DashScope API Response [{method_name}] - Summary: {json.dumps(summary, ensure_ascii=False)}") # Log error details at ERROR level if failed if response_data['status_code'] != 200: logger.error(f"DashScope API Error [{method_name}] - Status: {response_data['status_code']}, Message: {response_data['message']}") except Exception as e: logger.warning(f"Failed to log response data: {str(e)}") def analyze_video(self, video_path: Path, prompt: str, fps: Optional[int] = None) -> Dict[str, Any]: """ Analyze video content using DashScope API Args: video_path: Path to video file prompt: Analysis prompt fps: Frames per second (overrides config if provided) Returns: API response dictionary Raises: Exception: If API call fails """ # Ensure API key is set self._ensure_api_key() if not video_path.exists(): raise FileNotFoundError(f"Video file not found: {video_path}") # According to DashScope documentation for Linux/macOS Python SDK: # local_path = "xxx/test.mp4" # absolute path string # video_path = f"file://{local_path}" # Example from official docs: # local_path = "xxx/test.mp4" # video_path = f"file://{local_path}" absolute_path = video_path.absolute() local_path = str(absolute_path) video_path_for_api = f"file://{local_path}" # Log for debugging logger.info(f"Using video path (file:// format as per official docs): {video_path_for_api}") # Use provided fps or default from config fps_value = fps if fps is not None else self.fps # Prepare messages according to DashScope official documentation format: # {'role':'user', 'content': [{'video': video_path, "fps":2}, {'text': '...'}]} messages = [ { 'role': 'user', 'content': [ {'video': video_path_for_api, 'fps': fps_value}, {'text': prompt} ] } ] # Call API try: # Prepare request data for logging request_data = { 'model': self.model, 'api_key': self.api_key, 'messages': messages, 'fps': fps_value, 'video_path': video_path_for_api } # Log complete request self._log_request('analyze_video', request_data) response = MultiModalConversation.call( model=self.model, messages=messages ) # Log complete response self._log_response('analyze_video', response) if response.status_code == 200: # Extract text content from response content = response.output.choices[0].message.content[0]["text"] return { 'success': True, 'content': content, 'fps': fps_value } else: # If file:// format fails, try Base64 encoding as fallback # Note: Base64 encoding increases file size by ~33%, and API limit is 10MB for encoded video logger.warning(f"File:// format failed, trying Base64 encoding. Error: {response.message}") try: file_size = video_path.stat().st_size # Check if file is too large for Base64 encoding (10MB limit for encoded video) if file_size > 7 * 1024 * 1024: # ~7MB raw = ~10MB encoded logger.error(f"Video file too large for Base64 encoding: {file_size} bytes (limit: ~7MB raw)") return { 'success': False, 'error': f"Video file too large for Base64 encoding. File size: {file_size / 1024 / 1024:.2f}MB, limit: ~7MB raw (~10MB encoded). Please use a smaller video file.", 'status_code': response.status_code } # Read file and encode to Base64 with open(video_path, 'rb') as f: video_data = f.read() video_base64 = base64.b64encode(video_data).decode('utf-8') logger.info(f"Using Base64 encoding (file size: {file_size} bytes, encoded size: {len(video_base64)} bytes)") # According to DashScope docs, Base64 format should be: base64 encoded string # Try data URI format first messages_base64 = [ { 'role': 'user', 'content': [ {'video': f"data:video/mp4;base64,{video_base64}", 'fps': fps_value}, {'text': prompt} ] } ] # Prepare request data for logging (Base64 encoding) request_data_base64 = { 'model': self.model, 'api_key': self.api_key, 'messages': messages_base64, 'fps': fps_value, 'encoding': 'base64', 'file_size': file_size, 'encoded_size': len(video_base64) } # Log complete request (Base64 encoding) self._log_request('analyze_video_base64', request_data_base64) response = MultiModalConversation.call( model=self.model, messages=messages_base64 ) # Log complete response (Base64 encoding) self._log_response('analyze_video_base64', response) if response.status_code == 200: content = response.output.choices[0].message.content[0]["text"] return { 'success': True, 'content': content, 'fps': fps_value } else: logger.error(f"DashScope API error (all formats including Base64 failed) - Status: {response.status_code}, Message: {response.message}") return { 'success': False, 'error': f"API error: {response.message}", 'status_code': response.status_code } except Exception as e: logger.error(f"Base64 encoding failed: {str(e)}") return { 'success': False, 'error': f"All upload methods failed. Last error: {response.message}. Base64 encoding error: {str(e)}", 'status_code': response.status_code } except Exception as e: logger.error(f"DashScope API exception: {str(e)}, Path used: {video_path_for_api}") return { 'success': False, 'error': str(e) } def summarize_video(self, video_path: Path, fps: Optional[int] = None) -> Dict[str, Any]: """ Generate video summary Args: video_path: Path to video file fps: Frames per second (overrides config if provided) Returns: Summary result dictionary """ prompt = "请对这段视频进行总结,包括主要内容、关键场景和重要信息。" return self.analyze_video(video_path, prompt, fps) def compare_videos(self, video_paths: list[Path], fps: Optional[int] = None) -> Dict[str, Any]: """ Compare multiple videos Args: video_paths: List of video file paths fps: Frames per second (overrides config if provided) Returns: Comparison result dictionary """ # Ensure API key is set self._ensure_api_key() if len(video_paths) < 2: return { 'success': False, 'error': 'At least two videos are required for comparison' } # Validate all video files exist for path in video_paths: if not path.exists(): return { 'success': False, 'error': f"Video file not found: {path}" } # Convert paths - try direct absolute path strings first video_paths_for_api = [] for path in video_paths: absolute_path = path.absolute() local_path = str(absolute_path) video_paths_for_api.append(local_path) fps_value = fps if fps is not None else self.fps # Prepare messages with multiple videos using file:// URI format content = [] for video_uri in video_paths_for_api: content.append({'video': video_uri, 'fps': fps_value}) prompt = "请对比这些视频的内容,找出它们的相似之处和不同之处,并详细说明。" content.append({'text': prompt}) messages = [ { 'role': 'user', 'content': content } ] # Call API try: # Prepare request data for logging request_data = { 'model': self.model, 'api_key': self.api_key, 'messages': messages, 'fps': fps_value, 'video_paths': video_paths_for_api, 'video_count': len(video_paths_for_api) } # Log complete request self._log_request('compare_videos', request_data) response = MultiModalConversation.call( model=self.model, messages=messages ) # Log complete response self._log_response('compare_videos', response) if response.status_code == 200: content_text = response.output.choices[0].message.content[0]["text"] return { 'success': True, 'content': content_text, 'fps': fps_value } else: # If direct paths fail, try file:// format logger.warning(f"Direct paths failed, trying file:// format. Error: {response.message}") video_urls = [] for path in video_paths: absolute_path = path.absolute() local_path = str(absolute_path) video_urls.append(f"file://{local_path}") content_retry = [] for video_url in video_urls: content_retry.append({'video': video_url, 'fps': fps_value}) content_retry.append({'text': prompt}) messages_retry = [ { 'role': 'user', 'content': content_retry } ] # Prepare request data for logging (retry with file:// format) request_data_retry = { 'model': self.model, 'api_key': self.api_key, 'messages': messages_retry, 'fps': fps_value, 'video_paths': video_urls, 'video_count': len(video_urls), 'retry': True } # Log complete request (retry) self._log_request('compare_videos_retry', request_data_retry) response = MultiModalConversation.call( model=self.model, messages=messages_retry ) # Log complete response (retry) self._log_response('compare_videos_retry', response) if response.status_code == 200: content_text = response.output.choices[0].message.content[0]["text"] return { 'success': True, 'content': content_text, 'fps': fps_value } else: logger.error(f"DashScope API error (both attempts failed) - Status: {response.status_code}, Message: {response.message}") return { 'success': False, 'error': f"API error: {response.message}", 'status_code': response.status_code } except Exception as e: logger.error(f"DashScope API exception (compare): {str(e)}") return { 'success': False, 'error': str(e) }