# -*- coding: utf-8 -*- """ Loop task execution engine with pause/resume support. """ import asyncio import sys from datetime import datetime from typing import Optional # Fix Windows GBK encoding issue sys.stdout.reconfigure(encoding='utf-8', errors='replace') sys.stderr.reconfigure(encoding='utf-8', errors='replace') from models.db import get_db, _id, _now from service.loop_recall_md import DEFAULT_LLM_NOTE, append_recall_md_section # Module-level control dictionary for pause/resume/stop # key=loop_task_id, value={"pause_event": asyncio.Event, "stop": bool} _loop_controls: dict[str, dict] = {} async def _check_pause(loop_task_id: str) -> bool: """Check if task should pause. Returns True if stopped.""" ctrl = _loop_controls.get(loop_task_id) if not ctrl: return False if ctrl["stop"]: return True # Wait for pause_event (will block if event is cleared) await ctrl["pause_event"].wait() return ctrl["stop"] def _init_control(loop_task_id: str) -> None: """Initialize control structure for a loop task.""" event = asyncio.Event() event.set() # Initially not paused _loop_controls[loop_task_id] = { "pause_event": event, "stop": False, } def _clear_control(loop_task_id: str) -> None: """Clean up control structure.""" _loop_controls.pop(loop_task_id, None) async def pause_loop(loop_task_id: str) -> bool: """Pause a running loop task.""" ctrl = _loop_controls.get(loop_task_id) if not ctrl: return False # 立即写数据库,让前端看到"已暂停"状态 async with get_db() as db: await db.execute( "UPDATE loop_task SET status='paused', paused_at=? WHERE id=?", (_now(), loop_task_id), ) await db.commit() # Clear event,后台会在阶段边界停下来 ctrl["pause_event"].clear() return True async def resume_loop(loop_task_id: str) -> bool: """Resume a paused loop task.""" ctrl = _loop_controls.get(loop_task_id) if not ctrl: return False ctrl["pause_event"].set() return True async def stop_loop(loop_task_id: str) -> bool: """Stop a loop task permanently.""" ctrl = _loop_controls.get(loop_task_id) if not ctrl: return False ctrl["stop"] = True ctrl["pause_event"].set() # Unblock if paused async with get_db() as db: await db.execute( "UPDATE loop_task SET status='stopped', finished_at=? WHERE id=?", (_now(), loop_task_id), ) await db.commit() return True async def run_loop_task( loop_task_id: str, org_id: str, file_ids: list[str], judge_config_id: str, questions_per_section: int, quality_threshold: float, include_multimodal: bool, env_url: str, d_user_id: str, agent_id: str, top_k: int, recall_top_k: int, concurrency: int, cross_chunk: bool, max_rounds: int, max_questions: int, global_dedup: bool = False, # 是否使用全局去重(跨任务) ): """ Main loop execution engine. Each round: 1. Fetch existing questions from all previous rounds 2. Generate new questions (avoiding existing angles) 3. Deduplicate with LLM 4. Create single-jump test 5. Wait for test completion 6. Update stats and check termination conditions """ _init_control(loop_task_id) try: await _do_run_loop( loop_task_id, org_id, file_ids, judge_config_id, questions_per_section, quality_threshold, include_multimodal, env_url, d_user_id, agent_id, top_k, recall_top_k, concurrency, cross_chunk, max_rounds, max_questions, global_dedup ) except Exception as e: # Mark as failed async with get_db() as db: await db.execute( "UPDATE loop_task SET status='failed', error_message=? WHERE id=?", (str(e), loop_task_id), ) await db.commit() finally: _clear_control(loop_task_id) async def _do_run_loop( loop_task_id: str, org_id: str, file_ids: list[str], judge_config_id: str, questions_per_section: int, quality_threshold: float, include_multimodal: bool, env_url: str, d_user_id: str, agent_id: str, top_k: int, recall_top_k: int, concurrency: int, cross_chunk: bool, max_rounds: int, max_questions: int, global_dedup: bool = False, ): """Internal loop implementation.""" # Get loop task name与批次期望切片数(与 chunk_batches_plan.chunk_count 对齐,用于拉取完整性校验) async with get_db() as db: task_rows = await db.execute_fetchall( "SELECT name, expected_chunk_count FROM loop_task WHERE id=?", (loop_task_id,) ) _tr = dict(task_rows[0]) if task_rows else {} loop_task_name = _tr.get("name") or loop_task_id[:8] _ecc = _tr.get("expected_chunk_count") try: expected_chunk_count = int(_ecc) if _ecc is not None and int(_ecc) > 0 else None except (TypeError, ValueError): expected_chunk_count = None # Get judge config for LLM client async with get_db() as db: cfg_rows = await db.execute_fetchall( "SELECT * FROM judge_config WHERE id=?", (judge_config_id,) ) if not cfg_rows: raise ValueError("judge_config not found") judge_cfg = dict(cfg_rows[0]) # Initialize Embedding client for dedup (向量相似度查重,不再使用 LLM) from openai import AsyncOpenAI embed_base = (judge_cfg.get("embed_base_url") or judge_cfg["base_url"]).rstrip("/") embed_key = judge_cfg.get("embed_api_key") or judge_cfg["api_key"] embed_client = AsyncOpenAI( base_url=embed_base, api_key=embed_key, ) embed_model = judge_cfg.get("embed_model") or "text-embedding-3-small" # Update status to running async with get_db() as db: await db.execute( "UPDATE loop_task SET status='running' WHERE id=?", (loop_task_id,), ) await db.commit() consecutive_empty_rounds = 0 def stop_check(): ctrl = _loop_controls.get(loop_task_id) if ctrl is None or ctrl.get("stop", False): return True return False async def async_pause_check(): """Check if paused and wait for resume. Returns True if should stop.""" ctrl = _loop_controls.get(loop_task_id) if not ctrl: return False if ctrl.get("stop", False): return True # Check pause and wait if needed if not ctrl["pause_event"].is_set(): await ctrl["pause_event"].wait() if ctrl.get("stop", False): return True return False async def check_pause_between_stages() -> bool: """在阶段边界等待暂停信号,返回 True 表示应该停止。""" ctrl = _loop_controls.get(loop_task_id) if not ctrl: return False if ctrl["stop"]: return True # 如果 pause_event 已被 clear,说明用户点了暂停 # pause_loop 已经写了数据库,这里只需要等待 resume if not ctrl["pause_event"].is_set(): await ctrl["pause_event"].wait() # 阻塞直到 resume if ctrl["stop"]: return True # resume 后把状态改回 running async with get_db() as db: await db.execute( "UPDATE loop_task SET status='running', paused_at=NULL WHERE id=?", (loop_task_id,), ) await db.commit() return False # 确定从哪一轮、哪个阶段开始 # 查最后一轮的状态,决定是继续该轮还是开新轮 async with get_db() as db: rows = await db.execute_fetchall( """SELECT id, round_number, status, qa_gen_task_id, single_jump_task_id FROM loop_round WHERE loop_task_id=? ORDER BY round_number DESC LIMIT 1""", (loop_task_id,), ) # resume_round: 需要继续执行的轮次信息,None 表示从新轮开始 resume_round = None if rows: last = dict(rows[0]) if last["status"] != "done": resume_round = last # 需要从这一轮的某个阶段继续 round_number = last["round_number"] - 1 # 循环会 +1 回到这一轮 else: round_number = last["round_number"] # 从下一轮开始 else: round_number = 0 while True: # 阶段边界:检查暂停/停止 if await check_pause_between_stages(): return round_number += 1 # Check max_rounds if max_rounds > 0 and round_number > max_rounds: break # Check max_questions if max_questions > 0: async with get_db() as db: row = await db.execute_fetchall( "SELECT total_approved FROM loop_task WHERE id=?", (loop_task_id,) ) current_total = row[0]["total_approved"] if row else 0 if current_total >= max_questions: break # 判断是继续上次中断的轮次,还是创建新轮次 if resume_round and resume_round["round_number"] == round_number: # 继续上次中断的轮次,复用已有的 round_id 和 qa_gen_task_id round_id = resume_round["id"] resume_stage = resume_round["status"] # qa_generating / deduplicating / testing qa_task_id = resume_round["qa_gen_task_id"] resume_round = None # 只用一次 else: # 创建新轮次 resume_stage = None round_id = _id() qa_task_id = None async with get_db() as db: await db.execute( """INSERT INTO loop_round (id, loop_task_id, round_number, status, started_at) VALUES (?,?,?,?,?)""", (round_id, loop_task_id, round_number, "qa_generating", _now()), ) await db.execute( "UPDATE loop_task SET current_round=? WHERE id=?", (round_number, loop_task_id), ) await db.commit() # 1. Get existing questions from all previous rounds section_existing_questions = await _get_existing_questions(loop_task_id, global_dedup=global_dedup) all_existing_questions = [] for questions in section_existing_questions.values(): all_existing_questions.extend(questions) # For QA generation, only pass question text (not ids) section_existing_text = { sp: [q["question"] for q in qs] for sp, qs in section_existing_questions.items() } # 2. QA 生成阶段 # 如果是从 deduplicating 或 testing 阶段 resume,跳过 QA 生成 if resume_stage in ("deduplicating", "testing"): # qa_task_id 已经有了,直接跳过生成 pass else: # 需要运行 QA 生成(新轮次,或从 qa_generating 阶段 resume) if qa_task_id is None: qa_task_id = _id() async with get_db() as db: await db.execute( """INSERT INTO qa_gen_task (id,name,status,judge_config_id,questions_per_section,quality_threshold, progress,total,created_at) VALUES (?,?,?,?,?,?,?,?,?)""", (qa_task_id, f"{loop_task_name}-问题生成-第{round_number}轮", "pending", judge_config_id, questions_per_section, quality_threshold, 0, 0, _now()), ) await db.execute( "UPDATE loop_round SET qa_gen_task_id=?, status='qa_generating' WHERE id=?", (qa_task_id, round_id), ) await db.commit() else: # resume_stage == 'qa_generating':qa_task 已存在但未完成,重新跑 async with get_db() as db: await db.execute( "UPDATE loop_round SET status='qa_generating' WHERE id=?", (round_id,), ) await db.commit() from api.qa_gen_dagent import _run_dagent_task try: await _run_dagent_task( task_id=qa_task_id, org_id=org_id, file_id_list=file_ids, judge_config_id=judge_config_id, questions_per_section=questions_per_section, quality_threshold=quality_threshold, include_multimodal=include_multimodal, section_existing_questions=section_existing_text, stop_check=stop_check, pause_check=async_pause_check, env_url=env_url, expected_chunk_count=expected_chunk_count, ) except Exception as e: async with get_db() as db: await db.execute( "UPDATE loop_round SET status='failed', finished_at=? WHERE id=?", (_now(), round_id), ) await db.commit() raise # 阶段边界:QA 生成完成后检查暂停 if await check_pause_between_stages(): return # 3. 去重阶段 if resume_stage != "testing": async with get_db() as db: await db.execute( "UPDATE loop_round SET status='deduplicating' WHERE id=?", (round_id,), ) await db.commit() # 按切片分组获取新问题 new_questions_by_chunk = await _get_new_questions_by_chunk(qa_task_id) # 按切片分组获取已有问题(用于查重),排除本轮 qa_task_id 避免自查自 existing_by_chunk = await _get_existing_questions_by_chunk( loop_task_id, exclude_qa_task_id=qa_task_id, global_dedup=global_dedup, ) if new_questions_by_chunk: from service.dedup import deduplicate_questions_by_chunk async def on_dedup_progress(done: int, total: int): async with get_db() as db: await db.execute( "UPDATE loop_round SET dedup_progress=? WHERE id=?", (f"{done}/{total}", round_id), ) await db.commit() # 按切片并行查重(正则归一化 + 向量余弦相似度) dup_results = await deduplicate_questions_by_chunk( new_questions_by_chunk, existing_by_chunk, embed_client, embed_model, similarity_threshold=0.85, max_parallel_chunks=5, stop_check=stop_check, pause_check=async_pause_check, on_progress=on_dedup_progress, ) if stop_check(): return async with get_db() as db: for qid, (dup_of, sim) in dup_results.items(): if dup_of: await db.execute( """UPDATE qa_gen_question SET dup_of=?, dup_similarity=?, status='rejected' WHERE id=?""", (dup_of, sim, qid), ) await db.commit() # 阶段边界:去重完成后检查暂停 if await check_pause_between_stages(): return # 统计本轮数据 async with get_db() as db: counts = await db.execute_fetchall( """SELECT COUNT(*) as generated, SUM(CASE WHEN status='approved' THEN 1 ELSE 0 END) as approved, SUM(CASE WHEN dup_of IS NOT NULL THEN 1 ELSE 0 END) as duplicates FROM qa_gen_question WHERE task_id=?""", (qa_task_id,), ) gen_count = counts[0]["generated"] if counts else 0 app_count = counts[0]["approved"] if counts else 0 dup_count = counts[0]["duplicates"] if counts else 0 # SUM 在没有匹配行时返回 NULL,统一成 0 避免后续 None 比较 gen_count = gen_count or 0 app_count = app_count or 0 dup_count = dup_count or 0 async with get_db() as db: await db.execute( """UPDATE loop_round SET generated=?, approved=?, duplicates=?, status='testing' WHERE id=?""", (gen_count, app_count, dup_count, round_id), ) await db.commit() # 收敛检测 if app_count == 0: consecutive_empty_rounds += 1 if consecutive_empty_rounds >= 2: break else: consecutive_empty_rounds = 0 # 4. 召回测试阶段 if app_count > 0: await _run_single_jump_for_round( loop_task_id, loop_task_name, round_number, round_id, qa_task_id, env_url, org_id, d_user_id, agent_id, top_k, recall_top_k, concurrency, cross_chunk ) # 阶段边界:召回测试完成后检查暂停 if await check_pause_between_stages(): return # 5. 更新累计统计 await _update_loop_stats(loop_task_id) async with get_db() as db: await db.execute( "UPDATE loop_round SET status='done', finished_at=? WHERE id=?", (_now(), round_id), ) await db.commit() # Loop finished normally async with get_db() as db: await db.execute( "UPDATE loop_task SET status='done', finished_at=? WHERE id=?", (_now(), loop_task_id), ) await db.commit() async def _get_existing_questions(loop_task_id: str, global_dedup: bool = False) -> dict[str, list[str]]: """Get all approved questions, grouped by section_path. Args: loop_task_id: Current loop task ID global_dedup: If True, get all approved questions from database (cross-task dedup) If False, only get questions from this loop task (default) """ async with get_db() as db: if global_dedup: # 全局去重:获取所有已批准的问题(跨任务) rows = await db.execute_fetchall( """SELECT q.id, q.section_path, q.question FROM qa_gen_question q WHERE q.status = 'approved' ORDER BY q.created_at""", ) else: # 任务内去重:只获取当前循环任务的问题 rows = await db.execute_fetchall( """SELECT q.id, q.section_path, q.question FROM qa_gen_question q JOIN loop_round lr ON q.task_id = lr.qa_gen_task_id WHERE lr.loop_task_id = ? AND q.status = 'approved' ORDER BY q.created_at""", (loop_task_id,), ) result: dict[str, list] = {} for row in rows: sp = row["section_path"] if sp not in result: result[sp] = [] result[sp].append({"id": row["id"], "question": row["question"]}) return result async def _get_new_questions(qa_task_id: str) -> list[dict]: """Get all questions from a QA task.""" async with get_db() as db: rows = await db.execute_fetchall( "SELECT id, question FROM qa_gen_question WHERE task_id=?", (qa_task_id,), ) return [{"id": r["id"], "question": r["question"]} for r in rows] async def _get_new_questions_by_chunk(qa_task_id: str) -> dict[str, list[dict]]: """按切片分组获取新问题。 Returns: {chunk_id: [{id, question, ...}]} """ async with get_db() as db: rows = await db.execute_fetchall( """SELECT id, question, chunk_id, section_path FROM qa_gen_question WHERE task_id=?""", (qa_task_id,), ) result: dict[str, list] = {} for row in rows: chunk_id = row["chunk_id"] or row["section_path"] or "default" if chunk_id not in result: result[chunk_id] = [] result[chunk_id].append({ "id": row["id"], "question": row["question"], "chunk_id": row["chunk_id"], "section_path": row["section_path"], }) return result async def _get_existing_questions_by_chunk( loop_task_id: str, exclude_qa_task_id: str | None = None, global_dedup: bool = False, ) -> dict[str, list[tuple[str, str]]]: """按切片分组获取已有问题(用于查重)。 Args: loop_task_id: 当前循环任务ID exclude_qa_task_id: 排除的 qa_gen_task_id(即本轮刚生成的一批,避免自己查自己) global_dedup: 是否全局去重(跨任务) Returns: {chunk_id: [(id, question)]} """ async with get_db() as db: if global_dedup: # 全局去重:获取所有已批准的问题,但排除本轮 qa_task if exclude_qa_task_id: rows = await db.execute_fetchall( """SELECT id, chunk_id, section_path, question FROM qa_gen_question WHERE status = 'approved' AND task_id != ? ORDER BY created_at""", (exclude_qa_task_id,), ) else: rows = await db.execute_fetchall( """SELECT id, chunk_id, section_path, question FROM qa_gen_question WHERE status = 'approved' ORDER BY created_at""", ) else: # 任务内去重:只获取当前循环任务的问题,但排除本轮 qa_task if exclude_qa_task_id: rows = await db.execute_fetchall( """SELECT q.id, q.chunk_id, q.section_path, q.question FROM qa_gen_question q JOIN loop_round lr ON q.task_id = lr.qa_gen_task_id WHERE lr.loop_task_id = ? AND q.status = 'approved' AND q.task_id != ? ORDER BY q.created_at""", (loop_task_id, exclude_qa_task_id), ) else: rows = await db.execute_fetchall( """SELECT q.id, q.chunk_id, q.section_path, q.question FROM qa_gen_question q JOIN loop_round lr ON q.task_id = lr.qa_gen_task_id WHERE lr.loop_task_id = ? AND q.status = 'approved' ORDER BY q.created_at""", (loop_task_id,), ) result: dict[str, list] = {} for row in rows: chunk_id = row["chunk_id"] or row["section_path"] or "default" if chunk_id not in result: result[chunk_id] = [] result[chunk_id].append((row["id"], row["question"])) return result async def _run_single_jump_for_round( loop_task_id: str, loop_task_name: str, round_number: int, round_id: str, qa_task_id: str, env_url: str, org_id: str, d_user_id: str, agent_id: str, top_k: int, recall_top_k: int, concurrency: int, cross_chunk: bool, ): """Run single-jump test for a round's approved questions.""" def stop_check(): ctrl = _loop_controls.get(loop_task_id) return ctrl is None or ctrl.get("stop", False) # Check stop before starting if stop_check(): return # Create single-jump task sj_task_id = _id() async with get_db() as db: await db.execute( """INSERT INTO single_jump_task (id,name,env_url,org_id,d_user_id,agent_id,top_k,recall_top_k,concurrency,cross_chunk, status,progress,total,created_at,hit_top_k) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", (sj_task_id, f"{loop_task_name}-单跳测试-第{round_number}轮", env_url, org_id, d_user_id, agent_id, top_k, recall_top_k, concurrency, int(cross_chunk), "pending", 0, 0, _now(), top_k), ) await db.execute( "UPDATE loop_round SET single_jump_task_id=? WHERE id=?", (sj_task_id, round_id), ) await db.commit() # Build MD content from approved questions # Query approved questions from this QA task async with get_db() as db: rows = await db.execute_fetchall( """SELECT section_path, file_name, file_id, question, reference_answer, chunk_id, chunk_headers FROM qa_gen_question WHERE task_id=? AND status='approved' ORDER BY chunk_headers, created_at""", (qa_task_id,), ) if not rows: # No approved questions, skip test return # Check stop before running test if stop_check(): return # Group by chunk_headers (use section_path as fallback) from collections import defaultdict sections_dict: dict[str, list] = defaultdict(list) question_chunk_map: dict[str, str] = {} # question -> chunk_id # section_key -> {file_id, file_name} from qa_gen_question section_file_info: dict[str, dict] = {} for row in rows: # Use chunk_headers as the grouping key if available, otherwise use section_path section_key = row["chunk_headers"] if row["chunk_headers"] else row["section_path"] if not section_key: section_key = row["file_name"] or "default" sections_dict[section_key].append({ "question": row["question"], "reference_answer": row["reference_answer"], "file_name": row["file_name"], "chunk_headers": row["chunk_headers"], "chunk_id": row["chunk_id"], }) # Build question to chunk_id mapping if row["chunk_id"] and row["question"]: question_chunk_map[row["question"]] = row["chunk_id"] # Remember file info for this section_key (first non-empty file_id wins) if row["file_id"] and section_key not in section_file_info: section_file_info[section_key] = { "file_id": row["file_id"], "file_name": row["file_name"] or "", } # Generate MD(与 HTTP 导出、离线脚本共用 loop_recall_md) prebuilt_file_map: dict[str, dict] = {} md_lines: list[str] = [] section_index = 0 for section_key, items in sections_dict.items(): section_index += 1 file_name = (items[0].get("file_name") or "").strip() slice_title = (items[0].get("chunk_headers") or "").strip() or section_key parsed_section_path = append_recall_md_section( md_lines, section_index, file_name=file_name, slice_title=slice_title, qa_items=items, meta_lines=[DEFAULT_LLM_NOTE], ) finfo = section_file_info.get(section_key) if finfo: prebuilt_file_map[parsed_section_path] = { "file_id": finfo["file_id"], "file_name": finfo["file_name"], "match_type": "exact", } md_content = "\n".join(md_lines) # Check stop before running test if stop_check(): return # Run single-jump test from api.single_jump import _run_task # Import necessary modules import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent / "sdk")) await _run_task( task_id=sj_task_id, qa_text=md_content, env_url=env_url, org_id=org_id, d_user_id=d_user_id, agent_id=agent_id, hit_top_k=top_k, recall_top_k=recall_top_k, concurrency=concurrency, cross_chunk=cross_chunk, prebuilt_file_map=prebuilt_file_map if prebuilt_file_map else None, prebuilt_chunk_map=question_chunk_map if question_chunk_map else None, ) # After test completes, aggregate stats from single_jump_result async with get_db() as db: # Wait a bit for the test to complete (polling) max_wait = 1800 # Max 30 minutes wait for large tasks waited = 0 while waited < max_wait: # Check stop during polling if stop_check(): return row = await db.execute_fetchall( "SELECT status FROM single_jump_task WHERE id=?", (sj_task_id,) ) if row and row[0]["status"] in ("done", "failed"): break await asyncio.sleep(2) waited += 2 # Aggregate stats stats_rows = await db.execute_fetchall( """SELECT COUNT(*) as tested, SUM(CASE WHEN error IS NULL AND COALESCE(json_array_length(retrieved), 0) > 0 THEN 1 ELSE 0 END) as recalled, SUM(CASE WHEN is_file_hit = 1 THEN 1 ELSE 0 END) as file_hit, SUM(CASE WHEN is_chunk_hit = 1 THEN 1 ELSE 0 END) as chunk_hit FROM single_jump_result WHERE task_id=?""", (sj_task_id,) ) if stats_rows: stats = dict(stats_rows[0]) await db.execute( """UPDATE loop_round SET tested=?, recalled=?, file_hit=?, chunk_hit=? WHERE id=?""", (stats.get("tested") or 0, stats.get("recalled") or 0, stats.get("file_hit") or 0, stats.get("chunk_hit") or 0, round_id), ) await db.commit() async def _update_loop_stats(loop_task_id: str): """Update cumulative stats from all rounds.""" async with get_db() as db: # Aggregate from loop_round rows = await db.execute_fetchall( """SELECT SUM(generated) as total_generated, SUM(approved) as total_approved, SUM(duplicates) as total_duplicates, SUM(tested) as total_tested, SUM(recalled) as total_recalled, SUM(file_hit) as total_file_hit, SUM(chunk_hit) as total_chunk_hit FROM loop_round WHERE loop_task_id=?""", (loop_task_id,), ) stats = dict(rows[0]) if rows else {} # Count file_miss and recall_failed from single_jump_result miss_rows = await db.execute_fetchall( """SELECT SUM(CASE WHEN r.is_file_hit=0 AND COALESCE(json_array_length(r.retrieved), 0)>0 THEN 1 ELSE 0 END) as file_miss, SUM(CASE WHEN COALESCE(json_array_length(r.retrieved), 0)=0 AND r.error IS NULL THEN 1 ELSE 0 END) as recall_failed FROM single_jump_result r JOIN loop_round lr ON r.task_id = lr.single_jump_task_id WHERE lr.loop_task_id=?""", (loop_task_id,), ) miss_stats = dict(miss_rows[0]) if miss_rows else {} await db.execute( """UPDATE loop_task SET total_generated=?, total_approved=?, total_duplicates=?, total_tested=?, total_recalled=?, total_file_hit=?, total_file_miss=?, total_recall_failed=?, total_chunk_hit=? WHERE id=?""", ( stats.get("total_generated") or 0, stats.get("total_approved") or 0, stats.get("total_duplicates") or 0, stats.get("total_tested") or 0, stats.get("total_recalled") or 0, stats.get("total_file_hit") or 0, miss_stats.get("file_miss") or 0, miss_stats.get("recall_failed") or 0, stats.get("total_chunk_hit") or 0, loop_task_id, ), ) await db.commit() async def recover_orphaned_loops(): """On startup, set any 'running' loop tasks to 'paused'.""" async with get_db() as db: rows = await db.execute_fetchall( "SELECT id FROM loop_task WHERE status='running'" ) for row in rows: await db.execute( "UPDATE loop_task SET status='paused', paused_at=? WHERE id=?", (_now(), row["id"]), ) await db.commit()