dagent_eval/sdk/rag_eval/single_jump/cli.py

"""
单跳召回测试 CLI 入口。

用法：
  python -m rag_eval.single_jump.cli \
    --env-url https://cloud-dev.d-robotics.cc \
    --org-id dc778d0ae0aade4c33e19342ddd4fe72e68021623de5ff0e7c6b63dc04c7a1a7 \
    --qa-file "D:/evb知识库/EVB知识库完整问答集.md" \
    --top-k 5 \
    --output report.json
"""
import asyncio
import argparse
import sys
from pathlib import Path


async def run(args):
    sys.path.insert(0, str(Path(__file__).parent.parent.parent))

    from rag_eval.single_jump.parser import parse_qa_file
    from rag_eval.single_jump.mapper import FileMapper
    from rag_eval.single_jump.tester import RecallTester
    from rag_eval.single_jump.quality import check_recall_quality
    from rag_eval.single_jump.report import build_report

    # ── Step 1: 解析 MD 文件 ──────────────────────────────────────
    print(f"解析问答集文件: {args.qa_file}")
    sections = parse_qa_file(args.qa_file)
    total_qa = sum(len(s.qa_pairs) for s in sections)
    print(f"  共 {len(sections)} 个章节，{total_qa} 条问答对")

    # 限制测试数量（调试用）
    if args.max_questions and args.max_questions > 0:
        count = 0
        trimmed = []
        for s in sections:
            if count >= args.max_questions:
                break
            keep = s.qa_pairs[:max(0, args.max_questions - count)]
            if keep:
                s.qa_pairs = keep
                trimmed.append(s)
                count += len(keep)
        sections = trimmed
        total_qa = sum(len(s.qa_pairs) for s in sections)
        print(f"  限制为 {total_qa} 条（--max-questions {args.max_questions}）")

    # ── Step 2: 文件名映射 ────────────────────────────────────────
    print(f"\n拉取知识库文件列表...")
    mapper = FileMapper(
        env_url=args.env_url,
        org_id=args.org_id,
        d_user_id=args.user_id,
    )
    file_count = await mapper.load_files()
    print(f"  共 {file_count} 个文件")

    file_map: dict[str, dict | None] = {}
    unmatched = []
    for s in sections:
        if s.section_path not in file_map:
            result = mapper.map_section_to_file(s.section_path)
            file_map[s.section_path] = result
            if not result:
                unmatched.append(s.section_path)

    matched = len(file_map) - len(unmatched)
    print(f"  映射成功: {matched}/{len(file_map)} 个章节")
    if unmatched:
        print(f"  未匹配章节 ({len(unmatched)}): {unmatched[:5]}{'...' if len(unmatched) > 5 else ''}")

    # ── Step 3: 执行召回测试 ──────────────────────────────────────
    print(f"\n开始召回测试 (top_k={args.top_k}, concurrency={args.concurrency}, cross_chunk={args.cross_chunk})...")
    tester = RecallTester(
        env_url=args.env_url,
        org_id=args.org_id,
        d_user_id=args.user_id,
    )

    finished = 0
    def progress(done, total):
        nonlocal finished
        finished = done
        print(f"\r  进度: {done}/{total}", end="", flush=True)

    results = await tester.run(
        sections=sections,
        file_map=file_map,
        top_k=args.top_k,
        concurrency=args.concurrency,
        cross_chunk=args.cross_chunk,
        progress_cb=progress,
    )
    print(f"\r  完成: {len(results)} 条")

    # ── Step 4: 质量检测 ──────────────────────────────────────────
    quality_info = check_recall_quality(results)

    # ── Step 5: 生成报告 ──────────────────────────────────────────
    report = build_report(
        results=results,
        env_url=args.env_url,
        org_id=args.org_id,
        qa_file=args.qa_file,
        top_k=args.top_k,
        cross_chunk=args.cross_chunk,
        quality_info=quality_info,
    )

    print("\n" + report.summary_text())

    report.save(args.output)
    print(f"\n报告已保存: {args.output}")


def main():
    parser = argparse.ArgumentParser(
        prog="single-jump-eval",
        description="单跳知识库召回自动化测试",
    )
    parser.add_argument("--env-url", required=True, help="dagent 环境地址，如 https://cloud-dev.d-robotics.cc")
    parser.add_argument("--org-id", required=True, help="组织 ID")
    parser.add_argument("--user-id", default="test", help="d-user-id 请求头（默认 test）")
    parser.add_argument("--qa-file", required=True, help="问答集 MD 文件路径")
    parser.add_argument("--top-k", type=int, default=5, help="召回数量（默认 5）")
    parser.add_argument("--concurrency", type=int, default=5, help="并发数（默认 5）")
    parser.add_argument("--cross-chunk", action="store_true", help="跨切片模式（不限定 file_id）")
    parser.add_argument("--max-questions", type=int, default=0, help="限制测试问题数（0=不限制，调试用）")
    parser.add_argument("--output", default="single_jump_report.json", help="输出报告路径")

    args = parser.parse_args()
    asyncio.run(run(args))


if __name__ == "__main__":
    main()