98 lines
3.5 KiB
Python

"""
CLI entry point: rag-eval run --config config.yaml
"""
import asyncio
import argparse
import json
import yaml
import sys
from pathlib import Path
def main():
parser = argparse.ArgumentParser(prog="rag-eval", description="RAG Evaluation Framework")
sub = parser.add_subparsers(dest="command")
# rag-eval run
run_p = sub.add_parser("run", help="Run evaluation")
run_p.add_argument("--config", required=True, help="Path to YAML config file")
run_p.add_argument("--dataset", required=True, help="Path to dataset JSON file")
run_p.add_argument("--output", default="eval_report.json", help="Output report path")
# rag-eval generate
gen_p = sub.add_parser("generate", help="Generate dataset from knowledge base")
gen_p.add_argument("--config", required=True, help="Path to YAML config file")
gen_p.add_argument("--output", default="dataset.json", help="Output dataset path")
args = parser.parse_args()
if not args.command:
parser.print_help()
sys.exit(1)
asyncio.run(_dispatch(args))
async def _dispatch(args):
config = _load_config(args.config)
from rag_eval.adapters.dagent import DagentAdapter
from rag_eval.judge.openai_compatible import OpenAICompatibleJudge
adapter = DagentAdapter(
base_url=config["platform"]["base_url"],
org_id=config["platform"]["org_id"],
token=config["platform"].get("token", ""),
)
judge = OpenAICompatibleJudge(
base_url=config["judge"]["base_url"],
api_key=config["judge"]["api_key"],
model=config["judge"]["model"],
embed_base_url=config["judge"].get("embed_base_url", ""),
embed_api_key=config["judge"].get("embed_api_key", ""),
embed_model=config["judge"].get("embed_model", "text-embedding-3-small"),
)
if args.command == "run":
from rag_eval.runner import EvalRunner, RunConfig
from rag_eval.dataset.schema import EvalDataset
run_cfg = RunConfig(
agent_id=config["eval"]["agent_id"],
knowledge_hub_id=config["eval"]["knowledge_hub_id"],
top_k=config["eval"].get("top_k", 10),
eval_retrieval=config["eval"].get("eval_retrieval", True),
eval_generation=config["eval"].get("eval_generation", True),
file_id_list=config["eval"].get("file_id_list"),
concurrency=config["eval"].get("concurrency", 3),
)
runner = EvalRunner(adapter=adapter, judge=judge)
def _progress(done, total):
print(f"\r Progress: {done}/{total}", end="", flush=True)
print(f"Running evaluation on {args.dataset} ...")
report = await runner.run(args.dataset, run_cfg, progress_cb=_progress)
print()
print(report.summary())
report.save(args.output)
elif args.command == "generate":
from rag_eval.dataset.generator import DatasetGenerator
gen = DatasetGenerator(judge=judge, adapter=adapter)
dataset = await gen.generate(
knowledge_hub_id=config["eval"]["knowledge_hub_id"],
file_id_list=config["eval"]["file_id_list"],
questions_per_chunk=config["eval"].get("questions_per_chunk", 2),
max_chunks=config["eval"].get("max_chunks", 50),
)
with open(args.output, "w", encoding="utf-8") as f:
json.dump(dataset.to_dict(), f, ensure_ascii=False, indent=2)
print(f"Generated {len(dataset.samples)} samples → {args.output}")
def _load_config(path: str) -> dict:
with open(path, encoding="utf-8") as f:
return yaml.safe_load(f)