80 lines
2.2 KiB
Python
80 lines
2.2 KiB
Python
"""
|
||
测试样例质量检测器。
|
||
"""
|
||
from .parser import QAPair, Section
|
||
from .tester import RecallResult
|
||
|
||
|
||
def check_qa_quality(qa: QAPair) -> dict:
|
||
"""
|
||
检查单条问答对的质量。
|
||
返回:{"is_valid": bool, "issues": [str]}
|
||
"""
|
||
issues = []
|
||
|
||
# 问题完整性
|
||
if len(qa.question) < 5:
|
||
issues.append("问题过短")
|
||
if not qa.question.endswith("?") and not qa.question.endswith("?"):
|
||
issues.append("问题未以问号结尾")
|
||
|
||
# 答案完整性
|
||
if len(qa.answer) < 10:
|
||
issues.append("答案过短")
|
||
|
||
# 问答一致性(答案中应包含问题的关键词)
|
||
q_words = set(qa.question.replace("?", "").replace("?", "").split())
|
||
a_words = set(qa.answer.split())
|
||
if len(q_words & a_words) == 0:
|
||
issues.append("答案与问题无关键词重叠")
|
||
|
||
return {
|
||
"is_valid": len(issues) == 0,
|
||
"issues": issues,
|
||
}
|
||
|
||
|
||
def check_recall_quality(results: list[RecallResult]) -> dict:
|
||
"""
|
||
通过召回结果反向验证样例质量。
|
||
返回:{"low_quality": [RecallResult], "suspicious": [RecallResult]}
|
||
"""
|
||
low_quality = []
|
||
suspicious = []
|
||
|
||
for r in results:
|
||
if r.error or r.is_empty:
|
||
continue
|
||
|
||
# 召回相似度极低(< 0.5)
|
||
if r.best_cosine_sim and r.best_cosine_sim < 0.5:
|
||
low_quality.append(r)
|
||
|
||
# 召回的文件与预期不符(跨文件召回)
|
||
if r.file_id and r.file_id not in r.retrieved_file_ids:
|
||
suspicious.append(r)
|
||
|
||
return {
|
||
"low_quality": low_quality,
|
||
"suspicious": suspicious,
|
||
}
|
||
|
||
|
||
def detect_duplicates(sections: list[Section], threshold: float = 0.9) -> list[tuple[str, str]]:
|
||
"""
|
||
检测重复问题(简单基于字符串相似度)。
|
||
返回:[(qid1, qid2), ...]
|
||
"""
|
||
from difflib import SequenceMatcher
|
||
|
||
all_qa = [(s.section_path, qa) for s in sections for qa in s.qa_pairs]
|
||
duplicates = []
|
||
|
||
for i, (path1, qa1) in enumerate(all_qa):
|
||
for path2, qa2 in all_qa[i + 1:]:
|
||
sim = SequenceMatcher(None, qa1.question, qa2.question).ratio()
|
||
if sim > threshold:
|
||
duplicates.append((f"{path1}/{qa1.qid}", f"{path2}/{qa2.qid}"))
|
||
|
||
return duplicates
|