import pytestfrom langsmith import testing as tfrom openevals.llm import create_llm_as_judgefrom openevals.prompts import CORRECTNESS_PROMPTcorrectness_evaluator = create_llm_as_judge( prompt=CORRECTNESS_PROMPT, feedback_key="correctness", model="openai:o3-mini",)# 你的应用程序的模拟替代品def my_llm_app(inputs: dict) -> str: return "Doodads have increased in price by 10% in the past year."@pytest.mark.langsmithdef test_correctness(): inputs = "How much has the price of doodads changed in the past year?" reference_outputs = "The price of doodads has decreased by 50% in the past year." outputs = my_llm_app(inputs) t.log_inputs({"question": inputs}) t.log_outputs({"answer": outputs}) t.log_reference_outputs({"answer": reference_outputs}) correctness_evaluator( inputs=inputs, outputs=outputs, reference_outputs=reference_outputs )