Coverage for skema/rest/tests/test_integrated_text_reading_proxy.py: 100%
57 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 17:15 +0000
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 17:15 +0000
1from pathlib import Path
3from fastapi import status
4from fastapi.testclient import TestClient
5from pytest import approx
7from skema.rest.integrated_text_reading_proxy import app
8from skema.rest.schema import MiraGroundingOutputItem, TextReadingAnnotationsOutput
10client = TestClient(app)
13def test_text_integrated_extractions():
14 """ Tests the integrated text extractions endpoint """
15 # Read an example document to annotate
16 params = {
17 "annotate_skema": True,
18 "annotate_mit": False
19 }
21 payload = {
22 "texts": [
23 "x = 0",
24 "y = 1",
25 "I: Infected population"
26 ],
27 "amrs": []
28 }
30 response = client.post(f"/integrated-text-extractions", params=params, json=payload)
31 assert response.status_code == 200
33 results = TextReadingAnnotationsOutput(**response.json())
34 assert len(results.outputs) == 3, "One of the inputs doesn't have outputs"
35 assert results.generalized_errors is None, f"Generalized TR errors"
36 for ix, output in enumerate(results.outputs):
37 assert output.data is not None, f"Document {ix + 1} didn't generate AttributeCollection"
38 assert len(output.data.attributes) > 0, f"Document {ix + 1} generated an empty attribute collection"
39 assert output.errors is None, f"Document {ix + 1} reported errors"
42## EN: Comment this out until we can mock the cosmos endpoint to decouple our unit test from the status of their service
43def test_integrated_pdf_extraction():
44 """ Tests the pdf endpoint """
45 params = {
46 "annotate_skema": True,
47 "annotate_mit": False
48 }
50 path = Path(__file__).parents[0] / "data" / "integrated_text_reading" / "CHIME_SVIIvR_model.pdf"
51 with path.open("rb") as pdf:
52 files = [
53 ("pdfs", ("CHIME_SVIIvR_model.pdf", pdf, "application/pdf"))
54 ]
56 response = client.post(f"/integrated-pdf-extractions", params=params, files=files)
58 assert response.status_code == 200
60 results = TextReadingAnnotationsOutput(**response.json())
61 assert len(results.outputs) == 1, "The inputs doesn't have outputs"
62 assert results.generalized_errors is None, f"Generalized TR errors"
63 for ix, output in enumerate(results.outputs):
64 assert output.data is not None, f"Document {ix + 1} didn't generate AttributeCollection"
65 assert len(output.data.attributes) > 0, f"Document {ix + 1} generated an empty attribute collection"
66 assert output.errors is None, f"Document {ix + 1} reported errors"
69# Test the cosmos endpoint
70# EN: Commented this out as we don't control it (UWisc)
71# def test_cosmos():
72# """Test that we are able to fetch COSMOS data correctly"""
73# path = Path(__file__).parents[0] / "data" / "integrated_text_reading" / "CHIME_SVIIvR_model.pdf"
74# with path.open("rb") as pdf:
75# ret = cosmos_client(path.name, pdf)
76# assert ret is not None and len(ret) > 0
79def test_mira_grounding():
80 """Test that we are getting grounding for entities"""
81 queries = {"queries": ["infected", "suceptible"]}
82 params = {"k": 5}
83 ret = client.post("/ground_to_mira", params=params, json=queries)
85 assert ret.status_code == status.HTTP_200_OK
87 data = [[MiraGroundingOutputItem(**r) for r in q] for q in ret.json()]
88 assert len(data) == 2, "Service didn't return results for all queries"
89 assert all(len(groundings) == params["k"] for groundings in
90 data), "Service didn't return the requested number of candidates for each query"
93def test_extraction_evaluation():
94 """ Test the extraction evaluation endpoint such that:
95 - Runs end to end
96 - Doesn't drastically change in performance due to a bug on the evaluation function
97 """
99 extractions_path = Path(__file__).parents[0] / "data" / "integrated_text_reading" / "eval" / "extractions.json"
100 annotations_path = Path(__file__).parents[0] / "data" / "integrated_text_reading" / "eval" / "annotations.json"
101 json_path = Path(__file__).parents[0] / "data" / "integrated_text_reading" / "eval" / "contents.json"
103 with extractions_path.open("rb") as extractions, annotations_path.open("rb") as annotations, json_path.open(
104 "rb") as json:
105 files = {
106 "extractions_file": ("paper_variable_extractions.json", extractions),
107 "gt_annotations": ("paper_gt_annotations.json", annotations),
108 "json_text": ("paper_cosmos_output.json", json),
109 }
111 response = client.post(f"/eval", files=files)
113 assert response.status_code == status.HTTP_200_OK
115 results = response.json()
117 assert results['num_manual_annotations'] == 220, "There should be 220 gt manual annotations"
118 assert results['precision'] == approx(0.5230769230768426), "Precision drastically different from the expected value"
119 assert results['recall'] == approx(0.154545454545454542), "Recall drastically different from the expected value"
120 assert results['f1'] == approx(0.23859649119285095), "F1 drastically different from the expected value"
123def test_healthcheck():
124 """Test case for /healthcheck endpoint."""
125 response = client.get("/healthcheck")
126 assert response.status_code in {
127 status.HTTP_200_OK,
128 status.HTTP_502_BAD_GATEWAY,
129 status.HTTP_500_INTERNAL_SERVER_ERROR
130 }