Coverage for skema/rest/tests/test_integrated_text_reading

1from pathlib import Path

3from fastapi import status

4from fastapi.testclient import TestClient

5from pytest import approx

7from skema.rest.integrated_text_reading_proxy import app

8from skema.rest.schema import MiraGroundingOutputItem, TextReadingAnnotationsOutput

10client = TestClient(app)

13def test_text_integrated_extractions():

14 """ Tests the integrated text extractions endpoint """

15 # Read an example document to annotate

16 params = {

17 "annotate_skema": True,

18 "annotate_mit": False

19 }

21 payload = {

22 "texts": [

23 "x = 0",

24 "y = 1",

25 "I: Infected population"

26 ],

27 "amrs": []

28 }

30 response = client.post(f"/integrated-text-extractions", params=params, json=payload)

31 assert response.status_code == 200

33 results = TextReadingAnnotationsOutput(**response.json())

34 assert len(results.outputs) == 3, "One of the inputs doesn't have outputs"

35 assert results.generalized_errors is None, f"Generalized TR errors"

36 for ix, output in enumerate(results.outputs):

37 assert output.data is not None, f"Document {ix + 1} didn't generate AttributeCollection"

38 assert len(output.data.attributes) > 0, f"Document {ix + 1} generated an empty attribute collection"

39 assert output.errors is None, f"Document {ix + 1} reported errors"

42## EN: Comment this out until we can mock the cosmos endpoint to decouple our unit test from the status of their service

43def test_integrated_pdf_extraction():

44 """ Tests the pdf endpoint """

45 params = {

46 "annotate_skema": True,

47 "annotate_mit": False

48 }

50 path = Path(__file__).parents[0] / "data" / "integrated_text_reading" / "CHIME_SVIIvR_model.pdf"

51 with path.open("rb") as pdf:

52 files = [

53 ("pdfs", ("CHIME_SVIIvR_model.pdf", pdf, "application/pdf"))

54 ]

56 response = client.post(f"/integrated-pdf-extractions", params=params, files=files)

58 assert response.status_code == 200

60 results = TextReadingAnnotationsOutput(**response.json())

61 assert len(results.outputs) == 1, "The inputs doesn't have outputs"

62 assert results.generalized_errors is None, f"Generalized TR errors"

63 for ix, output in enumerate(results.outputs):

64 assert output.data is not None, f"Document {ix + 1} didn't generate AttributeCollection"

65 assert len(output.data.attributes) > 0, f"Document {ix + 1} generated an empty attribute collection"

66 assert output.errors is None, f"Document {ix + 1} reported errors"

69# Test the cosmos endpoint

70# EN: Commented this out as we don't control it (UWisc)

71# def test_cosmos():

72# """Test that we are able to fetch COSMOS data correctly"""

73# path = Path(__file__).parents[0] / "data" / "integrated_text_reading" / "CHIME_SVIIvR_model.pdf"

74# with path.open("rb") as pdf:

75# ret = cosmos_client(path.name, pdf)

76# assert ret is not None and len(ret) > 0

79def test_mira_grounding():

80 """Test that we are getting grounding for entities"""

81 queries = {"queries": ["infected", "suceptible"]}

82 params = {"k": 5}

83 ret = client.post("/ground_to_mira", params=params, json=queries)

85 assert ret.status_code == status.HTTP_200_OK

87 data = [[MiraGroundingOutputItem(**r) for r in q] for q in ret.json()]

88 assert len(data) == 2, "Service didn't return results for all queries"

89 assert all(len(groundings) == params["k"] for groundings in

90 data), "Service didn't return the requested number of candidates for each query"

93def test_extraction_evaluation():

94 """ Test the extraction evaluation endpoint such that:

95 - Runs end to end

96 - Doesn't drastically change in performance due to a bug on the evaluation function

97 """

99 extractions_path = Path(__file__).parents[0] / "data" / "integrated_text_reading" / "eval" / "extractions.json"

100 annotations_path = Path(__file__).parents[0] / "data" / "integrated_text_reading" / "eval" / "annotations.json"

101 json_path = Path(__file__).parents[0] / "data" / "integrated_text_reading" / "eval" / "contents.json"

102

103 with extractions_path.open("rb") as extractions, annotations_path.open("rb") as annotations, json_path.open(

104 "rb") as json:

105 files = {

106 "extractions_file": ("paper_variable_extractions.json", extractions),

107 "gt_annotations": ("paper_gt_annotations.json", annotations),

108 "json_text": ("paper_cosmos_output.json", json),

109 }

110

111 response = client.post(f"/eval", files=files)

112

113 assert response.status_code == status.HTTP_200_OK

114

115 results = response.json()

116

117 assert results['num_manual_annotations'] == 220, "There should be 220 gt manual annotations"

118 assert results['precision'] == approx(0.5230769230768426), "Precision drastically different from the expected value"

119 assert results['recall'] == approx(0.154545454545454542), "Recall drastically different from the expected value"

120 assert results['f1'] == approx(0.23859649119285095), "F1 drastically different from the expected value"

121

122

123def test_healthcheck():

124 """Test case for /healthcheck endpoint."""

125 response = client.get("/healthcheck")

126 assert response.status_code in {

127 status.HTTP_200_OK,

128 status.HTTP_502_BAD_GATEWAY,

129 status.HTTP_500_INTERNAL_SERVER_ERROR

130 }

Coverage for skema/rest/tests/test_integrated_text_reading_proxy.py: 100%

57 statements