Coverage for skema/rest/tests/test_integrated_text_reading_proxy.py: 100%

57 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-30 17:15 +0000

1from pathlib import Path 

2 

3from fastapi import status 

4from fastapi.testclient import TestClient 

5from pytest import approx 

6 

7from skema.rest.integrated_text_reading_proxy import app 

8from skema.rest.schema import MiraGroundingOutputItem, TextReadingAnnotationsOutput 

9 

10client = TestClient(app) 

11 

12 

13def test_text_integrated_extractions(): 

14 """ Tests the integrated text extractions endpoint """ 

15 # Read an example document to annotate 

16 params = { 

17 "annotate_skema": True, 

18 "annotate_mit": False 

19 } 

20 

21 payload = { 

22 "texts": [ 

23 "x = 0", 

24 "y = 1", 

25 "I: Infected population" 

26 ], 

27 "amrs": [] 

28 } 

29 

30 response = client.post(f"/integrated-text-extractions", params=params, json=payload) 

31 assert response.status_code == 200 

32 

33 results = TextReadingAnnotationsOutput(**response.json()) 

34 assert len(results.outputs) == 3, "One of the inputs doesn't have outputs" 

35 assert results.generalized_errors is None, f"Generalized TR errors" 

36 for ix, output in enumerate(results.outputs): 

37 assert output.data is not None, f"Document {ix + 1} didn't generate AttributeCollection" 

38 assert len(output.data.attributes) > 0, f"Document {ix + 1} generated an empty attribute collection" 

39 assert output.errors is None, f"Document {ix + 1} reported errors" 

40 

41 

42## EN: Comment this out until we can mock the cosmos endpoint to decouple our unit test from the status of their service 

43def test_integrated_pdf_extraction(): 

44 """ Tests the pdf endpoint """ 

45 params = { 

46 "annotate_skema": True, 

47 "annotate_mit": False 

48 } 

49 

50 path = Path(__file__).parents[0] / "data" / "integrated_text_reading" / "CHIME_SVIIvR_model.pdf" 

51 with path.open("rb") as pdf: 

52 files = [ 

53 ("pdfs", ("CHIME_SVIIvR_model.pdf", pdf, "application/pdf")) 

54 ] 

55 

56 response = client.post(f"/integrated-pdf-extractions", params=params, files=files) 

57 

58 assert response.status_code == 200 

59 

60 results = TextReadingAnnotationsOutput(**response.json()) 

61 assert len(results.outputs) == 1, "The inputs doesn't have outputs" 

62 assert results.generalized_errors is None, f"Generalized TR errors" 

63 for ix, output in enumerate(results.outputs): 

64 assert output.data is not None, f"Document {ix + 1} didn't generate AttributeCollection" 

65 assert len(output.data.attributes) > 0, f"Document {ix + 1} generated an empty attribute collection" 

66 assert output.errors is None, f"Document {ix + 1} reported errors" 

67 

68 

69# Test the cosmos endpoint 

70# EN: Commented this out as we don't control it (UWisc) 

71# def test_cosmos(): 

72# """Test that we are able to fetch COSMOS data correctly""" 

73# path = Path(__file__).parents[0] / "data" / "integrated_text_reading" / "CHIME_SVIIvR_model.pdf" 

74# with path.open("rb") as pdf: 

75# ret = cosmos_client(path.name, pdf) 

76# assert ret is not None and len(ret) > 0 

77 

78 

79def test_mira_grounding(): 

80 """Test that we are getting grounding for entities""" 

81 queries = {"queries": ["infected", "suceptible"]} 

82 params = {"k": 5} 

83 ret = client.post("/ground_to_mira", params=params, json=queries) 

84 

85 assert ret.status_code == status.HTTP_200_OK 

86 

87 data = [[MiraGroundingOutputItem(**r) for r in q] for q in ret.json()] 

88 assert len(data) == 2, "Service didn't return results for all queries" 

89 assert all(len(groundings) == params["k"] for groundings in 

90 data), "Service didn't return the requested number of candidates for each query" 

91 

92 

93def test_extraction_evaluation(): 

94 """ Test the extraction evaluation endpoint such that: 

95 - Runs end to end 

96 - Doesn't drastically change in performance due to a bug on the evaluation function 

97 """ 

98 

99 extractions_path = Path(__file__).parents[0] / "data" / "integrated_text_reading" / "eval" / "extractions.json" 

100 annotations_path = Path(__file__).parents[0] / "data" / "integrated_text_reading" / "eval" / "annotations.json" 

101 json_path = Path(__file__).parents[0] / "data" / "integrated_text_reading" / "eval" / "contents.json" 

102 

103 with extractions_path.open("rb") as extractions, annotations_path.open("rb") as annotations, json_path.open( 

104 "rb") as json: 

105 files = { 

106 "extractions_file": ("paper_variable_extractions.json", extractions), 

107 "gt_annotations": ("paper_gt_annotations.json", annotations), 

108 "json_text": ("paper_cosmos_output.json", json), 

109 } 

110 

111 response = client.post(f"/eval", files=files) 

112 

113 assert response.status_code == status.HTTP_200_OK 

114 

115 results = response.json() 

116 

117 assert results['num_manual_annotations'] == 220, "There should be 220 gt manual annotations" 

118 assert results['precision'] == approx(0.5230769230768426), "Precision drastically different from the expected value" 

119 assert results['recall'] == approx(0.154545454545454542), "Recall drastically different from the expected value" 

120 assert results['f1'] == approx(0.23859649119285095), "F1 drastically different from the expected value" 

121 

122 

123def test_healthcheck(): 

124 """Test case for /healthcheck endpoint.""" 

125 response = client.get("/healthcheck") 

126 assert response.status_code in { 

127 status.HTTP_200_OK, 

128 status.HTTP_502_BAD_GATEWAY, 

129 status.HTTP_500_INTERNAL_SERVER_ERROR 

130 }