Coverage for skema/model_assembly/interfaces.py: 0%

78 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-30 17:15 +0000

1import os 

2import json 

3from typing import List, Dict, NoReturn 

4 

5import requests 

6 

7 

8class TextReadingInterface: 

9 def __init__(self, addr): 

10 self.webservice = addr 

11 

12 def extract_mentions(self, doc_path: str, out_path: str) -> dict: 

13 if not os.path.isfile(doc_path): 

14 raise RuntimeError(f"Document not found: {doc_path}") 

15 

16 if not out_path.endswith(".json"): 

17 raise ValueError("/pdf_to_mentions requires an JSON output file") 

18 

19 if doc_path.endswith(".pdf"): 

20 res = requests.post( 

21 f"{self.webservice}/pdf_to_mentions", 

22 headers={"Content-type": "application/json"}, 

23 json={"pdf": doc_path, "outfile": out_path}, 

24 ) 

25 print(f"HTTP {res} for /pdf_to_mentions on {doc_path}") 

26 

27 elif doc_path.endswith("--COSMOS-data.json"): 

28 res = requests.post( 

29 f"{self.webservice}/cosmos_json_to_mentions", 

30 headers={"Content-type": "application/json"}, 

31 json={"pathToCosmosJson": doc_path, "outfile": out_path}, 

32 ) 

33 print(f"HTTP {res} for /cosmos_json_to_mentions on {doc_path}") 

34 elif doc_path.endswith(".json"): 

35 res = requests.post( 

36 f"{self.webservice}/json_doc_to_mentions", 

37 headers={"Content-type": "application/json"}, 

38 json={"json": doc_path, "outfile": out_path}, 

39 ) 

40 print(f"HTTP {res} for /json_doc_to_mentions on {doc_path}") 

41 

42 else: 

43 raise ValueError( 

44 f"Unknown input document extension in file {doc_path} (pdf or json expected)" 

45 ) 

46 

47 return json.load(open(out_path, "r")) 

48 

49 def get_link_hypotheses( 

50 self, 

51 mentions_path: str, 

52 eqns_path: str, 

53 grfn_path: str, 

54 comments_path: str, 

55 wikidata_path: str, 

56 ) -> dict: 

57 if not os.path.isfile(mentions_path): 

58 raise RuntimeError(f"Mentions not found: {mentions_path}") 

59 

60 if not os.path.isfile(eqns_path): 

61 raise RuntimeError(f"Equations not found: {eqns_path}") 

62 

63 if not os.path.isfile(grfn_path): 

64 raise RuntimeError(f"GrFN not found: {grfn_path}") 

65 

66 if not os.path.isfile(comments_path): 

67 raise RuntimeError(f"Comments not found: {comments_path}") 

68 

69 if not mentions_path.endswith(".json"): 

70 raise ValueError("/align expects mentions to be a JSON file") 

71 

72 if not eqns_path.endswith(".txt"): 

73 raise ValueError("/align expects equations to be a text file") 

74 

75 if not grfn_path.endswith(".json"): 

76 raise ValueError("/align expects GrFN to be a JSON file") 

77 

78 if not comments_path.endswith(".json"): 

79 raise ValueError("/align expects comments to be a JSON file") 

80 

81 grfn_data = json.load(open(grfn_path, "r")) 

82 

83 unique_var_names = list( 

84 { 

85 "::".join(var_def["identifier"].split("::")[:-1]) + "::0" 

86 for var_def in grfn_data["variables"] 

87 } 

88 ) 

89 variables = [{"name": var_name} for var_name in unique_var_names] 

90 

91 equations = list() 

92 with open(eqns_path, "r") as infile: 

93 for eqn_line in infile.readlines(): 

94 equations.append(eqn_line.strip()) 

95 

96 payload = { 

97 "mentions": mentions_path, 

98 "documents": mentions_path, 

99 "equations": equations, 

100 "source_code": { 

101 "variables": variables, 

102 "comments": json.load(open(comments_path, "r")), 

103 }, 

104 "toggles": { 

105 "groundToSVO": False, 

106 "groundToWiki": False, 

107 "saveWikiGroundings": False, 

108 "appendToGrFN": False, 

109 }, 

110 "arguments": {"maxSVOgroundingsPerVar": 5}, 

111 "wikidata": wikidata_path, 

112 } 

113 payload_path = f"{os.getcwd()}/align_payload.json" 

114 json.dump(payload, open(payload_path, "w")) 

115 

116 res = requests.post( 

117 f"{self.webservice}/align", 

118 headers={"Content-type": "application/json"}, 

119 json={"pathToJson": payload_path}, 

120 ) 

121 print(f"HTTP {res} for /align on:\n\t{mentions_path}\n\t{grfn_path}\n") 

122 json_dict = res.json() 

123 return json_dict 

124 

125 def ground_to_SVO(self, mentions_path: str) -> dict: 

126 if not os.path.isfile(mentions_path): 

127 raise RuntimeError(f"Mentions file not found: {mentions_path}") 

128 

129 if not mentions_path.endswith(".json"): 

130 raise ValueError( 

131 "/groundMentionsToSVO expects mentions to be a JSON file" 

132 ) 

133 

134 res = requests.post( 

135 f"{self.webservice}/groundMentionsToSVO", 

136 headers={"Content-type": "application/json"}, 

137 json={"mentions": mentions_path}, 

138 ) 

139 

140 print(f"HTTP {res} for /groundMentionsToSVO on {mentions_path}") 

141 json_dict = res.json() 

142 return json_dict 

143 

144 

145class EquationReadingInterface: 

146 # TODO: define this for interface to EqDec and Cosmos equation-detection 

147 pass 

148 

149 

150class CosmosInterface: 

151 def __init__(self): 

152 pass 

153 

154 def convert_parquet_collection(self, parquet_filenames: List[str]) -> Dict: 

155 pass 

156 

157 def find_parquet_files(self, outdir_path: str) -> List: 

158 return [ 

159 os.path.join(outdir_path, fname) 

160 for fname in os.listdir(outdir_path) 

161 if fname.endswith(".parquet") 

162 ] 

163 

164 def parquet2dict(self, parquet_file) -> Dict: 

165 pass 

166 

167 def parquet2Json(self, parquet_file) -> str: 

168 pass 

169 

170 def parquet2JsonFile(self, parquet_file, json_filename) -> NoReturn: 

171 pass