Coverage for skema/model_assembly/interfaces.py: 0%
78 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 17:15 +0000
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 17:15 +0000
1import os
2import json
3from typing import List, Dict, NoReturn
5import requests
8class TextReadingInterface:
9 def __init__(self, addr):
10 self.webservice = addr
12 def extract_mentions(self, doc_path: str, out_path: str) -> dict:
13 if not os.path.isfile(doc_path):
14 raise RuntimeError(f"Document not found: {doc_path}")
16 if not out_path.endswith(".json"):
17 raise ValueError("/pdf_to_mentions requires an JSON output file")
19 if doc_path.endswith(".pdf"):
20 res = requests.post(
21 f"{self.webservice}/pdf_to_mentions",
22 headers={"Content-type": "application/json"},
23 json={"pdf": doc_path, "outfile": out_path},
24 )
25 print(f"HTTP {res} for /pdf_to_mentions on {doc_path}")
27 elif doc_path.endswith("--COSMOS-data.json"):
28 res = requests.post(
29 f"{self.webservice}/cosmos_json_to_mentions",
30 headers={"Content-type": "application/json"},
31 json={"pathToCosmosJson": doc_path, "outfile": out_path},
32 )
33 print(f"HTTP {res} for /cosmos_json_to_mentions on {doc_path}")
34 elif doc_path.endswith(".json"):
35 res = requests.post(
36 f"{self.webservice}/json_doc_to_mentions",
37 headers={"Content-type": "application/json"},
38 json={"json": doc_path, "outfile": out_path},
39 )
40 print(f"HTTP {res} for /json_doc_to_mentions on {doc_path}")
42 else:
43 raise ValueError(
44 f"Unknown input document extension in file {doc_path} (pdf or json expected)"
45 )
47 return json.load(open(out_path, "r"))
49 def get_link_hypotheses(
50 self,
51 mentions_path: str,
52 eqns_path: str,
53 grfn_path: str,
54 comments_path: str,
55 wikidata_path: str,
56 ) -> dict:
57 if not os.path.isfile(mentions_path):
58 raise RuntimeError(f"Mentions not found: {mentions_path}")
60 if not os.path.isfile(eqns_path):
61 raise RuntimeError(f"Equations not found: {eqns_path}")
63 if not os.path.isfile(grfn_path):
64 raise RuntimeError(f"GrFN not found: {grfn_path}")
66 if not os.path.isfile(comments_path):
67 raise RuntimeError(f"Comments not found: {comments_path}")
69 if not mentions_path.endswith(".json"):
70 raise ValueError("/align expects mentions to be a JSON file")
72 if not eqns_path.endswith(".txt"):
73 raise ValueError("/align expects equations to be a text file")
75 if not grfn_path.endswith(".json"):
76 raise ValueError("/align expects GrFN to be a JSON file")
78 if not comments_path.endswith(".json"):
79 raise ValueError("/align expects comments to be a JSON file")
81 grfn_data = json.load(open(grfn_path, "r"))
83 unique_var_names = list(
84 {
85 "::".join(var_def["identifier"].split("::")[:-1]) + "::0"
86 for var_def in grfn_data["variables"]
87 }
88 )
89 variables = [{"name": var_name} for var_name in unique_var_names]
91 equations = list()
92 with open(eqns_path, "r") as infile:
93 for eqn_line in infile.readlines():
94 equations.append(eqn_line.strip())
96 payload = {
97 "mentions": mentions_path,
98 "documents": mentions_path,
99 "equations": equations,
100 "source_code": {
101 "variables": variables,
102 "comments": json.load(open(comments_path, "r")),
103 },
104 "toggles": {
105 "groundToSVO": False,
106 "groundToWiki": False,
107 "saveWikiGroundings": False,
108 "appendToGrFN": False,
109 },
110 "arguments": {"maxSVOgroundingsPerVar": 5},
111 "wikidata": wikidata_path,
112 }
113 payload_path = f"{os.getcwd()}/align_payload.json"
114 json.dump(payload, open(payload_path, "w"))
116 res = requests.post(
117 f"{self.webservice}/align",
118 headers={"Content-type": "application/json"},
119 json={"pathToJson": payload_path},
120 )
121 print(f"HTTP {res} for /align on:\n\t{mentions_path}\n\t{grfn_path}\n")
122 json_dict = res.json()
123 return json_dict
125 def ground_to_SVO(self, mentions_path: str) -> dict:
126 if not os.path.isfile(mentions_path):
127 raise RuntimeError(f"Mentions file not found: {mentions_path}")
129 if not mentions_path.endswith(".json"):
130 raise ValueError(
131 "/groundMentionsToSVO expects mentions to be a JSON file"
132 )
134 res = requests.post(
135 f"{self.webservice}/groundMentionsToSVO",
136 headers={"Content-type": "application/json"},
137 json={"mentions": mentions_path},
138 )
140 print(f"HTTP {res} for /groundMentionsToSVO on {mentions_path}")
141 json_dict = res.json()
142 return json_dict
145class EquationReadingInterface:
146 # TODO: define this for interface to EqDec and Cosmos equation-detection
147 pass
150class CosmosInterface:
151 def __init__(self):
152 pass
154 def convert_parquet_collection(self, parquet_filenames: List[str]) -> Dict:
155 pass
157 def find_parquet_files(self, outdir_path: str) -> List:
158 return [
159 os.path.join(outdir_path, fname)
160 for fname in os.listdir(outdir_path)
161 if fname.endswith(".parquet")
162 ]
164 def parquet2dict(self, parquet_file) -> Dict:
165 pass
167 def parquet2Json(self, parquet_file) -> str:
168 pass
170 def parquet2JsonFile(self, parquet_file, json_filename) -> NoReturn:
171 pass