Coverage for skema/rest/integrated_text_reading

1# Client code for SKEMA TR

2import io

3import itertools as it

4import json

5import tempfile

6import time

7from pathlib import Path

8from typing import List, Union, BinaryIO, Callable

9from typing import Optional, Dict, Any

10from zipfile import ZipFile

12import pandas as pd

13import requests

14import httpx

15from askem_extractions.data_model import AttributeCollection

16from askem_extractions.importers import import_arizona

17from fastapi import APIRouter, Depends, FastAPI, UploadFile, Response, status

18from langchain.tools.e2b_data_analysis.tool import UploadedFile

20from skema.rest.proxies import SKEMA_TR_ADDRESS, MIT_TR_ADDRESS, OPENAI_KEY, COSMOS_ADDRESS

21from skema.rest.schema import (

22 TextReadingInputDocuments,

23 TextReadingAnnotationsOutput,

24 TextReadingDocumentResults,

25 TextReadingError, MiraGroundingInputs, MiraGroundingOutputItem, TextReadingEvaluationResults,

26)

27from skema.rest import utils, metal_proxy

29router = APIRouter()

32# Utility code for the endpoints

34def annotate_with_skema(

35 endpoint: str,

36 input_: Union[str, List[str], List[Dict], List[List[Dict]]]) -> List[Dict[str, Any]]:

37 """ Blueprint for calling the SKEMA-TR API """

39 if isinstance(input_, (str, dict)):

40 payload = [

41 input_

42 ] # If the text to annotate is a single string representing the contents of a document, make it a list with

43 # a single element

44 else:

45 payload = input_ # if the text to annotate is already a list of documents to annotate, it is the payload itself

46 response = requests.post(endpoint, json=payload, timeout=600)

47 if response.status_code == 200:

48 return response.json()

49 else:

50 raise RuntimeError(

51 f"Calling {endpoint} failed with HTTP code {response.status_code}"

52 )

55def annotate_text_with_skema(text: Union[str, List[str]]) -> List[Dict[str, Any]]:

56 return annotate_with_skema(f"{SKEMA_TR_ADDRESS}/textFileToMentions", text)

59def annotate_pdfs_with_skema(

60 pdfs: Union[List[List[Dict]], List[Dict]]) -> List[Dict[str, Any]]:

61 return annotate_with_skema(f"{SKEMA_TR_ADDRESS}/cosmosJsonToMentions", pdfs)

64# Client code for MIT TR

65def annotate_text_with_mit(

66 texts: Union[str, List[str]]

67) -> Union[List[Dict[str, Any]], str]:

68 endpoint = f"{MIT_TR_ADDRESS}/annotation/upload_file_extract"

69 if isinstance(texts, str):

70 texts = [

71 texts

72 ] # If the text to annotate is a single string representing the contents of a document, make it a list with

73 # a single element

75 # TODO parallelize this

76 return_values = list()

77 for ix, text in enumerate(texts):

78 params = {"gpt_key": OPENAI_KEY}

79 files = {"file": io.StringIO(text)}

80 response = requests.post(endpoint, params=params, files=files)

81 try:

82 if response.status_code == 200:

83 return_values.append(response.json())

84 else:

85 return_values.append(

86 f"Calling {endpoint} on the {ix}th input failed with HTTP code {response.status_code}"

87 )

88 except Exception as ex:

89 return_values.append(

90 f"Calling {endpoint} on the {ix}th input failed with exception {ex}"

91 )

92 return return_values

95def normalize_extractions(

96 arizona_extractions: Optional[Dict[str, Any]], mit_extractions: Optional[Dict]

97) -> AttributeCollection:

98 collections = list()

99 with tempfile.TemporaryDirectory() as tmpdirname:

100 tmp_dir = Path(tmpdirname)

101 skema_path = tmp_dir / "skema.json"

102

103 canonical_mit, canonical_arizona = None, None

104

105 if arizona_extractions:

106 try:

107 with skema_path.open("w") as f:

108 json.dump(arizona_extractions, f)

109 canonical_arizona = import_arizona(Path(skema_path))

110 collections.append(canonical_arizona)

111 except Exception as ex:

112 print(ex)

113 if mit_extractions:

114 try:

115 # MIT extractions already come normalized

116 canonical_mit = AttributeCollection.from_json(mit_extractions)

117 collections.append(canonical_mit)

118 except Exception as ex:

119 print(ex)

120

121 if arizona_extractions and mit_extractions:

122 # Merge both with some de de-duplications

123 params = {"gpt_key": OPENAI_KEY}

124

125 skema_path = tmp_dir / "canonical_skema.json"

126 mit_path = tmp_dir / "canonical_mit.json"

127

128 canonical_arizona.save_json(skema_path)

129 canonical_mit.save_json(mit_path)

130

131 data = {

132 "mit_file": mit_path.open(),

133 "arizona_file": skema_path.open(),

134 }

135 response = requests.post(

136 f"{MIT_TR_ADDRESS}/integration/get_mapping", params=params, files=data

137 )

138

139 # MIT merges the collection for us

140 if response.status_code == 200:

141 merged_collection = AttributeCollection.from_json(response.json())

142 # Return the merged collection here

143 return merged_collection

144

145 # Merge the collections into a attribute collection

146 attributes = list(it.chain.from_iterable(c.attributes for c in collections))

147

148 return AttributeCollection(attributes=attributes)

149

150

151def parquet_to_json(path):

152 parquet_df = pd.read_parquet(path)

153 parquet_json = parquet_df.to_json()

154 parquet_data = json.loads(parquet_json)

155

156 if len(parquet_data) > 0:

157 parquet_data_keys = list(parquet_data.keys())

158 num_data_rows = max(

159 [int(k) for k in parquet_data[parquet_data_keys[0]]]

160 )

161

162 row_order_parquet_data = [dict() for i in range(num_data_rows + 1)]

163 for field_key, row_data in parquet_data.items():

164 for row_idx, datum in row_data.items():

165 row_idx_num = int(row_idx)

166 row_order_parquet_data[row_idx_num][field_key] = datum

167

168 # if filename == "documents.parquet":

169 # Sorts the content sections by page number and then by

170 # bounding box location. Use x-pos first to account for

171 # multi-column documents and then sort by y-pos.

172 row_order_parquet_data.sort(

173 key=lambda d: (

174 d["page_num"],

175 d["bounding_box"][0]

176 // 500, # allows for indentation while still catching items across the center line

177 # (d["bounding_box"][0]) // 100

178 # + round((d["bounding_box"][0] % 100 // 10) / 10),

179 d["bounding_box"][1],

180 )

181 )

182

183 edits = list()

184 for e1, extraction1 in enumerate(row_order_parquet_data):

185 (ext1_x1, ext1_y1, ext1_x2, ext1_y2) = extraction1[

186 "bounding_box"

187 ]

188 # Don't bother processing for left-justified or centered

189 # content ... only right column content needs to be checked

190 if ext1_x1 < 500:

191 continue

192

193 ext1_page_num = extraction1["page_num"]

194 found_col_break = False

195 insertion_index = -1

196 t1 = e1

197 while t1 > 0:

198 extraction2 = row_order_parquet_data[t1 - 1]

199 ext2_page_num = extraction2["page_num"]

200 # If the previous sorted entry is on an earlier page

201 # then we can stop our search

202 if ext1_page_num > ext2_page_num:

203 break

204

205 (ext2_x1, ext2_y1, ext2_x2, ext2_y2) = extraction2[

206 "bounding_box"

207 ]

208

209 if ext1_y2 <= ext2_y1:

210 ext2_xspan = ext2_x2 - ext2_x1

211 # Useful heuristic cutoff for now

212 if ext2_xspan >= 800:

213 found_col_break = True

214 insertion_index = t1 - 1

215 t1 -= 1

216 if found_col_break:

217 edits.append(

218 {

219 "del_idx": e1,

220 "ins_idx": insertion_index,

221 "val": extraction1,

222 }

223 )

224 for edit_dict in edits:

225 del row_order_parquet_data[edit_dict["del_idx"]]

226 row_order_parquet_data.insert(

227 edit_dict["ins_idx"], edit_dict["val"]

228 )

229 row_order_parquet_data.sort(key=lambda d: (d["pdf_name"]))

230

231 name2results = dict()

232 for row_data in row_order_parquet_data:

233 if row_data["pdf_name"] in name2results:

234 name2results[row_data["pdf_name"]].append(row_data)

235 else:

236 name2results[row_data["pdf_name"]] = [row_data]

237

238 return next(iter(name2results.items()))[1]

239

240

241def cosmos_client(name: str, data: BinaryIO):

242 """

243 Posts a pdf to COSMOS and returns the JSON representation of the parquet file

244

245 """

246

247 # Create POST request to COSMOS server

248 # Prep the pdf data for upload

249 files = [

250 ("pdf", (name, data, 'application/pdf')),

251 ]

252 response = requests.post(f"{COSMOS_ADDRESS}/process/", files=files)

253

254 if response.status_code == status.HTTP_202_ACCEPTED:

255

256 callback_endpoints = response.json()

257

258 for retry_num in range(200):

259 time.sleep(3) # Retry in ten seconds

260 poll = requests.get(f"{callback_endpoints['status_endpoint']}")

261 if poll.status_code == status.HTTP_200_OK:

262 poll_results = poll.json()

263 # If the job is completed, fetch the results

264 if poll_results['job_completed']:

265 cosmos_response = requests.get(f"{callback_endpoints['result_endpoint']}")

266 if cosmos_response.status_code == status.HTTP_200_OK:

267 data = cosmos_response.content

268 with ZipFile(io.BytesIO(data)) as z:

269 for file in z.namelist():

270 if file.endswith(".parquet") and \

271 not file.endswith("_figures.parquet") and \

272 not file.endswith("_pdfs.parquet") and \

273 not file.endswith("_tables.parquet") and \

274 not file.endswith("_sections.parquet") and \

275 not file.endswith("_equations.parquet"):

276 # convert parquet to json

277 with z.open(file) as zf:

278 json_data = parquet_to_json(zf)

279 return json_data

280 # Shouldn't reach this point

281 raise RuntimeError("COSMOS data doesn't include document file for annotation")

282

283 else:

284 raise RuntimeError(

285 f"COSMOS Result Error - STATUS CODE: {response.status_code} - {COSMOS_ADDRESS}")

286 # If not, just wait until the next iteration

287 else:

288 pass

289

290 # If we reached this point, we time out

291 raise TimeoutError(f"Timed out waiting for COSMOS on retry num {retry_num + 1}")

292

293 else:

294 raise RuntimeError(f"COSMOS Error - STATUS CODE: {response.status_code} - {COSMOS_ADDRESS}")

295

296

297def merge_pipelines_results(

298 skema_extractions,

299 mit_extractions,

300 general_skema_error,

301 general_mit_error,

302 annotate_skema,

303 annotate_mit):

304 """ Merges and de-duplicates text extractions from pipelines"""

305

306 # Build the generalized errors list

307 generalized_errors = list()

308 if general_skema_error:

309 generalized_errors.append(

310 TextReadingError(

311 pipeline="SKEMA",

312 message=general_skema_error

313 )

314 )

315 if general_mit_error:

316 generalized_errors.append(

317 TextReadingError(

318 pipeline="MIT",

319 message=general_mit_error

320 )

321 )

322

323 # Build the results and input-specific errors

324 results = list()

325 errors = list()

326 assert len(skema_extractions) == len(

327 mit_extractions

328 ), "Both pipeline results lists should have the same length"

329 for skema, mit in zip(skema_extractions, mit_extractions):

330 if annotate_skema and isinstance(skema, str):

331 errors.append(TextReadingError(pipeline="SKEMA", message=skema))

332 skema = None

333

334 if annotate_mit and isinstance(mit, str):

335 errors.append(TextReadingError(pipeline="MIT", message=mit))

336 mit = None

337

338 normalized = normalize_extractions(

339 arizona_extractions=skema, mit_extractions=mit

340 )

341 results.append(

342 TextReadingDocumentResults(

343 data=normalized if normalized.attributes else None,

344 errors=errors if errors else None,

345 )

346 )

347

348 return TextReadingAnnotationsOutput(

349 outputs=results,

350 generalized_errors=generalized_errors if generalized_errors else None

351 )

352

353

354def integrated_extractions(

355 response: Response,

356 skema_annotator: Callable,

357 skema_inputs: List[Union[str, List[Dict]]],

358 mit_inputs: List[str],

359 annotate_skema: bool = True,

360 annotate_mit: bool = True,

361) -> TextReadingAnnotationsOutput:

362 """

363 Run both text extractors and merge the results.

364 This is the annotation logic shared between different input formats

365 """

366

367 # Initialize the extractions to an empty list of arrays

368 skema_extractions = [[] for t in skema_inputs]

369 mit_extractions = [[] for t in mit_inputs]

370 skema_error = None

371 mit_error = None

372

373 if annotate_skema:

374 try:

375 skema_extractions = skema_annotator(skema_inputs)

376 except Exception as ex:

377 skema_error = f"Problem annotating with SKEMA: {ex}"

378

379 if annotate_mit:

380 try:

381 mit_extractions = annotate_text_with_mit(mit_inputs)

382 except Exception as ex:

383 mit_error = f"Problem annotating with MIT: {ex}"

384

385 return_val = merge_pipelines_results(

386 skema_extractions,

387 mit_extractions,

388 skema_error,

389 mit_error,

390 annotate_skema,

391 annotate_mit

392 )

393

394 # If there is any error, set the response's status code to 207

395 if skema_error or mit_error or any(o.errors is not None for o in return_val.outputs):

396 response.status_code = status.HTTP_207_MULTI_STATUS

397

398 return return_val

399

400

401# End utility code for the endpoints

402

403

404@router.post(

405 "/integrated-text-extractions",

406 summary="Posts one or more plain text documents and annotates with SKEMA and/or MIT text reading pipelines",

407 status_code=200

408)

409async def integrated_text_extractions(

410 response: Response,

411 inputs: TextReadingInputDocuments,

412 annotate_skema: bool = True,

413 annotate_mit: bool = True,

414) -> TextReadingAnnotationsOutput:

415 """

416 ### Python example

417 ```

418 params = {

419 "annotate_skema": True,

420 "annotate_mit": True

421

422 }

423 payload = {"texts": [file_text], "amrs": [amr_text]}

424

425 response = requests.post(f"{URL}/text-reading/integrated-text-extractions", params=params, json=payload)

426 if response.status_code == 200:

427 data = response.json()

428 ```

429 """

430 # Get the input plain texts

431 texts = inputs.texts

432

433 amrs = inputs.amrs

434

435 # Run the text extractors

436 extractions = integrated_extractions(

437 response,

438 annotate_text_with_skema,

439 texts,

440 texts,

441 annotate_skema,

442 annotate_mit

443 )

444

445 # Do the alignment

446 aligned_amrs = list()

447 if len(amrs) > 0:

448 # Build an UploadFile instance from the extractions

449 json_extractions = extractions.model_dump_json()

450 extractions_ufile = UploadFile(file=io.BytesIO(json_extractions.encode('utf-8')))

451 for amr in amrs:

452 # amr = json.loads(amr)

453 amr_ufile = UploadFile(file=io.BytesIO(amr.encode('utf-8')))

454 try:

455 aligned_amr = metal_proxy.link_amr(

456 amr_file=amr_ufile,

457 text_extractions_file=extractions_ufile)

458 aligned_amrs.append(aligned_amr)

459 except Exception as e:

460 error = TextReadingError(pipeline="AMR Linker", message=f"Error annotating amr: {e}")

461 if extractions.generalized_errors is None:

462 extractions.generalized_errors = [error]

463 else:

464 extractions.generalized_errors.append(error)

465

466 extractions.aligned_amrs = aligned_amrs

467

468 return extractions

469

470

471@router.post(

472 "/integrated-pdf-extractions",

473 summary="Posts one or more pdf documents and annotates with SKEMA and/or MIT text reading pipelines",

474 status_code=200

475)

476async def integrated_pdf_extractions(

477 response: Response,

478 pdfs: List[UploadFile],

479 amrs: List[UploadFile] = [],

480 annotate_skema: bool = True,

481 annotate_mit: bool = True

482) -> TextReadingAnnotationsOutput:

483 """

484

485 ### Python example

486 ```

487 params = {

488 "annotate_skema":True,

489 "annotate_mit": True

490 }

491

492 files = [("pdfs", ("ijerp.pdf", open("ijerp.pdf", "rb"))), ("amrs", ("amr.json", open("amr.json", "rb")))]

493

494 response = request.post(f"{URL}/text-reading/integrated-pdf-extractions", params=params, files=files)

495 if response.status_code == 200:

496 data = response.json()

497 ```

498 """

499 # TODO: Make this handle multiple pdf files in parallel

500 # Call COSMOS on the pdfs

501 cosmos_data = list()

502 for pdf in pdfs:

503 if pdf.filename.endswith("json"):

504 json_data = json.load(pdf.file)

505 else:

506 json_data = cosmos_client(pdf.filename, pdf.file)

507 cosmos_data.append(json_data)

508

509 # Get the plain text version from cosmos, passed through to MIT pipeline

510 plain_texts = ['\n'.join(block['content'] for block in c) for c in cosmos_data]

511

512 # Run the text extractors

513 extractions = integrated_extractions(

514 response,

515 annotate_pdfs_with_skema,

516 cosmos_data,

517 plain_texts,

518 annotate_skema,

519 annotate_mit

520 )

521

522 # Do the alignment

523 aligned_amrs = list()

524 if len(amrs) > 0:

525 # Build an UploadFile instance from the extractions

526 json_extractions = extractions.model_dump_json()

527 extractions_ufile = UploadFile(file=io.BytesIO(json_extractions.encode('utf-8')))

528 for amr in amrs:

529 try:

530 aligned_amr = metal_proxy.link_amr(

531 amr_file=amr,

532 text_extractions_file=extractions_ufile)

533 aligned_amrs.append(aligned_amr)

534 except Exception as e:

535 error = TextReadingError(pipeline="AMR Linker", message=f"Error annotating {amr.filename}: {e}")

536 if extractions.generalized_errors is None:

537 extractions.generalized_errors = [error]

538 else:

539 extractions.generalized_errors.append(error)

540

541 extractions.aligned_amrs = aligned_amrs

542

543 return extractions

544

545

546# These are some direct proxies to the SKEMA and MIT APIs

547@router.post(

548 "/cosmos_to_json",

549 status_code=200,

550)

551async def cosmos_to_json(pdf: UploadFile) -> List[Dict]:

552 """ Calls COSMOS on a pdf and converts the data into json

553

554 ### Python example

555 ```

556 response = requests.post(f"{endpoint}/text-reading/cosmos_to_json",

557 files=[

558 ("pdf", ("ijerp.pdf", open("ijerph-18-09027.pdf", 'rb')))

559 ]

560 )

561 ```

562 """

563 return cosmos_client(pdf.filename, pdf.file)

564

565

566@router.post(

567 "/ground_to_mira",

568 status_code=200,

569 response_model=List[List[MiraGroundingOutputItem]]

570)

571async def ground_to_mira(k: int, queries: MiraGroundingInputs, response: Response) -> List[

572 List[MiraGroundingOutputItem]]:

573 """ Proxy to the MIRA grounding functionality on the SKEMA TR service

574

575 ### Python example

576 ```

577 queries = {"queries": ["infected", "suceptible"]}

578 params = {"k": 5}

579 response = requests.post(f"{endpoint}/text-reading/ground_to_mira", params=params, json=queries)

580

581 if response.status_code == 200:

582 results = response.json()

583 ```

584 """

585 params = {

586 "k": k

587 }

588 headers = {

589 "Content-Type": "text/plain"

590 }

591 payload = "\n".join(queries.queries)

592 inner_response = requests.post(f"{SKEMA_TR_ADDRESS}/groundStringsToMira", headers=headers, params=params,

593 data=payload)

594

595 response.status_code = inner_response.status_code

596

597 if inner_response.status_code == 200:

598 return [[MiraGroundingOutputItem(**o) for o in q] for q in inner_response.json()]

599 else:

600 return inner_response.content

601

602

603@router.post("/cards/get_model_card")

604async def get_model_card(text_file: UploadFile, code_file: UploadFile, response: Response):

605 """ Calls the model card endpoint from MIT's pipeline

606

607 ### Python example

608 ```

609 files = {

610 "text_file": ('text_file.txt", open("text_file.txt", 'rb')),

611 "code_file": ('code_file.py", open("code_file.py", 'rb')),

612 }

613

614 response = requests.post(f"{endpoint}/text-reading/cards/get_model_card", files=files)

615 ```

616 """

617

618 params = {

619 "gpt_key": OPENAI_KEY,

620 }

621 files = {

622 "text_file": (text_file.filename, text_file.file, "text/plain"),

623 "code_file": (code_file.filename, code_file.file, "text/plain")

624 }

625

626 inner_response = requests.post(f"{MIT_TR_ADDRESS}/cards/get_model_card", params=params, files=files)

627

628 response.status_code = inner_response.status_code

629 return inner_response.json()

630

631

632@router.post("/cards/get_data_card")

633async def get_data_card(smart: bool, csv_file: UploadFile, doc_file: UploadFile, response: Response):

634 """

635 Calls the data card endpoint from MIT's pipeline.

636 Smart run provides better results but may result in slow response times as a consequence of extra GPT calls.

637

638 ### Python example

639 ```

640 params = {

641 "smart": False

642 }

643

644 files = {

645 "csv_file": ('csv_file.csv", open("csv_file.csv", 'rb')),

646 "doc_file": ('doc_file.txt", open("doc_file.txt", 'rb')),

647 }

648

649 response = requests.post(f"{endpoint}/text-reading/cards/get_data_card", params=params files=files)

650 ```

651 """

652

653 params = {

654 "gpt_key": OPENAI_KEY,

655 "smart": smart

656 }

657 files = {

658 "csv_file": (csv_file.filename, csv_file.file, "text/csv"),

659 "doc_file": (doc_file.filename, doc_file.file, "text/plain")

660 }

661

662 inner_response = requests.post(f"{MIT_TR_ADDRESS}/cards/get_data_card", params=params, files=files)

663

664 response.status_code = inner_response.status_code

665 return inner_response.json()

668####

671@router.get(

672 "/healthcheck",

673 summary="Check health of integrated text reading service",

674 response_model=int,

675 status_code=200,

676 responses={

677 200: {

678 "model": int,

679 "description": "All component services are healthy (200 status)",

680 },

681 500: {

682 "model": int,

683 "description": "Internal error occurred",

684 "example_value": 500

685 },

686 502: {

687 "model": int,

688 "description": "Either text reading service is not available"

689 }

690

691 },

692)

693def healthcheck() -> int:

694 # SKEMA health check

695 skema_endpoint = f"{SKEMA_TR_ADDRESS}/api/skema"

696 try:

697 skema_response = requests.get(skema_endpoint, timeout=10)

698 except Exception:

699 return status.HTTP_500_INTERNAL_SERVER_ERROR

700

701 # TODO replace this with a proper healthcheck endpoint

702 mit_endpoint = f"{MIT_TR_ADDRESS}/annotation/find_text_vars/"

703 mit_params = {"gpt_key": OPENAI_KEY}

704 files = {"file": io.StringIO("x = 0")}

705 try:

706 mit_response = requests.post(mit_endpoint, params=mit_params, files=files, timeout=10)

707 except Exception:

708 return status.HTTP_502_BAD_GATEWAY

709 ######################################################

710

711 status_code = (

712 status.HTTP_200_OK

713 if all(resp.status_code == 200 for resp in [skema_response, mit_response])

714 else status.HTTP_500_INTERNAL_SERVER_ERROR

715 )

716 return status_code

717

718

719@router.post("/eval", response_model=TextReadingEvaluationResults, status_code=200)

720def quantitative_eval(extractions_file: UploadFile,

721 gt_annotations: UploadFile, json_text: UploadFile) -> TextReadingEvaluationResults:

722 """

723 # Gets performance metrics of a set of text extractions against a ground truth annotations file.

724

725 ## Example:

726 ```python

727 files = {

728 "extractions_file": ("paper_variable_extractions.json", open("paper_variable_extractions.json", 'rb')),

729 "gt_annotations": ("paper_gt_annotations.json", open("paper_gt_annotations.json", 'rb')),

730 "json_text": ("paper_cosmos_output.json", open("paper_cosmos_output.json", 'rb')),

731 }

732

733 response = requests.post(f"{endpoint}/text-reading/eval", files=files)

734 ```

735

736 """

737

738 gt_data = json.load(gt_annotations.file)

739 json_contents = json.load(json_text.file)

740

741 # Support both Attribute Collections serialized and within the envelope of this rest API

742 extractions_json = json.load(extractions_file.file)

743 try:

744 extractions = AttributeCollection.from_json(extractions_json)

745 except KeyError:

746 extractions_file.file.seek(0)

747 service_output = json.load(extractions_file.file)

748 collections = list()

749 for collection in service_output['outputs']:

750 collection = AttributeCollection.from_json(collection['data'])

751 collections.append(collection)

752

753 extractions = AttributeCollection(

754 attributes=list(it.chain.from_iterable(c.attributes for c in collections)))

755

756 return utils.compute_text_reading_evaluation(gt_data, extractions, json_contents)

757

758

759app = FastAPI()

760app.include_router(router)

Coverage for skema/rest/integrated_text_reading_proxy.py: 66%

297 statements