Coverage for skema/rest/workflows.py: 38%

1# -*- coding: utf-8 -*-

2"""

3End-to-end skema workflows

4"""

5import copy

6import time

7from zipfile import ZipFile

8from io import BytesIO

9from typing import List

10from pathlib import Path

11import httpx

12import requests

14from fastapi import APIRouter, Depends, File, UploadFile, FastAPI, Request

15from starlette.responses import JSONResponse

17from skema.img2mml import eqn2mml

18from skema.img2mml.eqn2mml import image2mathml_db, b64_image_to_mml

19from skema.img2mml.api import get_mathml_from_bytes

20from skema.isa.lib import generate_code_graphs, align_eqn_code, convert_to_dict

21from skema.rest import config, schema, utils, llm_proxy

22from skema.rest.equation_extraction import process_pdf_and_images

23from skema.rest.proxies import SKEMA_RS_ADDESS

24from skema.skema_py import server as code2fn

27router = APIRouter()

30# equations [mathml, latex] -> amrs [Petrinet, Regnet, GAMR, MET, Decapode]

31@router.post(

32 "/consolidated/equations-to-amr", summary="equations [mathml, latex] → AMRs [Petrinet, Regnet, GAMR, MET, Decapode]"

33)

34async def equation_to_amrs(data: schema.EquationsToAMRs, client: httpx.AsyncClient = Depends(utils.get_client)):

35 """

36 Converts equations (in LaTeX or MathML) to an AMR (Petrinet, Regnet, GAMR, MET, or Decapode).

38 ## If Petrinet or Regnet is selected and the conversion fails, we fall back to converting to a Generalized AMR.

39 ---

40 ### Python example

41 ```

42 import requests

44 equations = [

45 "\\frac{\\delta x}{\\delta t} = {\\alpha x} - {\\beta x y}",

46 "\\frac{\\delta y}{\\delta t} = {\\alpha x y} - {\\gamma y}"

47 ]

48 url = "0.0.0.0"

49 r = requests.post(f"{url}/consolidated/equations-to-amr", json={"equations": equations, "model": "regnet"})

50 r.json()

51 ```

52 ---

53 parameters:

54 - name: equations

55 - schema:

56 - type: array

57 - items:

58 - type: string

59 - required: true

60 - description: This is a list of equations, in either pMathML or LaTeX

61 - name: model

62 - type: string

63 - required: true

64 - description: This specifies the type of model that the output AMR will be in

65 - examples:

66 - Petrinet:

67 - summary: For making a petrinet

68 - value: "petrinet"

69 - Regnet:

70 - summary: For making a regnet

71 - value: "regnet"

72 - Decapode:

73 - summary: For making a decapode

74 - value: "decapode"

75 - Generalized AMR:

76 - summary: For making a generalized AMR

77 - value: "gamr"

78 - Math Expression Tree:

79 - summary: For making a Math Expression Tree

80 - value: "met"

81 """

82 eqns = utils.parse_equations(data.equations)

83 if data.model == "petrinet" or data.model == "regnet":

84 payload = {"mathml": eqns, "model": data.model}

85 res = await client.put(f"{SKEMA_RS_ADDESS}/mathml/amr", json=payload)

86 if res.status_code != 200:

87 res_new = await client.put(f"{SKEMA_RS_ADDESS}/mathml/g-amr", json=eqns)

88 if res_new.status_code != 200:

89 return JSONResponse(

90 status_code=422,

91 content={

92 "error": f"Attempted creation of {data.model} AMR, which failed. Then tried creation of Generalized AMR, which also failed with the following error {res_new.text}. Please check equations, seen as pMathML below.",

93 "payload": eqns,

94 },

95 )

96 res = res_new

97 elif data.model == "met":

98 res = await client.put(f"{SKEMA_RS_ADDESS}/mathml/met", json=eqns)

99 if res.status_code != 200:

100 return JSONResponse(

101 status_code=400,

102 content={

103 "error": f"PUT /mathml/met failed to process payload with error {res.text}",

104 "payload": eqns,

105 },

106 )

107 elif data.model == "gamr":

108 res = await client.put(f"{SKEMA_RS_ADDESS}/mathml/g-amr", json=eqns)

109 if res.status_code != 200:

110 return JSONResponse(

111 status_code=400,

112 content={

113 "error": f"PUT /mathml/met failed to process payload with error {res.text}",

114 "payload": eqns,

115 },

116 )

117 elif data.model == "decapode":

118 res = await client.put(f"{SKEMA_RS_ADDESS}/mathml/decapodes", json=eqns)

119 if res.status_code != 200:

120 return JSONResponse(

121 status_code=400,

122 content={

123 "error": f"PUT /mathml/met failed to process payload with error {res.text}",

124 "payload": eqns,

125 },

126 )

127 else:

128 return JSONResponse(

129 status_code=422,

130 content={

131 "error": f"{data.model} is not a supported model type",

132 "payload": eqns,

133 },

134 )

135

136

137 return res.json()

138

139# Code Snippets -> amrs [Petrinet, Regnet, GAMR, MET]

140@router.post(

141 "/consolidated/code-snippets-to-amrs", summary="code snippets → AMRs [Petrinet, Regnet, GAMR, MET]"

142)

143async def code_snippets_to_amrs(system: code2fn.System, client: httpx.AsyncClient = Depends(utils.get_client)):

144 """

145 Code snippets to AMR workflow. This endpoint takes a code snippet, assumed to contain dynamics, and extracts the

146 Math Expression Tree of the dynamics, which is then converted into an AMR of the specified type.

147

148 ### Python example

149 ```

150 import requests

151

152 # Single file

153 single_snippet_payload = {"files": ["code.py"], "blobs": ["def sir(s: float, i: float, r: float, beta: float, gamma: float, n: float) -> Tuple[float, float, float]:\n \"\"\"The SIR model, one time step.\"\"\"\n s_n = (-beta * s * i) + s\n i_n = (beta * s * i - gamma * i) + i\n r_n = gamma * i + r\n scale = n / (s_n + i_n + r_n)\n return s_n * scale, i_n * scale, r_n * scale"], "model": "petrinet"}

154

155 response = requests.post("http://0.0.0.0:8000/workflows/consolidated/code-snippets-to-amrs", json=single_snippet_payload)

156 gromet_json = response.json()

157 ```

158 """

159 gromet = await code2fn.fn_given_filepaths(system)

160 gromet, _ = utils.fn_preprocessor(gromet)

161 if system.model == "petrinet":

162 res = await client.put(f"{SKEMA_RS_ADDESS}/models/PN", json=gromet)

163 if res.status_code != 200:

164 return JSONResponse(

165 status_code=400,

166 content={

167 "error": f"MORAE PUT /models/PN failed to process payload ({res.text})",

168 "payload": gromet,

169 },

170 )

171 elif system.model == "regnet":

172 res = await client.put(f"{SKEMA_RS_ADDESS}/models/RN", json=gromet)

173 if res.status_code != 200:

174 return JSONResponse(

175 status_code=400,

176 content={

177 "error": f"MORAE PUT /models/RN failed to process payload ({res.text})",

178 "payload": gromet,

179 },

180 )

181 elif system.model == "met":

182 res = await client.put(f"{SKEMA_RS_ADDESS}/models/MET", json=gromet)

183 if res.status_code != 200:

184 return JSONResponse(

185 status_code=400,

186 content={

187 "error": f"MORAE PUT /models/MET failed to process payload ({res.text})",

188 "payload": gromet,

189 },

190 )

191 elif system.model == "gamr":

192 res = await client.put(f"{SKEMA_RS_ADDESS}/models/G-AMR", json=gromet)

193 if res.status_code != 200:

194 return JSONResponse(

195 status_code=400,

196 content={

197 "error": f"MORAE PUT /models/G-AMR failed to process payload ({res.text})",

198 "payload": gromet,

199 },

200 )

201 else:

202 return JSONResponse(

203 status_code=401,

204 content={

205 "error": f"{system.model} is not a supported model type",

206 "payload": gromet,

207 },

208 )

209 return res.json()

210

211

212# equation images -> mml -> amr

213@router.post(

214 "/images/base64/equations-to-amr", summary="Equations (base64 images) → MML → AMR"

215)

216async def equations_img_to_amr(data: schema.EquationImagesToAMR, client: httpx.AsyncClient = Depends(utils.get_client)):

217 """

218 Converts images of equations to AMR.

219

220 ### Python example

221 ```

222 from pathlib import Path

223 import base64

224 import requests

225

226

227 with Path("bayes-rule-white-bg.png").open("rb") as infile:

228 img_bytes = infile.read()

229 img_b64 = base64.b64encode(img_bytes).decode("utf-8")

230 r = requests.post(url, data=img_b64)

231 print(r.text)

232

233 images_bytes = [open("eq1.png", "rb").read(), open("eq2.png", "rb").read()]

234

235 images_b64 = [base64.b64encode(img_bytes).decode("utf-8") for img_bytes in images_bytes]

236

237 url = "0.0.0.0"

238 r = requests.post(f"{url}/workflows/images/base64/equations-to-amr", json={"images": images_b64, "model": "regnet"})

239 r.json()

240 ```

241 """

242 mml: List[str] = [

243 utils.clean_mml(eqn2mml.b64_image_to_mml(img)) for img in data.images

244 ]

245 payload = {"mathml": mml, "model": data.model}

246 # FIXME: why is this a PUT?

247 res = await client.put(f"{SKEMA_RS_ADDESS}/mathml/amr", json=payload)

248 if res.status_code != 200:

249 return JSONResponse(

250 status_code=400,

251 content={

252 "error": f"MORAE PUT /mathml/amr failed to process payload with error {res.text}",

253 "payload": payload,

254 },

255 )

256 return res.json()

257

258

259# equation images -> mml -> latex

260@router.post("/images/equations-to-latex", summary="Equations (images) → MML → LaTeX")

261async def equations_img_to_latex(data: UploadFile, client: httpx.AsyncClient = Depends(utils.get_client)):

262 """

263 Converts images of equations to LaTeX.

264

265 ### Python example

266

267 Endpoint for generating LaTeX from an input image.

268

269 ```

270 import requests

271 import json

272

273 files = {

274 "data": open("bayes-rule-white-bg.png", "rb"),

275 }

276 r = requests.post("http://0.0.0.0:8000/workflows/images/equations-to-latex", files=files)

277 print(json.loads(r.text))

278 ```

279 """

280 # Read image data

281 image_bytes = await data.read()

282

283 # pass image bytes to get_mathml_from_bytes function

284 mml_res = get_mathml_from_bytes(image_bytes, image2mathml_db)

285 proxy_url = f"{SKEMA_RS_ADDESS}/mathml/latex"

286 print(f"MMML:\t{mml_res}")

287 print(f"Proxying request to {proxy_url}")

288 response = await client.post(proxy_url, data=mml_res)

289 # Check the response

290 if response.status_code == 200:

291 # The request was successful

292 return response.text

293 else:

294 # The request failed

295 print(f"Error: {response.status_code}")

296 print(response.text)

297 return f"Error: {response.status_code} {response.text}"

298

299

300# equation images -> base64 -> mml -> latex

301@router.post("/images/base64/equations-to-latex", summary="Equations (images) → MML → LaTeX")

302async def equations_to_latex(request: Request, client: httpx.AsyncClient = Depends(utils.get_client)):

303 """

304 Converts images of equations to LaTeX.

305

306 ### Python example

307

308 Endpoint for generating LaTeX from an input image.

309

310 ```

311 from pathlib import Path

312 import base64

313 import requests

314

315 url = "http://127.0.0.1:8000/workflows/images/base64/equations-to-latex"

316 with Path("test.png").open("rb") as infile:

317 img_bytes = infile.read()

318 img_b64 = base64.b64encode(img_bytes).decode("utf-8")

319 r = requests.post(url, data=img_b64)

320 print(r.text)

321 ```

322 """

323 # Read image data

324 img_b64 = await request.body()

325 mml_res = b64_image_to_mml(img_b64)

326

327 # pass image bytes to get_mathml_from_bytes function

328 proxy_url = f"{SKEMA_RS_ADDESS}/mathml/latex"

329 print(f"MML:\t{mml_res}")

330 print(f"Proxying request to {proxy_url}")

331 response = await client.post(proxy_url, data=mml_res)

332 # Check the response

333 if response.status_code == 200:

334 # The request was successful

335 return response.text

336 else:

337 # The request failed

338 print(f"Error: {response.status_code}")

339 print(response.text)

340 return f"Error: {response.status_code} {response.text}"

341

342# tex equations -> pmml -> amr

343@router.post("/latex/equations-to-amr", summary="Equations (LaTeX) → pMML → AMR")

344async def lx_equations_to_amr(data: schema.EquationLatexToAMR, client: httpx.AsyncClient = Depends(utils.get_client)):

345 """

346 Converts equations (in LaTeX) to AMR.

347

348 ### Python example

349 ```

350 import requests

351

352 equations = [

353 "\\frac{\\delta x}{\\delta t} = {\\alpha x} - {\\beta x y}",

354 "\\frac{\\delta y}{\\delta t} = {\\alpha x y} - {\\gamma y}"

355 ]

356 url = "0.0.0.0"

357 r = requests.post(f"{url}/workflows/latex/equations-to-amr", json={"equations": equations, "model": "regnet"})

358 r.json()

359 ```

360 """

361 mml: List[str] = [

362 utils.clean_mml(eqn2mml.get_mathml_from_latex(tex)) for tex in data.equations

363 ]

364 payload = {"mathml": mml, "model": data.model}

365 res = await client.put(f"{SKEMA_RS_ADDESS}/mathml/amr", json=payload)

366 if res.status_code != 200:

367 return JSONResponse(

368 status_code=400,

369 content={

370 "error": f"MORAE PUT /mathml/amr failed to process payload with error {res.text}",

371 "payload": payload,

372 },

373 )

374 return res.json()

375

376

377# pmml -> amr

378@router.post("/pmml/equations-to-amr", summary="Equations pMML → AMR")

379async def equations_to_amr(data: schema.MmlToAMR, client: httpx.AsyncClient = Depends(utils.get_client)):

380 """

381 Converts equations (in LaTeX or pMathML) to MathExpressionTree (JSON).

382

383 ### Python example

384 ```

385 import requests

386

387 payload = {

388 "equations":

389 [

390 "<math><mfrac><mrow><mi>d</mi><mi>E</mi></mrow><mrow><mi>d</mi><mi>t</mi></mrow></mfrac><mo>=</mo><mi>β</mi><mi>I</mi><mi>S</mi><mo>−</mo><mi>δ</mi><mi>E</mi></math>",

391 "<math><mfrac><mrow><mi>d</mi><mi>R</mi></mrow><mrow><mi>d</mi><mi>t</mi></mrow></mfrac><mo>=</mo><mi>(1−α)</mi><mi>γ</mi><mi>I</mi></math>",

392 "<math><mfrac><mrow><mi>d</mi><mi>I</mi></mrow><mrow><mi>d</mi><mi>t</mi></mrow></mfrac><mo>=</mo><mi>δ</mi><mi>E</mi><mo>−</mo><mi>(1−α)</mi><mi>γ</mi><mi>I</mi><mo>−</mo><mi>α</mi><mi>ρ</mi><mi>I</mi></math>",

393 "<math><mfrac><mrow><mi>d</mi><mi>D</mi></mrow><mrow><mi>d</mi><mi>t</mi></mrow></mfrac><mo>=</mo><mi>α</mi><mi>ρ</mi><mi>I</mi></math>",

394 "<math><mfrac><mrow><mi>d</mi><mi>S</mi></mrow><mrow><mi>d</mi><mi>t</mi></mrow></mfrac><mo>=</mo><mo>−</mo><mi>β</mi><mi>I</mi><mi>S</mi></math>"

395 ],

396 "model": "petrinet"

397 }

398

399 url = "http://127.0.0.1:8000"

400

401 r = requests.post(f"{url}/workflows/pmml/equations-to-amr", json=payload)

402 print(r.json())

403 ```

404 """

405 payload = {"mathml": data.equations, "model": data.model}

406 res = await client.put(f"{SKEMA_RS_ADDESS}/mathml/amr", json=payload)

407 if res.status_code != 200:

408 return JSONResponse(

409 status_code=400,

410 content={

411 "error": f"MORAE PUT /mathml/amr failed to process payload with error {res.text}",

412 "payload": payload,

413 },

414 )

415 return res.json()

416

417

418# equations(pmml or latex) -> MathExpressionTree

419@router.post("/equations-to-met", summary="Equations (LaTeX/pMML) → MathExpressionTree")

420async def equations_to_met(data: schema.EquationToMET, client: httpx.AsyncClient = Depends(utils.get_client)):

421 """

422 Converts equations (in LaTeX or pMathML) to MathExpressionTree (JSON).

423

424 ### Python example

425 ```

426 import requests

427

428 equations = [

429 "E=mc^2",

430 "c=\\frac{a}{b}"

431 ]

432

433 url = "http://127.0.0.1:8000"

434

435 r = requests.post(f"{url}/workflows/equations-to-met", json={"equations": equations})

436 print(r.json())

437 ```

438 """

439 eqns = utils.parse_equations(data.equations)

440

441 res = await client.put(f"{SKEMA_RS_ADDESS}/mathml/met", json=eqns)

442 if res.status_code != 200:

443 return JSONResponse(

444 status_code=400,

445 content={

446 "error": f"PUT /mathml/met failed to process payload with error {res.text}",

447 "payload": eqns,

448 },

449 )

450 return res.json()

451

452

453# equations(pmml or latex) -> Generalized AMR

454@router.post("/equations-to-gamr", summary="Equations (LaTeX/pMML) → Generalized AMR")

455async def equations_to_gamr(data: schema.EquationToMET, client: httpx.AsyncClient = Depends(utils.get_client)):

456 """

457 Converts equations (in LaTeX or pMathML) to Generalized AMR (JSON).

458

459 ### Python example

460 ```

461 import requests

462

463 equations = [

464 "E=mc^2",

465 "c=\\frac{a}{b}"

466 ]

467

468 url = "http://127.0.0.1:8000"

469

470 r = requests.post(f"{url}/workflows/equations-to-gamr", json={"equations": equations})

471 print(r.json())

472 ```

473 """

474 eqns = utils.parse_equations(data.equations)

475

476 res = await client.put(f"{SKEMA_RS_ADDESS}/mathml/g-amr", json=eqns)

477 if res.status_code != 200:

478 return JSONResponse(

479 status_code=400,

480 content={

481 "error": f"PUT /mathml/met failed to process payload with error {res.text}",

482 "payload": eqns,

483 },

484 )

485 return res.json()

486

487

488# code snippets -> fn -> petrinet amr

489@router.post("/code/snippets-to-pn-amr", summary="Code snippets → PetriNet AMR")

490async def code_snippets_to_pn_amr(system: code2fn.System, client: httpx.AsyncClient = Depends(utils.get_client)):

491 """

492 Code snippets to Petrinet AMR workflow. This endpoint takes a code snippet, assumed to contain dynamics, and extracts the

493 Math Expression Tree of the dynamics, which is then converted into a Petrinet AMR.

494

495 ### Python example

496 ```

497 import requests

498

499 # Single file

500 single_snippet_payload = {"files": ["code.py"], "blobs": ["def sir(s: float, i: float, r: float, beta: float, gamma: float, n: float) -> Tuple[float, float, float]:\n \"\"\"The SIR model, one time step.\"\"\"\n s_n = (-beta * s * i) + s\n i_n = (beta * s * i - gamma * i) + i\n r_n = gamma * i + r\n scale = n / (s_n + i_n + r_n)\n return s_n * scale, i_n * scale, r_n * scale"],}

501

502 response = requests.post("http://0.0.0.0:8000/workflows/code/snippets-to-met", json=single_snippet_payload)

503 gromet_json = response.json()

504 ```

505 """

506 gromet = await code2fn.fn_given_filepaths(system)

507 gromet, _ = utils.fn_preprocessor(gromet)

508 # print(f"gromet:{gromet}")

509 # print(f"client.follow_redirects:\t{client.follow_redirects}")

510 # print(f"client.timeout:\t{client.timeout}")

511 res = await client.put(f"{SKEMA_RS_ADDESS}/models/PN", json=gromet)

512 if res.status_code != 200:

513 return JSONResponse(

514 status_code=400,

515 content={

516 "error": f"MORAE PUT /models/PN failed to process payload ({res.text})",

517 "payload": gromet,

518 },

519 )

520 return res.json()

521

522

523""" TODO: The regnet endpoints are currently outdated

524# code snippets -> fn -> regnet amr

525@router.post("/code/snippets-to-rn-amr", summary="Code snippets → RegNet AMR")

526async def code_snippets_to_rn_amr(system: code2fn.System):

527 gromet = await code2fn.fn_given_filepaths(system)

528 res = requests.put(f"{SKEMA_RS_ADDESS}/models/RN", json=gromet)

529 if res.status_code != 200:

530 print(res.status_code)

531 return JSONResponse(

532 status_code=400,

533 content={

534 "error": f"MORAE POST /models/RN failed to process payload",

535 "payload": gromet,

536 },

537 )

538 return res.json()

539"""

540

541

542# zip archive -> fn -> petrinet amr

543@router.post(

544 "/code/codebase-to-pn-amr", summary="Code repo (zip archive) → PetriNet AMR"

545)

546async def repo_to_pn_amr(zip_file: UploadFile = File(), client: httpx.AsyncClient = Depends(utils.get_client)):

547 """

548 Codebase to AMR workflow. This endpoint uses an a simple algorithm to identify the dynamics and then we slice

549 that portion of the code to extract dynamics from it.

550

551 ### Python example

552 ```

553 import requests

554

555 files = {

556 'zip_archive': open('model_source.zip')

557 }

558 response = requests.post("localhost:8000/workflows/code/codebase-to-pn-amr", files=files)

559 amr = response.json()

560 ```

561 """

562 gromet = await code2fn.fn_given_filepaths_zip(zip_file)

563 gromet, _ = utils.fn_preprocessor(gromet)

564 res = await client.put(f"{SKEMA_RS_ADDESS}/models/PN", json=gromet)

565 if res.status_code != 200:

566 return JSONResponse(

567 status_code=400,

568 content={

569 "error": f"MORAE PUT /models/PN failed to process payload",

570 "payload": gromet,

571 },

572 )

573 return res.json()

574

575

576# zip archive -> linespan -> snippet -> petrinet amr

577@router.post(

578 "/code/llm-assisted-codebase-to-pn-amr",

579 summary="Code repo (zip archive) → PetriNet AMR",

580)

581async def llm_assisted_codebase_to_pn_amr(zip_file: UploadFile = File(), client: httpx.AsyncClient = Depends(utils.get_client)):

582 """

583 Codebase to AMR workflow. This endpoint uses an LLM to identify the dynamics and then we slice

584 that portion of the code to extract dynamics from it.

585

586 ### Python example

587 ```

588 import requests

589

590 files = {

591 'zip_archive': open('model_source.zip')

592 }

593 response = requests.post("localhost:8000/workflows/code/llm-assisted-codebase-to-pn-amr", files=files)

594 amr = response.json()

595 ```

596 """

597 # NOTE: Opening the zip file mutates the object and prevents it from being reopened.

598 # Since llm_proxy also needs to open the zip file, we should send a copy instead.

599 print(f"Time call linespan: {time.time()}")

600 linespans = await llm_proxy.get_lines_of_model(copy.deepcopy(zip_file))

601 print(f"Time response linespan: {time.time()}")

602

603 line_begin = []

604 import_begin = []

605 line_end = []

606 import_end = []

607 files = []

608 blobs = []

609 amrs = []

610

611 # There could now be multiple blocks that we need to handle and adjoin together

612 for linespan in linespans:

613 blocks = len(linespan.block)

614 lines = linespan.block[blocks-1].split("-")

615 line_begin.append(

616 max(int(lines[0][1:]) - 1, 0)

617 ) # Normalizing the 1-index response from llm_proxy

618 line_end.append(int(lines[1][1:]))

619 if blocks == 2:

620 lines = linespan.block[0].split("-")

621 import_begin.append(

622 max(int(lines[0][1:]) - 1, 0)

623 ) # Normalizing the 1-index response from llm_proxy

624 import_end.append(int(lines[1][1:]))

625

626 # So we are required to do the same when slicing the source code using its output.

627 with ZipFile(BytesIO(zip_file.file.read()), "r") as zip:

628 for file in zip.namelist():

629 file_obj = Path(file)

630 if file_obj.suffix in [".py"]:

631 # Skip file if located in a hidden directory or MACOSX artifact

632 valid = True

633 for parent in file_obj.parents:

634 if parent.name == "_MACOSX":

635 valid = False

636 break

637 elif parent.name.startswith("."):

638 valid = False

639 break

640 if valid:

641 files.append(file)

642 blobs.append(zip.open(file).read().decode("utf-8"))

643

644 # The source code is a string, so to slice using the line spans, we must first convert it to a list.

645 # Then we can convert it back to a string using .join

646 logging = []

647 import_counter = 0

648 for i in range(len(blobs)):

649 if line_begin[i] == line_end[i]:

650 print("failed linespan")

651 else:

652 if len(linespans[i].block) == 2:

653 temp = "".join(blobs[i].splitlines(keepends=True)[import_begin[import_counter]:import_end[import_counter]])

654 blobs[i] = temp + "\n" + "".join(blobs[i].splitlines(keepends=True)[line_begin[i]:line_end[i]])

655 import_counter += 1

656 else:

657 blobs[i] = "".join(blobs[i].splitlines(keepends=True)[line_begin[i]:line_end[i]])

658 try:

659 print(f"Time call code-snippets: {time.time()}")

660 gromet = await code2fn.fn_given_filepaths(code2fn.System(

661 files=[files[i]],

662 blobs=[blobs[i]],

663 ))

664 gromet, _ = utils.fn_preprocessor(gromet)

665 code_snippet_response = await client.put(f"{SKEMA_RS_ADDESS}/models/PN", json=gromet)

666 code_snippet_response = code_snippet_response.json()

667 print(f"Time response code-snippets: {time.time()}")

668 if "model" in code_snippet_response:

669 code_snippet_response["header"]["name"] = "LLM-assisted code to amr model"

670 code_snippet_response["header"]["description"] = f"This model came from code file: {files[i]}"

671 code_snippet_response["header"]["linespan"] = f"{linespans[i]}"

672 amrs.append(code_snippet_response)

673 else:

674 print("snippets failure")

675 logging.append(f"{files[i]} failed to parse an AMR from the dynamics")

676 except Exception as e:

677 print("Hit except to snippets failure")

678 print(f"Exception:\t{e}")

679 logging.append(f"{files[i]} failed to parse an AMR from the dynamics")

680 # we will return the amr with most states, in assumption it is the most "correct"

681 # by default it returns the first entry

682 print(f"{amrs}")

683 try:

684 amr = amrs[0]

685 for temp_amr in amrs:

686 try:

687 temp_len = len(temp_amr["model"]["states"])

688 amr_len = len(amr["model"]["states"])

689 if temp_len > amr_len:

690 amr = temp_amr

691 except:

692 continue

693 except:

694 amr = logging

695

696 return amr

697

698# code snippets -> fn -> MET

699@router.post("/code/snippets-to-met", summary="Code snippets → MET")

700async def code_snippets_to_MET(system: code2fn.System, client: httpx.AsyncClient = Depends(utils.get_client)):

701 """

702 Code snippets to Math Expression Tree workflow. This endpoint takes a code snippet, assumed to contain dynamics, and extracts the

703 Math Expression Tree of the dynamics.

704

705 ### Python example

706 ```

707 import requests

708

709 # Single file

710 single_snippet_payload = {"files": ["code.py"], "blobs": ["def sir(s: float, i: float, r: float, beta: float, gamma: float, n: float) -> Tuple[float, float, float]:\n \"\"\"The SIR model, one time step.\"\"\"\n s_n = (-beta * s * i) + s\n i_n = (beta * s * i - gamma * i) + i\n r_n = gamma * i + r\n scale = n / (s_n + i_n + r_n)\n return s_n * scale, i_n * scale, r_n * scale"],}

711

712 response = requests.post("http://0.0.0.0:8000/workflows/code/snippets-to-met", json=single_snippet_payload)

713 gromet_json = response.json()

714 ```

715 """

716 gromet = await code2fn.fn_given_filepaths(system)

717 gromet, _ = utils.fn_preprocessor(gromet)

718 # print(f"gromet:{gromet}")

719 # print(f"client.follow_redirects:\t{client.follow_redirects}")

720 # print(f"client.timeout:\t{client.timeout}")

721 res = await client.put(f"{SKEMA_RS_ADDESS}/models/MET", json=gromet)

722 if res.status_code != 200:

723 return JSONResponse(

724 status_code=400,

725 content={

726 "error": f"MORAE PUT /models/PN failed to process payload ({res.text})",

727 "payload": gromet,

728 },

729 )

730 return res.json()

731

732# code snippets -> fn -> generalized amr

733@router.post("/code/snippets-to-gamr", summary="Code snippets → Generalized-AMR")

734async def code_snippets_to_G_AMR(system: code2fn.System, client: httpx.AsyncClient = Depends(utils.get_client)):

735 """

736 Code snippets to Generalized AMR workflow. This endpoint takes a code snippet, assumed to contain dynamics, and extracts the

737 Math Expression Tree of the dynamics and then converts that to our Generalized AMR represenation.

738

739 ### Python example

740 ```

741 import requests

742

743 # Single file

744 single_snippet_payload = {"files": ["code.py"], "blobs": ["def sir(s: float, i: float, r: float, beta: float, gamma: float, n: float) -> Tuple[float, float, float]:\n \"\"\"The SIR model, one time step.\"\"\"\n s_n = (-beta * s * i) + s\n i_n = (beta * s * i - gamma * i) + i\n r_n = gamma * i + r\n scale = n / (s_n + i_n + r_n)\n return s_n * scale, i_n * scale, r_n * scale"],}

745

746 response = requests.post("http://0.0.0.0:8000/workflows/code/snippets-to-gamr", json=single_snippet_payload)

747 gromet_json = response.json()

748 ```

749 """

750 gromet = await code2fn.fn_given_filepaths(system)

751 gromet, _ = utils.fn_preprocessor(gromet)

752 # print(f"gromet:{gromet}")

753 # print(f"client.follow_redirects:\t{client.follow_redirects}")

754 # print(f"client.timeout:\t{client.timeout}")

755 res = await client.put(f"{SKEMA_RS_ADDESS}/models/G-AMR", json=gromet)

756 if res.status_code != 200:

757 return JSONResponse(

758 status_code=400,

759 content={

760 "error": f"MORAE PUT /models/PN failed to process payload ({res.text})",

761 "payload": gromet,

762 },

763 )

764 return res.json()

765

766

767# code snippets -> fn -> Vec<MET> -> alignment result

768# mathml -> MET ->

769@router.post("/isa/code-eqn-align", summary="ISA aided inference")

770async def code_snippets_to_isa_align(

771 mml_system: code2fn.MML_System,

772 client: httpx.AsyncClient = Depends(utils.get_client)

773):

774 """

775 Endpoint for ISA aided inference.

776

777 Args:

778 mml_system (code2fn.MML_System): Input data containing MML and system details.

779 client (httpx.AsyncClient): An asynchronous HTTP client dependency.

780

781 Returns:

782 JSONResponse: Response containing aligned equation and code information.

783 # The dictionary of the following data structure

784 # matching_ratio: the matching ratio between the equations 1 and the equation 2

785 # num_diff_edges: the number of different edges between the equations 1 and the equation 2

786 # node_labels1: the name list of the variables and terms in the equation 1

787 # node_labels2: the name list of the variables and terms in the equation 2

788 # aligned_indices1: the aligned indices in the name list of the equation 1 (-1 means missing)

789 # aligned_indices2: the aligned indices in the name list of the equation 2 (-1 means missing)

790 # union_graph: the visualization of the alignment result

791 # perfectly_matched_indices1: strictly matched node indices in Graph 1

792

793 Raises:

794 HTTPException: If there are errors in processing the payload or communication with external services.

795

796 Note:

797 This endpoint takes MML information and system details, processes the data, and communicates with external services

798 to perform ISA aided inference.

799

800 """

801 # Extracting system details using code2fn module

802 gromet = await code2fn.fn_given_filepaths(mml_system.system)

803 gromet, _ = utils.fn_preprocessor(gromet)

804

805 # Sending processed data to an external service

806 res = await client.put(f"{SKEMA_RS_ADDESS}/models/MET", json=gromet)

807

808 # Checking the response status and handling errors if any

809 if res.status_code != 200:

810 return JSONResponse(

811 status_code=400,

812 content={

813 "error": f"MORAE PUT /models/PN failed to process payload ({res.text})",

814 "payload": gromet,

815 },

816 )

817 else:

818 # Further processing and communication with the code-exp-graphs service

819 code_graph_res = await client.put(

820 f"{SKEMA_RS_ADDESS}/mathml/code-exp-graphs", json=res.json()

821 )

822

823 # Checking the response status and handling errors if any

824 if code_graph_res.status_code != 200:

825 return JSONResponse(

826 status_code=400,

827 content={

828 "error": f"code-exp-graphs PUT mathml/code-exp-graphs failed to process payload ({res.json()})",

829 "payload": res.json(),

830 },

831 )

832

833 # Aligning equation and code

834 alignment_res = align_eqn_code(utils.clean_mml(mml_system.mml), code_graph_res.text)

835

836 # Converting numpy arrays to dictionaries for deserialization

837 converted_alignment_res = convert_to_dict(alignment_res)

838

839 # Returning the final aligned result

840 return JSONResponse(

841 status_code=200,

842 content=converted_alignment_res,

843 )

844

845

846# PDF -> [COSMOS] -> Equation images -> [MIT Service] -> Equation JSON

847@router.post(

848 "/equations_extraction",

849 summary="PDF -> [COSMOS] -> Equation images -> [MIT Service] -> Equation JSON",

850)

851async def equations_extraction(

852 equation_extraction_system: code2fn.Equation_Extraction_System,

853):

854 """

855 Extracts images of equations from PDF and Generates the JSON info.

856

857 ### Python example

858

859 Endpoint for extracting images of equations from PDF.

860

861 ```

862 import requests

863

864 # Define the URL for the API endpoint

865 url: str = "http://127.0.0.1:8000/workflows/equations_extraction"

866

867 # Specify the local path to the PDF file

868 pdf_local_path: str = "your PDF path"

869

870 # Specify the folder path where images will be saved

871 save_folder: str = "your save folder path"

872

873 # Specify your OpenAI API key

874 gpt_key: str = "your openai key here"

875

876 # Send a POST request to the API endpoint

877 response = requests.post(url, json={"pdf_local_path": pdf_local_path, "save_folder": save_folder, "gpt_key": gpt_key})

878 ```

879 """

880 try:

881 process_pdf_and_images(

882 equation_extraction_system.pdf_local_path,

883 equation_extraction_system.save_folder,

884 equation_extraction_system.gpt_key,

885 )

886 return JSONResponse(

887 status_code=200,

888 content={"message": "Images extracted and processed successfully."},

889 )

890

891 except Exception as e:

892 return JSONResponse(

893 status_code=400,

894 content={"error": str(e)},

895 )

896

897

898app = FastAPI()

899app.include_router(router)