Coverage for skema/program_analysis/module_locate.py: 100%
61 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 17:15 +0000
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 17:15 +0000
1import argparse
2import os
3import pkg_resources
4import requests
5import importlib
6import sys
7import re
8import subprocess
9import tempfile
10from pathlib import Path
12from skema.gromet.fn import TypedValue, ImportSourceType, GrometFNModuleDependencyReference
14IMPORT_PATTERN = re.compile(r'^\s*(from\s+[^\s]+\s+import\s+[^\s,]+(?:\s*,\s*[^\s,]+)*|import\s+[^\s,]+(?:\s*,\s*[^\s,]+)*)', re.MULTILINE)
16def identify_source_type(source: str):
17 if not source:
18 return "Unknown"
19 elif source == "https://github.com/python/cpython":
20 return "Compiled"
21 elif "github" in source:
22 return "Repository"
23 elif source.startswith("http"):
24 return "Url"
25 return "Local"
28def extract_imports(source: str):
29 output_references = []
31 import_statements = IMPORT_PATTERN.findall(source)
32 modules = set(tuple([statement.split()[1] for statement in import_statements]))
34 for module in modules:
35 source_value = module_locate(module)
36 source_type = identify_source_type(source_value)
37 output_references.append(
38 GrometFNModuleDependencyReference(
39 name=module,
40 source_reference=TypedValue(
41 type=source_type,
42 value=source_value
43 )))
45 return output_references
47def module_locate(module_name: str) -> str:
48 """
49 Locates the source of a Python module specified by the import statement.
50 If the module is built-in or installed, it returns the file path.
51 If the module is on PyPI with a GitHub link, it returns the GitHub URL.
52 For PyPI modules, it also attempts to return the tarball URL for the current version.
54 :param module_name: The name of the module or submodule as a string.
55 :return: The module's file path, GitHub URL, or tarball URL.
56 """
58 # Check if module is compiled into Python
59 if module_name in sys.builtin_module_names:
60 return "https://github.com/python/cpython"
62 # Attempt to find the module in the local environment
63 try:
64 module_obj = importlib.import_module(module_name)
65 module_file = getattr(module_obj, '__file__', None)
66 if module_file:
67 module_path = Path(module_file)
68 # Check if it's a package
69 if module_path.name == "__init__.py":
70 return str(module_path.parent)
71 return str(module_path)
72 except ImportError:
73 pass # If module is not found locally, proceed to check on PyPI
75 # Fetch module info from PyPI
76 try:
77 pypi_url = f"https://pypi.org/pypi/{module_name}/json"
78 response = requests.get(pypi_url)
79 data = response.json()
81 project_urls = data.get('info', {}).get('project_urls', {})
82 github_url = project_urls.get('Source', '') or project_urls.get('Homepage', '')
83 if 'github.com' in github_url:
84 return github_url
86 # Get the tarball URL for the current version
87 version = data['info']['version']
88 releases = data['releases'].get(version, [])
89 for release in releases:
90 if release['filename'].endswith('.tar.gz'):
91 return release['url']
92 except Exception as e:
93 # Handle errors related to network issues or JSON decoding
94 print(f"Error fetching module information from PyPI: {e}")
96 return None
99"""
100# Basic tests
101print(module_locate("import os"))
102print(module_locate("import requests"))
103print(module_locate("import xml.etree"))
104print(module_locate("import minimal"))
106# PyDice tests
107print(module_locate("import numpy as np"))
108print(module_locate("import time"))
109print(module_locate("from numba import njit,guvectorize,float64"))
110print(module_locate("import scipy.optimize as opt"))
111print(module_locate("from matplotlib import pyplot as plt"))
112"""