Coverage for skema/program_analysis/url_ingester.py: 43%
49 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 17:15 +0000
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 17:15 +0000
1import subprocess
2import tempfile
3import os
4import argparse
5import requests
6import tarfile
7from pathlib import Path
9from skema.gromet.fn import GrometFNModuleCollection
10from skema.program_analysis.easy_multi_file_ingester import easy_process_file_system
12def process_git_repo(repo_url: str, checkout_ref=None, write_to_file=False, original_source=False, dependency_depth=0) -> GrometFNModuleCollection:
13 """
14 Clones a Git repository to a temporary directory and ingests it into a GrometFNModuleCollection with an optional dependency depth.
15 """
16 system_name = Path(repo_url).stem
18 with tempfile.TemporaryDirectory() as temp:
19 cloned_path = Path(temp) / system_name
20 try:
21 subprocess.check_call(['git', 'clone', repo_url, cloned_path.name], cwd=temp)
22 if checkout_ref:
23 subprocess.check_call(['git', 'checkout', checkout_ref], cwd=cloned_path)
24 except subprocess.CalledProcessError as e:
25 print(f"Failed to clone repository at {repo_url}. Exiting.")
26 exit()
28 gromet_collection = easy_process_file_system(system_name, str(cloned_path), write_to_file, original_source, dependency_depth)
30 return gromet_collection
32def process_archive(archive_url: str, write_to_file: bool = False, original_source: bool = False, dependency_depth=0) -> GrometFNModuleCollection:
33 """
34 Downloads a repository archive in tar format and ingests it into a GrometFNModuleCollection with an optional dependency depth.
35 """
36 system_name = archive_url.split('/')[-1].replace('.tar.gz', '').replace('.tar', '')
38 response = requests.get(archive_url)
40 with tempfile.TemporaryDirectory() as temp:
41 temp_archive_path = Path(temp) / f"{system_name}.tar.gz"
42 temp_archive_path.write_bytes(response.content)
43 with tarfile.open(temp_archive_path, "r:*") as tar:
44 tar.extractall(path=temp)
46 extracted_dir_path = Path(temp) / system_name
47 gromet_collection = easy_process_file_system(system_name, str(extracted_dir_path), write_to_file, original_source, dependency_depth)
49 return gromet_collection
51def main():
52 parser = argparse.ArgumentParser(description="Process a Git repository or a tar archive into a GrometFNModuleCollection.")
53 parser.add_argument("--mode", choices=['git', 'tar'], required=True, help="The mode of operation: 'git' for Git repositories, 'tar' for tar archives.")
54 parser.add_argument("url", help="The URL of the Git repository or tar archive to process.")
55 parser.add_argument("--ref", help="The tag, commit, or branch to checkout after cloning (Git mode only).", default=None)
56 parser.add_argument("--write_to_file", action="store_true", help="Whether to output Gromet to file.")
57 parser.add_argument("--source", action="store_true", help="Toggle whether or not to include the full source code in the Gromet metadata.")
58 parser.add_argument("--dependency_depth", type=int, default=0, help="Specify the dependency depth for analysis")
60 args = parser.parse_args()
62 if args.mode == 'git':
63 process_git_repo(args.url, args.ref, args.write_to_file, args.source, args.dependency_depth)
64 elif args.mode == 'tar':
65 process_archive(args.url, args.write_to_file, args.source, args.dependency_depth)
66 else:
67 print("Invalid mode selected. Please choose either 'git' or 'tar'.")
69if __name__ == "__main__":
70 main()