Coverage for skema/program_analysis/tree_sitter_parsers/build_parsers.py: 28%
46 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 17:15 +0000
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 17:15 +0000
1import argparse
2import os
3import yaml
4import subprocess
5import site
6from pathlib import Path
7from typing import List
9from tree_sitter import Language, Parser
11INSTALLED_LANGUAGES_FILEPATH = (
12 Path(__file__).resolve().parent / "build" / "installed_languages.so"
13)
14LANGUAGES_YAML_FILEPATH = Path(__file__).parent / "languages.yaml"
17def build_parsers(languages: List[str]) -> None:
18 # The 'build' directory containing the cloned tree-sitter parsers and shard object library may or may not already exist.
19 # We need to create it first if it does not exist
20 language_build_dir = Path(INSTALLED_LANGUAGES_FILEPATH.parent)
21 language_build_dir.mkdir(parents=True, exist_ok=True)
23 language_yaml_obj = yaml.safe_load(LANGUAGES_YAML_FILEPATH.read_text())
24 for language, language_dict in language_yaml_obj.items():
25 language_clone_directory = language_build_dir / language_dict["tree_sitter_name"]
26 if language in languages:
27 # Clone the repository if it doesn't exist
28 subprocess.run(
29 ["git", "clone", language_dict["clone_url"]], cwd=language_build_dir
30 )
31 # Update the repository to pull any new commits
32 subprocess.run(
33 ["git", "pull"], cwd=language_clone_directory
34 )
35 # Checkout the correct commit if commit_sha is specified
36 if language_dict.get("commit_sha"):
37 subprocess.run(
38 ["git", "checkout", str(language_dict["commit_sha"])], cwd=language_clone_directory
39 )
41 # We can set pass the cwd for subprocess as an argument to run().
42 # However, Language.build_library requires the cwd to be set to the build directory
43 wd = os.getcwd()
44 os.chdir(language_build_dir)
45 # If the library file already exists, build_library will fail and return False.
46 # So, we must check if it exists and delete it first
47 INSTALLED_LANGUAGES_FILEPATH.unlink(missing_ok=True)
48 Language.build_library(
49 # Store the library in the `build` directory
50 str(INSTALLED_LANGUAGES_FILEPATH.name),
51 # Include one or more languages
52 [
53 language_dict["tree_sitter_name"]
54 for language, language_dict in language_yaml_obj.items()
55 if language in languages
56 ],
57 )
59 os.chdir(wd)
62def copy_to_site_packages():
63 """Copy the .so file to the skema site package"""
64 copy_path = Path(site.getsitepackages()[0]) / "skema" / "program_analysis" / "tree_sitter_parsers" / "build" / "installed_languages.so"
65 copy_path.parent.mkdir(parents=True, exist_ok=True)
66 copy_path.write_bytes(INSTALLED_LANGUAGES_FILEPATH.read_bytes())
68if __name__ == "__main__":
69 parser = argparse.ArgumentParser()
71 language_yaml_obj = yaml.safe_load(open(LANGUAGES_YAML_FILEPATH))
72 parser.add_argument("--all", action="store_true", help="Build all tree-sitter parsers")
73 for language, language_dict in language_yaml_obj.items():
74 flag = f"--{language}"
75 help_text = f"Include {language} language"
76 parser.add_argument(flag, action="store_true", help=help_text)
77 parser.add_argument("--ci", action="store_true", help="Copy to site packages if running on ci")
78 args = parser.parse_args()
80 if args.all:
81 selected_languages = [language for language in yaml.safe_load(LANGUAGES_YAML_FILEPATH.read_text())]
82 else:
83 selected_languages = [language for language, value in vars(args).items() if value]
85 build_parsers(selected_languages)
87 if args.ci:
88 copy_to_site_packages()