Coverage for skema/program_analysis/tree_sitter_parsers/build_parsers.py: 28%

46 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-30 17:15 +0000

1import argparse 

2import os 

3import yaml 

4import subprocess 

5import site 

6from pathlib import Path 

7from typing import List 

8 

9from tree_sitter import Language, Parser 

10 

11INSTALLED_LANGUAGES_FILEPATH = ( 

12 Path(__file__).resolve().parent / "build" / "installed_languages.so" 

13) 

14LANGUAGES_YAML_FILEPATH = Path(__file__).parent / "languages.yaml" 

15 

16 

17def build_parsers(languages: List[str]) -> None: 

18 # The 'build' directory containing the cloned tree-sitter parsers and shard object library may or may not already exist. 

19 # We need to create it first if it does not exist 

20 language_build_dir = Path(INSTALLED_LANGUAGES_FILEPATH.parent) 

21 language_build_dir.mkdir(parents=True, exist_ok=True) 

22 

23 language_yaml_obj = yaml.safe_load(LANGUAGES_YAML_FILEPATH.read_text()) 

24 for language, language_dict in language_yaml_obj.items(): 

25 language_clone_directory = language_build_dir / language_dict["tree_sitter_name"] 

26 if language in languages: 

27 # Clone the repository if it doesn't exist 

28 subprocess.run( 

29 ["git", "clone", language_dict["clone_url"]], cwd=language_build_dir 

30 ) 

31 # Update the repository to pull any new commits 

32 subprocess.run( 

33 ["git", "pull"], cwd=language_clone_directory 

34 ) 

35 # Checkout the correct commit if commit_sha is specified 

36 if language_dict.get("commit_sha"): 

37 subprocess.run( 

38 ["git", "checkout", str(language_dict["commit_sha"])], cwd=language_clone_directory 

39 ) 

40 

41 # We can set pass the cwd for subprocess as an argument to run(). 

42 # However, Language.build_library requires the cwd to be set to the build directory 

43 wd = os.getcwd() 

44 os.chdir(language_build_dir) 

45 # If the library file already exists, build_library will fail and return False. 

46 # So, we must check if it exists and delete it first 

47 INSTALLED_LANGUAGES_FILEPATH.unlink(missing_ok=True) 

48 Language.build_library( 

49 # Store the library in the `build` directory 

50 str(INSTALLED_LANGUAGES_FILEPATH.name), 

51 # Include one or more languages 

52 [ 

53 language_dict["tree_sitter_name"] 

54 for language, language_dict in language_yaml_obj.items() 

55 if language in languages 

56 ], 

57 ) 

58 

59 os.chdir(wd) 

60 

61 

62def copy_to_site_packages(): 

63 """Copy the .so file to the skema site package""" 

64 copy_path = Path(site.getsitepackages()[0]) / "skema" / "program_analysis" / "tree_sitter_parsers" / "build" / "installed_languages.so" 

65 copy_path.parent.mkdir(parents=True, exist_ok=True) 

66 copy_path.write_bytes(INSTALLED_LANGUAGES_FILEPATH.read_bytes()) 

67 

68if __name__ == "__main__": 

69 parser = argparse.ArgumentParser() 

70 

71 language_yaml_obj = yaml.safe_load(open(LANGUAGES_YAML_FILEPATH)) 

72 parser.add_argument("--all", action="store_true", help="Build all tree-sitter parsers") 

73 for language, language_dict in language_yaml_obj.items(): 

74 flag = f"--{language}" 

75 help_text = f"Include {language} language" 

76 parser.add_argument(flag, action="store_true", help=help_text) 

77 parser.add_argument("--ci", action="store_true", help="Copy to site packages if running on ci") 

78 args = parser.parse_args() 

79 

80 if args.all: 

81 selected_languages = [language for language in yaml.safe_load(LANGUAGES_YAML_FILEPATH.read_text())] 

82 else: 

83 selected_languages = [language for language, value in vars(args).items() if value] 

84 

85 build_parsers(selected_languages) 

86 

87 if args.ci: 

88 copy_to_site_packages()