Coverage for skema/program_analysis/fn_unifier.py: 78%

142 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-30 17:15 +0000

1# Function Network (FN) Unifier 

2# Given a GroMEt FN JSON and a Comments JSON file, we 'unify' them by 

3# 1. Extracting the GroMEt JSON and turning it back into an object 

4# 2. Extracting the comments JSON file 

5# 3. Appending all comments from the comments JSON into the respective MetadataCollections for each FN 

6 

7from skema.program_analysis.JSON2GroMEt.json2gromet import json_to_gromet 

8from skema.program_analysis.comment_extractor.model import ( 

9 CodeComments, 

10 SingleFileCommentResponse, 

11 MultiFileCommentResponse, 

12) 

13from skema.gromet.metadata.source_code_comment import SourceCodeComment 

14from skema.gromet.metadata.source_code_reference import SourceCodeReference 

15from skema.gromet.metadata.comment_type import CommentType 

16from skema.gromet.fn.gromet_fn_module_collection import GrometFNModuleCollection 

17from skema.utils.fold import dictionary_to_gromet_json, del_nulls 

18 

19from typing import Dict, Text, Any 

20import argparse 

21import json 

22import re 

23 

24 

25def normalize_module_path(path: str): 

26 # The module paths in the GroMEt FN are dotted 

27 # We need slashes for the comments dictionary 

28 return path.replace(".", "/") 

29 

30 

31def normalize_extraction_names(extraction: dict): 

32 # Removes extraneous characters and filename extensions 

33 # from the extraction dictionary 

34 # Currently removes, ".py" extension 

35 # and "./" from the keys 

36 return {k.replace(".py", "").replace("./", ""): v for k, v in extraction.items()} 

37 

38 

39def strip_id(func_name): 

40 # Given a function name that ends with "_id###" where ### is a number 

41 # We remove that sequence of characters from the function name 

42 # The id is appended by the GroMEt generation, and so we can safely remove it 

43 # because we need the pure name of the function and not the identifier part 

44 

45 # Only strip the id if the func_name contains the pattern "_id###..." which 

46 # is appended by the Gromet generation 

47 if re.search("_id\d+", func_name): 

48 to_ret = list(func_name) 

49 to_ret.reverse() 

50 i = 0 

51 while i < len(to_ret) and to_ret[i] != "_": 

52 to_ret[i] = "" 

53 i += 1 

54 to_ret[i] = "" 

55 to_ret.reverse() 

56 return "".join(to_ret) 

57 else: 

58 return func_name 

59 

60 

61def find_source_code_reference(metadatum): 

62 # Find a SourceCodeReference metadata in the metadatum entry 

63 # we're looking at 

64 for elem in metadatum: 

65 if isinstance(elem, SourceCodeReference): 

66 return elem 

67 

68 return None 

69 

70 

71def find_comment(comments, line_num): 

72 # Given the comments for a file and a line number, we find 

73 # the comment that goes with that line number, if it exists 

74 for entry in comments["comments"]: 

75 if entry["line_number"] == line_num: 

76 return entry["contents"] 

77 

78 return None 

79 

80 

81def insert_metadata(gromet_metadata, new_metadata): 

82 # Appends a new metadata to the end of the gromet_metadata 

83 # NOTE: not used now but will be in the future 

84 gromet_metadata.append([new_metadata]) 

85 return len(gromet_metadata) 

86 

87 

88def align_gromet_elements(gromet_metadata, gromet_comments, gromet_elements): 

89 # Gromet elements are generic enough that we can use 

90 # the same function to iterate through gromet elements 

91 # and append comment metadata as necessary 

92 # TODO: associate code_file_reference_uid 

93 if gromet_elements != None: 

94 for elem in gromet_elements: 

95 if elem.metadata != None: 

96 metadatum = gromet_metadata[elem.metadata - 1] 

97 source_ref = find_source_code_reference(metadatum) 

98 if source_ref != None: 

99 # Look at line_begin 

100 line_start = source_ref.line_begin 

101 comment = find_comment(gromet_comments, line_start) 

102 if comment != None: 

103 source_comment = SourceCodeComment( 

104 comment=comment, 

105 comment_type=CommentType.OTHER, 

106 context_function_name=None, 

107 code_file_reference_uid=None, 

108 line_begin=source_ref.line_begin, 

109 line_end=source_ref.line_end, 

110 col_begin=source_ref.col_begin, 

111 col_end=source_ref.col_end, 

112 ) 

113 

114 metadatum.append(source_comment) 

115 

116 # Find a comment metadata associated with that 

117 

118 

119def align_fn(gromet_metadata, gromet_comments, gromet_fn): 

120 # Align the GroMEt b table 

121 # We might be able to use the generic aligner but for now we align 

122 # independently 

123 if gromet_fn.b != None: 

124 for box in gromet_fn.b: 

125 if box.metadata != None: 

126 metadatum = gromet_metadata[box.metadata - 1] 

127 source_ref = find_source_code_reference(metadatum) 

128 if source_ref != None: 

129 # NOTE: Look at line_begin in the source ref info 

130 line_start = source_ref.line_begin 

131 comment = find_comment(gromet_comments, line_start) 

132 if comment != None: 

133 source_comment = SourceCodeComment( 

134 comment=comment, 

135 comment_type=CommentType.OTHER, 

136 context_function_name=None, 

137 code_file_reference_uid=None, 

138 line_begin=source_ref.line_begin, 

139 line_end=source_ref.line_end, 

140 col_begin=source_ref.col_begin, 

141 col_end=source_ref.col_end, 

142 ) 

143 

144 metadatum.append(source_comment) 

145 

146 # All these GroMEt elements all have metadata stored in the same way 

147 # So we can align any comments for all these using a generic aligner 

148 align_gromet_elements(gromet_metadata, gromet_comments, gromet_fn.bf) 

149 align_gromet_elements(gromet_metadata, gromet_comments, gromet_fn.opi) 

150 align_gromet_elements(gromet_metadata, gromet_comments, gromet_fn.opo) 

151 align_gromet_elements(gromet_metadata, gromet_comments, gromet_fn.pif) 

152 align_gromet_elements(gromet_metadata, gromet_comments, gromet_fn.pof) 

153 align_gromet_elements(gromet_metadata, gromet_comments, gromet_fn.pic) 

154 align_gromet_elements(gromet_metadata, gromet_comments, gromet_fn.poc) 

155 

156 # Check if the current FN has a name, and if it's associated 

157 # With a docstring, align the docstring with it if that's the case 

158 if gromet_fn.b != None and gromet_fn.b[0].name != None: 

159 func_name = gromet_fn.b[0].name 

160 normalized_func_name = strip_id(func_name) 

161 if normalized_func_name in gromet_comments["docstrings"].keys(): 

162 metadata_idx = gromet_fn.b[0].metadata 

163 if metadata_idx != None: 

164 docstring = "".join(gromet_comments["docstrings"][normalized_func_name]) 

165 

166 source_comment = SourceCodeComment( 

167 comment=docstring, 

168 comment_type=CommentType.DOCSTRING, 

169 context_function_name=normalized_func_name, 

170 code_file_reference_uid=None, 

171 line_begin=source_ref.line_begin, 

172 line_end=source_ref.line_end, 

173 col_begin=source_ref.col_begin, 

174 col_end=source_ref.col_end, 

175 ) 

176 

177 gromet_metadata[metadata_idx - 1].append(source_comment) 

178 

179 

180def find_fn(gromet_modules, fn_name): 

181 # Given the gromet_modules list of FNs, we find fn_name in it 

182 modified_fn_name = fn_name.split("/")[-1] 

183 

184 for FN in gromet_modules: 

185 if modified_fn_name == FN.name: 

186 return FN 

187 

188 return None 

189 

190 

191def align_full_system(gromet_obj: GrometFNModuleCollection, extraction: CodeComments): 

192 # Comments extraction file holds comments for all files in the system 

193 

194 # The extracted comments json file can appear in two ways: 

195 # - extractions for a single file: 

196 # A single file consists of one top level dictionary containing 

197 # the comments and docstrings for that file 

198 # - extractions for a multi file 

199 # A multi file consists of a top level dictionary that maps each file 

200 # in the system to a dictionary containing the comments and docstrings for that file 

201 # We can check what kind of extracted comments file we have by checking the structure of the dictionary 

202 

203 # TODO: Update fn_unifer logic to support tree-sitter comment extractor format 

204 def tree_to_rust(comments: CodeComments) -> Dict: 

205 """Convert the new tree-sitter style comments to the existing comment style supported by the fn_unifier.""" 

206 def single_file_to_dict(single_file_comment_obj: SingleFileCommentResponse): 

207 output_dict = {"comments": [], "docstrings":{}} 

208 for single_comment in single_file_comment_obj.single: 

209 single_dict = dict(single_comment) 

210 single_dict["contents"] = single_dict["content"] 

211 output_dict["comments"].append(single_dict) 

212 

213 output_dict["docstrings"] = {docstring_comment.function_name: docstring_comment.content for docstring_comment in single_file_comment_obj.docstring } 

214 

215 return output_dict 

216 

217 if isinstance(comments, SingleFileCommentResponse): 

218 return single_file_to_dict(comments) 

219 else: 

220 output_dict = {} 

221 for file, single_file_comment_obj in comments.files.items(): 

222 output_dict[file] = single_file_to_dict(single_file_comment_obj) 

223 return output_dict 

224 extraction = tree_to_rust(extraction) 

225 

226 if "comments" in extraction.keys() and "docstrings" in extraction.keys(): 

227 # Single file system 

228 # NOTE: We assume for the moment that if we're aligning a single file that 

229 # The corresponding GroMEt has exactly one module 

230 

231 if len(gromet_obj.modules) != 1: 

232 raise NotImplementedError( 

233 "Single file alignment from a multi module GroMEt system not supported yet" 

234 ) 

235 

236 module_FN = gromet_obj.modules[0] 

237 if module_FN != None: 

238 FN_metadata = module_FN.metadata_collection 

239 align_fn(FN_metadata, extraction, module_FN.fn) 

240 

241 if len(module_FN.fn_array) > 0: 

242 for FN in module_FN.fn_array: 

243 align_fn(FN_metadata, extraction, FN) 

244 else: 

245 # Multi-file system 

246 extraction = normalize_extraction_names(extraction) 

247 for module in gromet_obj.module_index: 

248 # Go through each file in the GroMEt FN 

249 normalized_path = normalize_module_path(module) 

250 if normalized_path in extraction.keys(): 

251 # Find the current FN in the collection 

252 module_FN = find_fn(gromet_obj.modules, normalized_path) 

253 if module_FN != None: 

254 file_comments = extraction[normalized_path] 

255 FN_metadata = module_FN.metadata_collection 

256 align_fn(FN_metadata, file_comments, module_FN.fn) 

257 

258 if len(module_FN.fn_array) > 0: 

259 for FN in module_FN.fn_array: 

260 align_fn(FN_metadata, file_comments, FN) 

261 

262 

263def process_alignment( 

264 gromet_json: Dict[Text, Any], comments_json: Dict[Text, Any] 

265) -> GrometFNModuleCollection: 

266 # Given a GroMEt json and a comments json 

267 # We run the alignment on the GroMEt to unify the comments with 

268 # The gromet JSON 

269 gromet_object = json_to_gromet(gromet_json) 

270 align_full_system(gromet_object, comments_json) 

271 

272 return gromet_object 

273 

274 

275if __name__ == "__main__": 

276 parser = argparse.ArgumentParser() 

277 parser.add_argument("--gromet", type=str, help="Path to a GroMEt JSON file") 

278 parser.add_argument("--comments", type=str, help="Path to a Comments JSON file") 

279 args = parser.parse_args() 

280 

281 # Get the GroMEt JSON and turn it back into an object 

282 gromet_object = json_to_gromet(args.gromet) 

283 

284 # Get the comments data from the JSON file 

285 comments_file = open(args.comments, "r") 

286 comments_json = json.load(comments_file) 

287 comments_file.close() 

288 

289 align_full_system(gromet_object, comments_json) 

290 

291 # Write out the gromet with the comments 

292 with open(args.gromet, "w") as f: 

293 gromet_collection_dict = gromet_object.to_dict() 

294 f.write(dictionary_to_gromet_json(del_nulls(gromet_collection_dict)))