Coverage for skema/program_analysis/fn

1# Function Network (FN) Unifier

2# Given a GroMEt FN JSON and a Comments JSON file, we 'unify' them by

3# 1. Extracting the GroMEt JSON and turning it back into an object

4# 2. Extracting the comments JSON file

5# 3. Appending all comments from the comments JSON into the respective MetadataCollections for each FN

7from skema.program_analysis.JSON2GroMEt.json2gromet import json_to_gromet

8from skema.program_analysis.comment_extractor.model import (

9 CodeComments,

10 SingleFileCommentResponse,

11 MultiFileCommentResponse,

12)

13from skema.gromet.metadata.source_code_comment import SourceCodeComment

14from skema.gromet.metadata.source_code_reference import SourceCodeReference

15from skema.gromet.metadata.comment_type import CommentType

16from skema.gromet.fn.gromet_fn_module_collection import GrometFNModuleCollection

17from skema.utils.fold import dictionary_to_gromet_json, del_nulls

19from typing import Dict, Text, Any

20import argparse

21import json

22import re

25def normalize_module_path(path: str):

26 # The module paths in the GroMEt FN are dotted

27 # We need slashes for the comments dictionary

28 return path.replace(".", "/")

31def normalize_extraction_names(extraction: dict):

32 # Removes extraneous characters and filename extensions

33 # from the extraction dictionary

34 # Currently removes, ".py" extension

35 # and "./" from the keys

36 return {k.replace(".py", "").replace("./", ""): v for k, v in extraction.items()}

39def strip_id(func_name):

40 # Given a function name that ends with "_id###" where ### is a number

41 # We remove that sequence of characters from the function name

42 # The id is appended by the GroMEt generation, and so we can safely remove it

43 # because we need the pure name of the function and not the identifier part

45 # Only strip the id if the func_name contains the pattern "_id###..." which

46 # is appended by the Gromet generation

47 if re.search("_id\d+", func_name):

48 to_ret = list(func_name)

49 to_ret.reverse()

50 i = 0

51 while i < len(to_ret) and to_ret[i] != "_":

52 to_ret[i] = ""

53 i += 1

54 to_ret[i] = ""

55 to_ret.reverse()

56 return "".join(to_ret)

57 else:

58 return func_name

61def find_source_code_reference(metadatum):

62 # Find a SourceCodeReference metadata in the metadatum entry

63 # we're looking at

64 for elem in metadatum:

65 if isinstance(elem, SourceCodeReference):

66 return elem

68 return None

71def find_comment(comments, line_num):

72 # Given the comments for a file and a line number, we find

73 # the comment that goes with that line number, if it exists

74 for entry in comments["comments"]:

75 if entry["line_number"] == line_num:

76 return entry["contents"]

78 return None

81def insert_metadata(gromet_metadata, new_metadata):

82 # Appends a new metadata to the end of the gromet_metadata

83 # NOTE: not used now but will be in the future

84 gromet_metadata.append([new_metadata])

85 return len(gromet_metadata)

88def align_gromet_elements(gromet_metadata, gromet_comments, gromet_elements):

89 # Gromet elements are generic enough that we can use

90 # the same function to iterate through gromet elements

91 # and append comment metadata as necessary

92 # TODO: associate code_file_reference_uid

93 if gromet_elements != None:

94 for elem in gromet_elements:

95 if elem.metadata != None:

96 metadatum = gromet_metadata[elem.metadata - 1]

97 source_ref = find_source_code_reference(metadatum)

98 if source_ref != None:

99 # Look at line_begin

100 line_start = source_ref.line_begin

101 comment = find_comment(gromet_comments, line_start)

102 if comment != None:

103 source_comment = SourceCodeComment(

104 comment=comment,

105 comment_type=CommentType.OTHER,

106 context_function_name=None,

107 code_file_reference_uid=None,

108 line_begin=source_ref.line_begin,

109 line_end=source_ref.line_end,

110 col_begin=source_ref.col_begin,

111 col_end=source_ref.col_end,

112 )

113

114 metadatum.append(source_comment)

115

116 # Find a comment metadata associated with that

117

118

119def align_fn(gromet_metadata, gromet_comments, gromet_fn):

120 # Align the GroMEt b table

121 # We might be able to use the generic aligner but for now we align

122 # independently

123 if gromet_fn.b != None:

124 for box in gromet_fn.b:

125 if box.metadata != None:

126 metadatum = gromet_metadata[box.metadata - 1]

127 source_ref = find_source_code_reference(metadatum)

128 if source_ref != None:

129 # NOTE: Look at line_begin in the source ref info

130 line_start = source_ref.line_begin

131 comment = find_comment(gromet_comments, line_start)

132 if comment != None:

133 source_comment = SourceCodeComment(

134 comment=comment,

135 comment_type=CommentType.OTHER,

136 context_function_name=None,

137 code_file_reference_uid=None,

138 line_begin=source_ref.line_begin,

139 line_end=source_ref.line_end,

140 col_begin=source_ref.col_begin,

141 col_end=source_ref.col_end,

142 )

143

144 metadatum.append(source_comment)

145

146 # All these GroMEt elements all have metadata stored in the same way

147 # So we can align any comments for all these using a generic aligner

148 align_gromet_elements(gromet_metadata, gromet_comments, gromet_fn.bf)

149 align_gromet_elements(gromet_metadata, gromet_comments, gromet_fn.opi)

150 align_gromet_elements(gromet_metadata, gromet_comments, gromet_fn.opo)

151 align_gromet_elements(gromet_metadata, gromet_comments, gromet_fn.pif)

152 align_gromet_elements(gromet_metadata, gromet_comments, gromet_fn.pof)

153 align_gromet_elements(gromet_metadata, gromet_comments, gromet_fn.pic)

154 align_gromet_elements(gromet_metadata, gromet_comments, gromet_fn.poc)

155

156 # Check if the current FN has a name, and if it's associated

157 # With a docstring, align the docstring with it if that's the case

158 if gromet_fn.b != None and gromet_fn.b[0].name != None:

159 func_name = gromet_fn.b[0].name

160 normalized_func_name = strip_id(func_name)

161 if normalized_func_name in gromet_comments["docstrings"].keys():

162 metadata_idx = gromet_fn.b[0].metadata

163 if metadata_idx != None:

164 docstring = "".join(gromet_comments["docstrings"][normalized_func_name])

165

166 source_comment = SourceCodeComment(

167 comment=docstring,

168 comment_type=CommentType.DOCSTRING,

169 context_function_name=normalized_func_name,

170 code_file_reference_uid=None,

171 line_begin=source_ref.line_begin,

172 line_end=source_ref.line_end,

173 col_begin=source_ref.col_begin,

174 col_end=source_ref.col_end,

175 )

176

177 gromet_metadata[metadata_idx - 1].append(source_comment)

178

179

180def find_fn(gromet_modules, fn_name):

181 # Given the gromet_modules list of FNs, we find fn_name in it

182 modified_fn_name = fn_name.split("/")[-1]

183

184 for FN in gromet_modules:

185 if modified_fn_name == FN.name:

186 return FN

187

188 return None

189

190

191def align_full_system(gromet_obj: GrometFNModuleCollection, extraction: CodeComments):

192 # Comments extraction file holds comments for all files in the system

193

194 # The extracted comments json file can appear in two ways:

195 # - extractions for a single file:

196 # A single file consists of one top level dictionary containing

197 # the comments and docstrings for that file

198 # - extractions for a multi file

199 # A multi file consists of a top level dictionary that maps each file

200 # in the system to a dictionary containing the comments and docstrings for that file

201 # We can check what kind of extracted comments file we have by checking the structure of the dictionary

202

203 # TODO: Update fn_unifer logic to support tree-sitter comment extractor format

204 def tree_to_rust(comments: CodeComments) -> Dict:

205 """Convert the new tree-sitter style comments to the existing comment style supported by the fn_unifier."""

206 def single_file_to_dict(single_file_comment_obj: SingleFileCommentResponse):

207 output_dict = {"comments": [], "docstrings":{}}

208 for single_comment in single_file_comment_obj.single:

209 single_dict = dict(single_comment)

210 single_dict["contents"] = single_dict["content"]

211 output_dict["comments"].append(single_dict)

212

213 output_dict["docstrings"] = {docstring_comment.function_name: docstring_comment.content for docstring_comment in single_file_comment_obj.docstring }

214

215 return output_dict

216

217 if isinstance(comments, SingleFileCommentResponse):

218 return single_file_to_dict(comments)

219 else:

220 output_dict = {}

221 for file, single_file_comment_obj in comments.files.items():

222 output_dict[file] = single_file_to_dict(single_file_comment_obj)

223 return output_dict

224 extraction = tree_to_rust(extraction)

225

226 if "comments" in extraction.keys() and "docstrings" in extraction.keys():

227 # Single file system

228 # NOTE: We assume for the moment that if we're aligning a single file that

229 # The corresponding GroMEt has exactly one module

230

231 if len(gromet_obj.modules) != 1:

232 raise NotImplementedError(

233 "Single file alignment from a multi module GroMEt system not supported yet"

234 )

235

236 module_FN = gromet_obj.modules[0]

237 if module_FN != None:

238 FN_metadata = module_FN.metadata_collection

239 align_fn(FN_metadata, extraction, module_FN.fn)

240

241 if len(module_FN.fn_array) > 0:

242 for FN in module_FN.fn_array:

243 align_fn(FN_metadata, extraction, FN)

244 else:

245 # Multi-file system

246 extraction = normalize_extraction_names(extraction)

247 for module in gromet_obj.module_index:

248 # Go through each file in the GroMEt FN

249 normalized_path = normalize_module_path(module)

250 if normalized_path in extraction.keys():

251 # Find the current FN in the collection

252 module_FN = find_fn(gromet_obj.modules, normalized_path)

253 if module_FN != None:

254 file_comments = extraction[normalized_path]

255 FN_metadata = module_FN.metadata_collection

256 align_fn(FN_metadata, file_comments, module_FN.fn)

257

258 if len(module_FN.fn_array) > 0:

259 for FN in module_FN.fn_array:

260 align_fn(FN_metadata, file_comments, FN)

261

262

263def process_alignment(

264 gromet_json: Dict[Text, Any], comments_json: Dict[Text, Any]

265) -> GrometFNModuleCollection:

266 # Given a GroMEt json and a comments json

267 # We run the alignment on the GroMEt to unify the comments with

268 # The gromet JSON

269 gromet_object = json_to_gromet(gromet_json)

270 align_full_system(gromet_object, comments_json)

271

272 return gromet_object

273

274

275if __name__ == "__main__":

276 parser = argparse.ArgumentParser()

277 parser.add_argument("--gromet", type=str, help="Path to a GroMEt JSON file")

278 parser.add_argument("--comments", type=str, help="Path to a Comments JSON file")

279 args = parser.parse_args()

280

281 # Get the GroMEt JSON and turn it back into an object

282 gromet_object = json_to_gromet(args.gromet)

283

284 # Get the comments data from the JSON file

285 comments_file = open(args.comments, "r")

286 comments_json = json.load(comments_file)

287 comments_file.close()

288

289 align_full_system(gromet_object, comments_json)

290

291 # Write out the gromet with the comments

292 with open(args.gromet, "w") as f:

293 gromet_collection_dict = gromet_object.to_dict()

294 f.write(dictionary_to_gromet_json(del_nulls(gromet_collection_dict)))

Coverage for skema/program_analysis/fn_unifier.py: 78%

142 statements