Coverage for skema/program_analysis/fn_unifier.py: 78%
142 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 17:15 +0000
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 17:15 +0000
1# Function Network (FN) Unifier
2# Given a GroMEt FN JSON and a Comments JSON file, we 'unify' them by
3# 1. Extracting the GroMEt JSON and turning it back into an object
4# 2. Extracting the comments JSON file
5# 3. Appending all comments from the comments JSON into the respective MetadataCollections for each FN
7from skema.program_analysis.JSON2GroMEt.json2gromet import json_to_gromet
8from skema.program_analysis.comment_extractor.model import (
9 CodeComments,
10 SingleFileCommentResponse,
11 MultiFileCommentResponse,
12)
13from skema.gromet.metadata.source_code_comment import SourceCodeComment
14from skema.gromet.metadata.source_code_reference import SourceCodeReference
15from skema.gromet.metadata.comment_type import CommentType
16from skema.gromet.fn.gromet_fn_module_collection import GrometFNModuleCollection
17from skema.utils.fold import dictionary_to_gromet_json, del_nulls
19from typing import Dict, Text, Any
20import argparse
21import json
22import re
25def normalize_module_path(path: str):
26 # The module paths in the GroMEt FN are dotted
27 # We need slashes for the comments dictionary
28 return path.replace(".", "/")
31def normalize_extraction_names(extraction: dict):
32 # Removes extraneous characters and filename extensions
33 # from the extraction dictionary
34 # Currently removes, ".py" extension
35 # and "./" from the keys
36 return {k.replace(".py", "").replace("./", ""): v for k, v in extraction.items()}
39def strip_id(func_name):
40 # Given a function name that ends with "_id###" where ### is a number
41 # We remove that sequence of characters from the function name
42 # The id is appended by the GroMEt generation, and so we can safely remove it
43 # because we need the pure name of the function and not the identifier part
45 # Only strip the id if the func_name contains the pattern "_id###..." which
46 # is appended by the Gromet generation
47 if re.search("_id\d+", func_name):
48 to_ret = list(func_name)
49 to_ret.reverse()
50 i = 0
51 while i < len(to_ret) and to_ret[i] != "_":
52 to_ret[i] = ""
53 i += 1
54 to_ret[i] = ""
55 to_ret.reverse()
56 return "".join(to_ret)
57 else:
58 return func_name
61def find_source_code_reference(metadatum):
62 # Find a SourceCodeReference metadata in the metadatum entry
63 # we're looking at
64 for elem in metadatum:
65 if isinstance(elem, SourceCodeReference):
66 return elem
68 return None
71def find_comment(comments, line_num):
72 # Given the comments for a file and a line number, we find
73 # the comment that goes with that line number, if it exists
74 for entry in comments["comments"]:
75 if entry["line_number"] == line_num:
76 return entry["contents"]
78 return None
81def insert_metadata(gromet_metadata, new_metadata):
82 # Appends a new metadata to the end of the gromet_metadata
83 # NOTE: not used now but will be in the future
84 gromet_metadata.append([new_metadata])
85 return len(gromet_metadata)
88def align_gromet_elements(gromet_metadata, gromet_comments, gromet_elements):
89 # Gromet elements are generic enough that we can use
90 # the same function to iterate through gromet elements
91 # and append comment metadata as necessary
92 # TODO: associate code_file_reference_uid
93 if gromet_elements != None:
94 for elem in gromet_elements:
95 if elem.metadata != None:
96 metadatum = gromet_metadata[elem.metadata - 1]
97 source_ref = find_source_code_reference(metadatum)
98 if source_ref != None:
99 # Look at line_begin
100 line_start = source_ref.line_begin
101 comment = find_comment(gromet_comments, line_start)
102 if comment != None:
103 source_comment = SourceCodeComment(
104 comment=comment,
105 comment_type=CommentType.OTHER,
106 context_function_name=None,
107 code_file_reference_uid=None,
108 line_begin=source_ref.line_begin,
109 line_end=source_ref.line_end,
110 col_begin=source_ref.col_begin,
111 col_end=source_ref.col_end,
112 )
114 metadatum.append(source_comment)
116 # Find a comment metadata associated with that
119def align_fn(gromet_metadata, gromet_comments, gromet_fn):
120 # Align the GroMEt b table
121 # We might be able to use the generic aligner but for now we align
122 # independently
123 if gromet_fn.b != None:
124 for box in gromet_fn.b:
125 if box.metadata != None:
126 metadatum = gromet_metadata[box.metadata - 1]
127 source_ref = find_source_code_reference(metadatum)
128 if source_ref != None:
129 # NOTE: Look at line_begin in the source ref info
130 line_start = source_ref.line_begin
131 comment = find_comment(gromet_comments, line_start)
132 if comment != None:
133 source_comment = SourceCodeComment(
134 comment=comment,
135 comment_type=CommentType.OTHER,
136 context_function_name=None,
137 code_file_reference_uid=None,
138 line_begin=source_ref.line_begin,
139 line_end=source_ref.line_end,
140 col_begin=source_ref.col_begin,
141 col_end=source_ref.col_end,
142 )
144 metadatum.append(source_comment)
146 # All these GroMEt elements all have metadata stored in the same way
147 # So we can align any comments for all these using a generic aligner
148 align_gromet_elements(gromet_metadata, gromet_comments, gromet_fn.bf)
149 align_gromet_elements(gromet_metadata, gromet_comments, gromet_fn.opi)
150 align_gromet_elements(gromet_metadata, gromet_comments, gromet_fn.opo)
151 align_gromet_elements(gromet_metadata, gromet_comments, gromet_fn.pif)
152 align_gromet_elements(gromet_metadata, gromet_comments, gromet_fn.pof)
153 align_gromet_elements(gromet_metadata, gromet_comments, gromet_fn.pic)
154 align_gromet_elements(gromet_metadata, gromet_comments, gromet_fn.poc)
156 # Check if the current FN has a name, and if it's associated
157 # With a docstring, align the docstring with it if that's the case
158 if gromet_fn.b != None and gromet_fn.b[0].name != None:
159 func_name = gromet_fn.b[0].name
160 normalized_func_name = strip_id(func_name)
161 if normalized_func_name in gromet_comments["docstrings"].keys():
162 metadata_idx = gromet_fn.b[0].metadata
163 if metadata_idx != None:
164 docstring = "".join(gromet_comments["docstrings"][normalized_func_name])
166 source_comment = SourceCodeComment(
167 comment=docstring,
168 comment_type=CommentType.DOCSTRING,
169 context_function_name=normalized_func_name,
170 code_file_reference_uid=None,
171 line_begin=source_ref.line_begin,
172 line_end=source_ref.line_end,
173 col_begin=source_ref.col_begin,
174 col_end=source_ref.col_end,
175 )
177 gromet_metadata[metadata_idx - 1].append(source_comment)
180def find_fn(gromet_modules, fn_name):
181 # Given the gromet_modules list of FNs, we find fn_name in it
182 modified_fn_name = fn_name.split("/")[-1]
184 for FN in gromet_modules:
185 if modified_fn_name == FN.name:
186 return FN
188 return None
191def align_full_system(gromet_obj: GrometFNModuleCollection, extraction: CodeComments):
192 # Comments extraction file holds comments for all files in the system
194 # The extracted comments json file can appear in two ways:
195 # - extractions for a single file:
196 # A single file consists of one top level dictionary containing
197 # the comments and docstrings for that file
198 # - extractions for a multi file
199 # A multi file consists of a top level dictionary that maps each file
200 # in the system to a dictionary containing the comments and docstrings for that file
201 # We can check what kind of extracted comments file we have by checking the structure of the dictionary
203 # TODO: Update fn_unifer logic to support tree-sitter comment extractor format
204 def tree_to_rust(comments: CodeComments) -> Dict:
205 """Convert the new tree-sitter style comments to the existing comment style supported by the fn_unifier."""
206 def single_file_to_dict(single_file_comment_obj: SingleFileCommentResponse):
207 output_dict = {"comments": [], "docstrings":{}}
208 for single_comment in single_file_comment_obj.single:
209 single_dict = dict(single_comment)
210 single_dict["contents"] = single_dict["content"]
211 output_dict["comments"].append(single_dict)
213 output_dict["docstrings"] = {docstring_comment.function_name: docstring_comment.content for docstring_comment in single_file_comment_obj.docstring }
215 return output_dict
217 if isinstance(comments, SingleFileCommentResponse):
218 return single_file_to_dict(comments)
219 else:
220 output_dict = {}
221 for file, single_file_comment_obj in comments.files.items():
222 output_dict[file] = single_file_to_dict(single_file_comment_obj)
223 return output_dict
224 extraction = tree_to_rust(extraction)
226 if "comments" in extraction.keys() and "docstrings" in extraction.keys():
227 # Single file system
228 # NOTE: We assume for the moment that if we're aligning a single file that
229 # The corresponding GroMEt has exactly one module
231 if len(gromet_obj.modules) != 1:
232 raise NotImplementedError(
233 "Single file alignment from a multi module GroMEt system not supported yet"
234 )
236 module_FN = gromet_obj.modules[0]
237 if module_FN != None:
238 FN_metadata = module_FN.metadata_collection
239 align_fn(FN_metadata, extraction, module_FN.fn)
241 if len(module_FN.fn_array) > 0:
242 for FN in module_FN.fn_array:
243 align_fn(FN_metadata, extraction, FN)
244 else:
245 # Multi-file system
246 extraction = normalize_extraction_names(extraction)
247 for module in gromet_obj.module_index:
248 # Go through each file in the GroMEt FN
249 normalized_path = normalize_module_path(module)
250 if normalized_path in extraction.keys():
251 # Find the current FN in the collection
252 module_FN = find_fn(gromet_obj.modules, normalized_path)
253 if module_FN != None:
254 file_comments = extraction[normalized_path]
255 FN_metadata = module_FN.metadata_collection
256 align_fn(FN_metadata, file_comments, module_FN.fn)
258 if len(module_FN.fn_array) > 0:
259 for FN in module_FN.fn_array:
260 align_fn(FN_metadata, file_comments, FN)
263def process_alignment(
264 gromet_json: Dict[Text, Any], comments_json: Dict[Text, Any]
265) -> GrometFNModuleCollection:
266 # Given a GroMEt json and a comments json
267 # We run the alignment on the GroMEt to unify the comments with
268 # The gromet JSON
269 gromet_object = json_to_gromet(gromet_json)
270 align_full_system(gromet_object, comments_json)
272 return gromet_object
275if __name__ == "__main__":
276 parser = argparse.ArgumentParser()
277 parser.add_argument("--gromet", type=str, help="Path to a GroMEt JSON file")
278 parser.add_argument("--comments", type=str, help="Path to a Comments JSON file")
279 args = parser.parse_args()
281 # Get the GroMEt JSON and turn it back into an object
282 gromet_object = json_to_gromet(args.gromet)
284 # Get the comments data from the JSON file
285 comments_file = open(args.comments, "r")
286 comments_json = json.load(comments_file)
287 comments_file.close()
289 align_full_system(gromet_object, comments_json)
291 # Write out the gromet with the comments
292 with open(args.gromet, "w") as f:
293 gromet_collection_dict = gromet_object.to_dict()
294 f.write(dictionary_to_gromet_json(del_nulls(gromet_collection_dict)))