Source code for delphi.translators.for2py.mod_index_generator

"""
This module contains code to generate a module index file for a set of Fortran
files that the program analysis pipeline runs over. The file describes each
module used in a program run. The information about each module is represented
as a JSON dictionary and has the following fields:

    name:              <module_name>
    file:              <file_containing_the_module>
    module:            <list_of_used_modules>
    symbol_export:     <list_of_symbols_exported_by_module>
    subprogram_list:   <procedure_mapping_for_module>

The procedure mapping for each subprogram `p` defined in module `M` is a
mapping from each possible tuple of argument types for p to the function to
invoke for that argument type tuple.

Author: Pratik Bhandari
"""

import sys
import xml.etree.ElementTree as ET
from typing import List, Dict
import re
import json


[docs]class ModuleGenerator(object): def __init__(self): # This string holds the current context of the program being parsed self.current_context = None # This string holds the name of the original Fortran code which is # being processed self.fileName = None # This string holds the name of the main PROGRAM module self.main = None # This string holds the path on which the XML file of the original # Fortran code is located self.path = None # Initialize all the dictionaries which we will be writing to our file # This is a list of all modules inside a single Fortran file self.modules = [] # This dictionary holds the set of symbols exported by each module. self.exports = {} # This dictionary holds the modules used by each module/program. # Additionally, variables from each module can be selectively USEd ( # imported). This is stored in the object below. If all variables of # a module are USEd, an `*` symbol is used to denote this. self.uses = {} # This dictionary holds the set of symbols imported by each module. # This is given by: IMPORTS(m) = U { EXPORTS(p) | p ∈ USES(m) } # Since a module can use the ONLY keyword to import only some of the # symbols exported by another module, in such cases, the set of # imported symbols is be limited to those explicitly mentioned. self.imports = {} # This dictionary holds all the private variables defined in each # module self.private = {} # This dictionary holds all public variables for each module/context self.public = {} # This dictionary holds all subprograms (subroutines and functions) for # each module self.subprograms = {} # This dictionary stores the set of symbols declared in each module. self.symbols = {} # This dictionary stores a variable-type mapping. self.variable_types = {} self.symbol_type = {}
[docs] def populate_symbols(self): """ This function populates the dictionary `self.symbols` which stores the set of symbols declared in each module. This is the union of all public variables, private variables and subprograms for each module. """ for item in self.modules: self.symbols[item] = self.public.get(item, []) + \ self.private.get(item, []) + \ self.subprograms.get(item, [])
[docs] def populate_exports(self): """ This function populates the `self.exports` dictionary which holds the set of symbols exported by each module. The set of exported symbols is given by: (imports U symbols) - private """ for item in self.modules: interim = self.imports.get(item, []) + self.symbols.get(item, []) self.exports[item] = [x for x in interim if x not in self.private.get(item, [])]
[docs] def populate_imports(self, module_logs): """ This function populates the `self.imports` dictionary which holds all the private variables defined in each module.""" for module in self.uses: for use_item in self.uses[module]: for key in use_item: if len(use_item[key]) == 1 and use_item[key][0] == '*': if key in self.exports: symbols = self.exports[key] else: assert ( key.lower() in module_logs["mod_info"] ), f"module name (key) {key} does not exist in the log file." symbols = module_logs["mod_info"][key]["exports"] if module in module_logs["mod_info"]: module_logs["mod_info"][module]["imports"] = symbols if key in symbols: self.imports.setdefault(module, []).append( {key: symbols[key]} ) else: self.imports.setdefault(module, []).append( {key: symbols} ) else: self.imports.setdefault(module, []).append( {key: use_item[key]} )
[docs] def populate_symbol_types(self): for var in self.variable_types: for module in self.symbols: if var in self.symbols[module]: self.symbol_type[var] = [module, self.variable_types[var]]
[docs] def parse_tree(self, root, module_logs) -> bool: """ This function parses the XML tree of a Fortran file and tracks and maps relevant object relationships """ # Find name of PROGRAM module for item in root.iter(): if item.tag == "program": self.main = item.attrib["name"].lower() variable_type = None for item in root.iter(): # Get the name of the XML file being parsed if item.tag == "file": file_name = item.attrib["path"] file = file_name.split('/')[-1] file_regex = r'^(.*)_processed(\..*)$' path_regex = r'^.*(delphi/[^delphi].*)/\w+' match = re.match(file_regex, file) if match: self.fileName = match.group(1) + match.group(2) match = re.match(path_regex, file_name) if match: self.path = match.group(1) elif item.tag.lower() in ["module", "program"]: self.current_context = item.attrib["name"].lower() self.modules.append(item.attrib["name"].lower()) elif ( item.tag.lower() == "type" and "name" in item.attrib ): variable_type = item.attrib["name"].lower() elif item.tag.lower() == "variable": if item.attrib.get("name"): if not self.current_context: self.current_context = self.main self.public.setdefault(self.current_context, []).append( item.attrib["name"].lower()) self.variable_types[item.attrib["name"].lower()] = variable_type elif item.tag.lower() in ["subroutine", "function"]: if not self.current_context: self.current_context = self.main self.subprograms.setdefault(self.current_context, []).append( item.attrib["name"].lower()) self.current_context = item.attrib["name"].lower() elif item.tag.lower() == "declaration": # This function parses the <declaration> tag of the XML and # checks if private variables are defined in the respective # module/program. Private variables tend to be inside the # declaration tag private_status = False for child in item.iter(): if child.tag.lower() == "access-spec" and child.attrib.get( "keyword").lower() == "private": private_status = True if child.tag.lower() == "variable" and private_status: if not self.current_context: self.current_context = self.main if child.attrib.get("name"): self.private.setdefault(self.current_context, []).append( child.attrib["name"].lower()) elif item.tag.lower() == "use": # If a module, program or subroutine uses (imports) another # module, a USE statement is used and we want to map the # relationship between different program scopes that occur # with the use of the USE statement only_symbols = [] for child in item: if child.tag.lower() == "only": for innerChild in child: if innerChild.tag.lower() == "name": only_symbols.append(innerChild.attrib[ "id"].lower()) if not self.current_context: self.current_context = self.main self.uses.setdefault(self.current_context, []).append({ item.attrib["name"].lower(): only_symbols} if only_symbols else {item.attrib["name"].lower(): ["*"]}) self.populate_symbols() self.populate_symbol_types() self.populate_exports() self.populate_imports(module_logs) return True
[docs] def analyze(self, tree: ET.ElementTree, mod_log_path: str) -> List: """ Parse the XML file from the root and keep track of all important data structures and object relationships between files. """ with open(mod_log_path) as json_f: module_logs = json.load(json_f) status = self.parse_tree(tree, module_logs) output_dictionary = {} if status: output_dictionary['file_name'] = [self.fileName, self.path] output_dictionary['modules'] = self.modules output_dictionary['exports'] = self.exports output_dictionary['use_mapping'] = self.uses output_dictionary['imports'] = self.imports output_dictionary['private_objects'] = self.private output_dictionary['public_objects'] = self.public output_dictionary['subprograms'] = self.subprograms output_dictionary['symbols'] = self.symbols output_dictionary['symbol_types'] = self.symbol_type with open(mod_log_path, 'w+') as json_f: json_f.write(json.dumps(module_logs, indent=2)) return [output_dictionary]
[docs]def get_index(xml_file: str, module_log_file_path: str): """ Get the root of the XML ast, instantiate the moduleGenerator and start the analysis process. """ tree = ET.parse(xml_file).getroot() generator = ModuleGenerator() return generator.analyze(tree, module_log_file_path)