Source code for delphi.translators.for2py.mod_index_generator

"""
This module contains code to generate a module index file for a set of Fortran
files that the program analysis pipeline runs over. The file describes each
module used in a program run. The information about each module is represented
as a JSON dictionary and has the following fields:

    name:              <module_name>
    file:              <file_containing_the_module>
    module:            <list_of_used_modules>
    symbol_export:     <list_of_symbols_exported_by_module>
    subprogram_list:   <procedure_mapping_for_module>

The procedure mapping for each subprogram `p` defined in module `M` is a
mapping from each possible tuple of argument types for p to the function to
invoke for that argument type tuple.

Author: Pratik Bhandari
"""

import sys
import xml.etree.ElementTree as ET
from typing import List, Dict
import re
import json


[docs]class ModuleGenerator(object):
    def __init__(self):
        # This string holds the current context of the program being parsed
        self.current_context = None
        # This string holds the name of the original Fortran code which is
        # being processed
        self.fileName = None
        # This string holds the name of the main PROGRAM module
        self.main = None
        # This string holds the path on which the XML file of the original
        # Fortran code is located
        self.path = None
        # Initialize all the dictionaries which we will be writing to our file
        # This is a list of all modules inside a single Fortran file
        self.modules = []
        # This dictionary holds the set of symbols exported by each module.
        self.exports = {}
        # This dictionary holds the modules used by each module/program.
        # Additionally, variables from each module can be selectively USEd (
        # imported). This is stored in the object below. If all variables of
        # a module are USEd, an `*` symbol is used to denote this.
        self.uses = {}
        # This dictionary holds the set of symbols imported by each module.
        # This is given by: IMPORTS(m) = U { EXPORTS(p) | p ∈ USES(m) }
        # Since a module can use the ONLY keyword to import only some of the
        # symbols exported by another module, in such cases, the set of
        # imported symbols is be limited to those explicitly mentioned.
        self.imports = {}
        # This dictionary holds all the private variables defined in each
        # module
        self.private = {}
        # This dictionary holds all public variables for each module/context
        self.public = {}
        # This dictionary holds all subprograms (subroutines and functions) for
        # each module
        self.subprograms = {}
        # This dictionary stores the set of symbols declared in each module.
        self.symbols = {}
        # This dictionary stores a variable-type mapping.
        self.variable_types = {}
        self.symbol_type = {}

[docs]    def populate_symbols(self):
        """ This function populates the dictionary `self.symbols` which stores
        the set of symbols declared in each module. This is the union of all
        public variables, private variables and subprograms for each module.
        """
        for item in self.modules:
            self.symbols[item] = self.public.get(item, []) + \
                                 self.private.get(item, []) + \
                                 self.subprograms.get(item, [])

[docs]    def populate_exports(self):
        """ This function populates the `self.exports` dictionary which holds
        the set of symbols exported by each module. The set of exported symbols
        is given by: (imports U symbols) - private """
        for item in self.modules:
            interim = self.imports.get(item, []) + self.symbols.get(item, [])
            self.exports[item] = [x for x in interim if x not in
                                  self.private.get(item, [])]

[docs]    def populate_imports(self, module_logs):
        """ This function populates the `self.imports` dictionary which holds
        all the private variables defined in each module."""
        for module in self.uses:
            for use_item in self.uses[module]:
                for key in use_item:
                    if len(use_item[key]) == 1 and use_item[key][0] == '*':
                        if key in self.exports:
                            symbols = self.exports[key]
                        else:
                            assert (
                                key.lower() in module_logs["mod_info"]
                            ), f"module name (key) {key} does not exist in the log file."
                            symbols = module_logs["mod_info"][key]["exports"]
                            if module in module_logs["mod_info"]:
                                module_logs["mod_info"][module]["imports"] = symbols
                        if key in symbols:
                            self.imports.setdefault(module, []).append(
                                {key: symbols[key]}
                            )
                        else:
                            self.imports.setdefault(module, []).append(
                                {key: symbols}
                            )
                    else:
                        self.imports.setdefault(module, []).append(
                            {key: use_item[key]}
                        )

[docs]    def populate_symbol_types(self):
        for var in self.variable_types:
            for module in self.symbols:
                if var in self.symbols[module]:
                    self.symbol_type[var] = [module, self.variable_types[var]]

[docs]    def parse_tree(self, root, module_logs) -> bool:
        """ This function parses the XML tree of a Fortran file and tracks and
        maps relevant object relationships """

        # Find name of PROGRAM module
        for item in root.iter():
            if item.tag == "program":
                self.main = item.attrib["name"].lower()

        variable_type = None

        for item in root.iter():
            # Get the name of the XML file being parsed
            if item.tag == "file":
                file_name = item.attrib["path"]
                file = file_name.split('/')[-1]
                file_regex = r'^(.*)_processed(\..*)$'
                path_regex = r'^.*(delphi/[^delphi].*)/\w+'
                match = re.match(file_regex, file)
                if match:
                    self.fileName = match.group(1) + match.group(2)
                match = re.match(path_regex, file_name)
                if match:
                    self.path = match.group(1)

            elif item.tag.lower() in ["module", "program"]:
                self.current_context = item.attrib["name"].lower()
                self.modules.append(item.attrib["name"].lower())

            elif (
                    item.tag.lower() == "type"
                    and "name" in item.attrib
            ):
                variable_type = item.attrib["name"].lower()
            elif item.tag.lower() == "variable":
                if item.attrib.get("name"):
                    if not self.current_context:
                        self.current_context = self.main
                    self.public.setdefault(self.current_context, []).append(
                        item.attrib["name"].lower())
                    self.variable_types[item.attrib["name"].lower()] = variable_type

            elif item.tag.lower() in ["subroutine", "function"]:
                if not self.current_context:
                    self.current_context = self.main
                self.subprograms.setdefault(self.current_context, []).append(
                    item.attrib["name"].lower())
                self.current_context = item.attrib["name"].lower()

            elif item.tag.lower() == "declaration":
                # This function parses the <declaration> tag of the XML and
                # checks if private variables are defined in the respective
                # module/program. Private variables tend to be inside the
                # declaration tag
                private_status = False
                for child in item.iter():
                    if child.tag.lower() == "access-spec" and child.attrib.get(
                            "keyword").lower() == "private":
                        private_status = True
                    if child.tag.lower() == "variable" and private_status:
                        if not self.current_context:
                            self.current_context = self.main
                        if child.attrib.get("name"):
                            self.private.setdefault(self.current_context,
                                                    []).append(
                                child.attrib["name"].lower())

            elif item.tag.lower() == "use":
                # If a module, program or subroutine uses (imports) another
                # module, a USE statement is used and we want to map the
                # relationship between different program scopes that occur
                # with the use of the USE statement
                only_symbols = []
                for child in item:
                    if child.tag.lower() == "only":
                        for innerChild in child:
                            if innerChild.tag.lower() == "name":
                                only_symbols.append(innerChild.attrib[
                                                        "id"].lower())
                if not self.current_context:
                    self.current_context = self.main
                self.uses.setdefault(self.current_context, []).append({
                    item.attrib["name"].lower(): only_symbols} if only_symbols
                    else {item.attrib["name"].lower(): ["*"]})

        self.populate_symbols()
        self.populate_symbol_types()
        self.populate_exports()
        self.populate_imports(module_logs)

        return True

[docs]    def analyze(self, tree: ET.ElementTree, mod_log_path: str) -> List:
        """ Parse the XML file from the root and keep track of all important
        data structures and object relationships between files. """
        with open(mod_log_path) as json_f:
            module_logs = json.load(json_f)

        status = self.parse_tree(tree, module_logs)
        output_dictionary = {}
        if status:
            output_dictionary['file_name'] = [self.fileName, self.path]
            output_dictionary['modules'] = self.modules
            output_dictionary['exports'] = self.exports
            output_dictionary['use_mapping'] = self.uses
            output_dictionary['imports'] = self.imports
            output_dictionary['private_objects'] = self.private
            output_dictionary['public_objects'] = self.public
            output_dictionary['subprograms'] = self.subprograms
            output_dictionary['symbols'] = self.symbols
            output_dictionary['symbol_types'] = self.symbol_type

        with open(mod_log_path, 'w+') as json_f:
            json_f.write(json.dumps(module_logs, indent=2))
        return [output_dictionary]


[docs]def get_index(xml_file: str, module_log_file_path: str):
    """ Get the root of the XML ast, instantiate the moduleGenerator and start
    the analysis process.
    """
    tree = ET.parse(xml_file).getroot()
    generator = ModuleGenerator()
    return generator.analyze(tree, module_log_file_path)