Source code for delphi.translators.for2py.genModFileLog

"""
This program will scan all Fortran files in the given path searching for files
that hold modules. Then, it will create a log file in JSON format.

Example:
        This script can be executed as below:
        $ python genModFileLog.py -d <root_directory> -f <log_file_name>

fortran_file_path: Original input file that uses module file.
log_file_name: User does not have to provide the name as it is default
to "modFileLog.json", but (s)he can specify it with -f option follow by
the file name in string.

Currently, this program assumes that module files reside in the same directory
as use program file.

Author: Terrence J. Lim
"""

import os
import re
import sys
import json
import argparse
from os.path import isfile
from delphi.translators.for2py import syntax, preprocessor


[docs]def parse_args(): """This function is for a safe command line input. It should receive the fortran file name and returns it back to the caller. Returns: str: A file name of original fortran script. """ parser = argparse.ArgumentParser() parser.add_argument( "-d", "--directory", nargs="+", help="Root directory to begin the module scan from.", ) parser.add_argument( "-f", "--file", nargs="*", help="A user specified module log file name.", ) args = parser.parse_args(sys.argv[1:]) root_dir_path = args.directory[0] if args.file is not None: user_specified_log_file_name = args.file[0] return root_dir_path, user_specified_log_file_name else: default_module_file_name = "modFileLog.json" return root_dir_path, default_module_file_name
[docs]def get_file_list_in_directory(root_dir_path): """This function lists all Fortran files (excluding directories) in the specified directory. Args: dir_path (str): Directory path. Returns: List: List of Fortran files. """ files = [] for (dir_path, dir_names, file_names) in os.walk(root_dir_path): for f in file_names: if "_preprocessed" not in f and ( f.endswith(".f") or f.endswith(".for") ): path = os.path.join(dir_path, f) files.append(os.path.abspath(path)) return files
[docs]def modules_from_file( file_path, file_to_mod_mapper, mod_to_file_mapper, mod_info_dict ): """This function checks whether the module and file path already exist int the log file. If they do, then it compares the last_modified_time in the log file with the last modified time of file in disk. Then, it will call 'populate_mapper' function if file was not already looked before or the file was modified since last analyzed. Args: file_path (str): File path that is guaranteed to exist in the directory. file_to_mod_mapper (dict): Dictionary of lists that will hold file-to-module_name mappings. mod_to_file_mapper (dict): Dictionary that holds a module to its residing file path. Returns: None. """ if file_path in file_to_mod_mapper: last_modified_time = get_file_last_modified_time(file_path) last_modified_time_in_log = file_to_mod_mapper[file_path][-1] if last_modified_time != last_modified_time_in_log: assert last_modified_time > last_modified_time_in_log, ( "Last modified time in the log file cannot be later than on " "disk file's time." ) populate_mappers( file_path, file_to_mod_mapper, mod_to_file_mapper, mod_info_dict, ) else: populate_mappers( file_path, file_to_mod_mapper, mod_to_file_mapper, mod_info_dict )
[docs]def populate_mappers( file_path, file_to_mod_mapper, mod_to_file_mapper, mod_info_dict ): """This function populates two mappers by checking and extracting module names, if exist, from the file, and map it to the file name. Args: file_path (str): File of a path that will be scanned. file_to_mod_mapper (dict): Dictionary of lists that will hold file-to-module_name mappings. mod_to_file_mapper (dict): Dictionary that holds a module to its residing file path. Returns: None. """ f = open(file_path, encoding="ISO-8859-1") f_pos = f.tell() file_content = f.read() module_names = [] module_names_lowered = [] module_summary = {} procedure_functions = {} derived_types = {} # Checks if file contains "end module" or "endmodule", # which only appears in case of module declaration. # If not, there is no need to look into the file any further, # so ignore it. if syntax.has_module(file_content.lower()): # Extract the module names by inspecting each line in the file. f.seek(f_pos) org_lines = f.readlines() preprocessed_lines = preprocessor.preprocess_lines( org_lines, file_path, True ) for line in preprocessed_lines: line = line.lower() match = syntax.line_starts_pgm(line) if match[0] and match[1] == "module": module_names.append(match[2]) # Map current file to modules that it uses. module_names_lowered = [mod.lower() for mod in module_names] file_to_mod_mapper[file_path] = module_names_lowered.copy() file_to_mod_mapper[file_path].append( get_file_last_modified_time(file_path) ) # If current file has subroutines, then extract subroutine information # that are declared within the scope of any module and store in the module # summary dictionary. if syntax.has_subroutine(file_content.lower()): populate_module_summary( preprocessed_lines, module_summary, procedure_functions, derived_types, ) # Using collected function information, populate interface function information # by each module. populate_procedure_functions(procedure_functions, module_summary) # Populate actual module information (summary) # that will be written to thee JSONN file. for mod in module_names_lowered: mod_to_file_mapper[mod] = [file_path] mod_info_dict[mod] = { "exports": {}, "symbol_types": {}, "imports": {}, "interface_functions": {}, "derived_type_list": [], } if mod in module_summary: mod_info_dict[mod]["interface_functions"] = procedure_functions[ mod ] if mod in derived_types: mod_info_dict[mod]["derived_type_list"] = derived_types[mod] f.close()
[docs]def populate_procedure_functions(procedure_functions, module_summary): """This function completes procedure_functions dictionary. Params: procedure_functions (dict): A dictionary to hold interface-to-procedure function mappings. module_summary (dict): A dictionary for holding module-to-subroutine-to- arguments mappings. Returns: None. """ for mod in procedure_functions: if mod in module_summary: mod_functions = module_summary[mod] for interface in procedure_functions[mod]: for function in procedure_functions[mod][interface]: if function in mod_functions: procedure_functions[mod][interface][ function ] = mod_functions[function]
[docs]def populate_module_summary( f, module_summary, procedure_functions, derived_types ): """This function extracts module, derived type, and interface information, and populates module summary, procedure functions, and derived types dictionaries. Params: f (str): File content. module_summary (dict): A dictionary for holding module-to-subroutine-to- arguments mappings. procedure_functions (dict): A dictionary to hold interface-to-procedure function mappings. derived_types (dict): Dictionary that will hold module-to-derived type mapping. Returns: None. """ current_modu = None current_subr = None current_intr = None current_func = None isProcedure = False for line in f: line = line.lower() # Removing any inline comments if "!" in line: line = line.partition("!")[0].strip() # Detects module and interface entering line. pgm = syntax.line_starts_pgm(line) # Detects subroutine entering line. subroutine = syntax.subroutine_definition(line) end_pgm = syntax.pgm_end(line) if ( pgm[0] and pgm[1].strip() == "module" and pgm[2].strip() != "procedure" ): current_modu = pgm[2].strip() module_summary[current_modu] = {} procedure_functions[current_modu] = {} elif end_pgm[0] and end_pgm[1] == "module": current_modu = None else: pass # If currently processing line of code is within the scope of module, # we need to extract subroutine, interface, and derived type information. if current_modu: current_subr = extract_subroutine_info( pgm, end_pgm, module_summary, current_modu, subroutine, current_subr, line, ) current_intr = extract_interface_info( pgm, end_pgm, procedure_functions, current_modu, current_intr, line, ) extract_derived_type_info(end_pgm, current_modu, derived_types)
[docs]def extract_subroutine_info( pgm, end_pgm, module_summary, current_modu, subroutine, current_subr, line ): """This function extracts information of subroutine declared within the module, and stores those information to module_summary dictionary. Params: pgm (tuple): Current program information. end_pgm (typle): End of current program indicator. module_summary (dict): A dictionary for holding module-to-subroutine-to- arguments mappings. current_modu (str): Module name that current interface is located under. subroutine (tuple): Holds information of the subroutine. current_subr (str): Current subroutine name. line (str): A line from Fortran source code. Returns: (current_subr) Currently handling subroutine name. """ # If subroutine encountered, if subroutine[0]: # extract the name, current_subr = subroutine[1][0] # extract any existing arguments, subroutine_args = subroutine[1][1] # and populate thee module summary dictionary module_summary[current_modu][current_subr] = {} for arg in subroutine_args: if arg: # Since we cannot find out about the argument types # just looking at the argument, initialize the types # to None as defualt. module_summary[current_modu][current_subr][arg] = None elif end_pgm[0] and end_pgm[1] == "subroutine": # Indication of end of subroutine. current_subr = None elif current_subr: variable_dec = syntax.variable_declaration(line) if variable_dec[0] and not syntax.line_is_func_start(line): if variable_dec[0]: var_type = variable_dec[1] variables = variable_dec[2] # Handle syntax like: # precision, dimension(0:tmax) :: means, vars # precision, parameter :: var = 1.234 if "precision" in variables or "dimension" in variables: # Handle dimension (array) if "dimension" in variables: var_type = "Array" # Extract only variable names follow by '::' variables = variables.partition("::")[-1].strip() if "=" in variables: # Remove assignment syntax and only extract variable names variables = variables.partition("=")[0].strip() var_list = variables.split(",") for var in var_list: # Search for an implicit array variable declaration arrayVar = syntax.line_has_implicit_array(var) if arrayVar[0]: var = arrayVar[1] var_type = "Array" # Map each subroutine argument with its type if ( current_subr in module_summary[current_modu] and var.strip() in module_summary[current_modu][current_subr] ): module_summary[current_modu][current_subr][ var.strip() ] = var_type else: pass return current_subr
[docs]def extract_interface_info( pgm, end_pgm, procedure_functions, current_modu, current_intr, line ): """This function extracts INTERFACE information, such as the name of interface and procedure function names, and populates procedure_functions dictionary. Params: pgm (tuple): Current program information. end_pgm (typle): End of current program indicator. procedure_functions (dict): A dictionary to hold interface-to-procedure function mappings. current_modu (str): Module name that current interface is located under. current_intr (str): Current interface name. line (str): A line from Fortran source code. Returns: (current_intr) Currently handling interface name. """ if pgm[0] and pgm[1] == "interface": current_intr = pgm[2] procedure_functions[current_modu][current_intr] = {} elif end_pgm[0] and end_pgm[1] == "interface": current_intr = None elif current_intr: if "procedure" in line: # Partition the string, which should have one of syntaxes like: # module procedure __function_name__ # module procedure , __function_name__ # module procedure __function_name__ , __function_name__ , ... # by keyword procedure. Then, only extract function names, which # always will be located at [-1] after partitioning. Finally, split # the string of function names by comma and store in the functions list. functions = line.partition("procedure")[-1].split(",") for func in functions: func = func.strip() procedure_functions[current_modu][current_intr][func] = None else: pass return current_intr
[docs]def extract_derived_type_info(end_pgm, current_modu, derived_types): """This function extracts derived types declared under current module. Params: end_pgm (tuple): End of current program indicator. current_modu (str): Current module name. derived_types (dict): Dictionary that will hold module-to-derived type mapping. Returns: None. """ if end_pgm[0] and end_pgm[1] == "type": if current_modu not in derived_types: derived_types[current_modu] = [end_pgm[2]] else: derived_types[current_modu].append(end_pgm[2])
[docs]def get_file_last_modified_time(file_path): """This function retrieves the file status and assigns the last modified time of a file at the end of the file_to_mod_mapper[file_path] list. Params: file_path (str): File path that is assumed to exist in the directory. Returns: int: Last modified time represented as an integer. """ file_stat = os.stat(file_path) return file_stat[8]
[docs]def update_mod_info_json(module_log_file_path, mode_mapper_dict): """This function updates each module's information, such as the declared variables and their types, so that genPGM.py can simply reference this dictionary rather than processing the file again. Params: module_log_file_path (str): Path to module log file. mode_mapper_dict (dict): A dictionary that holds all information of a module(s). """ mod_info = {"exports": mode_mapper_dict["exports"]} symbol_types = {} for mod_name, mod_symbols in mode_mapper_dict["exports"].items(): sym_type = {} for sym in mod_symbols: if sym in mode_mapper_dict["symbol_types"]: m_type = mode_mapper_dict["symbol_types"][sym] sym_type[sym] = m_type[1] elif ( mod_name in mode_mapper_dict["subprograms"] and sym in mode_mapper_dict["subprograms"][mod_name] ): sym_type[sym] = "func" symbol_types[mod_name] = sym_type mod_info["symbol_types"] = symbol_types with open(module_log_file_path) as json_f: module_logs = json.load(json_f) for module in mode_mapper_dict["modules"]: mod = module_logs["mod_info"][module] mod["exports"] = mod_info["exports"][module] mod["symbol_types"] = mod_info["symbol_types"][module] if module in mode_mapper_dict["imports"]: imports = mode_mapper_dict["imports"][module] else: imports = [] mod["imports"] = imports module_logs["mod_info"][module] = mod with open(module_log_file_path, "w+") as json_f: json_f.write(json.dumps(module_logs, indent=2))
[docs]def mod_file_log_generator( root_dir_path=None, module_log_file_name=None, ): """This function is like a main function to invoke other functions to perform all checks and population of mappers. Though, loading of and writing to JSON file will happen in this function. Args: root_dir_path: Directory to examine for Fortran files module_log_file_name: Path to module log file. Returns: None. """ if root_dir_path is None: root_dir_path = "." module_log_file_path = root_dir_path + "/" + module_log_file_name # If module log file already exists, simply load data. if isfile(module_log_file_path): with open(module_log_file_path) as json_f: module_logs = json.load(json_f) # This will hold the file-to-module and file last modified date info. # One thing to notice is that the last index will be a place for # last modified time for file. # Structure (One-to-Many): # { # "__file_name__" : ["__module_name__",...,"last_modified_time"], # ..., # } file_to_mod_mapper = module_logs["file_to_mod"] # This will hold the module-to-file mapping, so any program that # accesses module log JSON file can directly access the file path # with the module name specified with "USE" without looping through # the file_to_mod mapper. # Structure (One-to-One): # { # "__module_name__" : "__file_path__", # ..., # } mod_to_file_mapper = module_logs["mod_to_file"] mod_info_dict = module_logs["mod_info"] else: file_to_mod_mapper = {} mod_to_file_mapper = {} mod_info_dict = {} files = get_file_list_in_directory(root_dir_path) for file_path in files: modules_from_file( file_path, file_to_mod_mapper, mod_to_file_mapper, mod_info_dict ) module_log = { "file_to_mod": file_to_mod_mapper, "mod_to_file": mod_to_file_mapper, "mod_info": mod_info_dict, } with open(module_log_file_path, "w+") as f: f.write(json.dumps(module_log, indent=2)) return module_log_file_path
if __name__ == "__main__": root_dir_path, module_log_file = parse_args() log_file_path = mod_file_log_generator(root_dir_path, module_log_file)