Source code for delphi.translators.for2py.syntax

""" Various utility routines for processing Fortran syntax.  """

import re
from typing import Tuple, Optional

################################################################################
#                                                                              #
#                                   COMMENTS                                   #
#                                                                              #
################################################################################

[docs]def line_is_comment(line: str) -> bool:
    """From FORTRAN Language Reference
    (https://docs.oracle.com/cd/E19957-01/805-4939/z40007332024/index.html)

    A line with a c, C, '*', d, D, or ! in column one is a comment line, except
    that if the -xld option is set, then the lines starting with D or d are
    compiled as debug lines. The d, D, and ! are nonstandard.

    If you put an exclamation mark (!) in any column of the statement field,
    except within character literals, then everything after the ! on that
    line is a comment.

    A totally blank line is a comment line (we ignore this here).

    Args:
        line

    Returns:
        True iff line is a comment, False otherwise.

    """

    return (line[0] in "cCdD*!")


################################################################################
#                                                                              #
#                              CONTINUATION LINES                              #
#                                                                              #
################################################################################

FIXED_FORM_EXT = ('.f', '.for', '.blk', '.inc', '.gin')

[docs]def line_is_continuation(line: str, f_ext: str) -> bool:
    """ From FORTRAN 77  Language Reference
        (https://docs.oracle.com/cd/E19957-01/805-4939/6j4m0vn6l/index.html)

    A statement takes one or more lines; the first line is called the initial line; 
    the subsequent lines are called the continuation lines.  You can format a 
    source line in either of two ways: Standard fixed format, or Tab format.

    In Standard Fixed Format, continuation lines are identified by a nonblank, 
    nonzero in column 6.

    Tab-Format source lines are defined as follows: A tab in any of columns 
    1 through 6, or an ampersand in column 1, establishes the line as a 
    tab-format source line.  If the tab is the first nonblank character, the 
    text following the tab is scanned as if it started in column 7.
    Continuation lines are identified by an ampersand (&) in column 1, or a 
    nonzero digit after the first tab.    

    Args:
        line
    Returns:
        True iff line is a continuation line, else False.  Currently this
        is used only for fixed-form input files, i.e., f_ext in ('.f', '.for')
    """

    if line_is_comment(line):
        return False

    if f_ext in FIXED_FORM_EXT:
        if line[0] == '\t':
            return (line[1] in "123456789")
        else:
            return (len(line) > 5 and not (line[5] == ' ' or line[5] == '0'))
 
    if line[0] == '&':
        return True

    return False


[docs]def line_is_continued(line: str) -> bool:
    """
    Args:
        line
    Returns:
        True iff line is continued on the next line.  This is a Fortran-90
        feature and indicated by a '&' at the end of the line.
    """

    if line_is_comment(line):
        return False

    llstr = line.rstrip()
    return len(llstr) > 0 and llstr[-1] == "&"


################################################################################
#                                                                              #
#                           FORTRAN LINE PROCESSING                            #
#                                                                              #
################################################################################

# Regular expressions that specify patterns for various Fortran constructs.

SUB_START = r"\s*subroutine\s+(\w+)\s*\("
RE_SUB_START = re.compile(SUB_START, re.I)

# The start of a function can look like any of the following:
#
#       function myfunc(...
#       integer function myfunc(...
#       double precision myfunc(...

FN_START = r"\s*(\w*\s*){0,2}function\s+(\w+)\s*\("
RE_FN_START = re.compile(FN_START, re.I)

PGM_UNIT = r"\s*\w*\s*(program|module|subroutine|(\w*\s*){0,2}function)\s+(\w+)"
RE_PGM_UNIT_START = re.compile(PGM_UNIT, re.I)

PGM_UNIT_SEP = r"\s+contains(\W+)"
RE_PGM_UNIT_SEP = re.compile(PGM_UNIT_SEP, re.I)

PGM_UNIT_END = r"\s*[a-z]*\s*end\s+(program|module|subroutine|function)\s+"
RE_PGM_UNIT_END = re.compile(PGM_UNIT_END, re.I)

SUBPGM_END = r"\s*end\s+"
RE_SUBPGM_END = re.compile(SUBPGM_END, re.I)

ASSG_STMT = r"\s*(\d+|&)?\s*.*=\s*"
RE_ASSG_STMT = re.compile(ASSG_STMT, re.I)

INCLUDE_STMT_1 = r"\s*(\d+)?\s*include\s+'(\w+(\.\w*)?)'"
RE_INCLUDE_STMT_1 = re.compile(INCLUDE_STMT_1, re.I)
INCLUDE_STMT_2 = r'\s*(\d+)?\s*include\s+"(\w+(\.\w*)?)"'
RE_INCLUDE_STMT_2 = re.compile(INCLUDE_STMT_2, re.I)

CALL_STMT = r"\s*(\d+|&)?\s*call\s*"
RE_CALL_STMT = re.compile(CALL_STMT, re.I)

IO_STMT = r"\s*(\d+|&)?\s*(open|close|read|write|print|format|rewind)\W*"
RE_IO_STMT = re.compile(IO_STMT, re.I)

DO_STMT = r"\s*(\d+|&)?\s*do\s*"
RE_DO_STMT = re.compile(DO_STMT, re.I)

ENDDO_STMT = r"\s*(\d+|&)?\s*end\s*do\s*"
RE_ENDDO_STMT = re.compile(ENDDO_STMT, re.I)

ENDIF_STMT = r"\s*(\d+|&)?\s*end\s*if\s*"
RE_ENDIF_STMT = re.compile(ENDIF_STMT, re.I)

GOTO_STMT = r"\s*(\d+|&)?\s*go\s*to\s*"
RE_GOTO_STMT = re.compile(GOTO_STMT, re.I)

IF_STMT = r"\s*(\d+|&)?\s*(if|elseif|else)\s*"
RE_IF_STMT = re.compile(IF_STMT, re.I)

PAUSE_STMT = r"\s*(\d+|&)?\s*pause\s*"
RE_PAUSE_STMT = re.compile(PAUSE_STMT, re.I)

RETURN_STMT = r"\s*(\d+|&)?\s*return\s*"
RE_RETURN_STMT = re.compile(RETURN_STMT, re.I)

CYCLE_STMT = r"\s*(\d+|&)?\s*cycle\s*"
RE_CYCLE_STMT = re.compile(CYCLE_STMT, re.I)

EXIT_STMT = r"\s*(\d+|&)?\s*exit\s*"
RE_EXIT_STMT = re.compile(EXIT_STMT, re.I)

SAVE_STMT = r"\s*(\d+|&)?\s*save\s*"
RE_SAVE_STMT = re.compile(SAVE_STMT, re.I)

SELECT_STMT = r"\s*(\d+|&)?\s*select\s*case\s*"
RE_SELECT_STMT = re.compile(SELECT_STMT, re.I)

ENDSELECT_STMT = r"\s*(\d+|&)?\s*end\s*select\s*"
RE_ENDSELECT_STMT = re.compile(ENDSELECT_STMT, re.I)

CASE_STMT = r"\s*(\d+|&)?\s*case\s*"
RE_CASE_STMT = re.compile(CASE_STMT, re.I)

STOP_STMT = r"\s*(\d+|&)?\s*stop\s*"
RE_STOP_STMT = re.compile(STOP_STMT, re.I)

TYPE_NAMES = r"^\s*(integer|real|double\s+precision|complex|character|logical" \
             r"|dimension|type|parameter)\W*"
RE_TYPE_NAMES = re.compile(TYPE_NAMES, re.I)

SUB_DEF = r"\s*subroutine\s+(?P<subroutine>(?P<name>.*?)\((?P<args>.*)\))"
RE_SUB_DEF = re.compile(SUB_DEF, re.I)

END_SUB = r"\s*end\s+subroutine\s+"
END_MODULE = r"\s*end\s+module\s+"

PGM_START = r"\s*(?P<pgm>(interface|module))\s+(?P<name>(\w*))"
RE_PGM_START = re.compile(PGM_START, re.I)

PGM_END = r"\s*[a-z]*\s*end\s+(?P<pgm>(module|subroutine|function|interface|type))\s*(?P<name>(\w*))"
RE_PGM_END = re.compile(PGM_END, re.I)

VARIABLE_DEC = r"\s*(?P<type>(double|float|int|integer|logical|real|str|string)),?\s(::)*(?P<variables>(.*))"
RE_VARIABLE_DEC = re.compile(VARIABLE_DEC, re.I)

DERIVED_TYPE_DEF = r"\s*type\s*(?P<type>(?P<name>(\w*)))"
RE_DERIVED_TYPE_DEF = re.compile(DERIVED_TYPE_DEF, re.I)

DERIVED_TYPE_VAR = r"\s*type\s*\((?P<type>.*)\)\s(?P<variables>(.*))"
RE_DERIVED_TYPE_VAR = re.compile(DERIVED_TYPE_VAR, re.I)

CHARACTER = r"\s*(character\*(\(\*\)|\d*))\s*(?P<variables>.*)"
RE_CHARACTER = re.compile(CHARACTER, re.I)

IMP_ARRAY = r"(?P<var>\w*)\s*\("
RE_IMP_ARRAY = re.compile(IMP_ARRAY, re.I)

# EXECUTABLE_CODE_START is a list of regular expressions matching
# lines that can start the executable code in a program or subroutine.
# It is used to for handling comments internal to subroutine bodies.
EXECUTABLE_CODE_START = [
    RE_ASSG_STMT,
    RE_CALL_STMT,
    RE_CASE_STMT,
    RE_CYCLE_STMT,
    RE_DO_STMT,
    RE_ENDDO_STMT,
    RE_ENDIF_STMT,
    RE_ENDSELECT_STMT,
    RE_EXIT_STMT,
    RE_GOTO_STMT,
    RE_IF_STMT,
    RE_IO_STMT,
    RE_PAUSE_STMT,
    RE_RETURN_STMT,
    RE_SELECT_STMT,
    RE_STOP_STMT
]

[docs]def line_starts_subpgm(line: str) -> Tuple[bool, Optional[str]]:
    """
    Indicates whether a line in the program is the first line of a subprogram
    definition.

    Args:
        line
    Returns:
       (True, f_name) if line begins a definition for subprogram f_name;
       (False, None) if line does not begin a subprogram definition.
    """

    match = RE_SUB_START.match(line)
    if match != None:
        f_name = match.group(1)
        return (True, f_name)

    match = RE_FN_START.match(line)
    if match is not None:
        f_name = match.group(2)
        return (True, f_name)

    return (False, None)


[docs]def line_is_pgm_unit_start(line):
    match = RE_PGM_UNIT_START.match(line)
    return match is not None


[docs]def line_is_pgm_unit_end(line):
    match = RE_PGM_UNIT_END.match(line)
    return match is not None


[docs]def line_is_pgm_unit_separator(line):
    match = RE_PGM_UNIT_SEP.match(line)
    return match is not None


[docs]def program_unit_name(line:str) -> str:
   """Given a line that starts a program unit, i.e., a program, module,
      subprogram, or function, this function returns the name associated
      with that program unit."""
   match = RE_PGM_UNIT_START.match(line)
   assert match is not None
   return match.group(2)


[docs]def line_ends_subpgm(line: str) -> bool:
    """
    Args:
        line
    Returns:
        True if line is the last line of a subprogram definition, else False.
    """
    match = RE_SUBPGM_END.match(line)
    return match is not None


[docs]def line_is_executable(line: str) -> bool:
    """line_is_executable() returns True iff the line can start an
       executable statement in a program."""

    if line_is_comment(line):
        return False

    if re.match(RE_TYPE_NAMES, line):
        return False

    for exp in EXECUTABLE_CODE_START:
        if re.match(exp, line) is not None:
            return True

    return False
            

[docs]def line_is_include(line: str) -> str:
    """ line_is_include() : if the argument is an INCLUDE statement, returns
        the argument to the INCLUDE statement (a file name); otherwise
        returns None.
    """
    match = RE_INCLUDE_STMT_1.match(line)
    if match is not None:
        return match.group(2)

    match = RE_INCLUDE_STMT_2.match(line)
    if match is not None:
        return match.group(2)

    return None

[docs]def line_has_implicit_array(line: str) -> Tuple[bool, Optional[str]]:
    """
    This function checks for the implicit array declaration (i.e.
    real myArray(10)) without DIMENSION keyword.

    Args:
        line
    Returns:
        (True, var) if the line holds implicit array declaration;
        (False, None) if the line does not hold implicit array declaration
    """

    match = RE_IMP_ARRAY.match(line)
    if match:
        var = match['var']
        return (True, var)
    return (False, None)

[docs]def has_subroutine(lines: str) -> bool:
    """
    This function searches end of subroutine syntax
    from the passed string of lines to find out whether
    any subroutines are presented in lines or not.

    Args:
        lines
    Returns:
        (True) if subroutine end syntax presented in lines.
        (False) if subroutine end syntax does not present in lines.
    """
    return re.search(END_SUB, lines)

[docs]def has_module(lines: str) -> bool:
    """This function searches end of module syntax
    from the passed string of lines to find out whether
    any modules are presented in lines or not.

    Args:
        lines
    Returns:
        (True) if module end syntax presented in lines.
        (False) if module end syntax does not present in lines.
    """
    return re.search(END_MODULE, lines)


[docs]def line_is_func_start(line: str) -> bool:
    """
    This function checks whether the line indicates a function entry or not.

    Args:
        line.
    Returns:
        (True) if the line is beginning of function definition.
        (False) if the line is not a beginning of function dedfinition.
    """
    match = RE_FN_START.match(line)
    return match is not None

[docs]def variable_declaration(line: str) -> Tuple[bool, Optional[list]]:
    """
    Indicates whether a line in the program is a variable declaration or not.

    Args:
        line
    Returns:
       (True, v_type, list) if line has one of variable declaration syntax;
       (False, None, None) if line does not have any variable declaration syntax .
    """

    match_var = RE_VARIABLE_DEC.match(line)
    if match_var:
        v_type = match_var['type']
        variable_list = match_var['variables']
        return (True, v_type, variable_list)
    
    match_dtype = RE_DERIVED_TYPE_VAR.match(line)
    if match_dtype:
        v_type = match_dtype['type']
        variable_list = match_dtype['variables']
        return (True, v_type, variable_list)

    match_char = RE_CHARACTER.match(line)
    if match_char:
        variable_list = match_char['variables']
        return (True, "character", variable_list)

    return (False, None, None)

[docs]def subroutine_definition(line: str) -> Tuple[bool, Optional[list]]:
    """
    Indicates whether a line in the program is the first line of a subroutine
    definition and extracts subroutine name and the arguments.

    Args:
        line
    Returns:
       (True, list) if line begins a definition for subroutine;
       (False, None) if line does not begin a subroutine definition.
    """
    match = RE_SUB_DEF.match(line)
    if match:
        f_name = match['name'].strip()
        f_args = match['args'].replace(' ','').split(',')
        return (True, [f_name, f_args])

    return (False, None)

[docs]def pgm_end(line: str) -> Tuple[bool, str, str]:
    """
    Indicates whether a line is the last line of a program.

    Args:
        line
    Returns:
       (True, pgm, name) if line is the last line of a definition
       either for interface or module;
       (False, None, None) if line is not the last line of either
       interface or module.
    """
    match = RE_PGM_END.match(line)
    if match:
        pgm = match['pgm']
        pgm_name = match['name']
        return (True, pgm, pgm_name)
    return (False, None, None)


[docs]def line_starts_pgm(line: str) -> Tuple[bool, Optional[str], Optional[str]]:
    """
    Indicates whether a line is the first line of either interface or module.

    Args:
        line
    Returns:
       (True, pgm, namee) if line begins a definition either for interface or module;
       (False, None, None) if line is not a beginning of either interface or module.
    """

    match = RE_PGM_START.match(line)
    if match:
        pgm = match['pgm'].strip()
        name = match['name'].strip()
        return (True, pgm, name)
    return (False, None, None)


################################################################################
#                                                                              #
#                       FORTRAN KEYWORDS AND INTRINSICS                        #
#                                                                              #
################################################################################

# F_KEYWDS : Fortran keywords
#      SOURCE: Keywords in Fortran Wiki 
#              (http://fortranwiki.org/fortran/show/Keywords)
#
#              tutorialspoint.com
#              (https://www.tutorialspoint.com/fortran/fortran_quick_guide.htm) 
#
# NOTE1: Because of the way this code does tokenization, it currently does
#       not handle keywords that consist of multiple space-separated words
#       (e.g., 'do concurrent', 'sync all').
#
# NOTE2: This list of keywords is incomplete.

F_KEYWDS = frozenset(['abstract', 'allocatable', 'allocate', 'assign',
    'assignment', 'associate', 'asynchronous', 'backspace', 'bind', 'block',
    'block data', 'call', 'case', 'character', 'class', 'close', 'codimension',
    'common', 'complex', 'contains', 'contiguous', 'continue', 'critical',
    'cycle', 'data', 'deallocate', 'default', 'deferred', 'dimension', 'do',
    'double precision', 'else', 'elseif', 'elsewhere', 'end', 'endif', 'endfile', 
    'endif', 'entry', 'enum', 'enumerator', 'equivalence', 'error', 'exit', 
    'extends', 'external', 'final', 'flush', 'forall', 'format', 'function', 
    'generic', 'goto', 'if', 'implicit', 'import', 'in', 'include', 'inout', 
    'inquire', 'integer', 'intent', 'interface', 'intrinsic', 'kind', 'len', 
    'lock', 'logical', 'module', 'namelist', 'non_overridable', 'nopass', 'nullify',
    'only', 'open', 'operator', 'optional', 'out', 'parameter', 'pass', 'pause',
    'pointer', 'print', 'private', 'procedure', 'program', 'protected', 'public',
    'pure', 'read', 'real', 'recursive', 'result', 'return', 'rewind', 'rewrite',
    'save', 'select', 'sequence', 'stop', 'submodule', 'subroutine', 'sync',
    'target', 'then', 'type', 'unlock', 'use', 'value', 'volatile', 'wait',
    'where', 'while', 'write'])

# F_INTRINSICS : intrinsic functions
#     SOURCE: GNU gfortran manual: 
#     https://gcc.gnu.org/onlinedocs/gfortran/Intrinsic-Procedures.html

F_INTRINSICS = frozenset(['abs', 'abort', 'access', 'achar', 'acos', 'acosd', 
    'acosh', 'adjustl', 'adjustr', 'aimag', 'aint', 'alarm', 'all', 'allocated', 
    'and', 'anint', 'any', 'asin', 'asind', 'asinh', 'associated', 'atan', 
    'atand', 'atan2', 'atan2d', 'atanh', 'atomic_add', 'atomic_and', 'atomic_cas', 
    'atomic_define', 'atomic_fetch_add', 'atomic_fetch_and', 'atomic_fetch_or', 
    'atomic_fetch_xor', 'atomic_or', 'atomic_ref', 'atomic_xor', 'backtrace', 
    'bessel_j0', 'bessel_j1', 'bessel_jn', 'bessel_y0', 'bessel_y1', 'bessel_yn', 
    'bge', 'bgt', 'bit_size', 'ble', 'blt', 'btest', 'c_associated', 
    'c_f_pointer', 'c_f_procpointer', 'c_funloc', 'c_loc', 'c_sizeof', 'ceiling', 
    'char', 'chdir', 'chmod', 'cmplx', 'co_broadcast', 'co_max', 'co_min', 
    'co_reduce', 'co_sum', 'command_argument_count', 'compiler_options', 
    'compiler_version', 'complex', 'conjg', 'cos', 'cosd', 'cosh', 'cotan', 
    'cotand', 'count', 'cpu_time', 'cshift', 'ctime', 'date_and_time', 'dble', 
    'dcmplx', 'digits', 'dim', 'dot_product', 'dprod', 'dreal', 'dshiftl', 
    'dshiftr', 'dtime', 'eoshift', 'epsilon', 'erf', 'erfc', 'erfc_scaled', 
    'etime', 'event_query', 'execute_command_line', 'exit', 'exp', 'exponent', 
    'extends_type_of', 'fdate', 'fget', 'fgetc', 'floor', 'flush', 'fnum', 'fput', 
    'fputc', 'fraction', 'free', 'fseek', 'fstat', 'ftell', 'gamma', 'gerror', 
    'getarg', 'get_command', 'get_command_argument', 'getcwd', 'getenv', 
    'get_environment_variable', 'getgid', 'getlog', 'getpid', 'getuid', 'gmtime', 
    'hostnm', 'huge', 'hypot', 'iachar', 'iall', 'iand', 'iany', 'iargc', 'ibclr', 
    'ibits', 'ibset', 'ichar', 'idate', 'ieor', 'ierrno', 'image_index', 'index', 
    'int', 'int2', 'int8', 'ior', 'iparity', 'irand', 'is_iostat_end', 
    'is_iostat_eor', 'isatty', 'ishft', 'ishftc', 'isnan', 'itime', 'kill', 
    'kind', 'lbound', 'lcobound', 'leadz', 'len', 'len_trim', 'lge', 'lgt', 'link', 
    'lle', 'llt', 'lnblnk', 'loc', 'log', 'log10', 'log_gamma', 'logical', 'long', 
    'lshift', 'lstat', 'ltime', 'malloc', 'maskl', 'maskr', 'matmul', 'max', 
    'maxexponent', 'maxloc', 'maxval', 'mclock', 'mclock8', 'merge', 'merge_bits', 
    'min', 'minexponent', 'minloc', 'minval', 'mod', 'modulo', 'move_alloc', 
    'mvbits', 'nearest', 'new_line', 'nint', 'norm2', 'not', 'null', 'num_images', 
    'or', 'pack', 'parity', 'perror', 'popcnt', 'poppar', 'precision', 'present', 
    'product', 'radix', 'ran', 'rand', 'random_number', 'random_seed', 'range', 
    'rank ', 'real', 'rename', 'repeat', 'reshape', 'rrspacing', 'rshift', 
    'same_type_as', 'scale', 'scan', 'secnds', 'second', 'selected_char_kind', 
    'selected_int_kind', 'selected_real_kind', 'set_exponent', 'shape', 'shifta', 
    'shiftl', 'shiftr', 'sign', 'signal', 'sin', 'sind', 'sinh', 'size', 'sizeof', 
    'sleep', 'spacing', 'spread', 'sqrt', 'srand', 'stat', 'storage_size', 'sum', 
    'symlnk', 'system', 'system_clock', 'tan', 'tand', 'tanh', 'this_image', 
    'time', 'time8', 'tiny', 'trailz', 'transfer', 'transpose', 'trim', 'ttynam', 
    'ubound', 'ucobound', 'umask', 'unlink', 'unpack', 'verify', 'xor'])


################################################################################
#                                                                              #
#                       NEGATED OPERATIONS MAPPING                             #
#                                                                              #
################################################################################

NEGATED_OP = {
                ".le." : ".gt.",
                ".ge." : ".lt.",
                ".lt." : ".ge.",
                ".gt." : ".le.",
                ".eq." : ".ne.",
                ".ne." : ".eq.",
                "<=" : ">",
                ">=" : "<",
                "==" : "!=",
                "<" : ">=",
                ">" : "<=",
                "!=" : "==",
              }