"""
This module implements functions to preprocess Fortran source files prior to
parsing to fix up some constructs (such as continuation lines) that are
problematic for the OpenFortranParser front end.
Author:
Saumya Debray
"""
import os
import sys
import re
from collections import OrderedDict
from typing import List, Dict, Tuple
from delphi.translators.for2py.syntax import (
line_is_comment,
line_is_continuation,
line_is_continued,
line_is_include,
)
[docs]def merge_continued_lines(lines, f_ext):
"""Given a list of Fortran source code lines, merge_continued_lines()
merges sequences of lines that are indicated to be continuation lines
and returns the resulting list of source lines. The argument f_ext
gives the file extension of the input file: this determines whether
we have fixed-form or free-form syntax, which determines how
continuation lines are written.
"""
chg = True
while chg:
chg = False
i = 0
while i < len(lines):
line = lines[i]
if line_is_continuation(line, f_ext):
assert i > 0, "Weird continuation line (line {}): {}".format(
i + 1, line
)
prev_line_code = lines[i - 1]
curr_line_code = line.lstrip()[1:] # remove continuation char
merged_code = (
prev_line_code.rstrip()
+ " "
+ curr_line_code.lstrip()
+ "\n"
)
lines[i - 1] = merged_code
lines.pop(i)
chg = True
elif line_is_continued(line):
assert i < len(lines) - 1 # there must be a next line
next_line_code = lines[i + 1]
curr_line_code = line.rstrip()[
:-1
].rstrip() # remove continuation char
merged_code = curr_line_code + " " + next_line_code.lstrip()
lines[i] = merged_code
lines.pop(i + 1)
chg = True
i += 1
return lines
[docs]def path_to_target(infile, target):
# if target is already specified via an absolute path, return that path
if target[0] == "/":
return target
# if infile has a path specified, specify target relative to that path
pos = infile.rfind("/")
if pos >= 0:
path_to_infile = infile[:pos]
return "{}/{}".format(path_to_infile, target)
# otherwise simply return target
return target
[docs]def process_includes(lines, infile):
""" process_includes() processes INCLUDE statements, which behave like
the #include preprocessor directive in C.
"""
chg = True
while chg:
chg = False
include_idxs = [
i
for i in range(len(lines))
if line_is_include(lines[i]) is not None
]
# include_idxs is a list of the index positions of INCLUDE statements.
# Each such statement is processed by replacing it with the contents
# of the file it mentions. We process include_idxs in reverse so that
# processing an INCLUDE statement does not change the index position of
# any remaining INCLUDE statements.
for idx in reversed(include_idxs):
chg = True
include_f = line_is_include(lines[idx])
assert include_f is not None
include_path = path_to_target(infile, include_f)
incl_lines = get_preprocessed_lines_from_file(include_path)
lines = lines[:idx] + incl_lines + lines[idx + 1 :]
return lines
[docs]def refactor_select_case(lines):
"""Search for lines that are CASE statements and refactor their structure
such that they are always in a i:j form. This means any CASE statement that
is in the form <:3> will be <Inf:3>. This is done so that the FortranOFP
recognizes the <:3> and <3:> structures properly.
"""
prefix_regex = re.compile(r"([(,])\s*:\s*(-?[\d\w+])", re.I)
suffix_regex = re.compile(r"(-?[\d\w+])\s*:\s*([),])", re.I)
i = 0
while i < len(lines):
code_line = lines[i]
if prefix_regex.search(code_line):
match_list = re.findall(prefix_regex, code_line)
code_line = re.sub(
prefix_regex,
f"{match_list[0][0]}'-Inf':" f"{match_list[0][1]}",
code_line,
)
if suffix_regex.search(code_line):
match_list = re.findall(suffix_regex, code_line)
code_line = re.sub(
suffix_regex,
f"{match_list[0][0]}:'Inf'" f"{match_list[0][1]}",
code_line,
)
lines[i] = code_line
i += 1
return lines
# The regular expressions defined below are used for processing implicit array
# declarations, which the preprocessor converts into explicit array declarations.
BASE_TYPES = r"^(\s*)(integer|real|double\s+precision|complex|character|logical)\s+(.*)"
RE_BASE_TYPES = re.compile(BASE_TYPES, re.I)
KWDS = r"\s*(DIMENSION|FUNCTION)\s*.*"
RE_KWDS = re.compile(KWDS, re.I)
IMPLICIT_ARRAY = r"(\w+)\((\w+)\)"
RE_IMPLICIT_ARRAY = re.compile(IMPLICIT_ARRAY, re.I)
VAR_OR_ARRAY = r"\s*(\w+)(\((\w+)\))?"
RE_VAR_OR_ARRAY = re.compile(VAR_OR_ARRAY, re.I)
DECL_CONTINUATION = r"\s*,\s*"
RE_DECL_CONTINUATION = re.compile(DECL_CONTINUATION, re.I)
[docs]def implicit_array_decl_parameters(line):
""" If line contains an implicit array declaration, extract and return
the following parameters: the initial indentation, the type of the
array, and the rest of the line after the type; otherwise return None.
"""
match = RE_BASE_TYPES.match(line)
if match is None:
return None
indentation = match.group(1)
type = match.group(2)
rest = match.group(3)
if type.lower() == "character":
match = re.match(r"\s*(\(\s*len\s*=\s*\d+\s*\)|\*\s*\d+)", rest)
if match is not None:
char_parms = match.group(1)
type += char_parms
rest = rest[match.end():]
# If the the rest of the string begins with specific keywords
# like DIMENSION or FUNCTION, this is not an implicit declaration.
match = RE_KWDS.match(rest)
if match is not None:
return None
# If the line does not match the pattern for an implicit array,
# it does not have an implicit array declaration
match = RE_IMPLICIT_ARRAY.search(rest)
if match is None:
return None
return (indentation, type, rest)
[docs]def fix_implicit_array_decls(lines):
out_lines = []
for line in lines:
implicit_decl_parms = implicit_array_decl_parameters(line)
if implicit_decl_parms is None:
out_lines.append(line)
continue
else:
(indentation, type, rest) = implicit_decl_parms
decls = {}
arr_name = arr_size = None
match2 = RE_VAR_OR_ARRAY.match(rest)
while match2 is not None:
arr_name, arr_size = match2.group(1), match2.group(2)
if arr_size is not None:
arr_size = arr_size[1:-1]
else:
arr_size = 0
if arr_size in decls:
decls[arr_size] += ", " + arr_name
else:
decls[arr_size] = arr_name
# get the rest of the string if appropriate
n = match2.end()
if n < len(rest):
rest = rest[n:]
else:
rest = ""
# process any comma separator if present
match3 = RE_DECL_CONTINUATION.match(rest)
if match3 is not None:
n = match3.end()
rest = rest[n:]
match2 = RE_VAR_OR_ARRAY.match(rest)
# finally, construct the output lines with implicit declarations
# replaced by explicit ones
new_lines = []
for sz in decls:
if sz != 0:
new_lines.append("{}{}, DIMENSION({}) :: {}\n".\
format(indentation, type, sz, decls[sz]))
else:
new_lines.append("{}{} :: {}\n".\
format(indentation, type, decls[sz]))
out_lines.extend(new_lines)
return out_lines
[docs]def preprocess_lines(lines, infile, forModLogGen=False):
_, f_ext = os.path.splitext(infile)
lines = [line for line in lines if line.rstrip() != ""]
lines = separate_trailing_comments(lines)
lines = discard_comments(lines)
lines = merge_continued_lines(lines, f_ext)
lines = fix_implicit_array_decls(lines)
# For module log file generation, we do not need to
# preprocess any included external files, so skip in
# such case.
if not forModLogGen:
lines = process_includes(lines, infile)
lines = refactor_select_case(lines)
return lines
[docs]def get_preprocessed_lines_from_file(infile, forModLogGen=False):
with open(infile, mode="r", encoding="latin-1") as f:
lines = f.readlines()
return preprocess_lines(lines, infile, forModLogGen)