Source code for delphi.translators.for2py.format

#!/usr/bin/env python3

"""File: format.py

Purpose: Process a string (obtained from a data file) according to
         a Fortran FORMAT.

         NOTE: Not all format specifiers are supported at this time.
         The input formats supported are shown in the method
         match_input_fmt_1(), while the output formats supported are
         shown in the method gen_output_fmt_1().

Usage:
        I. FORMATTED I/O

        Given a Fortran READ or WRITE statement that should be processed
        according to a format list fmt_list (where fmt_list is a Python
        list of Fortran FORMAT descriptors), do the following:

        (1) Create a Format object as follows:

                my_fmt_obj = Format(fmt_list)

        (2) INPUT: To process a line of input inp_ln according to this
            format and assign values to a tuple of variables var_list:

                var_list = my_fmt_obj.read_line(inp_ln)

            OUTPUT: To construct a line to be printed out given a set
            of values val1, ..., valN:

                out_string = my_fmt_obj.write_line([val1, ..., valN])

        II. LIST_DIRECTED I/O

        At this time only list-directed output has been implemented.

        For list-directed output, e.g.: WRITE (*,*) X, Y, Z do the following:

        (1) Construct a list of the types of the values to be written.
            Let this list be denoted by out_type_list.

        (2) Construct a list of format specifiers for these types using:

                fmt_list = list_output_format(out_type_list)

        (3) Use fmt_list as described above.

        There are some examples towards the end of this file.
"""

import re
import sys
from . import For2PyError


[docs]class Format: def __init__(self, format_list): self._format_list = format_list.copy() self._read_line_init = False self._write_line_init = False self._re_cvt = None self._regexp_str = None self._re = None self._match_exps = None self._divisors = None self._in_cvt_fns = None self._output_fmt = None self._out_gen_fmt = None self._out_widths = None
[docs] def init_read_line(self): """init_read_line() initializes fields relevant to input matching""" format_list = self._format_list self._re_cvt = self.match_input_fmt(format_list) regexp0_str = 'r"' + "".join([subs[0] for subs in self._re_cvt]) + '"' self._regexp_str = regexp0_str self._re = re.compile(regexp0_str) self._match_exps = [ subs[1] for subs in self._re_cvt if subs[1] is not None ] self._divisors = [subs[2] for subs in self._re_cvt if subs[2] is not None] self._in_cvt_fns = [ subs[3] for subs in self._re_cvt if subs[3] is not None ] self._read_line_init = True
[docs] def init_write_line(self): """init_write_line() initializes fields relevant to output generation""" format_list = self._format_list output_info = self.gen_output_fmt(format_list) self._output_fmt = "".join([sub[0] for sub in output_info]) self._out_gen_fmt = [sub[1] for sub in output_info if sub[1] is not None] self._out_widths = [sub[2] for sub in output_info if sub[2] is not None] self._write_line_init = True
[docs] def read_line(self, line): """ Match a line of input according to the format specified and return a tuple of the resulting values """ if not self._read_line_init: self.init_read_line() match = self._re.match(line) assert match is not None, f"Format mismatch (line = {line})" matched_values = [] for i in range(self._re.groups): cvt_re = self._match_exps[i] cvt_div = self._divisors[i] cvt_fn = self._in_cvt_fns[i] match_str = match.group(i + 1) match0 = re.match(cvt_re, match_str) if match0 is not None: if cvt_fn == "float": if "." in match_str: val = float(match_str) else: val = int(match_str) / cvt_div elif cvt_fn == "int": val = int(match_str) else: sys.stderr.write( f"Unrecognized conversion function: {cvt_fn}\n" ) else: sys.stderr.write( f"Format conversion failed: {match_str}\n" ) matched_values.append(val) return tuple(matched_values)
[docs] def write_line(self, values): """ Process a list of values according to the format specified to generate a line of output. """ if not self._write_line_init: self.init_write_line() if len(self._out_widths) > len(values): raise For2PyError(f"ERROR: too few values for format" f" {self._format_list}\n") out_strs = [] for i in range(len(self._out_widths)): out_fmt = self._out_gen_fmt[i] out_width = self._out_widths[i] if values[i] is None: out_strs.append(values[i]) else: out_val = out_fmt.format(values[i]) # out_width == "*" indicates that the field can be # arbitrarily wide if out_width != "*": if len(out_val) > out_width: # value too big for field out_val = "*" * out_width out_strs.append(out_val) out_str_exp = ( '"' + self._output_fmt + '".format' + str(tuple(out_strs)) ) out_str = eval(out_str_exp) return out_str + "\n"
def __str__(self): return str(self._format_list) ########################################################################### # # # INPUT MATCHING # # # ###########################################################################
[docs] def match_input_fmt(self, fmt_list): """Given a list of Fortran format specifiers, e.g., ['I5', '2X', 'F4.1'], this function constructs a list of tuples for matching an input string against those format specifiers.""" rexp_list = [] for fmt in fmt_list: rexp_list.extend(self.match_input_fmt_1(fmt)) return rexp_list
[docs] def match_input_fmt_1(self, fmt): """ Given a single format specifier, e.g., '2X', 'I5', etc., this function constructs a list of tuples for matching against that specifier. Each element of this list is a tuple (xtract_re, cvt_re, divisor, cvt_fn) where: xtract_re is a regular expression that extracts an input field of the requisite width; cvt_re is a regular expression that matches the character sequence extracted by xtract_re against the specified format; divisor is the value to divide by in order to get the appropriate number of decimal places if a decimal point is not given in the input value (meaningful only for floats); and cvt_fn is a string denoting the function to be used to convert the matched string to a value. """ # first, remove any surrounding space fmt = fmt.strip() # get any leading digits indicating repetition match = re.match(r"(\d+)(.+)", fmt) if match is None: reps = 1 else: reps = int(match.group(1)) fmt = match.group(2) if fmt[0] == "(": # process parenthesized format list recursively fmt = fmt[1:-1] fmt_list = fmt.split(",") rexp = self.match_input_fmt(fmt_list) else: if fmt[0] in "iI": # integer sz = fmt[1:] xtract_rexp = 'r"(.{' + sz + '})"' # r.e. for extraction rexp1 = r" *-?\d+" # r.e. for matching divisor = 1 rexp = [(xtract_rexp, rexp1, divisor, "int")] elif fmt[0] in "xX": # skip xtract_rexp = "." # r.e. for extraction rexp = [(xtract_rexp, None, None, None)] elif fmt[0] in "fF": # floating point idx0 = fmt.find(".") sz = fmt[1:idx0] divisor = 10 ** (int(fmt[idx0 + 1:])) xtract_rexp = 'r"(.{,' + sz + '})"' # r.e. for extraction rexp1 = r" *-?\d+(\.\d+)?" # r.e. for matching rexp = [(xtract_rexp, rexp1, divisor, "float")] else: raise For2PyError( f"ERROR: Unrecognized format specifier {fmt}\n" ) # replicate the regular expression by the repetition factor in the # format rexp *= reps return rexp
########################################################################### # # # OUTPUT GENERATION # # # ###########################################################################
[docs] def gen_output_fmt(self, fmt_list): """given a list of Fortran format specifiers, e.g., ['I5', '2X', 'F4.1'], this function constructs a list of tuples for constructing an output string based on those format specifiers.""" rexp_list = [] for fmt in fmt_list: rexp_list.extend(self.gen_output_fmt_1(fmt)) return rexp_list
[docs] def gen_output_fmt_1(self, fmt): """given a single format specifier, get_output_fmt_1() constructs and returns a list of tuples for matching against that specifier. Each element of this list is a tuple (gen_fmt, cvt_fmt, sz) where: gen_fmt is the Python format specifier for assembling this value into the string constructed for output; cvt_fmt is the Python format specifier for converting this value into a string that will be assembled into the output string; and sz is the width of this field. """ # first, remove any surrounding space fmt = fmt.strip() # get any leading digits indicating repetition match = re.match(r"(\d+)(.+)", fmt) if match is None: reps = 1 else: reps = int(match.group(1)) fmt = match.group(2) if fmt[0] == "(": # process parenthesized format list recursively fmt = fmt[1:-1] fmt_list = fmt.split(",") rexp = self.gen_output_fmt(fmt_list) else: if fmt[0] in "iI": # integer sz = fmt[1:] gen_fmt = "{}" cvt_fmt = "{:" + str(sz) + "d}" rexp = [(gen_fmt, cvt_fmt, int(sz))] elif fmt[0] in "xX": gen_fmt = " " rexp = [(gen_fmt, None, None)] elif fmt[0] in "aA": gen_fmt = "{}" # the '*' in the third position of the tuple (corresponding to # field width) indicates that the field can be arbitrarily wide rexp = [(gen_fmt, "{}", "*")] elif fmt[0] in "eEfFgG": # various floating point formats idx0 = fmt.find(".") sz = fmt[1:idx0] suffix = fmt[idx0 + 1:] # The 'E' and G formats can optionally specify the width of # the exponent, e.g.: 'E15.3E2'. For now we ignore any such # the exponent width -- but if it's there, we need to extract # the sequence of digits before it. m = re.match(r"(\d+).*", suffix) assert m is not None, f"Improper format? '{fmt}'" prec = m.group(1) gen_fmt = "{}" cvt_fmt = "{:" + sz + "." + prec + fmt[0] + "}" rexp = [(gen_fmt, cvt_fmt, int(sz))] elif fmt[0] in "pP": # scaling factor # For now we ignore scaling: there are lots of other things we # need to spend time on. To fix later if necessary. rest_of_fmt = fmt[1:] rexp = self.gen_output_fmt_1(rest_of_fmt) elif fmt[0] in "'\"": # character string sz = len(fmt) - 2 # -2 for the quote at either end # escape any double-quotes in the string gen_fmt = fmt[1:-1].replace('"', '\\\"') rexp = [(gen_fmt, None, None)] elif fmt[0] == "/": # newlines gen_fmt = "\\n" * len(fmt) rexp = [(gen_fmt, None, None)] else: raise For2PyError( f"ERROR: Unrecognized format specifier {fmt[0]}\n" ) # replicate the regular expression by the repetition factor in the # format rexp *= reps return rexp
################################################################################ # # # DEFAULT FORMATS FOR LIST-DIRECTED I/O # # # ################################################################################ # LIST_WRITE_DEFAULTS specifies the default formats for list-directed output. # Source: https://software.intel.com/en-us/fortran-compiler-developer-guide-\ # and-reference-rules-for-list-directed-sequential-write-statements LIST_WRITE_DEFAULTS = { "BYTE": "I5", "LOGICAL(1)": "L2", "LOGICAL(2)": "L2", "LOGICAL(4)": "L2", "LOGICAL(8)": "L2", "INTEGER": "I12", "INTEGER(1)": "I5", "INTEGER(2)": "I7", "INTEGER(4)": "I12", "INTEGER(8)": "I22", "REAL": "1PE14.5E2", "REAL(4)": "1PG15.7E2", "REAL(8)": "1PG24.15E3", "REAL(16)": "1PG43.33E4", } # default_output_format() takes a type name and returns the default format # specifier for list-directed output of a value of that type.
[docs]def default_output_format(type_item): type_item = type_item.upper() if type_item in LIST_WRITE_DEFAULTS: return LIST_WRITE_DEFAULTS[type_item] if type_item[0] in "\"'": return type_item sys.stderr.write( f"WARNING: No output format found for type {type_item}\n" ) return None
[docs]def list_output_formats(type_list): """This function takes a list of type names and returns a list of format specifiers for list-directed output of values of those types.""" out_format_list = [] for type_item in type_list: item_format = default_output_format(type_item) out_format_list.append(item_format) return out_format_list
[docs]def list_input_formats(type_list): sys.stderr.write("*** List-directed input not yet implemented\n") return []
[docs]def list_data_type(type_list): """This function takes a list of format specifiers and returns a list of data types represented by the format specifiers.""" data_type = [] for item in type_list: match = re.match(r"(\d+)(.+)", item) if not match: reps = 1 if item[0] in "FfEegG": data_type.append("REAL") elif item[0] in "Ii": data_type.append("INTEGER") else: reps = match.group(1) fmt = match.group(2) if "(" in fmt and "," in fmt: fmt = fmt[1:-1].split(",") elif "(" in fmt: fmt = [fmt[1:-1]] else: fmt = [fmt] for i in range(int(reps)): for ft in fmt: if ft[0] in "FfEegG": data_type.append("REAL") elif ft[0] in "Ii": data_type.append("INTEGER") return data_type
################################################################################ # # # EXAMPLE USAGE # # # ################################################################################
[docs]def example_1(): ################################# EXAMPLE 1 ############################### # Format from read statement in the file Weather.for # The relevant Fortran code is: # # OPEN (4,FILE='WEATHER.INP',STATUS='UNKNOWN') # ...s # READ(4,20) DATE,SRAD,TMAX,TMIN,RAIN,PAR # 20 FORMAT(I5,2X,F4.1,2X,F4.1,2X,F4.1,F6.1,14X,F4.1) # # The line of data shown (input1) is taken from the file WEATHER.INP format1 = [ "I5", "2X", "F4.1", "2X", "F4.1", "2X", "F4.1", "F6.1", "14X", "F4.1", ] input1 = "87001 -5.1 20.0 4.4 -23.9 10.7 " rexp1 = Format(format1) (DATE, SRAD, TMAX, TMIN, RAIN, PAR) = rexp1.read_line(input1) print(f"FORMAT: {format1}") print(f"regexp_str = '{rexp1}'") vars1 = (DATE, SRAD, TMAX, TMIN, RAIN, PAR) print(f"vars1 = {vars1}") print("")
[docs]def example_2(): ################################# EXAMPLE 2 ################################ # Format based on a read statement in the file Plant.for # The relevant Fortran code is: # OPEN (2,FILE='PLANT.INP',STATUS='UNKNOWN') # ... # READ(2,10) Lfmax, EMP2,EMP1,PD,nb,rm,fc,tb,intot,n,lai,w,wr,wc # & ,p1,sla # 10 FORMAT(17(1X,F7.4)) # # The line of data shown (input2) is taken from the file PLANT.INP format2 = ["3(1X,F7.4)"] input2 = " 12.0 0.64 0.104" rexp2 = Format(format2) (Lfmax, EMP2, EMP1) = rexp2.read_line(input2) print("FORMAT: {}".format(format2)) print('regexp_str = "{}"'.format(rexp2)) vars2 = (Lfmax, EMP2, EMP1) print("vars2 = {}".format(vars2)) print("")
[docs]def example_3(): format3 = ["3(I5,2X,F5.2)"] print(list_data_type(format3))
if __name__ == "__main__": example_1() example_2() example_3()