#!/usr/bin/python3 import sys sys.path.append( '/usr/dd/common/python_lib' ) import os, pytz, re, shlex, struct, textwrap, time_utils from datetime import datetime, timedelta from time_series import TimeSeries from units import Units ############################################################################### def ifdef( *values ): for val in values: if val is not None: return val return None ############################################################################### # Determine the maximum text length of a given strftime() format. def max_date_size( date_format ): width = 0 widths = { '%': 1, 'a': 3, 'A': 9, 'b': 3, 'B': 9, 'c': 28, 'C': 2, 'd': 2, 'D': 8, 'e': 2, 'g': 2, 'G': 4, 'h': 3, 'H': 2, 'I': 2, 'j': 3, 'k': 2, 'l': 2, 'm': 2, 'M': 2, 'p': 4, 'r': 10, 'R': 5, 's': 2, 't': 8, 'T': 8, 'u': 1, 'U': 2, 'V': 2, 'w': 1, 'x': 8, 'X': 8, 'y': 2, 'Y': 4, 'z': 3 } for fmt in re.findall( '%(.)', date_format ): if fmt in widths: width += widths[fmt] else: print("Unknown strftime parameter: '%s'" % ( fmt ), file=sys.stderr) width += len( re.sub( '%.', '', date_format ) ) # Add in any hard text return width ############################################################################### def midnight_time( time, format, midnight ): if( midnight != 24 or time.hour != 0 or time.minute != 0 or time.second != 0 ): return time.strftime( format ) # Ok, we have a midnight time, and we need to express it as '2400' instead # of zero-hour. Pre-populate all the strftime() cases for hours, then # run the time through the remaining strftime() format. # Ignore 12-hour representations, as they'll be 12 a.m. either way. # Ignore %[cCX] and %Ox (locale-specifics) for convenience (we can # post-process them if we need to) format = re.sub( '%[Hk]', '24', format ) format = re.sub( '%T', '24:00:00', format ) format = re.sub( '%R', '24:00', format ) time -= timedelta( days=1 ) # It's 2400 for the prior day. return time.strftime( format ) ############################################################################### def read_to_end( fh, endstr='end' ): ret = '' for line in fh: if re.match( endstr, line, re.I ): break ret += line return ret ############################################################################### class cwms_report: def __init__( self ): self.columns = [] self.database = 'local' self.date_format = '%d-%b-%Y' self.date_width = max_date_size( self.date_format ) self.date_title = 'Date' self.end = '1D' self.format = '' self.interval = timedelta( days=1 ) self.midnight = 0 self.missing = 'M' self.mode = 'text' self.out = sys.stdout self.separator = ' ' self.spacing = 1 self.start = '-14D' self.time_zone = pytz.timezone( 'US/Pacific' ) self.units = 'english' self.value_format = '%8.2f' self.value_width = self.format_width( self.value_format ) self.var = {} self.w_after = None self.w_before = None self.db = None ############################################################################# def column( self, expr, title, format=None, which_stats='' ): stats = {} for stat in re.split( '\W+', which_stats.lower() ): if stat == 'ave': stat = 'avg' stats[stat] = 1 width = self.value_width if format is not None: width = self.format_width( format ) self.columns.append( { 'format': format, 'expr': expr, 'stats': stats, 'title': title, 'width': width } ) ############################################################################ # Return the format(s) for a given column def column_formats( self, format ): if format is not None: if format.startswith( '=' ): format = format[1:] if not format.startswith( '=' ): # '==' => literal '=' format = format[1:] return shlex.split( format )[2:] return [ format ] ############################################################################# def comment( self, fh, end_str ): read_to_end( fh, end_str ) ############################################################################# def set_database( self, id ): self.database = id ############################################################################# def set_date_format( self, format, title='Date' ): self.date_format = format if isinstance(title, bytes): self.date_title = title.decode('unicode_escape') # For Python 3 else: self.date_title = title # If it's already a string, no need to decode self.date_width = max_date_size( format ) ############################################################################# def date_range( self, start, end ): self.start = start self.end = end ############################################################################# def define( self, var, *expr ): self.var[var] = ' '.join( expr ) ############################################################################# def dereference( self, var, stime, etime, units ): if re.match( '(\S+\.){5}\S+$', var ): # Dotted sextet ==> CWMS id # Determine what kind of database we're talking to. # Only load the module we need, in case others are not installed on # this system if '/' in self.database: # pathname ==> SQLite from sqlite import SQLite if not isinstance( self.db, SQLite ): self.db = SQLite() else: from cwms import CWMS if not isinstance( self.db, CWMS ): self.db = CWMS() if not self.db.connection(): self.db.connect( self.database, login='cwmsview', password='cwmsview' ) self.database = self.db.connection() if self.db.connection() != self.database: self.db.disconnect() self.db.connect( self.database, login='cwmsview', password='cwmsview' ) return ( self.db.ts_data( var, stime, etime, Units.units( var, units ) ) ) elif var == 'empty': return TimeSeries() elif var == 'zero': return self.zero_data( stime, etime ) elif re.match( '^-?\d*\.?\d*$', var ) and re.match( '\d', var ): return float( var ) # var is a scalar numeric value else: if var not in self.var: print("Undefined variable '%s' referenced" % ( var ), file=sys.stderr) return TimeSeries() return self.evaluate( self.var[var], stime, etime, units ) ############################################################################# def empty_row( self, cols ): if self.mode == 'html': return ' \n' % ( cols ) else: return '\n' * self.spacing ############################################################################# def end_of_row( self ): if self.mode == 'html': return '\n' else: return '\n' * self.spacing ############################################################################# def evaluate( self, expr, stime, etime, units=None ): if units is None: units = self.units value = TimeSeries() operator = None for arg in expr.split(): if arg in '+-*/': operator = arg else: operand = self.dereference( arg, stime, etime, units ) if operator is not None: value = value.math( operator, operand ) elif len( value ) == 0: value = operand else: print(( "Syntax error near '%s': missing operator?" % ( arg ) ), file=sys.stderr) return value ############################################################################# # Format a value using a given format string. Usually this is just using the # % operator, but we have exceptions for multi-valued fields. def format_field( self, format, value ): ret = '' if format.startswith( '==' ): # Double-equal -> leading literal equal format = format[1:] elif format.startswith( '=' ): tokens = shlex.split( format[1:] ) pre_fmt = tokens.pop( 0 ) unpack = re.sub( r'A(\d+)', r'\1s', tokens.pop( 0 ) ) # Truncate the formatted value to the size required by unpack in case # the data are too large for the format specified. val_fmtd = (pre_fmt % value)[:struct.calcsize( unpack )] vals = struct.unpack( unpack, val_fmtd ) for v in vals: fmt = tokens.pop( 0 ) ret += self._format_field( fmt, float( v ) ) return ret else: return self._format_field( format, value ) def _format_field( self, format, value ): if self.mode == 'html': return( ( '' + format + '' ) % ( value ) ) else: return( ( self.separator + format ) % ( value ) ) ############################################################################# # Interpreting % directives is harder than using the % operator, so be lazy def format_width( self, format ): result = None if format.startswith( '=' ): tokens = shlex.split( format ) pre_fmt = tokens.pop( 0 ) unpack = tokens.pop( 0 ) fmt = self.separator.join( tokens ) result = fmt % ( ( 0, ) * len( tokens ) ) else: result = format % ( 0 ) return len( result ) ############################################################################# def include( self, filename ): if isinstance( filename, str ): fh = open( filename ) else: fh = filename for line in fh: line = line.rstrip( '\n' ) # Strip newlines while line.endswith( '\\' ): # Concatenate line = line[:-1] + next( fh ).rstrip( '\n' ) # continuation lines re.sub( '#.*', '', line ) # Strip comments if not re.match( '\w', line ): # Ignore empty lines continue args = shlex.split( line ) directive = args.pop( 0 ) d = directive.lower() if d == 'column': self.column( *args ) elif d == 'comment': self.comment( fh, *args ) elif d == 'database': self.set_database( *args ) elif d == 'date_format': self.set_date_format( *args ) elif d == 'date_range': self.date_range( *args ) elif d == 'define': self.define( *args ) elif d == 'format': self.format( fh, *args ) elif d == 'include': self.include( *args ) elif d == 'interval': self.set_interval( *args ) elif d == 'literal': self.literal( fh, *args ) elif d == 'midnight': self.set_midnight( *args ) elif d == 'missing': self.set_missing( *args ) elif d == 'mode': self.set_mode( *args ) elif d == 'multi_column': self.multi_column( *args ) elif d == 'new_columns': self.new_columns( *args ) elif d == 'new_page': self.new_page( *args ) elif d == 'output': self.output( *args ) elif d == 'report': self.report( *args ) elif d == 'series': self.series( *args ) elif d == 'separator': self.set_separator( *args ) elif d == 'spacing': self.set_spacing( *args ) elif d == 'stats': self.stats( args ) elif d == 'text_time': self.text_time( fh, *args ) elif d == 'time_zone': self.set_time_zone( *args ) elif d == 'units': self.set_units( *args ) elif d == 'value_format': self.set_value_format( *args ) elif d == 'window': self.window( *args ) else: print(( "Warning: Unknown report directive: '%s'" % ( directive ) ), file=sys.stderr) ############################################################################# # Increment a given timestamp (default: current time). # # Midnight being 2400 instead of 0000 changes not only how we display dates, # but also how we refer to them. So, if zero-time is actually the prior # day, shift "current day" forward by whatever interval we're using. The # only difficulty with doing this is if you've asked for a time that *isn't* # based on the day, month, or year (that is, an absolute offset from current # time). So it only increments if you've asked for month or water-year, or # if the offset is capitalized. def increment_time( self, offset, time=datetime.now() ): if re.match( '\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d', offset ): res = datetime.strptime( offset, '%Y-%m-%dT%H:%M:%S' ) else: res = time_utils.offset_to_time( offset, time ) if self.midnight == 24 and re.search( 'mo|MO|wy|WY|[A-Z]+$', offset ): res += self.interval return self.time_zone.localize( res ) ############################################################################# def set_interval( self, int ): self.interval = time_utils.interval_to_timedelta( int ) if self.w_before is None: self.w_before = self.interval / 2 if self.w_after is None: self.w_after = self.interval / 2 ############################################################################# def line_row( self, pattern, columns ): if self.mode == 'html': width = 1 for col in columns: width += len( self.column_formats( col['format'] ) ) return '\n' % ( width ) else: ret = ( '%%%d.%ds' % ( self.date_width, self.date_width ) ) % ( pattern * self.date_width ) for col in columns: width = col['width'] ret += ( '%%s%%%d.%ds' % ( width, width ) ) % ( self.separator, pattern * width ) return ret + '\n' ############################################################################# def literal( self, fh, end_str='end' ): lit = read_to_end( fh, end_str ) if( '%' in lit ): stime = self.increment_time( self.start ) etime = self.increment_time( self.end ) - timedelta( seconds=1 ) median_time = stime + ( etime - stime ) / 2 print(median_time.strftime( lit ), end=' ', file=self.out) else: print(lit, end=' ', file=self.out) def text_time( self, fh, type, terminator='end' ): lit = read_to_end( fh, terminator ) time = None type = type.lower() if type == 'now': time = datetime.now() elif type == 'start': time = self.increment_time( self.start ) elif type == 'mid': s = self.increment_time( self.start ) e = self.increment_time( self.end ) - timedelta( seconds=1 ) time = s + ( e - s ) / 2 elif type == 'end': time = self.increment_time( self.end ) - timedelta( seconds=1 ) if time is None: print(lit, end=' ', file=self.out) else: print(midnight_time( time, lit, self.midnight ), end=' ', file=self.out) ############################################################################# def set_mode( self, mode ): self.mode = mode.lower() ############################################################################# def set_midnight( self, mid ): self.midnight = int( mid ) ############################################################################# def set_missing( self, str='' ): self.missing = str ############################################################################# def multi_column( self, expr, col_interval, title_fmt ): ( stime, etime ) = self.time_range( self.start, self.end ) # Build the column headers at col_interval using title_fmt, if it exists if re.search( '\S', title_fmt ): cols = [] time = stime while time <= etime: cols.append( { 'title': midnight_time( time, title_fmt, self.midnight ), 'width': self.value_width } ) time = self.increment_time( col_interval, time ) print(self.wrap_titles( cols ), file=self.out) ts = self.set_series( expr, stime, etime, self.interval ) self.columns = [] # Reset columns as if by NEW_COLUMNS # Step through each interval, jumping to the next column # when within w_before of the next col_interval. # # Store columns as if they were built by COLUMN and REPORT, so that the # statistics code can be reused. def new_col( start ): return { 'start': start, 'stats': { 'all': 1 }, 'values': [], 'width': self.value_width } time = stime col = new_col( time ) while time <= etime: if( self.increment_time( col_interval, col['start'] ) - time <= self.w_before ): self.columns.append( col ) col = new_col( time ) if time in ts and ts[time] is not None: val = ts[time] col['values'].append( self.format_field( self.value_format, val ) ) if 'min' not in col or val < col['min']: col['min'] = val col['tmin'] = time if 'max' not in col or val > col['max']: col['max'] = val col['tmax'] = time if 'tot'in col: col['tot'] += val else: col['tot'] = val if 'ct' in col: col['ct'] += 1 else: col['ct'] = 1 if 'first' not in col: col['first'] = val col['last'] = val else: fmt = '%s%%%s.%ss' % ( self.separator, self.value_width, self.value_width ) col['values'].append( fmt % ( self.missing ) ) time += self.interval self.columns.append( col ) # Finalize any columnar statistics for col in self.columns: if 'ct' in col and col['ct'] > 0: col['avg'] = col['tot'] / col['ct'] if 'max' in col and 'min' in col: col['range'] = col['max'] - col['min'] if 'last' in col and 'first' in col: col['delta'] = col['last'] - col['first'] # Determine the length of the longest column, which will determine the # length of the report. Also preserve the start time of the longest # column for indexing it (so we can appropriately label the end rows # to match the longest columns, e.g. days 29, 30, and 31) report_len = 0 long_col_start = stime for col in self.columns: column_len = len( col['values'] ) if column_len > report_len: report_len = column_len long_col_start = col['start'] # Display the report rowtime = long_col_start lines = [] for row in range( report_len ): line = midnight_time( rowtime, self.date_format, self.midnight ) for col in self.columns: if row < len( col['values'] ): line += col['values'][row] else: line += ' ' + ' ' * self.value_width lines.append( line ) rowtime += self.interval print(('\n' * self.spacing).join( lines ), file=self.out) ############################################################################# def new_columns( self ): self.columns = [] ############################################################################# def new_page( self ): print('\f', end=' ', file=self.out) ############################################################################# def output( self, filename ): if filename.lower() == 'stdout': self.out = sys.stdout else: self.out = open( filename, 'w' ) ############################################################################# def report( self, *opts ): dropempty = 0 notitle = 0 for opt in opts: opt = opt.lower() if opt == 'dropempty': dropempty = 1 elif opt == 'notitle': notitle = 1 report = '' ( stime, etime ) = self.time_range( self.start, self.end ) # Get each column's data, transform it into an appropriate time series # Build a list of columns that actually have data, so we can elide out # those that do not, if requested. populated = [] for col in self.columns: col['ts'] = self.set_series( col['expr'], stime, etime, self.interval ) if len( col['ts'] ) > 0: populated.append( col ) for key in ( 'avg', 'ct', 'delta', 'first', 'last', 'max', 'min', 'range', 'tot' ): col.pop( key, None ) saved = self.columns if dropempty: self.columns = populated # If requested, build (possibly multi-row) titles, then display them if notitle == 0: print(self.wrap_titles( self.columns ), file=self.out) # Build columnar data. # Keep statistics on the displayed values incase they are needed. if self.mode == 'html': time_sprintf = '%s' else: time_sprintf = '%s' time = stime while time <= etime: report += time_sprintf % ( midnight_time( time, self.date_format, self.midnight ) ) for col in self.columns: format = ifdef( col['format'], self.value_format ) val = None if time in col['ts']: val = col['ts'][time] if val is not None: report += self.format_field( format, val ) if 'min' not in col or val < col['min']: col['min'] = val if 'max' not in col or val > col['max']: col['max'] = val if 'tot' in col: col['tot'] += val else: col['tot'] = val if 'ct' in col: col['ct'] += 1 else: col['ct'] = 1 if 'first' not in col: col['first'] = val col['last'] = val else: if self.mode == 'html': for fmt in self.column_formats( col['format'] ): report += '%s' % ( self.missing ) else: if 'width' in col: fw = col['width'] else: fw = self.value_width fmt = '%s%%%s.%ss' % ( self.separator, fw, fw ) report += fmt % ( self.missing ) report += self.end_of_row() time += self.interval for col in self.columns: if 'ct' in col and col['ct'] > 0: col['avg'] = col['tot'] / col['ct'] if 'max' in col and 'min' in col: col['range'] = col['max'] - col['min'] if 'last' in col and 'first' in col: col['delta'] = col['last'] - col['first'] print(report, end=' ', file=self.out) # Restore the full column list in case we are asked to show a new date range columns = saved ############################################################################# def set_separator( self, sep ): self.separator = sep ############################################################################# def set_series( self, expr, stime, etime, interval ): type = '' ts = None m = re.search( '(.*?):(.*)', expr ) if m: ( expr, type ) = m.group( 1, 2 ) type = type.lower() if type == 'ave': type = 'avg' if re.match( '(min|max|tot|ct|avg|range|delta)', type ): ts = self.evaluate( expr, stime - interval, etime ) ( min, max, tot, ct, avg, range, delta ) = ts.stat( interval, stime ) if type == 'min': ts = min elif type == 'max': ts = max elif type == 'tot': ts = tot elif type == 'ct': ts = ct elif type == 'avg': ts = avg elif type == 'range': ts = range elif type == 'delta': ts = delta elif re.match( 'rollav', type ): m = re.match( 'rollav[ge]:(\S+)', type ) lookback = time_utils.interval_to_timedelta( m.group( 1 ) ) ts = self.evaluate( expr, stime - lookback, etime ) ts = ts.rollavg( interval, lookback, stime ) else: s_offset = timedelta( seconds = 0 ) e_offset = s_offset dst = 0 m = re.match( 'off(dst)?:(\S+)', type ) if m: s_offset = time_utils.interval_to_timedelta( m.group( 2 ) ) e_offset = s_offset opt = m.group( 1 ) if opt == 'dst': s_offset -= dst_offset( stime ) e_offset -= dst_offset( etime ) dst = 1 ts = self.evaluate( expr, stime + s_offset - self.w_before, etime + e_offset + self.w_after ) if dst: ts = ts.offset_to_dst() snap = ts.snap( interval, self.w_before, self.w_after, s_offset, stime ) ts = snap.dict() return ts ############################################################################# def set_spacing( self, count ): self.spacing = int( count ) ############################################################################# def stats( self, lines=[ 'min:MIN', 'max:MAX', 'tmax:@', 'tot:TOT', 'ct:CT', 'avg:AVG' ] ): report = '' for line in lines: if ':' in line: tokens = line.split( ':', 2 ) stat = tokens.pop( 0 ) name = None format = None if len( tokens ) > 0: name = tokens.pop( 0 ) if len( tokens ) > 0: format = tokens.pop( 0 ) formula = None m = re.search( '\((.*)\)', stat ) if m: formula = m.group( 1 ) stat = re.sub( '\(.*\)', '', stat ) stat = stat.lower() if stat == 'ave': stat = 'avg' if self.mode == 'html': report += '%s' % ( name ) else: fmt = '%%%s.%ss' % ( self.date_width, self.date_width ) report += fmt % ( name ) for column in self.columns: if( ( stat in column['stats'] or 'all' in column['stats'] ) and stat in column ): if formula is not None: column[stat] = eval( '%s %s' % ( column[stat], formula ) ) if stat == 'tmax' or stat == 'tmin': fmt = ifdef( format, self.date_format ) time = midnight_time( columns[stat], fmt, self.midnight ) if self.mode == 'html': report += '%s' % ( time ) else: if 'width' in column: fw = column['width'] else: fw = self.value_width fmt = '%s%%s.%ss' % ( self.separator, fw, fw ) report += fmt % ( time ) else: fmt = ifdef( format, column['format'], self.value_format ) report += self.format_field( fmt, column[stat] ) else: if self.mode == 'html': for fmt in self.column_formats( column['format'] ): report += ' ' else: if 'width' in column: fw = column['width'] else: fw = self.value_width report += self.separator + ' ' * fw report += self.end_of_row() elif line == 'blank': report += self.empty_row( len( self.columns ) + 1 ) elif line.startswith( 'line' ): m = re.match( 'line(\S+)?', line ) if m: pattern = m.group( 1 ) else: pattern = '-' widths = [ self.date_width ] for col in self.columns: widths.append( col['width'] ) report += self.line_row( pattern, self.columns ) print(report, end=' ', file=self.out) ############################################################################# # Convert offsets to start and end times. def time_range( self, start, end ): stime = self.increment_time( start ) etime = self.increment_time( end ) - timedelta( seconds=1 ) return( stime, etime ) ############################################################################# def set_time_zone( self, zone ): self.time_zone = pytz.timezone( zone ) ############################################################################# def set_units( self, units ): self.units = units ############################################################################# def set_value_format( self, fmt ): self.value_format = fmt self.value_width = self.format_width( fmt ) ############################################################################# def window( self, before, after=None ): self.w_before = time_utils.interval_to_timedelta( before ) if after is None: self.w_after = self.w_before else: self.w_after = time_utils.interval_to_timedelta( after ) ############################################################################# # Create a timeseries with all-zero data for the given range def zero_data( self, stime, etime ): ts = TimeSeries cur = stime while cur <= etime: ts.set_datum( cur, 0 ) cur += self.interval return ts ############################################################################# def wrap_titles( self, columns ): ret = '' if self.mode == 'html': ret = '%s' % ( self.date_title ) for col in columns: if 'format' in col: format = col['format'] else: format = self.value_format width = len( self.column_formats( format ) ) if width > 1: span = ' colspan="%d"' % ( width ) else: span = '' if 'title' in col: title = col['title'].encode('utf-8').decode( 'unicode_escape' ) else: title = '' ret += '%s' % ( span, title ) ret += '' else: maxrow = 0 for col in columns: if 'title' in col: title = col['title'].encode('utf-8').decode( 'unicode_escape' ) else: title = '' if 'width' in col: width = col['width'] else: width = self.value_width lines = [] for line in title.splitlines(): lines.extend( textwrap.wrap( line, width ) ) lines.reverse() if len( lines ) > maxrow: maxrow = len( lines ) col['lines'] = lines header = [] datelines = [] for line in self.date_title.splitlines(): datelines.extend( textwrap.wrap( line, self.date_width ) ) datelines.reverse() for row in range( maxrow ): if len( datelines ) == 0: date = '' else: date = datelines.pop( 0 ) header_line = ( '%%%d.%ds' % ( self.date_width, self.date_width ) ) % ( date ) for col in columns: if 'width' in col: fw = col['width'] else: fw = self.value_width if len( col['lines'] ) != 0: line = col['lines'].pop( 0 ) else: line = '' header_line += ( '%s%%%d.%ds' % ( self.separator, fw, fw ) ) % ( line ) header.append( header_line ) header.reverse() for line in header: ret += line + '\n' return ret ############################################################################### ############################################################################### if __name__ == '__main__': infile = sys.stdin if len( sys.argv ) > 1: infile = sys.argv[1] report = cwms_report() report.include( infile )