#!/usr/bin/python3
import sys
sys.path.append( '/usr/dd/common/python_lib' )
import os, pytz, re, shlex, struct, textwrap, time_utils
from datetime import datetime, timedelta
from time_series import TimeSeries
from units import Units
###############################################################################
def ifdef( *values ):
for val in values:
if val is not None:
return val
return None
###############################################################################
# Determine the maximum text length of a given strftime() format.
def max_date_size( date_format ):
width = 0
widths = { '%': 1, 'a': 3, 'A': 9, 'b': 3, 'B': 9, 'c': 28, 'C': 2,
'd': 2, 'D': 8, 'e': 2, 'g': 2, 'G': 4, 'h': 3, 'H': 2,
'I': 2, 'j': 3, 'k': 2, 'l': 2, 'm': 2, 'M': 2, 'p': 4,
'r': 10, 'R': 5, 's': 2, 't': 8, 'T': 8, 'u': 1, 'U': 2,
'V': 2, 'w': 1, 'x': 8, 'X': 8, 'y': 2, 'Y': 4, 'z': 3 }
for fmt in re.findall( '%(.)', date_format ):
if fmt in widths:
width += widths[fmt]
else:
print("Unknown strftime parameter: '%s'" % ( fmt ), file=sys.stderr)
width += len( re.sub( '%.', '', date_format ) ) # Add in any hard text
return width
###############################################################################
def midnight_time( time, format, midnight ):
if( midnight != 24 or
time.hour != 0 or time.minute != 0 or time.second != 0 ):
return time.strftime( format )
# Ok, we have a midnight time, and we need to express it as '2400' instead
# of zero-hour. Pre-populate all the strftime() cases for hours, then
# run the time through the remaining strftime() format.
# Ignore 12-hour representations, as they'll be 12 a.m. either way.
# Ignore %[cCX] and %Ox (locale-specifics) for convenience (we can
# post-process them if we need to)
format = re.sub( '%[Hk]', '24', format )
format = re.sub( '%T', '24:00:00', format )
format = re.sub( '%R', '24:00', format )
time -= timedelta( days=1 ) # It's 2400 for the prior day.
return time.strftime( format )
###############################################################################
def read_to_end( fh, endstr='end' ):
ret = ''
for line in fh:
if re.match( endstr, line, re.I ): break
ret += line
return ret
###############################################################################
class cwms_report:
def __init__( self ):
self.columns = []
self.database = 'local'
self.date_format = '%d-%b-%Y'
self.date_width = max_date_size( self.date_format )
self.date_title = 'Date'
self.end = '1D'
self.format = ''
self.interval = timedelta( days=1 )
self.midnight = 0
self.missing = 'M'
self.mode = 'text'
self.out = sys.stdout
self.separator = ' '
self.spacing = 1
self.start = '-14D'
self.time_zone = pytz.timezone( 'US/Pacific' )
self.units = 'english'
self.value_format = '%8.2f'
self.value_width = self.format_width( self.value_format )
self.var = {}
self.w_after = None
self.w_before = None
self.db = None
#############################################################################
def column( self, expr, title, format=None, which_stats='' ):
stats = {}
for stat in re.split( '\W+', which_stats.lower() ):
if stat == 'ave': stat = 'avg'
stats[stat] = 1
width = self.value_width
if format is not None: width = self.format_width( format )
self.columns.append( { 'format': format,
'expr': expr,
'stats': stats,
'title': title,
'width': width } )
############################################################################
# Return the format(s) for a given column
def column_formats( self, format ):
if format is not None:
if format.startswith( '=' ):
format = format[1:]
if not format.startswith( '=' ): # '==' => literal '='
format = format[1:]
return shlex.split( format )[2:]
return [ format ]
#############################################################################
def comment( self, fh, end_str ):
read_to_end( fh, end_str )
#############################################################################
def set_database( self, id ): self.database = id
#############################################################################
def set_date_format( self, format, title='Date' ):
self.date_format = format
if isinstance(title, bytes):
self.date_title = title.decode('unicode_escape') # For Python 3
else:
self.date_title = title # If it's already a string, no need to decode
self.date_width = max_date_size( format )
#############################################################################
def date_range( self, start, end ):
self.start = start
self.end = end
#############################################################################
def define( self, var, *expr ):
self.var[var] = ' '.join( expr )
#############################################################################
def dereference( self, var, stime, etime, units ):
if re.match( '(\S+\.){5}\S+$', var ): # Dotted sextet ==> CWMS id
# Determine what kind of database we're talking to.
# Only load the module we need, in case others are not installed on
# this system
if '/' in self.database: # pathname ==> SQLite
from sqlite import SQLite
if not isinstance( self.db, SQLite ): self.db = SQLite()
else:
from cwms import CWMS
if not isinstance( self.db, CWMS ): self.db = CWMS()
if not self.db.connection():
self.db.connect( self.database, login='cwmsview', password='cwmsview' )
self.database = self.db.connection()
if self.db.connection() != self.database:
self.db.disconnect()
self.db.connect( self.database, login='cwmsview', password='cwmsview' )
return ( self.db.ts_data( var, stime, etime,
Units.units( var, units ) ) )
elif var == 'empty':
return TimeSeries()
elif var == 'zero':
return self.zero_data( stime, etime )
elif re.match( '^-?\d*\.?\d*$', var ) and re.match( '\d', var ):
return float( var ) # var is a scalar numeric value
else:
if var not in self.var:
print("Undefined variable '%s' referenced" % ( var ), file=sys.stderr)
return TimeSeries()
return self.evaluate( self.var[var], stime, etime, units )
#############################################################################
def empty_row( self, cols ):
if self.mode == 'html':
return '
| |
\n' % ( cols )
else:
return '\n' * self.spacing
#############################################################################
def end_of_row( self ):
if self.mode == 'html': return '\n'
else: return '\n' * self.spacing
#############################################################################
def evaluate( self, expr, stime, etime, units=None ):
if units is None: units = self.units
value = TimeSeries()
operator = None
for arg in expr.split():
if arg in '+-*/':
operator = arg
else:
operand = self.dereference( arg, stime, etime, units )
if operator is not None:
value = value.math( operator, operand )
elif len( value ) == 0: value = operand
else:
print(( "Syntax error near '%s': missing operator?" %
( arg ) ), file=sys.stderr)
return value
#############################################################################
# Format a value using a given format string. Usually this is just using the
# % operator, but we have exceptions for multi-valued fields.
def format_field( self, format, value ):
ret = ''
if format.startswith( '==' ): # Double-equal -> leading literal equal
format = format[1:]
elif format.startswith( '=' ):
tokens = shlex.split( format[1:] )
pre_fmt = tokens.pop( 0 )
unpack = re.sub( r'A(\d+)', r'\1s', tokens.pop( 0 ) )
# Truncate the formatted value to the size required by unpack in case
# the data are too large for the format specified.
val_fmtd = (pre_fmt % value)[:struct.calcsize( unpack )]
vals = struct.unpack( unpack, val_fmtd )
for v in vals:
fmt = tokens.pop( 0 )
ret += self._format_field( fmt, float( v ) )
return ret
else:
return self._format_field( format, value )
def _format_field( self, format, value ):
if self.mode == 'html':
return( ( '' + format + ' | ' ) % ( value ) )
else:
return( ( self.separator + format ) % ( value ) )
#############################################################################
# Interpreting % directives is harder than using the % operator, so be lazy
def format_width( self, format ):
result = None
if format.startswith( '=' ):
tokens = shlex.split( format )
pre_fmt = tokens.pop( 0 )
unpack = tokens.pop( 0 )
fmt = self.separator.join( tokens )
result = fmt % ( ( 0, ) * len( tokens ) )
else:
result = format % ( 0 )
return len( result )
#############################################################################
def include( self, filename ):
if isinstance( filename, str ):
fh = open( filename )
else:
fh = filename
for line in fh:
line = line.rstrip( '\n' ) # Strip newlines
while line.endswith( '\\' ): # Concatenate
line = line[:-1] + next( fh ).rstrip( '\n' ) # continuation lines
re.sub( '#.*', '', line ) # Strip comments
if not re.match( '\w', line ): # Ignore empty lines
continue
args = shlex.split( line )
directive = args.pop( 0 )
d = directive.lower()
if d == 'column': self.column( *args )
elif d == 'comment': self.comment( fh, *args )
elif d == 'database': self.set_database( *args )
elif d == 'date_format': self.set_date_format( *args )
elif d == 'date_range': self.date_range( *args )
elif d == 'define': self.define( *args )
elif d == 'format': self.format( fh, *args )
elif d == 'include': self.include( *args )
elif d == 'interval': self.set_interval( *args )
elif d == 'literal': self.literal( fh, *args )
elif d == 'midnight': self.set_midnight( *args )
elif d == 'missing': self.set_missing( *args )
elif d == 'mode': self.set_mode( *args )
elif d == 'multi_column': self.multi_column( *args )
elif d == 'new_columns': self.new_columns( *args )
elif d == 'new_page': self.new_page( *args )
elif d == 'output': self.output( *args )
elif d == 'report': self.report( *args )
elif d == 'series': self.series( *args )
elif d == 'separator': self.set_separator( *args )
elif d == 'spacing': self.set_spacing( *args )
elif d == 'stats': self.stats( args )
elif d == 'text_time': self.text_time( fh, *args )
elif d == 'time_zone': self.set_time_zone( *args )
elif d == 'units': self.set_units( *args )
elif d == 'value_format': self.set_value_format( *args )
elif d == 'window': self.window( *args )
else:
print(( "Warning: Unknown report directive: '%s'" %
( directive ) ), file=sys.stderr)
#############################################################################
# Increment a given timestamp (default: current time).
#
# Midnight being 2400 instead of 0000 changes not only how we display dates,
# but also how we refer to them. So, if zero-time is actually the prior
# day, shift "current day" forward by whatever interval we're using. The
# only difficulty with doing this is if you've asked for a time that *isn't*
# based on the day, month, or year (that is, an absolute offset from current
# time). So it only increments if you've asked for month or water-year, or
# if the offset is capitalized.
def increment_time( self, offset, time=datetime.now() ):
if re.match( '\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d', offset ):
res = datetime.strptime( offset, '%Y-%m-%dT%H:%M:%S' )
else:
res = time_utils.offset_to_time( offset, time )
if self.midnight == 24 and re.search( 'mo|MO|wy|WY|[A-Z]+$', offset ):
res += self.interval
return self.time_zone.localize( res )
#############################################################################
def set_interval( self, int ):
self.interval = time_utils.interval_to_timedelta( int )
if self.w_before is None: self.w_before = self.interval / 2
if self.w_after is None: self.w_after = self.interval / 2
#############################################################################
def line_row( self, pattern, columns ):
if self.mode == 'html':
width = 1
for col in columns:
width += len( self.column_formats( col['format'] ) )
return ' |
\n' % ( width )
else:
ret = ( '%%%d.%ds' % ( self.date_width, self.date_width )
) % ( pattern * self.date_width )
for col in columns:
width = col['width']
ret += ( '%%s%%%d.%ds' % ( width, width )
) % ( self.separator, pattern * width )
return ret + '\n'
#############################################################################
def literal( self, fh, end_str='end' ):
lit = read_to_end( fh, end_str )
if( '%' in lit ):
stime = self.increment_time( self.start )
etime = self.increment_time( self.end ) - timedelta( seconds=1 )
median_time = stime + ( etime - stime ) / 2
print(median_time.strftime( lit ), end=' ', file=self.out)
else:
print(lit, end=' ', file=self.out)
def text_time( self, fh, type, terminator='end' ):
lit = read_to_end( fh, terminator )
time = None
type = type.lower()
if type == 'now': time = datetime.now()
elif type == 'start': time = self.increment_time( self.start )
elif type == 'mid':
s = self.increment_time( self.start )
e = self.increment_time( self.end ) - timedelta( seconds=1 )
time = s + ( e - s ) / 2
elif type == 'end':
time = self.increment_time( self.end ) - timedelta( seconds=1 )
if time is None:
print(lit, end=' ', file=self.out)
else:
print(midnight_time( time, lit, self.midnight ), end=' ', file=self.out)
#############################################################################
def set_mode( self, mode ): self.mode = mode.lower()
#############################################################################
def set_midnight( self, mid ): self.midnight = int( mid )
#############################################################################
def set_missing( self, str='' ): self.missing = str
#############################################################################
def multi_column( self, expr, col_interval, title_fmt ):
( stime, etime ) = self.time_range( self.start, self.end )
# Build the column headers at col_interval using title_fmt, if it exists
if re.search( '\S', title_fmt ):
cols = []
time = stime
while time <= etime:
cols.append(
{ 'title': midnight_time( time, title_fmt, self.midnight ),
'width': self.value_width } )
time = self.increment_time( col_interval, time )
print(self.wrap_titles( cols ), file=self.out)
ts = self.set_series( expr, stime, etime, self.interval )
self.columns = [] # Reset columns as if by NEW_COLUMNS
# Step through each interval, jumping to the next column
# when within w_before of the next col_interval.
#
# Store columns as if they were built by COLUMN and REPORT, so that the
# statistics code can be reused.
def new_col( start ):
return { 'start': start,
'stats': { 'all': 1 },
'values': [],
'width': self.value_width }
time = stime
col = new_col( time )
while time <= etime:
if( self.increment_time( col_interval, col['start'] ) - time <=
self.w_before ):
self.columns.append( col )
col = new_col( time )
if time in ts and ts[time] is not None:
val = ts[time]
col['values'].append( self.format_field( self.value_format, val ) )
if 'min' not in col or val < col['min']:
col['min'] = val
col['tmin'] = time
if 'max' not in col or val > col['max']:
col['max'] = val
col['tmax'] = time
if 'tot'in col: col['tot'] += val
else: col['tot'] = val
if 'ct' in col: col['ct'] += 1
else: col['ct'] = 1
if 'first' not in col: col['first'] = val
col['last'] = val
else:
fmt = '%s%%%s.%ss' % ( self.separator, self.value_width,
self.value_width )
col['values'].append( fmt % ( self.missing ) )
time += self.interval
self.columns.append( col )
# Finalize any columnar statistics
for col in self.columns:
if 'ct' in col and col['ct'] > 0: col['avg'] = col['tot'] / col['ct']
if 'max' in col and 'min' in col: col['range'] = col['max'] - col['min']
if 'last' in col and 'first' in col:
col['delta'] = col['last'] - col['first']
# Determine the length of the longest column, which will determine the
# length of the report. Also preserve the start time of the longest
# column for indexing it (so we can appropriately label the end rows
# to match the longest columns, e.g. days 29, 30, and 31)
report_len = 0
long_col_start = stime
for col in self.columns:
column_len = len( col['values'] )
if column_len > report_len:
report_len = column_len
long_col_start = col['start']
# Display the report
rowtime = long_col_start
lines = []
for row in range( report_len ):
line = midnight_time( rowtime, self.date_format, self.midnight )
for col in self.columns:
if row < len( col['values'] ): line += col['values'][row]
else: line += ' ' + ' ' * self.value_width
lines.append( line )
rowtime += self.interval
print(('\n' * self.spacing).join( lines ), file=self.out)
#############################################################################
def new_columns( self ):
self.columns = []
#############################################################################
def new_page( self ):
print('\f', end=' ', file=self.out)
#############################################################################
def output( self, filename ):
if filename.lower() == 'stdout':
self.out = sys.stdout
else:
self.out = open( filename, 'w' )
#############################################################################
def report( self, *opts ):
dropempty = 0
notitle = 0
for opt in opts:
opt = opt.lower()
if opt == 'dropempty': dropempty = 1
elif opt == 'notitle': notitle = 1
report = ''
( stime, etime ) = self.time_range( self.start, self.end )
# Get each column's data, transform it into an appropriate time series
# Build a list of columns that actually have data, so we can elide out
# those that do not, if requested.
populated = []
for col in self.columns:
col['ts'] = self.set_series( col['expr'], stime, etime, self.interval )
if len( col['ts'] ) > 0:
populated.append( col )
for key in ( 'avg', 'ct', 'delta', 'first', 'last', 'max', 'min',
'range', 'tot' ):
col.pop( key, None )
saved = self.columns
if dropempty: self.columns = populated
# If requested, build (possibly multi-row) titles, then display them
if notitle == 0:
print(self.wrap_titles( self.columns ), file=self.out)
# Build columnar data.
# Keep statistics on the displayed values incase they are needed.
if self.mode == 'html': time_sprintf = '| %s | '
else: time_sprintf = '%s'
time = stime
while time <= etime:
report += time_sprintf % ( midnight_time( time, self.date_format,
self.midnight ) )
for col in self.columns:
format = ifdef( col['format'], self.value_format )
val = None
if time in col['ts']: val = col['ts'][time]
if val is not None:
report += self.format_field( format, val )
if 'min' not in col or val < col['min']: col['min'] = val
if 'max' not in col or val > col['max']: col['max'] = val
if 'tot' in col: col['tot'] += val
else: col['tot'] = val
if 'ct' in col: col['ct'] += 1
else: col['ct'] = 1
if 'first' not in col: col['first'] = val
col['last'] = val
else:
if self.mode == 'html':
for fmt in self.column_formats( col['format'] ):
report += '%s | ' % ( self.missing )
else:
if 'width' in col: fw = col['width']
else: fw = self.value_width
fmt = '%s%%%s.%ss' % ( self.separator, fw, fw )
report += fmt % ( self.missing )
report += self.end_of_row()
time += self.interval
for col in self.columns:
if 'ct' in col and col['ct'] > 0:
col['avg'] = col['tot'] / col['ct']
if 'max' in col and 'min' in col:
col['range'] = col['max'] - col['min']
if 'last' in col and 'first' in col:
col['delta'] = col['last'] - col['first']
print(report, end=' ', file=self.out)
# Restore the full column list in case we are asked to show a new date range
columns = saved
#############################################################################
def set_separator( self, sep ): self.separator = sep
#############################################################################
def set_series( self, expr, stime, etime, interval ):
type = ''
ts = None
m = re.search( '(.*?):(.*)', expr )
if m: ( expr, type ) = m.group( 1, 2 )
type = type.lower()
if type == 'ave': type = 'avg'
if re.match( '(min|max|tot|ct|avg|range|delta)', type ):
ts = self.evaluate( expr, stime - interval, etime )
( min, max, tot, ct, avg, range, delta ) = ts.stat( interval, stime )
if type == 'min': ts = min
elif type == 'max': ts = max
elif type == 'tot': ts = tot
elif type == 'ct': ts = ct
elif type == 'avg': ts = avg
elif type == 'range': ts = range
elif type == 'delta': ts = delta
elif re.match( 'rollav', type ):
m = re.match( 'rollav[ge]:(\S+)', type )
lookback = time_utils.interval_to_timedelta( m.group( 1 ) )
ts = self.evaluate( expr, stime - lookback, etime )
ts = ts.rollavg( interval, lookback, stime )
else:
s_offset = timedelta( seconds = 0 )
e_offset = s_offset
dst = 0
m = re.match( 'off(dst)?:(\S+)', type )
if m:
s_offset = time_utils.interval_to_timedelta( m.group( 2 ) )
e_offset = s_offset
opt = m.group( 1 )
if opt == 'dst':
s_offset -= dst_offset( stime )
e_offset -= dst_offset( etime )
dst = 1
ts = self.evaluate( expr, stime + s_offset - self.w_before,
etime + e_offset + self.w_after )
if dst:
ts = ts.offset_to_dst()
snap = ts.snap( interval, self.w_before, self.w_after, s_offset, stime )
ts = snap.dict()
return ts
#############################################################################
def set_spacing( self, count ): self.spacing = int( count )
#############################################################################
def stats( self, lines=[ 'min:MIN', 'max:MAX', 'tmax:@', 'tot:TOT', 'ct:CT',
'avg:AVG' ] ):
report = ''
for line in lines:
if ':' in line:
tokens = line.split( ':', 2 )
stat = tokens.pop( 0 )
name = None
format = None
if len( tokens ) > 0: name = tokens.pop( 0 )
if len( tokens ) > 0: format = tokens.pop( 0 )
formula = None
m = re.search( '\((.*)\)', stat )
if m:
formula = m.group( 1 )
stat = re.sub( '\(.*\)', '', stat )
stat = stat.lower()
if stat == 'ave': stat = 'avg'
if self.mode == 'html':
report += '
| %s | ' % ( name )
else:
fmt = '%%%s.%ss' % ( self.date_width, self.date_width )
report += fmt % ( name )
for column in self.columns:
if( ( stat in column['stats'] or 'all' in column['stats'] ) and
stat in column ):
if formula is not None:
column[stat] = eval( '%s %s' % ( column[stat], formula ) )
if stat == 'tmax' or stat == 'tmin':
fmt = ifdef( format, self.date_format )
time = midnight_time( columns[stat], fmt, self.midnight )
if self.mode == 'html':
report += '%s | ' % ( time )
else:
if 'width' in column: fw = column['width']
else: fw = self.value_width
fmt = '%s%%s.%ss' % ( self.separator, fw, fw )
report += fmt % ( time )
else:
fmt = ifdef( format, column['format'], self.value_format )
report += self.format_field( fmt, column[stat] )
else:
if self.mode == 'html':
for fmt in self.column_formats( column['format'] ):
report += ' | '
else:
if 'width' in column: fw = column['width']
else: fw = self.value_width
report += self.separator + ' ' * fw
report += self.end_of_row()
elif line == 'blank':
report += self.empty_row( len( self.columns ) + 1 )
elif line.startswith( 'line' ):
m = re.match( 'line(\S+)?', line )
if m: pattern = m.group( 1 )
else: pattern = '-'
widths = [ self.date_width ]
for col in self.columns:
widths.append( col['width'] )
report += self.line_row( pattern, self.columns )
print(report, end=' ', file=self.out)
#############################################################################
# Convert offsets to start and end times.
def time_range( self, start, end ):
stime = self.increment_time( start )
etime = self.increment_time( end ) - timedelta( seconds=1 )
return( stime, etime )
#############################################################################
def set_time_zone( self, zone ): self.time_zone = pytz.timezone( zone )
#############################################################################
def set_units( self, units ): self.units = units
#############################################################################
def set_value_format( self, fmt ):
self.value_format = fmt
self.value_width = self.format_width( fmt )
#############################################################################
def window( self, before, after=None ):
self.w_before = time_utils.interval_to_timedelta( before )
if after is None: self.w_after = self.w_before
else: self.w_after = time_utils.interval_to_timedelta( after )
#############################################################################
# Create a timeseries with all-zero data for the given range
def zero_data( self, stime, etime ):
ts = TimeSeries
cur = stime
while cur <= etime:
ts.set_datum( cur, 0 )
cur += self.interval
return ts
#############################################################################
def wrap_titles( self, columns ):
ret = ''
if self.mode == 'html':
ret = '
| %s | ' % ( self.date_title )
for col in columns:
if 'format' in col: format = col['format']
else: format = self.value_format
width = len( self.column_formats( format ) )
if width > 1: span = ' colspan="%d"' % ( width )
else: span = ''
if 'title' in col: title = col['title'].encode('utf-8').decode( 'unicode_escape' )
else: title = ''
ret += '%s | ' % ( span, title )
ret += '
'
else:
maxrow = 0
for col in columns:
if 'title' in col: title = col['title'].encode('utf-8').decode( 'unicode_escape' )
else: title = ''
if 'width' in col: width = col['width']
else: width = self.value_width
lines = []
for line in title.splitlines():
lines.extend( textwrap.wrap( line, width ) )
lines.reverse()
if len( lines ) > maxrow: maxrow = len( lines )
col['lines'] = lines
header = []
datelines = []
for line in self.date_title.splitlines():
datelines.extend( textwrap.wrap( line, self.date_width ) )
datelines.reverse()
for row in range( maxrow ):
if len( datelines ) == 0: date = ''
else: date = datelines.pop( 0 )
header_line = ( '%%%d.%ds' % ( self.date_width,
self.date_width ) ) % ( date )
for col in columns:
if 'width' in col: fw = col['width']
else: fw = self.value_width
if len( col['lines'] ) != 0: line = col['lines'].pop( 0 )
else: line = ''
header_line += ( '%s%%%d.%ds' % ( self.separator,
fw, fw ) ) % ( line )
header.append( header_line )
header.reverse()
for line in header:
ret += line + '\n'
return ret
###############################################################################
###############################################################################
if __name__ == '__main__':
infile = sys.stdin
if len( sys.argv ) > 1: infile = sys.argv[1]
report = cwms_report()
report.include( infile )