#This is a data conversion utility script which
#was used to convert ERBE g4 data into convenient
#ASCII and netCDF files for use with data labs and
#problem sets. The specific input data format is
#peculiar to the ERBE input data, so no attempt was
#made to make this conversion routine general. 
#
#The data was taken from the netCDF version available on
#    http://climserv.ipsl.polytechnique.fr/catalog.html
#rather than from the native NASA HDF version. The climserv
#version is a pretty straightforward translation of the NASA
#originals.  Only the years for which twelve full months
#of data are available have been converted. The output
#is in ascii files for zonal means, as well as netCDF files
#which will be readable with a wide variety of software.

#ToDo: *Put long names and data info into header of text files
#      *Write out netCDF 2D data


import cdms,MV,glob
from ClimateUtilities import *

#Location of the data
datapath = '/Users/rtp1/bigdata/ERBE_netcdf/'

#-------Define dictionaries and constants--------------------
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sept',
          'Oct','Nov','Dec']
#Data tag dictionary, giving mapping between
#data type and the 4-letter tags used in the
#ERBE file name conventions (Climserv netCDF version)
dataTagDict = {'alas':'ALB' ,'alcs':'ALBclear','lwas':'OLR',
               'lwcs':'OLRclear','ntas':'NET',
               'ntcs':'NETclear','swas':'SW','swcs':'SWclear',
               'lwcf':'CLOUDlwf','swcf':'CLOUDswf'}

#Dictionaries  mapping between short variable
#names (to be used in the output netCDF file) and
#the ascii output file to the corresponding long names
#
longNameDict = {'Clear Sky OLR':'OLRclear','All Sky OLR':'OLR',
             'Clear Sky Solar':'SWclear','All Sky Solar':'SW',
             'Clear Sky Albedo':'ALBclr','All Sky Albedo':'ALB',
             'Clear Sky Net Top of Atmosphere Budget':'NETclear',
             'All Sky Net Top of Atmosphere Budget':'NET',
             'Cloud Longwave Forcing':'CLOUDlwf',
             'Cloud Shortwave Forcing':'CLOUDswf'}
#Build the reverse dictionary
#  (Is there a way to directly do a reverse dictionary lookup?)
vNameDict = {}
for key in longNameDict.keys():
    val = longNameDict[key]
    vNameDict[val] = key

#-------Define the functions we need -----------------------
def fetch(name):
    f = cdms.open(name)
    #Get the data (it's in a 1D array)
    data = f['z']
    #Make a place to keep the cleaned-up data. Initialize
    #with the undefined data code.
    UNDEF = 1.e20
    data1 = MV.zeros(len(data),MV.Float) + UNDEF
    nMissing = 0
    #
    #The following loop is here because the input data uses
    #nan ('Not a Number') for the missing data code, which creates
    #some problems in Python when any operation is performed.
    #The try/except block traps this error, and leaves the
    #corresponding data item set to the UNDEF code.
    #
    #The strange-looking reference data[i][0] is because
    #the value is returned by cdms as an array containing just
    #a single element. That's a trick to allow information
    #about,e.g., floats versus doubles to be retained, needed
    #because all Python floats are double precision. This
    #kind of return is done by cdms variables, but NOT by
    #regular Numeric arrays
    #
    for i in range(len(data)):
        try:
            data1[i] = data[i][0]
        except:
            nMissing +=1
    #
    #The data is in a 1D array. Reshape to 2D, using what
    #we know about the latitude and longitude dimensions
    data1 = MV.reshape(data1,[72,144])
    print 'filename ',name,nMissing,' missing values'
    f.close()
    #
    #Mask the data so that cdms variable arithmetic can
    #deal with missing values properly, Note that masking
    #fills missing data with the default fill value
    #(1.e20), which happens to be the same as what we
    #used for UNDEF above, in this case. If you want
    #to retain a different value for UNDEF, you would
    #have to specify it as part of masked_where 
    data1 = MV.masked_where(data1 == UNDEF,data1)
    return data1

def name(tag,year,month):
    names = glob.glob(datapath+tag+'_s4g_*%d_%02d.netcdf'%(year,month))
    if len(names) > 1:
        print "Warning: multiple files ",names
        print "Selected ",names[0]
    if len(names) ==0:
        print "No files found for ",tag,year,month
        return None
    else:
        return names[0]

def zonalMean(data):
    months = ['Jan','Feb','Mar','April','May','Jun','Jul',
              'Aug','Sep','Oct','Nov','Dec']
    zmean = MV.average(data,axis = 2) #zonal mean
    annual = MV.average(zmean) #annual average
    #
    #Put it in a Curve and return it for writing
    c = Curve()
    c.addCurve(lat[:],'Latitude') #lat is a global
    for i in range(12):
        c.addCurve(zmean[i],months[i])
    c.addCurve(annual,'ann')
    return c

#----------Main script starts here ----------------------------
startyear = 1985
endyear = 1988

#Create axes, and a variable to hold 12 months of data
lat = cdms.createAxis([90.-1.25-2.5*i for i in range(72)],id='lat')
lon = cdms.createAxis([-180.+2.5*i for i in range(144)],id='lon')
month = cdms.createAxis([1.+i for i in range(12)],id='time')
outDat = cdms.createVariable(MV.zeros([12,72,144]),typecode = 'd',axes =[month,lat,lon])

#Read in a year of data, do zonal means, write to ascii and
#netCDF output files.

#ToDo: Figure out why, in new version of Curve object,
#c.dump() doesn't work if the column added is an MV
#(or presumably an MA).
#
#      *Change missing value fill value for zonal mean,
#       so  it isn't zero.

for year in [1989]:
    for tag in dataTagDict.keys():
        for month in range(12):
            fname = name(tag,year,month+1)
            if fname == None:
                outDat[month][:] = -1.e20
            else:
                outDat[month] = fetch(fname)
        #Compute zonal means for each month, and annual average
        #of these. Put the data in a curve and write out
        c = Curve()
        c.addCurve(lat[:],'lat')
        zmean = MV.average(outDat,axis=2)
        for month in range(12):
            c.addCurve(zmean[month]._data,months[month])
        zmeanAnn = MV.average(zmean)
        c.addCurve(zmeanAnn,'Ann')
        c.dump('%s%d.txt'%(dataTagDict[tag],year))