#This is a data conversion utility script which #was used to convert ERBE g4 data into convenient #ASCII and netCDF files for use with data labs and #problem sets. The specific input data format is #peculiar to the ERBE input data, so no attempt was #made to make this conversion routine general. # #The data was taken from the netCDF version available on # http://climserv.ipsl.polytechnique.fr/catalog.html #rather than from the native NASA HDF version. The climserv #version is a pretty straightforward translation of the NASA #originals. Only the years for which twelve full months #of data are available have been converted. The output #is in ascii files for zonal means, as well as netCDF files #which will be readable with a wide variety of software. #ToDo: *Put long names and data info into header of text files # *Write out netCDF 2D data import cdms,MV,glob from ClimateUtilities import * #Location of the data datapath = '/Users/rtp1/bigdata/ERBE_netcdf/' #-------Define dictionaries and constants-------------------- months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sept', 'Oct','Nov','Dec'] #Data tag dictionary, giving mapping between #data type and the 4-letter tags used in the #ERBE file name conventions (Climserv netCDF version) dataTagDict = {'alas':'ALB' ,'alcs':'ALBclear','lwas':'OLR', 'lwcs':'OLRclear','ntas':'NET', 'ntcs':'NETclear','swas':'SW','swcs':'SWclear', 'lwcf':'CLOUDlwf','swcf':'CLOUDswf'} #Dictionaries mapping between short variable #names (to be used in the output netCDF file) and #the ascii output file to the corresponding long names # longNameDict = {'Clear Sky OLR':'OLRclear','All Sky OLR':'OLR', 'Clear Sky Solar':'SWclear','All Sky Solar':'SW', 'Clear Sky Albedo':'ALBclr','All Sky Albedo':'ALB', 'Clear Sky Net Top of Atmosphere Budget':'NETclear', 'All Sky Net Top of Atmosphere Budget':'NET', 'Cloud Longwave Forcing':'CLOUDlwf', 'Cloud Shortwave Forcing':'CLOUDswf'} #Build the reverse dictionary # (Is there a way to directly do a reverse dictionary lookup?) vNameDict = {} for key in longNameDict.keys(): val = longNameDict[key] vNameDict[val] = key #-------Define the functions we need ----------------------- def fetch(name): f = cdms.open(name) #Get the data (it's in a 1D array) data = f['z'] #Make a place to keep the cleaned-up data. Initialize #with the undefined data code. UNDEF = 1.e20 data1 = MV.zeros(len(data),MV.Float) + UNDEF nMissing = 0 # #The following loop is here because the input data uses #nan ('Not a Number') for the missing data code, which creates #some problems in Python when any operation is performed. #The try/except block traps this error, and leaves the #corresponding data item set to the UNDEF code. # #The strange-looking reference data[i][0] is because #the value is returned by cdms as an array containing just #a single element. That's a trick to allow information #about,e.g., floats versus doubles to be retained, needed #because all Python floats are double precision. This #kind of return is done by cdms variables, but NOT by #regular Numeric arrays # for i in range(len(data)): try: data1[i] = data[i][0] except: nMissing +=1 # #The data is in a 1D array. Reshape to 2D, using what #we know about the latitude and longitude dimensions data1 = MV.reshape(data1,[72,144]) print 'filename ',name,nMissing,' missing values' f.close() # #Mask the data so that cdms variable arithmetic can #deal with missing values properly, Note that masking #fills missing data with the default fill value #(1.e20), which happens to be the same as what we #used for UNDEF above, in this case. If you want #to retain a different value for UNDEF, you would #have to specify it as part of masked_where data1 = MV.masked_where(data1 == UNDEF,data1) return data1 def name(tag,year,month): names = glob.glob(datapath+tag+'_s4g_*%d_%02d.netcdf'%(year,month)) if len(names) > 1: print "Warning: multiple files ",names print "Selected ",names[0] if len(names) ==0: print "No files found for ",tag,year,month return None else: return names[0] def zonalMean(data): months = ['Jan','Feb','Mar','April','May','Jun','Jul', 'Aug','Sep','Oct','Nov','Dec'] zmean = MV.average(data,axis = 2) #zonal mean annual = MV.average(zmean) #annual average # #Put it in a Curve and return it for writing c = Curve() c.addCurve(lat[:],'Latitude') #lat is a global for i in range(12): c.addCurve(zmean[i],months[i]) c.addCurve(annual,'ann') return c #----------Main script starts here ---------------------------- startyear = 1985 endyear = 1988 #Create axes, and a variable to hold 12 months of data lat = cdms.createAxis([90.-1.25-2.5*i for i in range(72)],id='lat') lon = cdms.createAxis([-180.+2.5*i for i in range(144)],id='lon') month = cdms.createAxis([1.+i for i in range(12)],id='time') outDat = cdms.createVariable(MV.zeros([12,72,144]),typecode = 'd',axes =[month,lat,lon]) #Read in a year of data, do zonal means, write to ascii and #netCDF output files. #ToDo: Figure out why, in new version of Curve object, #c.dump() doesn't work if the column added is an MV #(or presumably an MA). # # *Change missing value fill value for zonal mean, # so it isn't zero. for year in [1989]: for tag in dataTagDict.keys(): for month in range(12): fname = name(tag,year,month+1) if fname == None: outDat[month][:] = -1.e20 else: outDat[month] = fetch(fname) #Compute zonal means for each month, and annual average #of these. Put the data in a curve and write out c = Curve() c.addCurve(lat[:],'lat') zmean = MV.average(outDat,axis=2) for month in range(12): c.addCurve(zmean[month]._data,months[month]) zmeanAnn = MV.average(zmean) c.addCurve(zmeanAnn,'Ann') c.dump('%s%d.txt'%(dataTagDict[tag],year))