##--------------------## from matplotlib import numpy as np # matplotlib submodule for numeric arrays #---------- READING BINARY MIROC GEOPHYSICAL DATA ---------- def CCM_readmodel(datafile, datadir): """ This function reads one binary file of MIROC output (2D or 3D data). Input: directory and file name (following the nomenclature of the online repository for the HFC data). Output: Python dictionary with geophysical data + ancillary information. --- NEEDS IMPORT IN THE MIROC NAMESPACE: - module 'numpy' for array manipulation --- AUTHOR: Eric Dupuy, National Institute for Environmental Studies (NIES) --- NOTES: 1) Error management is not implemented: please make sure the directory path does not end in '/' and that both the last directory on path and the filenames match those of the online repository or modify accordingly. 2) To simplify the multi-member analysis (100 per experiment), the output array dimension is standardized to 366 days; a dummy entry is inserted at index 59 (60th position) for non-leap years. 3) Geophysical parameters implemented are those used in the analysis described in the JGR paper: atmospheric number density, atmospheric temperature, zonal mean zonal wind, ozone volume mixing ratio, total column ozone, meridional and vertical components and divergence of the Eliassen-Palm flux, and residual mean meridional and vertical wind (v*, w*). --- VERSION HISTORY: VERSION ver.1.0, last update: 2021-06-09 (ED, NIES) - adapted for external users """ # ========== # INPUT: Initialization # ----- # extract info from names of data directory and file parameter = datadir.split('/')[-1] # assuming no trailing separator in directory name year = int(datafile.split('_')[-1]) # skip day index for non-leap years idx_dayskip = 59 # 29 February missing if not leap year numtot_records = 366 # number of "records" (= days) in binary file # accounting for leap years # ----- # file properties and sizes for sequential binary reading fltprec = np.float32 # reading precision from binary hdr_bytes = 1024 # header size in bytes: 64 lines x 16 chars elt_bytes = 4 # length of 1 elt (data/padding) in bytes num_padding = 4 # number of padding elements / record (delimiters) # header handling nchars_hline = 16 # number of characters per line nlines_header = 64 # number of lines in the header # date indices (binary file header-related) idx_sttdate = 47 # field index of start date (date at 00h00m) in header idx_middate = 26 # field index of middle date (date at 12h00m) in header idx_enddate = 48 # field index of end date (date at 00h00m next day) in header # ----- # GEOLOCATION: model grid values # T42 resolution: constant longitude step 2.8125; latitude step ~2.8 # (!! same order as the elements of the final data array !!) geolat = np.array([ 87.864, 85.097, 82.313, 79.526, 76.737, 73.948, 71.158, 68.368, 65.578, 62.787, 59.997, 57.207, 54.416, 51.626, 48.835, 46.045, 43.254, 40.464, 37.673, 34.883, 32.092, 29.301, 26.511, 23.720, 20.930, 18.139, 15.348, 12.558, 9.767, 6.977, 4.186, 1.395, -1.395, -4.186, -6.977, -9.767, -12.558, -15.348, -18.139, -20.930, -23.720, -26.511, -29.301, -32.092, -34.883, -37.673, -40.464, -43.254, -46.045, -48.835, -51.626, -54.416, -57.207, -59.997, -62.787, -65.578, -68.368, -71.158, -73.948, -76.737, -79.526, -82.313, -85.097, -87.864,], dtype = fltprec) # mean step 2.789, min 2.767, max 2.791 geolon = np.array(np.r_[:128] * 2.8125, dtype = fltprec) geolev = np.array([1000., 850., 700., 500., 400., 300., 250., 200., 170., 150., 130., 115., 100., 90., 80., 70., 50., 30., 20., 15., 10., 7., 5., 3., 2., 1.5, 1., 0.5, 0.3, 0.2, 0.1,], dtype = fltprec) # dimensions of the geolocation data numtot_levels = 31 # total number of pressure levels numtot_lats = 64 # total number of lat elements numtot_lons = 128 # total number of lon elements # GEOPHYSICAL PARAMETER DEFAULTS: initialize variable-related quantities # to defaults: full name, array dimensions, and physical unit (formatted as # a LaTeX string) # Available geophysical parameters are defgeo = {'air_P':{ 'name': 'Air number density', # var. full name 'fdims': '3d', # dims of data array 'unit': 'm${-3}$', # physical quantity }, 'dobson':{'name': 'Total column ozone', # var. full name 'fdims': '2d', # dims of data array 'unit': 'DU', # physical quantity }, 'epfdiv':{'name': 'Eliassen-Palm flux divergence', # var. full name 'fdims': '2z', # dims of data array 'unit': 'm.s$^{-2}$', # physical quantity }, 'epfy':{ 'name': 'Eliassen-Palm flux vector meridional component', # var. full name 'fdims': '2z', # dims of data array 'unit': 'kg.m$^{-1}$.s$^{-2}$', # physical quantity }, 'epfz':{ 'name': 'Eliassen-Palm flux vector vertical component', # var. full name 'fdims': '2z', # dims of data array 'unit': 'kg.m$^{-1}$.s$^{-2}$', # physical quantity }, 'T_P':{ 'name': 'Temperature', # var. full name 'fdims': '3d', # dims of data array 'unit': 'K', # physical quantity }, 'u_P':{ 'name': 'Zonal mean zonal wind', # var. full name 'fdims': '3d', # dims of data array 'unit': 'm.s$^{-1}$', # physical quantity }, 'v_sta':{ 'name': 'Residual mean meridional wind', # var. full name 'fdims': '2z', # dims of data array 'unit': 'm.s$^{-1}$', # physical quantity }, 'w_sta':{ 'name': 'Residual mean vertical wind', # var. full name 'fdims': '2z', # dims of data array 'unit': 'm.s$^{-1}$', # physical quantity }, 'xo3_P':{ 'lname': 'Ozone volume mixing ratio', # var. full name 'fdims': '3d', # dims of data array 'unit': 'ppv', # physical quantity }, } # ----- # Define shape of parameter array (= "dimensions") # set number of data elements, arrays sizes depending on the dimensions nlat = numtot_lats # all latitudes are read dims = defgeo[parameter]['fdims'] if (dims == '3d'): # 3D --> latitude/longitude/pressure, full output # total array size per record, 3D files nelts = numtot_lats * numtot_lons * numtot_levels # geolocation array effective sizes nlon = numtot_lons nlev = numtot_levels # pressure levels (= 31 ) idx_levels = np.r_[:nlev] # pressure indices (= [0: 30]) elif (dims == '2z'): # 2z --> latitude/pressure (zonal data, e.g., EPflux div.) # total array size per record, zonally averaged nelts = numtot_lats * numtot_levels # geolocation array effective sizes nlon = 1 nlev = numtot_levels # pressure levels (= 31 ) idx_levels = np.r_[:nlev] # pressure indices (= [0: 30]) elif (dims == '2d'): # 2D --> latitude/longitude (integrated data, total O3) # total array size per record, vertically integrated or single-level nelts = numtot_lats * numtot_lons # geolocation array effective sizes nlon = numtot_lons nlev = 1 # surface or integrated idx_levels = np.r_[0] # only one level # ----- # infer number of bytes per record after counting data values rec_bytes = hdr_bytes + elt_bytes * (nelts + num_padding); # rec size (== 1 day) in bytes # full file size in bytes: number of records (= days) x size of one record # account for leap years for the loop indices; effective number of records if (year % 400 == 0): nreff = numtot_records # leap year, full year --> 366 days elif (year % 400 != 0): if (year % 4 == 0): if (year % 100 != 0) : nreff = numtot_records # not divisible by 100 but divisible by 4 --> leap year else: # divisible by 100 (but not 400) --> not a leap year nreff = numtot_records-1 else: # not divisible by either 4 or 400 --> not a leap year nreff = numtot_records-1 # ----- # Initialize output dictionary data_out = {'name':{'infile': parameter, 'full': defgeo[parameter]['name']}, 'unit': defgeo[parameter]['unit'], 'directory': datadir, 'filename': datafile, 'year': year, 'datadims': {'type': dims, 'array': (numtot_records, nlev, nlat, nlon)}, } # Initialize temporary arrays # list of date dictionaries per record dtsfull = [] # list of headers as lists of strings (nlines_header lines of nchars_hline characters) hdrfull = [] # initialize temporary data array and set all values to NaN tmpdata = np.ones((nreff, nlev, nlat, nlon), dtype = fltprec) * np.nan # dummy parameters to complete for non leap years: data, header, dates # dummy parameters to complete for non leap years: data, header, dates dummyrec = np.ones(( 1, nlev, nlat, nlon), dtype = fltprec) * np.nan dummystr = []; dummystr.extend(('-' * nchars_hline for line in range(nlines_header))) dummydate = {'start': 'n/a', 'middle': 'n/a', 'end': 'n/a'} ##### # READING: Extraction from file # Open the binary file for sequential reading, model binary output with open('/'.join([datadir, datafile]), 'rb') as fbinary: # loop over all records (= days) for irec in range( nreff ): # set position at start of current record fbinary.seek(rec_bytes * irec, 0) # read number of bytes for one record/date (record length) rec_bin = fbinary.read(rec_bytes) # extract header as a list of strings hdr_strings = rec_bin[elt_bytes : hdr_bytes+elt_bytes].decode("utf-8") # append to list of headers hdrfull.append([hdr_strings[line-nchars_hline : line] for line in \ range(nchars_hline, len(hdr_strings) + nchars_hline, nchars_hline)]) # append date/time strings to list of date strings (extracted from header) dtsfull.append({'start': hdrfull[irec][idx_sttdate], 'middle':hdrfull[irec][idx_middate], 'end': hdrfull[irec][idx_enddate]}) # read remainder of the record = data # (convert native binary to output array: swap bytes and rearrange) # (the padding bytes are due to the FORTRAN writing of the model # output into binary files; reading order is different from FORTRAN) recdata = np.frombuffer( rec_bin[3 * elt_bytes + hdr_bytes : \ 3 * elt_bytes + hdr_bytes + nelts * elt_bytes], dtype = fltprec).byteswap().reshape( nlev, nlat, nlon ) # set missing values (-999.0) to NaN recdata[recdata == -999.] = np.nan # assign to temporary data array tmpdata[irec, :, :, :] = recdata[idx_levels, :, :] # ----- # assign to final array (skip 29 feb index except leap year) # indices 0-30: January; 31:58: Feb 01 to 28; 59: February 29 or NaN if nreff != numtot_records: fulldata = np.concatenate((tmpdata[:idx_dayskip,:,:,:], dummyrec, tmpdata[idx_dayskip:,:,:,:]), axis = 0) hdrfull = hdrfull[:idx_dayskip] + [dummystr ] + hdrfull[idx_dayskip:] dtsfull = dtsfull[:idx_dayskip] + [dummydate] + dtsfull[idx_dayskip:] ##### # OUTPUT: Update output structure/dictionary data_out['dates' ] = dtsfull # dates data_out['headers'] = hdrfull # headers data_out['data' ] = fulldata # data # return the output return(data_out)