DKRZ Python example read netCDF file using xarray#

Example script:

#
#  PyEarthScience:  read_netCDF_with_xarray.py
#
#  Description:
#    Demonstrate the use of xarray to open and read the content of
#    a netCDF file.
#
#  Author:
#    Karin Meier-Fleischer
#
#  Date of initial publication:
#    April, 2019
#
'''
  PyEarthScience:  read_netCDF_with_xarray.py

  Description:
    Demonstrate the use of xarray to open and read the content of
    a netCDF file.

  - xarray
  - netCDF

  2019-04-14  kmf
'''

import os
import numpy as np
import xarray as xr

#-----------------------------------------------------------------------
#-- Function: main
#-----------------------------------------------------------------------
def main():

    #-- input file rectilinear_grid_3d.nc from the NCL User Guide
    #-- is available in the PyNGL installation
    fname = "/sw/spack-levante/ncl-6.6.2-r3hsef/lib/ncarg/data/nug/rectilinear_grid_3D.nc" #-- data file name

    #-- open file
    ds = xr.open_dataset(fname)

    print('------------------------------------------------------')
    print()
    print('--> ds        ', ds)
    print()

    #-- read variable t, first timestep, first level
    var = xr.open_dataset(fname).t.isel(time=0,lev=0)

    #-- read variable latitude and longitude arrays
    lat = xr.open_dataset(fname).lat
    lon = xr.open_dataset(fname).lon

    print('------------------------------------------------------')
    print()
    print('-->   ', xr.open_dataset(fname))
    print()

    #-- print the size and shape of the variable
    print('------------------------------------------------------')
    print()
    print('--> var.size           ',var.size)
    print('--> var.shape          ',var.shape)

    #-- the above notation has the same results as below
    #f    = xr.open_dataset(fname)
    #data = f['t'][0,0,:,:]                   #-- first time step, all latitude
    #lat  = f['lat'][:]                       #-- all latitudes
    #lon  = f['lon'][:]                       #-- all longitudes
    print()

    #-- print the minimum and maximum of lat and lon
    print('------------------------------------------------------')
    print()
    print('--> lat min             ', lat.min().values)
    print('--> lat max             ', lat.max().values)
    print('--> lon min             ', lon.min().values)
    print('--> lon max             ', lon.max().values)

    #-- the above notation has the same results as below
    #print('--> lat min             ', lat.min().item())
    #print('--> lat max             ', lat.max().item())
    #print('--> lon min             ', lon.min().item())
    #print('--> lon max             ', lon.max().item())
    print()

    #-- print variable information
    print('------------------------------------------------------')
    print()
    print('--> var')
    print()
    print(var)
    print()

    #-- retrieve the name of the coordinates lat/lon and the values of
    #-- the shape of the coordinates
    dimslat  = lat.dims[0]
    shapelat = lat.shape[0]
    dimslon  = lon.dims[0]
    shapelon = lon.shape[0]
    nrlat    = shapelat
    nrlon    = shapelon

    print('------------------------------------------------------')
    print()
    print('--> dimslat: ',dimslat, '  dimslon: ',dimslon,'  nrlat: ',nrlat,'  nrlon: ',nrlon)
    print()

    #-- print the variable attributes
    print('------------------------------------------------------')
    print()
    print('--> attributes:       ',var.attrs)
    print()

    #-- print the variable values
    print('------------------------------------------------------')
    print()
    print('--> values            ')
    print()
    print(var.values)
    print()

    #-- print the type of the variable (DataArray)
    print('------------------------------------------------------')
    print()
    print('--> type(var)         ',type(var))
    print()

    #-- print the type of the variable values (numpy.ndarray)
    print('------------------------------------------------------')
    print()
    print('--> type(var.values)  ',type(var.values))
    print()

    #-- select variable t from dataset for first timestep
    print('------------------------------------------------------')
    print()
    print('--> dataset variable t (time=0, lev=6)')
    print()
    print(ds.t.isel(time=0,lev=6).values)
    print()

    #-- select variable t from dataset, lat index 1 and lon index 2
    print('------------------------------------------------------')
    print()
    print('--> dataset variable t select data which is closest to lat=1 and lon=2')
    print()
    print(ds.t.isel(lat=1, lon=2).values)
    print()

    #-- select variable t, timestep 2001-01-01
    print('------------------------------------------------------')
    print()
    print('--> time="2001-01-01"')
    print()
    print(ds.t.sel(time='2001-01-01'))
    print()

    #-- select a sub-region (slice)
    print('------------------------------------------------------')
    print()
    print('--> select sub-region')
    print()
    print(ds.t.sel(lat=slice(20, 0), lon=slice(-25, 0), time='2001-01-01'))
    print()

    #-- select slice nearest neighbor with tolerance
    print('------------------------------------------------------')
    print()
    print('--> select slice nearest neighbor with tolerance')
    print()
    print(ds.t.sel(lat=5.0, lon=1.0, method='nearest', tolerance=2).values)
    print()

    #-- print dataset minimum/maximum: prints the name of the variables,
    #-- their types and minimum value
    print('------------------------------------------------------')
    print()
    print('--> print dataset min')
    print()
    print(ds.min().values)
    print()
    print('--> print dataset max')
    print()
    print(ds.max().values)
    print()

    #-- print median values of variable t of dataset, one value for each level
    print('------------------------------------------------------')
    print()
    print('--> variable median')
    print()
    print(ds.t.median(dim=['lat', 'lon']).values)
    print()

    #-- compute the means of the variable t of the dataset, one value for each level
    print('------------------------------------------------------')
    print()
    print('--> means')
    print()
    means = ds.t.mean(dim=['lat', 'lon'])
    print(means.values)
    print()

    #-- compute the mean of the variable t which are greater than 273.15 K
    print('------------------------------------------------------')
    print()
    print('--> only means greater than 273.15 K')
    print()
    print(means.where(means > 273.15).values)
    print()


if __name__ == '__main__':
    main()