import numpy as np
from osgeo import gdal
from osgeo import osr
import os
import pyresample as pr
from satpy import Scene
import matplotlib.pyplot as plt
Translating EUMETSAT’s .nat files to GTiff
In this tutorial, I am using Python to translate a Meteosat Second Generation (MSG) Native Archive Format (.nat) file to GTiff. Conveniently, there exists a driver support for these files in both, the gdal and the satpy library. Here, we are going to use satpy because we also want to resample the data with its “associated” library pyresample. First, we will load all the libraries we are going to need into our Python session.
As opposed to BUFR files, reading .nat files is quite straightforward. All we need to do is handing the right reader to the satpy Scene function. We can then take a look at the available datasets.
file = "MSG1-SEVI-MSG15-0100-NA-20160531090417.660000000Z-20160531090437-1405098.nat"
# define reader
= "seviri_l1b_native"
reader # read the file
= Scene(filenames = {reader:[file]})
scn # extract data set names
= scn.all_dataset_names()
dataset_names # print available datasets
print('\n'.join(map(str, dataset_names)))
HRV
IR_016
IR_039
IR_087
IR_097
IR_108
IR_120
IR_134
VIS006
VIS008
WV_062
WV_073
The MSG data is provided as Full Disk, meaning that roughly the complete North-South extent of the globe from the Atlantic to the Indian Ocean is present in each file. For most applications and research questions, it is not necessary to process an extent that large. This is why as an example, we are going to resample the data to the extent of Spain. For this, we are using functionality from the pyresample library, which allows users to create customized area definitions.
# create some information on the reference system
= "Spain"
area_id = "Geographical Coordinate System clipped on Spain"
description = "Spain"
proj_id # specifing some parameters of the projection
= {"proj": "longlat", "ellps": "WGS84", "datum": "WGS84"}
proj_dict # calculate the width and height of the aoi in pixels
= -9.5 # lower left x coordinate in degrees
llx = 35.9 # lower left y coordinate in degrees
lly = 3.3 # upper right x coordinate in degrees
urx = 43.8 # upper right y coordinate in degrees
ury = 0.005 # target resolution in degrees
resolution # calculating the number of pixels
= int((urx - llx) / resolution)
width = int((ury - lly) / resolution)
height = (llx,lly,urx,ury)
area_extent # defining the area
= pr.geometry.AreaDefinition(area_id, proj_id, description, proj_dict, width, height, area_extent)
area_def print(area_def)
Area ID: Spain
Description: Spain
Projection ID: Geographical Coordinate System clipped on Spain
Projection: {'datum': 'WGS84', 'no_defs': 'None', 'proj': 'longlat', 'type': 'crs'}
Number of columns: 2560
Number of rows: 1579
Area extent: (-9.5, 35.9, 3.3, 43.8)
/home/darius/Desktop/website/new/py-env/lib/python3.10/site-packages/pyproj/crs/crs.py:1282: UserWarning: You will likely lose important projection information when converting to a PROJ string from another format. See: https://proj.org/faq.html#what-is-the-best-format-for-describing-coordinate-reference-systems
proj = self._crs.to_proj4(version=version)
We will show here how to proceed when we want to extract more than one specific data set. We can either apply a for loop over the desired datasets we need or write a general function that can extract the data for any specified variable. Here we are going forward with the latter approach because a function is more reusable than a simple script.
def nat2tif(file, calibration, area_def, dataset, reader, outdir, label, dtype, radius, epsilon, nodata):
# open the file
= Scene(filenames = {reader: [file]})
scn # let us check that the specified data set is actually available
= scn.all_dataset_names()
scn_names # raise exception if dataset is not present in available names
if dataset not in scn_names:
raise Exception("Specified dataset is not available.")
# we need to load the data, different calibration can be chosen
=calibration)
scn.load([dataset], calibration# let us extract the longitude and latitude data
= scn[dataset].area.get_lonlats()
lons, lats # now we can apply a swath definition for our output raster
= pr.geometry.SwathDefinition(lons=lons, lats=lats)
swath_def # and finally we also extract the data
= scn[dataset].values
values # we will now change the datatype of the arrays
# depending on the present data this can be changed
= lons.astype(dtype)
lons = lats.astype(dtype)
lats = values.astype(dtype)
values # now we can already resample our data to the area of interest
= pr.kd_tree.resample_nearest(swath_def, values,
values
area_def,=radius, # in meters
radius_of_influence=epsilon,
epsilon=False)
fill_value# we are going to check if the outdir exists and create it if it doesnt
if not os.path.exists(outdir):
os.makedirs(outdir)# let us join our filename based on the input file's basename
= os.path.join(outdir, os.path.basename(file)[:-4] + "_" + str(label) + ".tif")
outname # now we define some metadata for our raster file
= values.shape[1]
cols = values.shape[0]
rows = (area_def.area_extent[2] - area_def.area_extent[0]) / cols
pixelWidth = (area_def.area_extent[1] - area_def.area_extent[3]) / rows
pixelHeight = area_def.area_extent[0]
originX = area_def.area_extent[3]
originY # here we actually create the file
= gdal.GetDriverByName("GTiff")
driver = driver.Create(outname, cols, rows, 1)
outRaster # writing the metadata
0, originY, 0, pixelHeight))
outRaster.SetGeoTransform((originX, pixelWidth, # creating a new band and writting the data
= outRaster.GetRasterBand(1)
outband #specified no data value by user
outband.SetNoDataValue(nodata) # writting the values
outband.WriteArray(np.array(values)) = osr.SpatialReference() # create CRS instance
outRasterSRS 4326) # get info for EPSG 4326
outRasterSRS.ImportFromEPSG(# set CRS as WKT
outRaster.SetProjection(outRasterSRS.ExportToWkt()) # clean up
outband.FlushCache()= None
outband = None outRaster
Now we can apply this function to our input file and extract any available dataset. Note that some of the input variables need further explanation. The very first option which might not be self-evident is calibration. With this option we can tell satpy to pre-calibrate the data, for example, to reflectance in contrast to radiances. The option label appends the value of label to the ouput filename. With the dtype option, we can specifically choose which datatype is used for the output file. Accordingly, we should adopt the value for nodata, which flags no data values in the output file. The options radius and epsilon are options of the nearest neighbor resampling routine and can be specified to the user needs (see here for more information).
file = file,
nat2tif(= "radiance",
calibration = area_def,
area_def = "HRV",
dataset = reader,
reader = "./output",
outdir = "HRV",
label = "float32",
dtype = 16000,
radius = .5,
epsilon = -3.4E+38) nodata
Now we can read in the newly created file and take a look at a simple plot to visualize our result (Note that in the background, I am using R to generate this plot quickly).
file = "output/MSG1-SEVI-MSG15-0100-NA-20160531090417.660000000Z-20160531090437-1405098_HRV.tif"
= gdal.Open(file)
ds = ds.GetRasterBand(1)
band = band.ReadAsArray()
data plt.imshow(data)
<matplotlib.image.AxesImage at 0x7f54fa1e1c90>