Skip to content

Welcome to the eocis_chuk_api documentation

Creating, loading and exporting CHUK data

CHUKDataSetUtils

Provide utility functions for working with CHUK datasets, based on xarray data structures.

This class helps users to work with CHUK datasets, providing support for creating and converting data and metadata

Examples:

>>> import xarray as xr
>>> import numpy as np
>>> from eocis_chuk_api import CHUKDataSetUtils
>>> utils = CHUKDataSetUtils("EOCIS-CHUK-GRID-100M-v1.0.nc")
>>> chuk_ds = utils.create_new_dataset(
     title="My CHUK dataset",
     institution = "EOCIS CHUK",
     Conventions = "CF-1.10",
     tracking_id = "12345678",
     product_version = "1.0",
     summary = "Shows estimates of the squirrel population in each CHUK grid cell",
     license = "Creative Commons Licence by attribution (https://creativecommons.org/licenses/by/4.0/)",
     history = "Developed from the squirrel population dataset",
     comment = "This is a made up example",
     creator_url = "https:///www.example.com",
     creator_name = "Institute of Squirrel Studies",
     creator_email = "enquiries@squirrel-studies.org.uk",
     creator_processing_institution = "Institute of Squirrel Studies")
>>> # create an array to hold the data
>>> population_data = np.zeros(utils.get_grid_shape())
>>> # populate the data
>>> population_data[...] = ...
>>> # attach the data
>>> chuk_ds["squirrel_population"] = xr.DataArray(population_data,dims=("y","x"), attrs={
    "long_name":"estimated_squirrel_population",
    "coordinates": "lat lon",
    "grid_mapping": "crsOSGB"
})
>>> # save the dataset
>>> utils.save(chuk_ds, "EOCIS-CHUK-L4-SQUIRRELPOP-MERGED-20231204-v0.1.nc")
Source code in eocis_chuk_api/chuk_dataset_utils.py
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
class CHUKDataSetUtils:
    """Provide utility functions for working with CHUK datasets, based on xarray data structures.

    This class helps users to work with CHUK datasets, providing support for creating and converting data and metadata

    Examples:

        >>> import xarray as xr
        >>> import numpy as np
        >>> from eocis_chuk_api import CHUKDataSetUtils
        >>> utils = CHUKDataSetUtils("EOCIS-CHUK-GRID-100M-v1.0.nc")
        >>> chuk_ds = utils.create_new_dataset(
             title="My CHUK dataset",
             institution = "EOCIS CHUK",
             Conventions = "CF-1.10",
             tracking_id = "12345678",
             product_version = "1.0",
             summary = "Shows estimates of the squirrel population in each CHUK grid cell",
             license = "Creative Commons Licence by attribution (https://creativecommons.org/licenses/by/4.0/)",
             history = "Developed from the squirrel population dataset",
             comment = "This is a made up example",
             creator_url = "https:///www.example.com",
             creator_name = "Institute of Squirrel Studies",
             creator_email = "enquiries@squirrel-studies.org.uk",
             creator_processing_institution = "Institute of Squirrel Studies")
        >>> # create an array to hold the data
        >>> population_data = np.zeros(utils.get_grid_shape())
        >>> # populate the data
        >>> population_data[...] = ...
        >>> # attach the data
        >>> chuk_ds["squirrel_population"] = xr.DataArray(population_data,dims=("y","x"), attrs={
            "long_name":"estimated_squirrel_population",
            "coordinates": "lat lon",
            "grid_mapping": "crsOSGB"
        })
        >>> # save the dataset
        >>> utils.save(chuk_ds, "EOCIS-CHUK-L4-SQUIRRELPOP-MERGED-20231204-v0.1.nc")

    """

    def __init__(self, chuk_grid_path: str):
        """
        Initialise an instance with the path to the CHUK grid file

        Args:
            chuk_grid_path: path to a grid file

        Notes:
            grid files can be obtained from https://gws-access.jasmin.ac.uk/public/nceo_uor/eocis-chuk/

        Examples:
            >>> from eocis_chuk_api import CHUKDataSetUtils
            >>> utils = CHUKDataSetUtils("EOCIS-CHUK-GRID-100M-v0.4.nc")
        """
        self.chuk_grid_ds = xr.open_dataset(chuk_grid_path)
        self.grid_resolution = int(self.chuk_grid_ds.x.data[1]) - int(self.chuk_grid_ds.x.data[0])

    def get_grid_latlons(self) -> (xarray.DataArray, xarray.DataArray):
        """
        Obtain the chuk grid lats/lons

        Returns:
            2-tuple containing xarray.DataArray objects (lats,lons)
        """
        return (self.chuk_grid_ds.lat, self.chuk_grid_ds.lon)

    def get_grid_shape(self) -> (int, int):
        """
        Obtain the chuk grid shape (y,x)

        Returns:
            2-tuple containing the grid (height, width)
        """
        return self.chuk_grid_ds.lat.shape

    def create_filename(self, project: str, processing_level: str, product_type: str, product_string: str,
                        datetime: str, version: str, additional_segregator: str = None, suffix: str = ".nc") -> str:
        """
        Create an EOCIS standards compliant filename

        Args:
            project: the EOCIS project string (see the appropriate standards doc)
            processing_level: specify the processing level in (L0, L1A, L1B, L1C, L2, L2P, L3, L3U, L3C, L3S, L4, IND)
            product_type: standardised term to describe the main product type in te dataset, see standards doc
            product_string: descriptive name chosen from the team, should not contain hyphens, can contain underscores
            datetime: date and optionally time, format YYYY[MM[DD[HH[MM[SS]]]]]
            version: File version number one or more digits followed by an optional "." and another one or more digits
            additional_segregator: an optional extra segregator, to be used if otherwise different data sets
                                   would generate the same filename
            suffix: the file suffix, including the "."

        Returns:
            Formatted filename
        """
        segregator = "" if additional_segregator is None else "-" + additional_segregator
        return f"EOCIS-{project}-{processing_level}-{product_type}-{product_string}{segregator}-{datetime}-fv{version}{suffix}"

    def create_new_dataset(self,
                           title: str = "",
                           include_lon_lat: bool = False,
                           institution: str = "EOCIS CHUK",
                           source: str = "",
                           history: str = "",
                           references: str = "",
                           tracking_id: str = "",
                           Conventions: str = "CF-1.10",
                           product_version: str = "",
                           format_version: str = "",
                           summary: str = "",
                           keywords: str = "",
                           id: str = "",
                           naming_authority: str = "",
                           keywords_vocabulary: str = "",
                           cdm_data_type: str = "",
                           comment: str = "",
                           date_created: str = "",
                           creator_name: str = "",
                           creator_url: str = "",
                           creator_email: str = "",
                           project: str = "Earth Observation Climate Information Service (EOCIS)",
                           geospatial_lat_min: str = "47.089",
                           geospatial_lat_max: str = "61.133",
                           geospatial_lon_min: str = "-15.374",
                           geospatial_lon_max: str = "4.750",
                           geospatial_vertical_min: str = "0",
                           geospatial_vertical_max: str = "0",
                           time_coverage_start: str = "",
                           time_coverage_end: str = "",
                           time_coverage_duration: str = "",
                           time_coverage_resolution: str = "",
                           standard_name_vocabulary: str = "",
                           license: str = "Creative Commons Attribution 4.0 International (CC-BY 4.0 license)",
                           platform: str = "",
                           sensor: str = "",
                           spatial_resolution: str = "100m",
                           geospatial_lat_units: str = "degrees_north",
                           geospatial_lon_units: str = "degrees_east",
                           geospatial_lon_resolution: str = "0.0009",
                           geospatial_lat_resolution: str = "0.00086",
                           key_variables: str = "",
                           acknowledgement: str = "Funded by UK EOCIS. Use of these data should acknowledge EOCIS",
                           publisher_url: str = "https://eocis.org",
                           publisher_name: str = "EOCIS",
                           publisher_email: str = "EOCIS@reading.ac.uk",
                           **other_attributes: dict) -> xarray.Dataset:
        """
        Create a new CHUK dataset with expected global attributes.

        Args:
            include_lon_lat: True if lon and lat 2d variables should be included
            title: a title for the dataset
            institution: Succinct description of the dataset
            source: Comma separated list of original data sources (+DOIs if available)
            history: Processing history of the dataset
            references: References to algorithm, ATBD, technical note describing dataset
            tracking_id: A UUID (Universal Unique Identifier) value
            Conventions: The CF Version e.g. CF-1.10
            product_version: The product version of this data file
            format_version: The EOCIS data format used e.g. “EOCIS Data Standards v1.x”
            summary: A paragraph describing the dataset
            keywords: A comma separated list of key words and phrases
            id: see naming_authority
            naming_authority: The combination of the naming authority and the id should be a globally unique identifier for the dataset
            keywords_vocabulary: If you are following a guideline for the words/phrases in your “keywords” attribute, put the name of that guideline here
            cdm_data_type: The THREDDS data type appropriate for this dataset
            comment: Miscellaneous information about the data
            date_created: The date on which the data was created
            creator_name: The person/organisation that created the data
            creator_url: A URL for the person/organisation that created the data
            creator_email: Contact email address for the person/organisation that created the data
            project: The scientific project that produced the data: “Earth Observation Climate Information Service (EOCIS)”
            geospatial_lat_min: Decimal degrees north, range -90 to +90
            geospatial_lat_max: Decimal degrees north, range -90 to +90
            geospatial_lon_min: Decimal degrees east, range -180 to +180
            geospatial_lon_max: Decimal degrees east, range -180 to +180
            geospatial_vertical_min: Assumed to be in metres above ground unless geospatial_vertical_units attribute defined otherwise
            geospatial_vertical_max: Assumed to be in metres above ground unless geospatial_vertical_units attribute defined otherwise
            time_coverage_start: Format yyyymmddThhmmssZ
            time_coverage_end: Format yyyymmddThhmmssZ
            time_coverage_duration: Should be an ISO8601 duration string, for example P1D
            time_coverage_resolution: Should be an ISO8601 duration string. For L2 data on the original satellite sampling it is acceptable to use 'satellite_orbit_frequency'
            standard_name_vocabulary: The name of the controlled vocabulary from which variable standard names are taken e.g. ‘CF Standard Name Table v82’
            license: Describe the restrictions to data access and distribution
            platform: Satellite name e.g. Sentinel-5. Separate lists by commas and use angled brackets for a platform series, e.g. ‘Envisat, NOAA-<12,14,16,17,18>, Metop-A’. The platform names used should follow the naming in the CCI controlled vocabulary
            sensor: Sensor name e.g. AATSR. Separate lists by commas.  The platform names used should follow the naming in the CCI controlled vocabulary
            spatial_resolution: A free-text string describing the approximate resolution of the product. For example, “1.1km at nadir”. This is intended to provide a useful indication to the user, so if more than one resolution is relevant e.g. the grid resolution and the data resolution, then both can be included.
            geospatial_lat_units: Geospatial latitude units used
            geospatial_lon_units: Geospatial longitude units used
            geospatial_lon_resolution: Geospatial latitude resolution used
            geospatial_lat_resolution: Geospatial longitude resolution used
            key_variables: A comma separated list of the key primary variables in the file i.e. those that have been scientifically validated.
            acknowledgement: Acknowledge funding sources and/or contributors
            other_attributes: any other attributes to include

        Returns:
            An xarray.Dataset object
        """

        attrs = {}
        self.__extend_attrs(attrs, "title", title, required=True)
        self.__extend_attrs(attrs, "institution", institution, required=True)
        self.__extend_attrs(attrs, "source", source)
        self.__extend_attrs(attrs, "history", history)
        self.__extend_attrs(attrs, "references", references)
        self.__extend_attrs(attrs, "tracking_id", tracking_id, required=True),
        self.__extend_attrs(attrs, "Conventions", Conventions)
        self.__extend_attrs(attrs, "product_version", product_version, required=True)
        self.__extend_attrs(attrs, "format_version", format_version)
        self.__extend_attrs(attrs, "summary", summary)
        self.__extend_attrs(attrs, "keywords", keywords)
        self.__extend_attrs(attrs, "id", id),
        self.__extend_attrs(attrs, "naming_authority", naming_authority),
        self.__extend_attrs(attrs, "keywords_vocabulary", keywords_vocabulary, required=False),
        self.__extend_attrs(attrs, "cdm_data_type", cdm_data_type),
        self.__extend_attrs(attrs, "comment", comment),
        self.__extend_attrs(attrs, "date_created", date_created),
        self.__extend_attrs(attrs, "creator_name", creator_name),
        self.__extend_attrs(attrs, "creator_url", creator_url),
        self.__extend_attrs(attrs, "creator_email", creator_email),
        self.__extend_attrs(attrs, "project", project),
        self.__extend_attrs(attrs, "geospatial_lat_min", geospatial_lat_min),
        self.__extend_attrs(attrs, "geospatial_lat_max", geospatial_lat_max),
        self.__extend_attrs(attrs, "geospatial_lon_min", geospatial_lon_min),
        self.__extend_attrs(attrs, "geospatial_lon_max", geospatial_lon_max),
        self.__extend_attrs(attrs, "geospatial_vertical_min", geospatial_vertical_min),
        self.__extend_attrs(attrs, "geospatial_vertical_max", geospatial_vertical_max),
        self.__extend_attrs(attrs, "time_coverage_start", time_coverage_start),
        self.__extend_attrs(attrs, "time_coverage_end", time_coverage_end),
        self.__extend_attrs(attrs, "time_coverage_duration", time_coverage_duration),
        self.__extend_attrs(attrs, "time_coverage_resolution", time_coverage_resolution),
        self.__extend_attrs(attrs, "standard_name_vocabulary", standard_name_vocabulary),
        self.__extend_attrs(attrs, "license", license),
        self.__extend_attrs(attrs, "platform", platform),
        self.__extend_attrs(attrs, "sensor", sensor),
        self.__extend_attrs(attrs, "spatial_resolution", spatial_resolution),
        self.__extend_attrs(attrs, "geospatial_lat_units", geospatial_lat_units),
        self.__extend_attrs(attrs, "geospatial_lon_units", geospatial_lon_units),
        self.__extend_attrs(attrs, "geospatial_lon_resolution", geospatial_lon_resolution),
        self.__extend_attrs(attrs, "geospatial_lat_resolution", geospatial_lat_resolution),
        self.__extend_attrs(attrs, "key_variables", key_variables)
        self.__extend_attrs(attrs, "acknowledgement", acknowledgement)
        self.__extend_attrs(attrs, "publisher_name", publisher_name),
        self.__extend_attrs(attrs, "publisher_url", publisher_url),
        self.__extend_attrs(attrs, "publisher_email", publisher_email),

        attrs.update(other_attributes)
        ds = xr.Dataset(attrs=attrs)
        # copy the grid definition from the grid file
        copyvars = ["x", "y", "x_bnds", "y_bnds", "crsOSGB"]
        if include_lon_lat:
            copyvars += ["lon", "lat"]
        for copyvar in copyvars:
            ds[copyvar] = self.chuk_grid_ds[copyvar]

        ds = ds.rio.write_crs("EPSG:27700", grid_mapping_name="crsOSGB")

        return ds

    def add_variable(self, to_dataset: xr.Dataset, data: np.array, variable_name: str, standard_name: str = None,
                     long_name: str = None, units: str = None, source: str = None, **other_attrs:dict):
        """
        Add a new variable to a dataset.  The dataset is updated in-place.

        Args:
            to_dataset: The xarray.Dataset to which the variable will be added
            data: a numpy array containing the data, organised by (y,x), (time,y,x) or (y,x,time)
            variable_name: the name of the variable to be added to the dataset
            standard_name: CF standard name (if appropriate)
            long_name:  A longer descriptive name of the variable
            units: units from UDUNITS
            other_attrs: dictionary containing other attributes to add to the new variable

        Raises:
            ValueError: if the data parameter does not match the expected shape
        """
        expected_shape = self.get_grid_shape()
        if len(data.shape) == 2:
            dims = ("y", "x")
            if data.shape != expected_shape:
                raise ValueError("Bad data shape, expecting: " + str(expected_shape) + " was: " + str(data.shape))
        else:
            if data.shape[1:] == expected_shape:
                dims = ("time", "y", "x")
            elif data.shape[1:] == expected_shape:
                dims = ("y", "x", "time")

        attrs = {
            "grid_mapping": "crsOSGB"
        }
        if standard_name is not None:
            attrs["standard_name"] = standard_name
        if long_name is not None:
            attrs["long_name"] = long_name
        if source is not None:
            attrs["source"] = source
        if units is not None:
            attrs["units"] = units
        attrs.update(other_attrs)

        to_dataset[variable_name] = xr.DataArray(data=data, dims=dims, attrs=attrs)

    def load(self, from_path: str, add_latlon: bool = False, add_latlon_bnds: bool = False) -> xarray.Dataset:
        """
        Load a CHUK dataset from file and return a dataset

        Args:
            from_path: path to a NetCDF4 file
            add_latlon: add lon and lat 2D arrays to the dataset
            add_latlon_bnds: add lon_bnds and lat_bnds 2D arrays to the dataset

        Returns:
            A dataset containing the loaded CHUK data
        """
        ds = xr.open_dataset(from_path, decode_coords="all")

        self.extend_latlon(ds, add_latlon=add_latlon, add_latlon_bnds=add_latlon_bnds)

        return ds

    def save(self, ds: xarray.Dataset, to_path: str, add_latlon: bool = False, add_latlon_bnds: bool = False,
             x_chunk_size: int = 1000, y_chunk_size: int = 1000,
             time_chunk_size: int = 1, custom_encodings: dict = {}, override_encodings: dict={}):
        """
        Save a CHUK dataset to file, applying the standard chunking and compression

        Args:
            ds: an xarray dataset containing CHUK data
            to_path: path to a NetCDF4 file
            add_latlon: add lon and lat 2D arrays to the dataset
            add_latlon_bnds: add lon_bnds and lat_bnds 2D arrays to the dataset
            x_chunk_size: size of chunking in the x-dimension
            y_chunk_size: size of chunking in the x-dimension
            time_chunk_size: size of chunking in the time dimension
            custom_encodings: dictionary mapping from variable names to a custom encoding to use by xarray
        """

        encodings = {}

        for v in ds.variables:
            if custom_encodings and v in custom_encodings:
                encodings[v] = custom_encodings[v]
            else:
                dims = ds[v].dims
                if "x" in dims and "y" in dims:

                    encodings[v] = {
                        "zlib": True,
                        "complevel": 5
                    }

                    if v in override_encodings:
                        for (name,value) in override_encodings[v].items():
                            if value is None:
                                if name in encodings:
                                    del encodings[v][name]
                            else:
                                encodings[v][name] = value

                    chunk_sizes = []
                    for d in dims:
                        if d == "y":
                            chunk_sizes.append(y_chunk_size)
                        elif d == "x":
                            chunk_sizes.append(x_chunk_size)
                        elif d == "time":
                            chunk_sizes.append(time_chunk_size)
                        else:
                            chunk_sizes.append(0)
                    encodings[v]["chunksizes"] = chunk_sizes

        self.extend_latlon(ds, add_latlon=add_latlon, add_latlon_bnds=add_latlon_bnds)


        # ds = ds.rio.write_crs("EPSG:27700",grid_mapping_name="crsOSGB")

        ds.to_netcdf(to_path, encoding=encodings)

    def extend_latlon(self, ds, add_latlon=False, add_latlon_bnds=False):
        if add_latlon:
            self.add_latlon(ds)

        if add_latlon_bnds:
            self.add_latlon_bnds(ds)

        if add_latlon:
            for v in ["lat", "lon"]:
                # remove bounds if no such variable exists
                bounds = ds[v].attrs.get("bounds",None)
                if bounds and bounds not in ds.variables:
                    del ds[v].attrs["bounds"]

    def check(self, ds: xarray.Dataset) -> ([(str, str)], [(str, str)]):
        """
        Check a dataset against CHUK format, returning details of any problems found

        Args:
            ds: the xarray.Dataset to check

        Returns:
            2-tuple (warnings, errors) containing lists of (code,detail) tuples
        """

        # perform metadata checks
        warnings, errors = CHUKMetadata.check(ds)

        # check the dimensions are correct, compared to the grid
        for v in ["x", "y"]:
            actual_shape = ds[v].shape
            expected_shape = self.chuk_grid_ds[v].shape
            if actual_shape != expected_shape:
                errors.append(("bad_shape", (v, actual_shape, expected_shape)))

        return warnings, errors

    @staticmethod
    def sample(ds: xarray.Dataset, to_resolution: int) -> xarray.Dataset:
        """
        Create a lower resolution sample of a CHUK dataset

        Args:
            ds: the xarray.Dataset containing CHUK data to sample
            to_resolution: the resolution for the sampled output, must be a multiple of 100

        Returns:
            A dataset containing the sampled data
        """
        if to_resolution % 100 != 0:
            raise ValueError(f"Error - resolution requested ({to_resolution}) is not a multiple of 100")
        sample_step = int(to_resolution / 100)
        return ds.isel(x=slice(0, -1, sample_step), y=slice(0, -1, sample_step))

    def add_latlon(self, ds: xarray.Dataset):
        """
        Add lat and lon 2D arrays from the reference grid

        Args:
            ds: the dataset to mondify in-place
        """
        ds["lon"] = self.chuk_grid_ds["lon"]
        ds["lat"] = self.chuk_grid_ds["lat"]

    def add_latlon_bnds(self, ds: xarray.Dataset):
        """
        Add lat and lon 2D bounds from the reference grid

        Args:
           ds: the dataset to mondify in-place
        """
        ds["lon_bnds"] = self.chuk_grid_ds["lon_bnds"]
        ds["lat_bnds"] = self.chuk_grid_ds["lat_bnds"]

    @staticmethod
    def save_as_geotif(ds: xarray.Dataset, variable_name: str, to_path: str):
        """
        Save a CHUK dataset to a geotiff.  DEPRECATED - use save_as_geotiff

        Args:
            ds: the CHUK dataset
            variable_name: the name of the variable to save from the dataset
            to_path: the path to save the geotiff file to
        """
        return CHUKDataSetUtils.save_as_geotiff(ds, variable_name, to_path)

    @staticmethod
    def save_as_geotiff(ds: xarray.Dataset, variable_name: str, to_path: str):
        """
        Save a CHUK dataset to a geotiff

        Args:
            ds: the CHUK dataset
            variable_name: the name of the variable to save from the dataset
            to_path: the path to save the geotiff file to
        """
        ds_crs = ds.rio.write_crs("EPSG:27700")
        if "grid_mapping" in ds_crs[variable_name].attrs:
            # this seems to cause a problem, why?
            del ds_crs[variable_name].attrs["grid_mapping"]
        tags = CHUKMetadata.to_json(ds_crs, variable_name)
        ds_crs[variable_name].rio.to_raster(to_path, tags=tags, driver="COG")

    def __extend_attrs(self, attrs, key, value, required=None):
        if required and value == "" or value is None:
            raise ValueError(f"attribute {key} is required")
        else:
            if value == "" or value is None:
                return
            attrs[key] = value

__init__(chuk_grid_path)

Initialise an instance with the path to the CHUK grid file

Parameters:

Name Type Description Default
chuk_grid_path str

path to a grid file

required
Notes

grid files can be obtained from https://gws-access.jasmin.ac.uk/public/nceo_uor/eocis-chuk/

Examples:

>>> from eocis_chuk_api import CHUKDataSetUtils
>>> utils = CHUKDataSetUtils("EOCIS-CHUK-GRID-100M-v0.4.nc")
Source code in eocis_chuk_api/chuk_dataset_utils.py
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def __init__(self, chuk_grid_path: str):
    """
    Initialise an instance with the path to the CHUK grid file

    Args:
        chuk_grid_path: path to a grid file

    Notes:
        grid files can be obtained from https://gws-access.jasmin.ac.uk/public/nceo_uor/eocis-chuk/

    Examples:
        >>> from eocis_chuk_api import CHUKDataSetUtils
        >>> utils = CHUKDataSetUtils("EOCIS-CHUK-GRID-100M-v0.4.nc")
    """
    self.chuk_grid_ds = xr.open_dataset(chuk_grid_path)
    self.grid_resolution = int(self.chuk_grid_ds.x.data[1]) - int(self.chuk_grid_ds.x.data[0])

add_latlon(ds)

Add lat and lon 2D arrays from the reference grid

Parameters:

Name Type Description Default
ds Dataset

the dataset to mondify in-place

required
Source code in eocis_chuk_api/chuk_dataset_utils.py
457
458
459
460
461
462
463
464
465
def add_latlon(self, ds: xarray.Dataset):
    """
    Add lat and lon 2D arrays from the reference grid

    Args:
        ds: the dataset to mondify in-place
    """
    ds["lon"] = self.chuk_grid_ds["lon"]
    ds["lat"] = self.chuk_grid_ds["lat"]

add_latlon_bnds(ds)

Add lat and lon 2D bounds from the reference grid

Parameters:

Name Type Description Default
ds Dataset

the dataset to mondify in-place

required
Source code in eocis_chuk_api/chuk_dataset_utils.py
467
468
469
470
471
472
473
474
475
def add_latlon_bnds(self, ds: xarray.Dataset):
    """
    Add lat and lon 2D bounds from the reference grid

    Args:
       ds: the dataset to mondify in-place
    """
    ds["lon_bnds"] = self.chuk_grid_ds["lon_bnds"]
    ds["lat_bnds"] = self.chuk_grid_ds["lat_bnds"]

add_variable(to_dataset, data, variable_name, standard_name=None, long_name=None, units=None, source=None, **other_attrs)

Add a new variable to a dataset. The dataset is updated in-place.

Parameters:

Name Type Description Default
to_dataset Dataset

The xarray.Dataset to which the variable will be added

required
data array

a numpy array containing the data, organised by (y,x), (time,y,x) or (y,x,time)

required
variable_name str

the name of the variable to be added to the dataset

required
standard_name str

CF standard name (if appropriate)

None
long_name str

A longer descriptive name of the variable

None
units str

units from UDUNITS

None
other_attrs dict

dictionary containing other attributes to add to the new variable

{}

Raises:

Type Description
ValueError

if the data parameter does not match the expected shape

Source code in eocis_chuk_api/chuk_dataset_utils.py
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
def add_variable(self, to_dataset: xr.Dataset, data: np.array, variable_name: str, standard_name: str = None,
                 long_name: str = None, units: str = None, source: str = None, **other_attrs:dict):
    """
    Add a new variable to a dataset.  The dataset is updated in-place.

    Args:
        to_dataset: The xarray.Dataset to which the variable will be added
        data: a numpy array containing the data, organised by (y,x), (time,y,x) or (y,x,time)
        variable_name: the name of the variable to be added to the dataset
        standard_name: CF standard name (if appropriate)
        long_name:  A longer descriptive name of the variable
        units: units from UDUNITS
        other_attrs: dictionary containing other attributes to add to the new variable

    Raises:
        ValueError: if the data parameter does not match the expected shape
    """
    expected_shape = self.get_grid_shape()
    if len(data.shape) == 2:
        dims = ("y", "x")
        if data.shape != expected_shape:
            raise ValueError("Bad data shape, expecting: " + str(expected_shape) + " was: " + str(data.shape))
    else:
        if data.shape[1:] == expected_shape:
            dims = ("time", "y", "x")
        elif data.shape[1:] == expected_shape:
            dims = ("y", "x", "time")

    attrs = {
        "grid_mapping": "crsOSGB"
    }
    if standard_name is not None:
        attrs["standard_name"] = standard_name
    if long_name is not None:
        attrs["long_name"] = long_name
    if source is not None:
        attrs["source"] = source
    if units is not None:
        attrs["units"] = units
    attrs.update(other_attrs)

    to_dataset[variable_name] = xr.DataArray(data=data, dims=dims, attrs=attrs)

check(ds)

Check a dataset against CHUK format, returning details of any problems found

Parameters:

Name Type Description Default
ds Dataset

the xarray.Dataset to check

required

Returns:

Type Description
([(str, str)], [(str, str)])

2-tuple (warnings, errors) containing lists of (code,detail) tuples

Source code in eocis_chuk_api/chuk_dataset_utils.py
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
def check(self, ds: xarray.Dataset) -> ([(str, str)], [(str, str)]):
    """
    Check a dataset against CHUK format, returning details of any problems found

    Args:
        ds: the xarray.Dataset to check

    Returns:
        2-tuple (warnings, errors) containing lists of (code,detail) tuples
    """

    # perform metadata checks
    warnings, errors = CHUKMetadata.check(ds)

    # check the dimensions are correct, compared to the grid
    for v in ["x", "y"]:
        actual_shape = ds[v].shape
        expected_shape = self.chuk_grid_ds[v].shape
        if actual_shape != expected_shape:
            errors.append(("bad_shape", (v, actual_shape, expected_shape)))

    return warnings, errors

create_filename(project, processing_level, product_type, product_string, datetime, version, additional_segregator=None, suffix='.nc')

Create an EOCIS standards compliant filename

Parameters:

Name Type Description Default
project str

the EOCIS project string (see the appropriate standards doc)

required
processing_level str

specify the processing level in (L0, L1A, L1B, L1C, L2, L2P, L3, L3U, L3C, L3S, L4, IND)

required
product_type str

standardised term to describe the main product type in te dataset, see standards doc

required
product_string str

descriptive name chosen from the team, should not contain hyphens, can contain underscores

required
datetime str

date and optionally time, format YYYY[MM[DD[HH[MM[SS]]]]]

required
version str

File version number one or more digits followed by an optional "." and another one or more digits

required
additional_segregator str

an optional extra segregator, to be used if otherwise different data sets would generate the same filename

None
suffix str

the file suffix, including the "."

'.nc'

Returns:

Type Description
str

Formatted filename

Source code in eocis_chuk_api/chuk_dataset_utils.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
def create_filename(self, project: str, processing_level: str, product_type: str, product_string: str,
                    datetime: str, version: str, additional_segregator: str = None, suffix: str = ".nc") -> str:
    """
    Create an EOCIS standards compliant filename

    Args:
        project: the EOCIS project string (see the appropriate standards doc)
        processing_level: specify the processing level in (L0, L1A, L1B, L1C, L2, L2P, L3, L3U, L3C, L3S, L4, IND)
        product_type: standardised term to describe the main product type in te dataset, see standards doc
        product_string: descriptive name chosen from the team, should not contain hyphens, can contain underscores
        datetime: date and optionally time, format YYYY[MM[DD[HH[MM[SS]]]]]
        version: File version number one or more digits followed by an optional "." and another one or more digits
        additional_segregator: an optional extra segregator, to be used if otherwise different data sets
                               would generate the same filename
        suffix: the file suffix, including the "."

    Returns:
        Formatted filename
    """
    segregator = "" if additional_segregator is None else "-" + additional_segregator
    return f"EOCIS-{project}-{processing_level}-{product_type}-{product_string}{segregator}-{datetime}-fv{version}{suffix}"

create_new_dataset(title='', include_lon_lat=False, institution='EOCIS CHUK', source='', history='', references='', tracking_id='', Conventions='CF-1.10', product_version='', format_version='', summary='', keywords='', id='', naming_authority='', keywords_vocabulary='', cdm_data_type='', comment='', date_created='', creator_name='', creator_url='', creator_email='', project='Earth Observation Climate Information Service (EOCIS)', geospatial_lat_min='47.089', geospatial_lat_max='61.133', geospatial_lon_min='-15.374', geospatial_lon_max='4.750', geospatial_vertical_min='0', geospatial_vertical_max='0', time_coverage_start='', time_coverage_end='', time_coverage_duration='', time_coverage_resolution='', standard_name_vocabulary='', license='Creative Commons Attribution 4.0 International (CC-BY 4.0 license)', platform='', sensor='', spatial_resolution='100m', geospatial_lat_units='degrees_north', geospatial_lon_units='degrees_east', geospatial_lon_resolution='0.0009', geospatial_lat_resolution='0.00086', key_variables='', acknowledgement='Funded by UK EOCIS. Use of these data should acknowledge EOCIS', publisher_url='https://eocis.org', publisher_name='EOCIS', publisher_email='EOCIS@reading.ac.uk', **other_attributes)

Create a new CHUK dataset with expected global attributes.

Parameters:

Name Type Description Default
include_lon_lat bool

True if lon and lat 2d variables should be included

False
title str

a title for the dataset

''
institution str

Succinct description of the dataset

'EOCIS CHUK'
source str

Comma separated list of original data sources (+DOIs if available)

''
history str

Processing history of the dataset

''
references str

References to algorithm, ATBD, technical note describing dataset

''
tracking_id str

A UUID (Universal Unique Identifier) value

''
Conventions str

The CF Version e.g. CF-1.10

'CF-1.10'
product_version str

The product version of this data file

''
format_version str

The EOCIS data format used e.g. “EOCIS Data Standards v1.x”

''
summary str

A paragraph describing the dataset

''
keywords str

A comma separated list of key words and phrases

''
id str

see naming_authority

''
naming_authority str

The combination of the naming authority and the id should be a globally unique identifier for the dataset

''
keywords_vocabulary str

If you are following a guideline for the words/phrases in your “keywords” attribute, put the name of that guideline here

''
cdm_data_type str

The THREDDS data type appropriate for this dataset

''
comment str

Miscellaneous information about the data

''
date_created str

The date on which the data was created

''
creator_name str

The person/organisation that created the data

''
creator_url str

A URL for the person/organisation that created the data

''
creator_email str

Contact email address for the person/organisation that created the data

''
project str

The scientific project that produced the data: “Earth Observation Climate Information Service (EOCIS)”

'Earth Observation Climate Information Service (EOCIS)'
geospatial_lat_min str

Decimal degrees north, range -90 to +90

'47.089'
geospatial_lat_max str

Decimal degrees north, range -90 to +90

'61.133'
geospatial_lon_min str

Decimal degrees east, range -180 to +180

'-15.374'
geospatial_lon_max str

Decimal degrees east, range -180 to +180

'4.750'
geospatial_vertical_min str

Assumed to be in metres above ground unless geospatial_vertical_units attribute defined otherwise

'0'
geospatial_vertical_max str

Assumed to be in metres above ground unless geospatial_vertical_units attribute defined otherwise

'0'
time_coverage_start str

Format yyyymmddThhmmssZ

''
time_coverage_end str

Format yyyymmddThhmmssZ

''
time_coverage_duration str

Should be an ISO8601 duration string, for example P1D

''
time_coverage_resolution str

Should be an ISO8601 duration string. For L2 data on the original satellite sampling it is acceptable to use 'satellite_orbit_frequency'

''
standard_name_vocabulary str

The name of the controlled vocabulary from which variable standard names are taken e.g. ‘CF Standard Name Table v82’

''
license str

Describe the restrictions to data access and distribution

'Creative Commons Attribution 4.0 International (CC-BY 4.0 license)'
platform str

Satellite name e.g. Sentinel-5. Separate lists by commas and use angled brackets for a platform series, e.g. ‘Envisat, NOAA-<12,14,16,17,18>, Metop-A’. The platform names used should follow the naming in the CCI controlled vocabulary

''
sensor str

Sensor name e.g. AATSR. Separate lists by commas. The platform names used should follow the naming in the CCI controlled vocabulary

''
spatial_resolution str

A free-text string describing the approximate resolution of the product. For example, “1.1km at nadir”. This is intended to provide a useful indication to the user, so if more than one resolution is relevant e.g. the grid resolution and the data resolution, then both can be included.

'100m'
geospatial_lat_units str

Geospatial latitude units used

'degrees_north'
geospatial_lon_units str

Geospatial longitude units used

'degrees_east'
geospatial_lon_resolution str

Geospatial latitude resolution used

'0.0009'
geospatial_lat_resolution str

Geospatial longitude resolution used

'0.00086'
key_variables str

A comma separated list of the key primary variables in the file i.e. those that have been scientifically validated.

''
acknowledgement str

Acknowledge funding sources and/or contributors

'Funded by UK EOCIS. Use of these data should acknowledge EOCIS'
other_attributes dict

any other attributes to include

{}

Returns:

Type Description
Dataset

An xarray.Dataset object

Source code in eocis_chuk_api/chuk_dataset_utils.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
def create_new_dataset(self,
                       title: str = "",
                       include_lon_lat: bool = False,
                       institution: str = "EOCIS CHUK",
                       source: str = "",
                       history: str = "",
                       references: str = "",
                       tracking_id: str = "",
                       Conventions: str = "CF-1.10",
                       product_version: str = "",
                       format_version: str = "",
                       summary: str = "",
                       keywords: str = "",
                       id: str = "",
                       naming_authority: str = "",
                       keywords_vocabulary: str = "",
                       cdm_data_type: str = "",
                       comment: str = "",
                       date_created: str = "",
                       creator_name: str = "",
                       creator_url: str = "",
                       creator_email: str = "",
                       project: str = "Earth Observation Climate Information Service (EOCIS)",
                       geospatial_lat_min: str = "47.089",
                       geospatial_lat_max: str = "61.133",
                       geospatial_lon_min: str = "-15.374",
                       geospatial_lon_max: str = "4.750",
                       geospatial_vertical_min: str = "0",
                       geospatial_vertical_max: str = "0",
                       time_coverage_start: str = "",
                       time_coverage_end: str = "",
                       time_coverage_duration: str = "",
                       time_coverage_resolution: str = "",
                       standard_name_vocabulary: str = "",
                       license: str = "Creative Commons Attribution 4.0 International (CC-BY 4.0 license)",
                       platform: str = "",
                       sensor: str = "",
                       spatial_resolution: str = "100m",
                       geospatial_lat_units: str = "degrees_north",
                       geospatial_lon_units: str = "degrees_east",
                       geospatial_lon_resolution: str = "0.0009",
                       geospatial_lat_resolution: str = "0.00086",
                       key_variables: str = "",
                       acknowledgement: str = "Funded by UK EOCIS. Use of these data should acknowledge EOCIS",
                       publisher_url: str = "https://eocis.org",
                       publisher_name: str = "EOCIS",
                       publisher_email: str = "EOCIS@reading.ac.uk",
                       **other_attributes: dict) -> xarray.Dataset:
    """
    Create a new CHUK dataset with expected global attributes.

    Args:
        include_lon_lat: True if lon and lat 2d variables should be included
        title: a title for the dataset
        institution: Succinct description of the dataset
        source: Comma separated list of original data sources (+DOIs if available)
        history: Processing history of the dataset
        references: References to algorithm, ATBD, technical note describing dataset
        tracking_id: A UUID (Universal Unique Identifier) value
        Conventions: The CF Version e.g. CF-1.10
        product_version: The product version of this data file
        format_version: The EOCIS data format used e.g. “EOCIS Data Standards v1.x”
        summary: A paragraph describing the dataset
        keywords: A comma separated list of key words and phrases
        id: see naming_authority
        naming_authority: The combination of the naming authority and the id should be a globally unique identifier for the dataset
        keywords_vocabulary: If you are following a guideline for the words/phrases in your “keywords” attribute, put the name of that guideline here
        cdm_data_type: The THREDDS data type appropriate for this dataset
        comment: Miscellaneous information about the data
        date_created: The date on which the data was created
        creator_name: The person/organisation that created the data
        creator_url: A URL for the person/organisation that created the data
        creator_email: Contact email address for the person/organisation that created the data
        project: The scientific project that produced the data: “Earth Observation Climate Information Service (EOCIS)”
        geospatial_lat_min: Decimal degrees north, range -90 to +90
        geospatial_lat_max: Decimal degrees north, range -90 to +90
        geospatial_lon_min: Decimal degrees east, range -180 to +180
        geospatial_lon_max: Decimal degrees east, range -180 to +180
        geospatial_vertical_min: Assumed to be in metres above ground unless geospatial_vertical_units attribute defined otherwise
        geospatial_vertical_max: Assumed to be in metres above ground unless geospatial_vertical_units attribute defined otherwise
        time_coverage_start: Format yyyymmddThhmmssZ
        time_coverage_end: Format yyyymmddThhmmssZ
        time_coverage_duration: Should be an ISO8601 duration string, for example P1D
        time_coverage_resolution: Should be an ISO8601 duration string. For L2 data on the original satellite sampling it is acceptable to use 'satellite_orbit_frequency'
        standard_name_vocabulary: The name of the controlled vocabulary from which variable standard names are taken e.g. ‘CF Standard Name Table v82’
        license: Describe the restrictions to data access and distribution
        platform: Satellite name e.g. Sentinel-5. Separate lists by commas and use angled brackets for a platform series, e.g. ‘Envisat, NOAA-<12,14,16,17,18>, Metop-A’. The platform names used should follow the naming in the CCI controlled vocabulary
        sensor: Sensor name e.g. AATSR. Separate lists by commas.  The platform names used should follow the naming in the CCI controlled vocabulary
        spatial_resolution: A free-text string describing the approximate resolution of the product. For example, “1.1km at nadir”. This is intended to provide a useful indication to the user, so if more than one resolution is relevant e.g. the grid resolution and the data resolution, then both can be included.
        geospatial_lat_units: Geospatial latitude units used
        geospatial_lon_units: Geospatial longitude units used
        geospatial_lon_resolution: Geospatial latitude resolution used
        geospatial_lat_resolution: Geospatial longitude resolution used
        key_variables: A comma separated list of the key primary variables in the file i.e. those that have been scientifically validated.
        acknowledgement: Acknowledge funding sources and/or contributors
        other_attributes: any other attributes to include

    Returns:
        An xarray.Dataset object
    """

    attrs = {}
    self.__extend_attrs(attrs, "title", title, required=True)
    self.__extend_attrs(attrs, "institution", institution, required=True)
    self.__extend_attrs(attrs, "source", source)
    self.__extend_attrs(attrs, "history", history)
    self.__extend_attrs(attrs, "references", references)
    self.__extend_attrs(attrs, "tracking_id", tracking_id, required=True),
    self.__extend_attrs(attrs, "Conventions", Conventions)
    self.__extend_attrs(attrs, "product_version", product_version, required=True)
    self.__extend_attrs(attrs, "format_version", format_version)
    self.__extend_attrs(attrs, "summary", summary)
    self.__extend_attrs(attrs, "keywords", keywords)
    self.__extend_attrs(attrs, "id", id),
    self.__extend_attrs(attrs, "naming_authority", naming_authority),
    self.__extend_attrs(attrs, "keywords_vocabulary", keywords_vocabulary, required=False),
    self.__extend_attrs(attrs, "cdm_data_type", cdm_data_type),
    self.__extend_attrs(attrs, "comment", comment),
    self.__extend_attrs(attrs, "date_created", date_created),
    self.__extend_attrs(attrs, "creator_name", creator_name),
    self.__extend_attrs(attrs, "creator_url", creator_url),
    self.__extend_attrs(attrs, "creator_email", creator_email),
    self.__extend_attrs(attrs, "project", project),
    self.__extend_attrs(attrs, "geospatial_lat_min", geospatial_lat_min),
    self.__extend_attrs(attrs, "geospatial_lat_max", geospatial_lat_max),
    self.__extend_attrs(attrs, "geospatial_lon_min", geospatial_lon_min),
    self.__extend_attrs(attrs, "geospatial_lon_max", geospatial_lon_max),
    self.__extend_attrs(attrs, "geospatial_vertical_min", geospatial_vertical_min),
    self.__extend_attrs(attrs, "geospatial_vertical_max", geospatial_vertical_max),
    self.__extend_attrs(attrs, "time_coverage_start", time_coverage_start),
    self.__extend_attrs(attrs, "time_coverage_end", time_coverage_end),
    self.__extend_attrs(attrs, "time_coverage_duration", time_coverage_duration),
    self.__extend_attrs(attrs, "time_coverage_resolution", time_coverage_resolution),
    self.__extend_attrs(attrs, "standard_name_vocabulary", standard_name_vocabulary),
    self.__extend_attrs(attrs, "license", license),
    self.__extend_attrs(attrs, "platform", platform),
    self.__extend_attrs(attrs, "sensor", sensor),
    self.__extend_attrs(attrs, "spatial_resolution", spatial_resolution),
    self.__extend_attrs(attrs, "geospatial_lat_units", geospatial_lat_units),
    self.__extend_attrs(attrs, "geospatial_lon_units", geospatial_lon_units),
    self.__extend_attrs(attrs, "geospatial_lon_resolution", geospatial_lon_resolution),
    self.__extend_attrs(attrs, "geospatial_lat_resolution", geospatial_lat_resolution),
    self.__extend_attrs(attrs, "key_variables", key_variables)
    self.__extend_attrs(attrs, "acknowledgement", acknowledgement)
    self.__extend_attrs(attrs, "publisher_name", publisher_name),
    self.__extend_attrs(attrs, "publisher_url", publisher_url),
    self.__extend_attrs(attrs, "publisher_email", publisher_email),

    attrs.update(other_attributes)
    ds = xr.Dataset(attrs=attrs)
    # copy the grid definition from the grid file
    copyvars = ["x", "y", "x_bnds", "y_bnds", "crsOSGB"]
    if include_lon_lat:
        copyvars += ["lon", "lat"]
    for copyvar in copyvars:
        ds[copyvar] = self.chuk_grid_ds[copyvar]

    ds = ds.rio.write_crs("EPSG:27700", grid_mapping_name="crsOSGB")

    return ds

get_grid_latlons()

Obtain the chuk grid lats/lons

Returns:

Type Description
(DataArray, DataArray)

2-tuple containing xarray.DataArray objects (lats,lons)

Source code in eocis_chuk_api/chuk_dataset_utils.py
83
84
85
86
87
88
89
90
def get_grid_latlons(self) -> (xarray.DataArray, xarray.DataArray):
    """
    Obtain the chuk grid lats/lons

    Returns:
        2-tuple containing xarray.DataArray objects (lats,lons)
    """
    return (self.chuk_grid_ds.lat, self.chuk_grid_ds.lon)

get_grid_shape()

Obtain the chuk grid shape (y,x)

Returns:

Type Description
(int, int)

2-tuple containing the grid (height, width)

Source code in eocis_chuk_api/chuk_dataset_utils.py
92
93
94
95
96
97
98
99
def get_grid_shape(self) -> (int, int):
    """
    Obtain the chuk grid shape (y,x)

    Returns:
        2-tuple containing the grid (height, width)
    """
    return self.chuk_grid_ds.lat.shape

load(from_path, add_latlon=False, add_latlon_bnds=False)

Load a CHUK dataset from file and return a dataset

Parameters:

Name Type Description Default
from_path str

path to a NetCDF4 file

required
add_latlon bool

add lon and lat 2D arrays to the dataset

False
add_latlon_bnds bool

add lon_bnds and lat_bnds 2D arrays to the dataset

False

Returns:

Type Description
Dataset

A dataset containing the loaded CHUK data

Source code in eocis_chuk_api/chuk_dataset_utils.py
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
def load(self, from_path: str, add_latlon: bool = False, add_latlon_bnds: bool = False) -> xarray.Dataset:
    """
    Load a CHUK dataset from file and return a dataset

    Args:
        from_path: path to a NetCDF4 file
        add_latlon: add lon and lat 2D arrays to the dataset
        add_latlon_bnds: add lon_bnds and lat_bnds 2D arrays to the dataset

    Returns:
        A dataset containing the loaded CHUK data
    """
    ds = xr.open_dataset(from_path, decode_coords="all")

    self.extend_latlon(ds, add_latlon=add_latlon, add_latlon_bnds=add_latlon_bnds)

    return ds

sample(ds, to_resolution) staticmethod

Create a lower resolution sample of a CHUK dataset

Parameters:

Name Type Description Default
ds Dataset

the xarray.Dataset containing CHUK data to sample

required
to_resolution int

the resolution for the sampled output, must be a multiple of 100

required

Returns:

Type Description
Dataset

A dataset containing the sampled data

Source code in eocis_chuk_api/chuk_dataset_utils.py
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
@staticmethod
def sample(ds: xarray.Dataset, to_resolution: int) -> xarray.Dataset:
    """
    Create a lower resolution sample of a CHUK dataset

    Args:
        ds: the xarray.Dataset containing CHUK data to sample
        to_resolution: the resolution for the sampled output, must be a multiple of 100

    Returns:
        A dataset containing the sampled data
    """
    if to_resolution % 100 != 0:
        raise ValueError(f"Error - resolution requested ({to_resolution}) is not a multiple of 100")
    sample_step = int(to_resolution / 100)
    return ds.isel(x=slice(0, -1, sample_step), y=slice(0, -1, sample_step))

save(ds, to_path, add_latlon=False, add_latlon_bnds=False, x_chunk_size=1000, y_chunk_size=1000, time_chunk_size=1, custom_encodings={}, override_encodings={})

Save a CHUK dataset to file, applying the standard chunking and compression

Parameters:

Name Type Description Default
ds Dataset

an xarray dataset containing CHUK data

required
to_path str

path to a NetCDF4 file

required
add_latlon bool

add lon and lat 2D arrays to the dataset

False
add_latlon_bnds bool

add lon_bnds and lat_bnds 2D arrays to the dataset

False
x_chunk_size int

size of chunking in the x-dimension

1000
y_chunk_size int

size of chunking in the x-dimension

1000
time_chunk_size int

size of chunking in the time dimension

1
custom_encodings dict

dictionary mapping from variable names to a custom encoding to use by xarray

{}
Source code in eocis_chuk_api/chuk_dataset_utils.py
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
def save(self, ds: xarray.Dataset, to_path: str, add_latlon: bool = False, add_latlon_bnds: bool = False,
         x_chunk_size: int = 1000, y_chunk_size: int = 1000,
         time_chunk_size: int = 1, custom_encodings: dict = {}, override_encodings: dict={}):
    """
    Save a CHUK dataset to file, applying the standard chunking and compression

    Args:
        ds: an xarray dataset containing CHUK data
        to_path: path to a NetCDF4 file
        add_latlon: add lon and lat 2D arrays to the dataset
        add_latlon_bnds: add lon_bnds and lat_bnds 2D arrays to the dataset
        x_chunk_size: size of chunking in the x-dimension
        y_chunk_size: size of chunking in the x-dimension
        time_chunk_size: size of chunking in the time dimension
        custom_encodings: dictionary mapping from variable names to a custom encoding to use by xarray
    """

    encodings = {}

    for v in ds.variables:
        if custom_encodings and v in custom_encodings:
            encodings[v] = custom_encodings[v]
        else:
            dims = ds[v].dims
            if "x" in dims and "y" in dims:

                encodings[v] = {
                    "zlib": True,
                    "complevel": 5
                }

                if v in override_encodings:
                    for (name,value) in override_encodings[v].items():
                        if value is None:
                            if name in encodings:
                                del encodings[v][name]
                        else:
                            encodings[v][name] = value

                chunk_sizes = []
                for d in dims:
                    if d == "y":
                        chunk_sizes.append(y_chunk_size)
                    elif d == "x":
                        chunk_sizes.append(x_chunk_size)
                    elif d == "time":
                        chunk_sizes.append(time_chunk_size)
                    else:
                        chunk_sizes.append(0)
                encodings[v]["chunksizes"] = chunk_sizes

    self.extend_latlon(ds, add_latlon=add_latlon, add_latlon_bnds=add_latlon_bnds)


    # ds = ds.rio.write_crs("EPSG:27700",grid_mapping_name="crsOSGB")

    ds.to_netcdf(to_path, encoding=encodings)

save_as_geotif(ds, variable_name, to_path) staticmethod

Save a CHUK dataset to a geotiff. DEPRECATED - use save_as_geotiff

Parameters:

Name Type Description Default
ds Dataset

the CHUK dataset

required
variable_name str

the name of the variable to save from the dataset

required
to_path str

the path to save the geotiff file to

required
Source code in eocis_chuk_api/chuk_dataset_utils.py
477
478
479
480
481
482
483
484
485
486
487
@staticmethod
def save_as_geotif(ds: xarray.Dataset, variable_name: str, to_path: str):
    """
    Save a CHUK dataset to a geotiff.  DEPRECATED - use save_as_geotiff

    Args:
        ds: the CHUK dataset
        variable_name: the name of the variable to save from the dataset
        to_path: the path to save the geotiff file to
    """
    return CHUKDataSetUtils.save_as_geotiff(ds, variable_name, to_path)

save_as_geotiff(ds, variable_name, to_path) staticmethod

Save a CHUK dataset to a geotiff

Parameters:

Name Type Description Default
ds Dataset

the CHUK dataset

required
variable_name str

the name of the variable to save from the dataset

required
to_path str

the path to save the geotiff file to

required
Source code in eocis_chuk_api/chuk_dataset_utils.py
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
@staticmethod
def save_as_geotiff(ds: xarray.Dataset, variable_name: str, to_path: str):
    """
    Save a CHUK dataset to a geotiff

    Args:
        ds: the CHUK dataset
        variable_name: the name of the variable to save from the dataset
        to_path: the path to save the geotiff file to
    """
    ds_crs = ds.rio.write_crs("EPSG:27700")
    if "grid_mapping" in ds_crs[variable_name].attrs:
        # this seems to cause a problem, why?
        del ds_crs[variable_name].attrs["grid_mapping"]
    tags = CHUKMetadata.to_json(ds_crs, variable_name)
    ds_crs[variable_name].rio.to_raster(to_path, tags=tags, driver="COG")

Working with CHUK Auxilary data

CHUKAuxilaryDataCombinedMask

Bases: Mask

Source code in eocis_chuk_api/chuk_auxilary_utils.py
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
class CHUKAuxilaryDataCombinedMask(Mask):

    def __init__(self, *masks:list[Mask], operator:str="or"):
        """
        Create a mask derived from one or more other masks

        Args:
            masks: a list of one or more masks to be combined
            operator: the operator to use, should be "not", "or" or "and"

        Throws:
            ValueError for example if the list of masks is empty or operator is not one of "not","and","or"
        """
        if len(masks) == 0:
            raise ValueError("masks must be a non-empty list")
        if operator not in ("and","or","not"):
            raise ValueError('operator must be one of "and", "or" or "not"')
        if operator == "not" and len(masks) > 1:
            raise ValueError("only one mask can be supplied for the not operator")
        self.input_masks = masks
        self.operator = operator

    def to_array(self):

        if self.operator == "not":
            m = self.input_masks[0].to_array()
            return xr.where(m,False,True)

        stacked = xr.concat([m.to_array() for m in self.input_masks], "layer")

        if self.operator == "or":
            return stacked.any(dim="layer")
        elif self.operator == "and":
            return stacked.all(dim="layer")

__init__(*masks, operator='or')

Create a mask derived from one or more other masks

Parameters:

Name Type Description Default
masks list[Mask]

a list of one or more masks to be combined

()
operator str

the operator to use, should be "not", "or" or "and"

'or'
Throws

ValueError for example if the list of masks is empty or operator is not one of "not","and","or"

Source code in eocis_chuk_api/chuk_auxilary_utils.py
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
def __init__(self, *masks:list[Mask], operator:str="or"):
    """
    Create a mask derived from one or more other masks

    Args:
        masks: a list of one or more masks to be combined
        operator: the operator to use, should be "not", "or" or "and"

    Throws:
        ValueError for example if the list of masks is empty or operator is not one of "not","and","or"
    """
    if len(masks) == 0:
        raise ValueError("masks must be a non-empty list")
    if operator not in ("and","or","not"):
        raise ValueError('operator must be one of "and", "or" or "not"')
    if operator == "not" and len(masks) > 1:
        raise ValueError("only one mask can be supplied for the not operator")
    self.input_masks = masks
    self.operator = operator

CHUKAuxilaryDataMask

Bases: Mask

Source code in eocis_chuk_api/chuk_auxilary_utils.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
class CHUKAuxilaryDataMask(Mask):

    def __init__(self, dataset_name:str, variable_name:str, include_missing:bool=False):
        """
        Construct a mask associated with a particular dataset

        Args:
            dataset_name: the name of the dataset
            variable_name: the name of the variable in the dataset to use to construct the mask
            include_missing: whether to also include missing data values (eg NaN) in the mask
        """
        self.dataset_name = dataset_name
        self.variable_name = variable_name
        self.da = xr.open_dataset(dataset_name)[variable_name]
        meanings = self.da.attrs["flag_meanings"].split(" ")
        values = self.da.attrs["flag_values"]
        self.value_lookup = {}
        for (meaning, value) in zip(meanings, values):
            self.value_lookup[meaning] = value
        self.mask_values = []
        self.cached_result = None
        self.include_missing = include_missing

    def get_all_mask_values(self) -> list[str]:
        """
        Get a list of all the values that could be included in this mask

        Returns:
             a list of values
        """
        return list(self.value_lookup.keys())

    def get_selected_mask_values(self) -> list[str]:
        """
        Get a list of all the values that are included in this mask

        Returns:
             a list of values included in this mask
        """
        keys = []
        for mask_value in self.mask_values:
            keys += self.__get_matching_keys(mask_value)
        return keys

    def add_mask_value(self, mask_value: str):
        """
        Add a value to the mask

        Args:
            mask_value: the category value to include in the mask
        Throws:
            ValueError if the specified value is not a valid value for this mask
        """
        matching_keys = self.__get_matching_keys(mask_value)
        if len(matching_keys) == 0:
            raise ValueError(f"Value {mask_value} does not match any values {','.join(self.value_lookup.keys())}")
        self.cached_result = None
        self.mask_values.append(mask_value)
        return matching_keys

    def to_array(self) -> xr.DataArray:
        """
        Obtain the evaluated mask values
        Returns:
            an xarray DataArray object
        """
        if self.cached_result is None:
            filter_keys = []
            for mask_value in self.mask_values:
                filter_keys += self.__get_matching_keys(mask_value)
            filter_values = [self.value_lookup[key] for key in filter_keys]
            self.cached_result = xr.where(self.da.isin(filter_values), True, False)
            if self.include_missing:
                self.cached_result = xr.where(np.isnan(self.da),True,self.cached_result)
        return self.cached_result

    def __get_matching_keys(self, value_or_pattern):
        if value_or_pattern in self.value_lookup:
            return [value_or_pattern]
        matches = []
        for key in self.value_lookup:
            if fnmatch.fnmatch(key, value_or_pattern):
                matches.append(key)
        return matches

__init__(dataset_name, variable_name, include_missing=False)

Construct a mask associated with a particular dataset

Parameters:

Name Type Description Default
dataset_name str

the name of the dataset

required
variable_name str

the name of the variable in the dataset to use to construct the mask

required
include_missing bool

whether to also include missing data values (eg NaN) in the mask

False
Source code in eocis_chuk_api/chuk_auxilary_utils.py
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
def __init__(self, dataset_name:str, variable_name:str, include_missing:bool=False):
    """
    Construct a mask associated with a particular dataset

    Args:
        dataset_name: the name of the dataset
        variable_name: the name of the variable in the dataset to use to construct the mask
        include_missing: whether to also include missing data values (eg NaN) in the mask
    """
    self.dataset_name = dataset_name
    self.variable_name = variable_name
    self.da = xr.open_dataset(dataset_name)[variable_name]
    meanings = self.da.attrs["flag_meanings"].split(" ")
    values = self.da.attrs["flag_values"]
    self.value_lookup = {}
    for (meaning, value) in zip(meanings, values):
        self.value_lookup[meaning] = value
    self.mask_values = []
    self.cached_result = None
    self.include_missing = include_missing

add_mask_value(mask_value)

Add a value to the mask

Parameters:

Name Type Description Default
mask_value str

the category value to include in the mask

required

Throws: ValueError if the specified value is not a valid value for this mask

Source code in eocis_chuk_api/chuk_auxilary_utils.py
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
def add_mask_value(self, mask_value: str):
    """
    Add a value to the mask

    Args:
        mask_value: the category value to include in the mask
    Throws:
        ValueError if the specified value is not a valid value for this mask
    """
    matching_keys = self.__get_matching_keys(mask_value)
    if len(matching_keys) == 0:
        raise ValueError(f"Value {mask_value} does not match any values {','.join(self.value_lookup.keys())}")
    self.cached_result = None
    self.mask_values.append(mask_value)
    return matching_keys

get_all_mask_values()

Get a list of all the values that could be included in this mask

Returns:

Type Description
list[str]

a list of values

Source code in eocis_chuk_api/chuk_auxilary_utils.py
123
124
125
126
127
128
129
130
def get_all_mask_values(self) -> list[str]:
    """
    Get a list of all the values that could be included in this mask

    Returns:
         a list of values
    """
    return list(self.value_lookup.keys())

get_selected_mask_values()

Get a list of all the values that are included in this mask

Returns:

Type Description
list[str]

a list of values included in this mask

Source code in eocis_chuk_api/chuk_auxilary_utils.py
132
133
134
135
136
137
138
139
140
141
142
def get_selected_mask_values(self) -> list[str]:
    """
    Get a list of all the values that are included in this mask

    Returns:
         a list of values included in this mask
    """
    keys = []
    for mask_value in self.mask_values:
        keys += self.__get_matching_keys(mask_value)
    return keys

to_array()

Obtain the evaluated mask values Returns: an xarray DataArray object

Source code in eocis_chuk_api/chuk_auxilary_utils.py
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
def to_array(self) -> xr.DataArray:
    """
    Obtain the evaluated mask values
    Returns:
        an xarray DataArray object
    """
    if self.cached_result is None:
        filter_keys = []
        for mask_value in self.mask_values:
            filter_keys += self.__get_matching_keys(mask_value)
        filter_values = [self.value_lookup[key] for key in filter_keys]
        self.cached_result = xr.where(self.da.isin(filter_values), True, False)
        if self.include_missing:
            self.cached_result = xr.where(np.isnan(self.da),True,self.cached_result)
    return self.cached_result

CHUKAuxilaryUtils

Source code in eocis_chuk_api/chuk_auxilary_utils.py
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
class CHUKAuxilaryUtils:

    @staticmethod
    def create_mask(dataset_path:str, variable:str, mask_values:[str,list[str]], include_missing:bool=False) -> CHUKAuxilaryDataMask:
        """
        Create a mask

        Args:
            dataset_path: path to the netcdf file containing the auxilary data to use
            variable: the variable in the file to use in the mask
            mask_values: a string or list of strings
            include_missing: whether to include missing data values in the mask or not

        Returns:
            A mask object containing of True or False values for every cell
        """
        mask = CHUKAuxilaryDataMask(dataset_path, variable)
        if isinstance(mask_values, str):
            mask_values = [mask_values]
        for mask_value in mask_values:
            mask.add_mask_value(mask_value)
        return mask

    """
    Construct the logical AND of a list of masks

    Args:
        masks: the masks to combine

    Returns:
        Resulting mask
    """
    @staticmethod
    def combine_masks_and(*masks:list[Mask]):
        return masks[0].and_mask(*masks[1:])

    """
    Construct the logical OR of a list of masks

    Args:
        masks: the masks to combine

    Returns:
        Resulting mask
    """
    @staticmethod
    def combine_masks_or(*masks:list[Mask]):
        return masks[0].or_mask(*masks[1:])

    """
    Construct the logical NOT of a mask

    Args:
        masks: the masks to combine

    Returns:
        Resulting mask
    """
    @staticmethod
    def not_mask(mask:Mask):
        return mask.not_mask()

create_mask(dataset_path, variable, mask_values, include_missing=False) staticmethod

Create a mask

Parameters:

Name Type Description Default
dataset_path str

path to the netcdf file containing the auxilary data to use

required
variable str

the variable in the file to use in the mask

required
mask_values [str, list[str]]

a string or list of strings

required
include_missing bool

whether to include missing data values in the mask or not

False

Returns:

Type Description
CHUKAuxilaryDataMask

A mask object containing of True or False values for every cell

Source code in eocis_chuk_api/chuk_auxilary_utils.py
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
@staticmethod
def create_mask(dataset_path:str, variable:str, mask_values:[str,list[str]], include_missing:bool=False) -> CHUKAuxilaryDataMask:
    """
    Create a mask

    Args:
        dataset_path: path to the netcdf file containing the auxilary data to use
        variable: the variable in the file to use in the mask
        mask_values: a string or list of strings
        include_missing: whether to include missing data values in the mask or not

    Returns:
        A mask object containing of True or False values for every cell
    """
    mask = CHUKAuxilaryDataMask(dataset_path, variable)
    if isinstance(mask_values, str):
        mask_values = [mask_values]
    for mask_value in mask_values:
        mask.add_mask_value(mask_value)
    return mask

Mask

Bases: ABC

Source code in eocis_chuk_api/chuk_auxilary_utils.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
class Mask(abc.ABC):

    def __init__(self):
        """
        Abstract Base Class for masks, do not instantiate directly
        """
        pass

    def or_mask(self, *others:list["Mask"]) -> "Mask":
        """
        OR this mask with other masks

        Args:
            others: a list of masks to be OR'd with this mask
        Returns:
            a combined mask
        """
        return CHUKAuxilaryDataCombinedMask(self, *others, operator="or")

    def and_mask(self, *others:list["Mask"]) -> "Mask":
        """
        AND this mask with other masks

        Args:
            others: a list of masks to be AND'd with this mask
        Returns:
            a combined mask
        """
        return CHUKAuxilaryDataCombinedMask(self, *others, operator="and")


    def not_mask(self) -> "Mask":
        """
        Invert this mask

        Returns:
             a new mask that is the negation of this mask
        """
        return CHUKAuxilaryDataCombinedMask(self, operator="not")

    def count(self) -> int:
        """
        Count the number of True values in this mask

        Returns:
            the total number of True values
        """
        return int(self.to_array().sum())

    def fraction(self) -> float:
        """
        Calculate the fraction of values that are True in this mask

        Returns:
            the fraction of values that are True
        """
        m = self.to_array()
        count = 1
        for d in m.shape:
            count *= d
        return int(m.sum()) / count

    @abc.abstractmethod
    def to_array(self) -> xr.DataArray:
        """
        Convert this mask to an xarray.DataArray and return it

        Returns:
             an xarray.DataArray containing the mask values
        """
        pass # implemented in sub-classes

__init__()

Abstract Base Class for masks, do not instantiate directly

Source code in eocis_chuk_api/chuk_auxilary_utils.py
29
30
31
32
33
def __init__(self):
    """
    Abstract Base Class for masks, do not instantiate directly
    """
    pass

and_mask(*others)

AND this mask with other masks

Parameters:

Name Type Description Default
others list[Mask]

a list of masks to be AND'd with this mask

()

Returns: a combined mask

Source code in eocis_chuk_api/chuk_auxilary_utils.py
46
47
48
49
50
51
52
53
54
55
def and_mask(self, *others:list["Mask"]) -> "Mask":
    """
    AND this mask with other masks

    Args:
        others: a list of masks to be AND'd with this mask
    Returns:
        a combined mask
    """
    return CHUKAuxilaryDataCombinedMask(self, *others, operator="and")

count()

Count the number of True values in this mask

Returns:

Type Description
int

the total number of True values

Source code in eocis_chuk_api/chuk_auxilary_utils.py
67
68
69
70
71
72
73
74
def count(self) -> int:
    """
    Count the number of True values in this mask

    Returns:
        the total number of True values
    """
    return int(self.to_array().sum())

fraction()

Calculate the fraction of values that are True in this mask

Returns:

Type Description
float

the fraction of values that are True

Source code in eocis_chuk_api/chuk_auxilary_utils.py
76
77
78
79
80
81
82
83
84
85
86
87
def fraction(self) -> float:
    """
    Calculate the fraction of values that are True in this mask

    Returns:
        the fraction of values that are True
    """
    m = self.to_array()
    count = 1
    for d in m.shape:
        count *= d
    return int(m.sum()) / count

not_mask()

Invert this mask

Returns:

Type Description
Mask

a new mask that is the negation of this mask

Source code in eocis_chuk_api/chuk_auxilary_utils.py
58
59
60
61
62
63
64
65
def not_mask(self) -> "Mask":
    """
    Invert this mask

    Returns:
         a new mask that is the negation of this mask
    """
    return CHUKAuxilaryDataCombinedMask(self, operator="not")

or_mask(*others)

OR this mask with other masks

Parameters:

Name Type Description Default
others list[Mask]

a list of masks to be OR'd with this mask

()

Returns: a combined mask

Source code in eocis_chuk_api/chuk_auxilary_utils.py
35
36
37
38
39
40
41
42
43
44
def or_mask(self, *others:list["Mask"]) -> "Mask":
    """
    OR this mask with other masks

    Args:
        others: a list of masks to be OR'd with this mask
    Returns:
        a combined mask
    """
    return CHUKAuxilaryDataCombinedMask(self, *others, operator="or")

to_array() abstractmethod

Convert this mask to an xarray.DataArray and return it

Returns:

Type Description
DataArray

an xarray.DataArray containing the mask values

Source code in eocis_chuk_api/chuk_auxilary_utils.py
89
90
91
92
93
94
95
96
97
@abc.abstractmethod
def to_array(self) -> xr.DataArray:
    """
    Convert this mask to an xarray.DataArray and return it

    Returns:
         an xarray.DataArray containing the mask values
    """
    pass # implemented in sub-classes