Skip to content

pace module

This module contains functions to read and process PACE data.

extract_pace(dataset, latitude, longitude, delta=0.01, return_plot=False, **kwargs)

Extracts data from a PACE dataset for a given latitude and longitude range and calculates the mean over these dimensions.

Parameters:

Name Type Description Default
dataset Union[xr.Dataset, str]

The PACE dataset or path to the dataset file.

required
latitude Union[float, Tuple[float, float]]

The latitude or range of latitudes to extract data for.

required
longitude Union[float, Tuple[float, float]]

The longitude or range of longitudes to extract data for.

required
delta float

The range to add/subtract to the latitude and longitude if they are not ranges. Defaults to 0.01.

0.01
return_plot bool

Whether to return a plot of the data. Defaults to False.

False
**kwargs

Additional keyword arguments to pass to the plot function.

{}

Returns:

Type Description
Union[xr.DataArray, plt.figure.Figure]

The mean data over the latitude and longitude dimensions, or a plot of this data if return_plot is True.

Source code in hypercoast/pace.py
def extract_pace(
    dataset: Union[xr.Dataset, str],
    latitude: Union[float, Tuple[float, float]],
    longitude: Union[float, Tuple[float, float]],
    delta: float = 0.01,
    return_plot: bool = False,
    **kwargs,
) -> Union[xr.DataArray, plt.Figure]:
    """
    Extracts data from a PACE dataset for a given latitude and longitude range
        and calculates the mean over these dimensions.

    Args:
        dataset (Union[xr.Dataset, str]): The PACE dataset or path to the dataset file.
        latitude (Union[float, Tuple[float, float]]): The latitude or range of
            latitudes to extract data for.
        longitude (Union[float, Tuple[float, float]]): The longitude or range of
            longitudes to extract data for.
        delta (float, optional): The range to add/subtract to the latitude and
            longitude if they are not ranges. Defaults to 0.01.
        return_plot (bool, optional): Whether to return a plot of the data. Defaults to False.
        **kwargs: Additional keyword arguments to pass to the plot function.

    Returns:
        Union[xr.DataArray, plt.figure.Figure]: The mean data over the latitude
            and longitude dimensions, or a plot of this data if return_plot is True.
    """
    if isinstance(latitude, list) or isinstance(latitude, tuple):
        pass
    else:
        latitude = (latitude - delta, latitude + delta)

    if isinstance(longitude, list) or isinstance(longitude, tuple):
        pass
    else:
        longitude = (longitude - delta, longitude + delta)

    ds = filter_pace(dataset, latitude, longitude, return_plot=False)
    data = ds.mean(dim=["latitude", "longitude"])
    if return_plot:
        return data.plot.line(**kwargs)
    else:
        return data

filter_pace(dataset, latitude, longitude, drop=True, return_plot=False, **kwargs)

Filters a PACE dataset based on latitude and longitude.

Parameters:

Name Type Description Default
dataset xr.Dataset

The PACE dataset to filter.

required
latitude float or tuple

The latitude to filter by. If a tuple or list, it represents a range.

required
longitude float or tuple

The longitude to filter by. If a tuple or list, it represents a range.

required
drop bool

Whether to drop the filtered out data. Defaults to True.

True

Returns:

Type Description
xr.DataArray

The filtered PACE data.

Source code in hypercoast/pace.py
def filter_pace(dataset, latitude, longitude, drop=True, return_plot=False, **kwargs):
    """
    Filters a PACE dataset based on latitude and longitude.

    Args:
        dataset (xr.Dataset): The PACE dataset to filter.
        latitude (float or tuple): The latitude to filter by. If a tuple or list, it represents a range.
        longitude (float or tuple): The longitude to filter by. If a tuple or list, it represents a range.
        drop (bool, optional): Whether to drop the filtered out data. Defaults to True.

    Returns:
        xr.DataArray: The filtered PACE data.
    """
    if isinstance(latitude, list) or isinstance(latitude, tuple):
        lat_con = (dataset["latitude"] > latitude[0]) & (
            dataset["latitude"] < latitude[1]
        )
    else:
        lat_con = dataset["latitude"] == latitude

    if isinstance(longitude, list) or isinstance(longitude, tuple):
        lon_con = (dataset["longitude"] > longitude[0]) & (
            dataset["longitude"] < longitude[1]
        )
    else:
        lon_con = dataset["longitude"] == longitude

    da = dataset["Rrs"].where(lat_con & lon_con, drop=drop, **kwargs)
    da_filtered = da.dropna(dim="latitude", how="all")
    da_filtered = da_filtered.dropna(dim="longitude", how="all")

    if return_plot:
        rrs_stack = da_filtered.stack(
            {"pixel": ["latitude", "longitude"]},
            create_index=False,
        )
        rrs_stack.plot.line(hue="pixel")
    else:
        return da_filtered

grid_pace(dataset, wavelengths=None, method='nearest', **kwargs)

Grids a PACE dataset based on latitude and longitude.

Parameters:

Name Type Description Default
dataset xr.Dataset

The PACE dataset to grid.

required
wavelengths float or int

The wavelength to select.

None
method str

The method to use for griddata interpolation. Defaults to "nearest".

'nearest'
**kwargs

Additional keyword arguments to pass to the xr.Dataset constructor.

{}

Returns:

Type Description
xr.DataArray

The gridded PACE data.

Source code in hypercoast/pace.py
def grid_pace(dataset, wavelengths=None, method="nearest", **kwargs):
    """
    Grids a PACE dataset based on latitude and longitude.

    Args:
        dataset (xr.Dataset): The PACE dataset to grid.
        wavelengths (float or int): The wavelength to select.
        method (str, optional): The method to use for griddata interpolation.
            Defaults to "nearest".
        **kwargs: Additional keyword arguments to pass to the xr.Dataset constructor.

    Returns:
        xr.DataArray: The gridded PACE data.
    """
    from scipy.interpolate import griddata

    if wavelengths is None:
        wavelengths = dataset.coords["wavelength"].values[0]

    # Ensure wavelengths is a list
    if not isinstance(wavelengths, list):
        wavelengths = [wavelengths]

    lat = dataset.latitude
    lon = dataset.longitude

    grid_lat = np.linspace(lat.min().values, lat.max().values, lat.shape[0])
    grid_lon = np.linspace(lon.min().values, lon.max().values, lon.shape[1])
    grid_lon_2d, grid_lat_2d = np.meshgrid(grid_lon, grid_lat)

    gridded_data_dict = {}
    for wavelength in wavelengths:
        data = dataset.sel(wavelength=wavelength, method="nearest")["Rrs"]
        gridded_data = griddata(
            (lat.data.flatten(), lon.data.flatten()),
            data.data.flatten(),
            (grid_lat_2d, grid_lon_2d),
            method=method,
        )
        gridded_data_dict[wavelength] = gridded_data

    # Create a 3D array with dimensions latitude, longitude, and wavelength
    gridded_data_3d = np.dstack(list(gridded_data_dict.values()))

    dataset2 = xr.Dataset(
        {"Rrs": (("latitude", "longitude", "wavelength"), gridded_data_3d)},
        coords={
            "latitude": ("latitude", grid_lat),
            "longitude": ("longitude", grid_lon),
            "wavelength": ("wavelength", list(gridded_data_dict.keys())),
        },
        **kwargs,
    )

    dataset2["Rrs"].rio.write_crs("EPSG:4326", inplace=True)

    return dataset2

grid_pace_bgc(dataset, variable='chlor_a', method='nearest', **kwargs)

Grids PACE BGC data using specified interpolation method.

This function takes an xarray Dataset containing PACE BGC data, interpolates it onto a regular grid using the specified method, and returns the gridded data as an xarray DataArray with the specified variable.

Parameters:

Name Type Description Default
dataset xr.Dataset

The input dataset containing PACE BGC data with latitude and longitude coordinates.

required
variable str

The variable within the dataset to grid. Can be one of chlor_a, carbon_phyto, poc, chlor_a_unc, carbon_phyto_unc, and l2_flags. Defaults to "chlor_a".

'chlor_a'
method str

The interpolation method to use. Options include "nearest", "linear", and "cubic". Defaults to "nearest".

'nearest'
**kwargs Any

Additional keyword arguments to pass to the xr.Dataset creation.

{}

Returns:

Type Description
xr.DataArray

The gridded data as an xarray DataArray, with the specified variable and EPSG:4326 CRS.

Examples:

>>> dataset = hypercoast.read_pace_bgc("path_to_your_dataset.nc")
>>> gridded_data = grid_pace_bgc(dataset, variable="chlor_a", method="nearest")
>>> print(gridded_data)
Source code in hypercoast/pace.py
def grid_pace_bgc(
    dataset: xr.Dataset,
    variable: str = "chlor_a",
    method: str = "nearest",
    **kwargs: Any,
) -> xr.DataArray:
    """
    Grids PACE BGC data using specified interpolation method.

    This function takes an xarray Dataset containing PACE BGC data, interpolates it onto a regular grid
    using the specified method, and returns the gridded data as an xarray DataArray with the specified
    variable.

    Args:
        dataset (xr.Dataset): The input dataset containing PACE BGC data with latitude and longitude coordinates.
        variable (str, optional): The variable within the dataset to grid. Can be
            one of chlor_a, carbon_phyto, poc, chlor_a_unc, carbon_phyto_unc, and l2_flags.
            Defaults to "chlor_a".
        method (str, optional): The interpolation method to use. Options include "nearest", "linear", and "cubic".
            Defaults to "nearest".
        **kwargs (Any): Additional keyword arguments to pass to the xr.Dataset creation.

    Returns:
        xr.DataArray: The gridded data as an xarray DataArray, with the specified variable and EPSG:4326 CRS.

    Example:
        >>> dataset = hypercoast.read_pace_bgc("path_to_your_dataset.nc")
        >>> gridded_data = grid_pace_bgc(dataset, variable="chlor_a", method="nearest")
        >>> print(gridded_data)
    """
    import rioxarray
    from scipy.interpolate import griddata

    lat = dataset.latitude
    lon = dataset.longitude

    grid_lat = np.linspace(lat.min().values, lat.max().values, lat.shape[0])
    grid_lon = np.linspace(lon.min().values, lon.max().values, lon.shape[1])
    grid_lon_2d, grid_lat_2d = np.meshgrid(grid_lon, grid_lat)

    data = dataset[variable]
    gridded_data = griddata(
        (lat.data.flatten(), lon.data.flatten()),
        data.data.flatten(),
        (grid_lat_2d, grid_lon_2d),
        method=method,
    )

    dataset2 = xr.Dataset(
        {variable: (("latitude", "longitude"), gridded_data)},
        coords={
            "latitude": ("latitude", grid_lat),
            "longitude": ("longitude", grid_lon),
        },
        **kwargs,
    )

    dataset2 = dataset2[variable].rio.write_crs("EPSG:4326")

    return dataset2

pace_chla_to_image(data, output=None, **kwargs)

Converts PACE chlorophyll-a data to an image.

Parameters:

Name Type Description Default
data xr.DataArray or str

The chlorophyll-a data or the file path to the data.

required
output str

The file path where the image will be saved. If None, the image will be returned as a PIL Image object. Defaults to None.

None
**kwargs

Additional keyword arguments to be passed to leafmap.array_to_image.

{}

Returns:

Type Description
rasterio.Dataset or None

The image converted from the data. If output is provided, the image will be saved to the specified file and the function will return None.

Source code in hypercoast/pace.py
def pace_chla_to_image(data, output=None, **kwargs):
    """
    Converts PACE chlorophyll-a data to an image.

    Args:
        data (xr.DataArray or str): The chlorophyll-a data or the file path to the data.
        output (str, optional): The file path where the image will be saved. If None, the image will be returned as a PIL Image object. Defaults to None.
        **kwargs: Additional keyword arguments to be passed to `leafmap.array_to_image`.

    Returns:
        rasterio.Dataset or None: The image converted from the data. If `output` is provided, the image will be saved to the specified file and the function will return None.
    """
    from leafmap import array_to_image, image_to_geotiff

    if isinstance(data, str):
        data = read_pace_chla(data)
    elif not isinstance(data, xr.DataArray):
        raise ValueError("data must be an xarray DataArray")

    image = array_to_image(data, transpose=False, output=None, **kwargs)

    if output is not None:
        image_to_geotiff(image, output, dtype="float32")

    return image

pace_to_image(dataset, wavelengths=None, method='nearest', gridded=False, output=None, **kwargs)

Converts an PACE dataset to an image.

Parameters:

Name Type Description Default
dataset xarray.Dataset or str

The dataset containing the EMIT data or the file path to the dataset.

required
wavelengths array-like

The specific wavelengths to select. If None, all wavelengths are selected. Defaults to None.

None
method str

The method to use for data interpolation. Defaults to "nearest".

'nearest'
gridded bool

Whether the dataset is a gridded dataset. Defaults to False,

False
output str

The file path where the image will be saved. If None, the image will be returned as a PIL Image object. Defaults to None.

None
**kwargs

Additional keyword arguments to be passed to leafmap.array_to_image.

{}

Returns:

Type Description
rasterio.Dataset or None

The image converted from the dataset. If output is provided, the image will be saved to the specified file and the function will return None.

Source code in hypercoast/pace.py
def pace_to_image(
    dataset, wavelengths=None, method="nearest", gridded=False, output=None, **kwargs
):
    """
    Converts an PACE dataset to an image.

    Args:
        dataset (xarray.Dataset or str): The dataset containing the EMIT data or the file path to the dataset.
        wavelengths (array-like, optional): The specific wavelengths to select. If None, all wavelengths are selected. Defaults to None.
        method (str, optional): The method to use for data interpolation. Defaults to "nearest".
        gridded (bool, optional): Whether the dataset is a gridded dataset. Defaults to False,
        output (str, optional): The file path where the image will be saved. If None, the image will be returned as a PIL Image object. Defaults to None.
        **kwargs: Additional keyword arguments to be passed to `leafmap.array_to_image`.

    Returns:
        rasterio.Dataset or None: The image converted from the dataset. If `output` is provided, the image will be saved to the specified file and the function will return None.
    """
    from leafmap import array_to_image

    if isinstance(dataset, str):
        dataset = read_pace(dataset, wavelengths=wavelengths, method="nearest")

    if wavelengths is not None:
        dataset = dataset.sel(wavelength=wavelengths, method="nearest")

    if not gridded:
        grid = grid_pace(dataset, wavelengths=wavelengths, method=method)
    else:
        grid = dataset
    data = grid["Rrs"]
    data.rio.write_crs("EPSG:4326", inplace=True)

    return array_to_image(data, transpose=False, output=output, **kwargs)

read_pace(filepath, wavelengths=None, method='nearest', engine='h5netcdf', **kwargs)

Reads PACE data from a given file and returns an xarray Dataset.

Parameters:

Name Type Description Default
filepath str

Path to the file to read.

required
wavelengths array-like

Specific wavelengths to select. If None, all wavelengths are selected.

None
method str

Method to use for selection when wavelengths is not None. Defaults to "nearest".

'nearest'
**kwargs

Additional keyword arguments to pass to the sel method when wavelengths is not None.

{}

Returns:

Type Description
xr.Dataset

An xarray Dataset containing the PACE data.

Source code in hypercoast/pace.py
def read_pace(
    filepath, wavelengths=None, method="nearest", engine="h5netcdf", **kwargs
):
    """
    Reads PACE data from a given file and returns an xarray Dataset.

    Args:
        filepath (str): Path to the file to read.
        wavelengths (array-like, optional): Specific wavelengths to select. If None, all wavelengths are selected.
        method (str, optional): Method to use for selection when wavelengths is not None. Defaults to "nearest".
        **kwargs: Additional keyword arguments to pass to the `sel` method when wavelengths is not None.

    Returns:
        xr.Dataset: An xarray Dataset containing the PACE data.
    """

    rrs = xr.open_dataset(filepath, engine=engine, group="geophysical_data")["Rrs"]
    wvl = xr.open_dataset(filepath, engine=engine, group="sensor_band_parameters")
    dataset = xr.open_dataset(filepath, engine=engine, group="navigation_data")
    dataset = dataset.set_coords(("longitude", "latitude"))
    if "pixel_control_points" in dataset.dims:
        dataset = dataset.rename({"pixel_control_points": "pixels_per_line"})
    dataset = xr.merge([rrs, dataset.coords.to_dataset()])
    dataset.coords["wavelength_3d"] = wvl.coords["wavelength_3d"]
    dataset = dataset.rename(
        {
            "number_of_lines": "latitude",
            "pixels_per_line": "longitude",
            "wavelength_3d": "wavelength",
        }
    )

    if wavelengths is not None:
        dataset = dataset.sel(wavelength=wavelengths, method=method, **kwargs)

    return dataset

read_pace_aop(filepath, engine='h5netcdf', **kwargs)

Reads PACE data from a given file and returns an xarray Dataset.

Parameters:

Name Type Description Default
filepath str

Path to the file to read.

required
wavelengths array-like

Specific wavelengths to select. If None, all wavelengths are selected.

required
method str

Method to use for selection when wavelengths is not None. Defaults to "nearest".

required
**kwargs

Additional keyword arguments to pass to the sel method when wavelengths is not None.

{}

Returns:

Type Description
xr.Dataset

An xarray Dataset containing the PACE data.

Source code in hypercoast/pace.py
def read_pace_aop(filepath, engine="h5netcdf", **kwargs):
    """
    Reads PACE data from a given file and returns an xarray Dataset.

    Args:
        filepath (str): Path to the file to read.
        wavelengths (array-like, optional): Specific wavelengths to select. If None, all wavelengths are selected.
        method (str, optional): Method to use for selection when wavelengths is not None. Defaults to "nearest".
        **kwargs: Additional keyword arguments to pass to the `sel` method when wavelengths is not None.

    Returns:
        xr.Dataset: An xarray Dataset containing the PACE data.
    """

    rrs = xr.open_dataset(filepath, engine=engine, group="geophysical_data", **kwargs)[
        "Rrs"
    ]
    wvl = xr.open_dataset(
        filepath, engine=engine, group="sensor_band_parameters", **kwargs
    )
    dataset = xr.open_dataset(
        filepath, engine=engine, group="navigation_data", **kwargs
    )
    dataset = dataset.set_coords(("longitude", "latitude"))
    if "pixel_control_points" in dataset.dims:
        dataset = dataset.rename({"pixel_control_points": "pixels_per_line"})
    dataset = xr.merge([rrs, dataset.coords.to_dataset()])
    dataset.coords["wavelength_3d"] = wvl.coords["wavelength_3d"]

    return dataset

read_pace_bgc(filepath, variable=None, engine='h5netcdf', **kwargs)

Reads PACE BGC data from a specified file and returns an xarray Dataset.

This function opens a dataset from a file using the specified engine, optionally selects a single variable, merges geophysical and navigation data, sets appropriate coordinates, and renames dimensions for easier use.

Parameters:

Name Type Description Default
filepath str

The path to the file containing the PACE BGC data.

required
variable Optional[str]

The specific variable to extract from the geophysical_data group. If None, all variables are read. Defaults to None.

None
engine str

The engine to use for reading the file. Defaults to "h5netcdf".

'h5netcdf'
**kwargs Any

Additional keyword arguments to pass to xr.open_dataset.

{}

Returns:

Type Description
xr.Dataset

An xarray Dataset containing the requested PACE BGC data, with merged geophysical and navigation data, set coordinates, and renamed dimensions.

Examples:

>>> dataset = read_pace_bgc("path/to/your/datafile.h5", variable="chlor_a")
>>> print(dataset)
Source code in hypercoast/pace.py
def read_pace_bgc(
    filepath: str,
    variable: Optional[str] = None,
    engine: str = "h5netcdf",
    **kwargs: Any,
) -> xr.Dataset:
    """
    Reads PACE BGC data from a specified file and returns an xarray Dataset.

    This function opens a dataset from a file using the specified engine,
    optionally selects a single variable, merges geophysical and navigation data,
    sets appropriate coordinates, and renames dimensions for easier use.

    Args:
        filepath (str): The path to the file containing the PACE BGC data.
        variable (Optional[str], optional): The specific variable to extract
            from the geophysical_data group. If None, all variables are read. Defaults to None.
        engine (str, optional): The engine to use for reading the file. Defaults to "h5netcdf".
        **kwargs (Any): Additional keyword arguments to pass to `xr.open_dataset`.

    Returns:
        xr.Dataset: An xarray Dataset containing the requested PACE BGC data,
        with merged geophysical and navigation data, set coordinates, and renamed dimensions.

    Example:
        >>> dataset = read_pace_bgc("path/to/your/datafile.h5", variable="chlor_a")
        >>> print(dataset)
    """

    ds = xr.open_dataset(filepath, engine=engine, group="geophysical_data", **kwargs)
    if variable is not None:
        ds = ds[variable]
    dataset = xr.open_dataset(
        filepath, engine=engine, group="navigation_data", **kwargs
    )
    dataset = dataset.set_coords(("longitude", "latitude"))
    if "pixel_control_points" in dataset.dims:
        dataset = dataset.rename({"pixel_control_points": "pixels_per_line"})
    dataset = xr.merge([ds, dataset.coords.to_dataset()])
    dataset = dataset.rename(
        {
            "number_of_lines": "latitude",
            "pixels_per_line": "longitude",
        }
    )
    attrs = xr.open_dataset(filepath, engine=engine, **kwargs).attrs
    dataset.attrs.update(attrs)

    return dataset

read_pace_chla(filepaths, engine='h5netcdf', **kwargs)

Reads chlorophyll-a data from PACE files and applies a logarithmic transformation.

This function supports reading from a single file or multiple files. For multiple files, it combines them into a single dataset. It then extracts the chlorophyll-a variable, applies a logarithmic transformation, and sets the coordinate reference system to EPSG:4326.

Parameters:

Name Type Description Default
filepaths Union[str, List[str]]

A string or a list of strings containing the file path(s) to the PACE chlorophyll-a data files.

required
engine str

The backend engine to use for reading files. Defaults to "h5netcdf".

'h5netcdf'
**kwargs

Additional keyword arguments to pass to xr.open_dataset or xr.open_mfdataset.

{}

Returns:

Type Description
DataArray

An xarray DataArray containing the logarithmically transformed chlorophyll-a data with updated attributes.

Examples:

Read chlorophyll-a data from a single file:

>>> chla_data = read_pace_chla('path/to/single/file.nc')

Read and combine chlorophyll-a data from multiple files:

>>> chla_data = read_pace_chla(['path/to/file1.nc', 'path/to/file2.nc'], combine='by_coords')
Source code in hypercoast/pace.py
def read_pace_chla(
    filepaths: Union[str, List[str]], engine: str = "h5netcdf", **kwargs
) -> xr.DataArray:
    """
    Reads chlorophyll-a data from PACE files and applies a logarithmic transformation.

    This function supports reading from a single file or multiple files. For multiple files,
    it combines them into a single dataset. It then extracts the chlorophyll-a variable,
    applies a logarithmic transformation, and sets the coordinate reference system to EPSG:4326.

    Args:
        filepaths: A string or a list of strings containing the file path(s) to the PACE chlorophyll-a data files.
        engine: The backend engine to use for reading files. Defaults to "h5netcdf".
        **kwargs: Additional keyword arguments to pass to `xr.open_dataset` or `xr.open_mfdataset`.

    Returns:
        An xarray DataArray containing the logarithmically transformed chlorophyll-a data with updated attributes.

    Examples:
        Read chlorophyll-a data from a single file:
        >>> chla_data = read_pace_chla('path/to/single/file.nc')

        Read and combine chlorophyll-a data from multiple files:
        >>> chla_data = read_pace_chla(['path/to/file1.nc', 'path/to/file2.nc'], combine='by_coords')
    """

    import os
    import glob
    import rioxarray

    date = None
    if isinstance(filepaths, str) and os.path.isfile(filepaths):
        filepaths = [filepaths]
    if "combine" not in kwargs:
        kwargs["combine"] = "nested"
    if "concat_dim" not in kwargs:
        kwargs["concat_dim"] = "date"
    dataset = xr.open_mfdataset(filepaths, engine=engine, **kwargs)
    if not isinstance(filepaths, list):
        filepaths = glob.glob(filepaths)
        filepaths.sort()

    dates = [extract_date_from_filename(f) for f in filepaths]
    date = [timestamp.strftime("%Y-%m-%d") for timestamp in dates]
    dataset = dataset.assign_coords(date=("date", date))

    chla = np.log10(dataset["chlor_a"])
    chla.attrs.update(
        {
            "units": f'lg({dataset["chlor_a"].attrs["units"]})',
        }
    )

    if date is not None:
        chla.attrs["date"] = date

    chla = chla.transpose("lat", "lon", "date")

    chla.rio.write_crs("EPSG:4326", inplace=True)

    return chla

view_pace_pixel_locations(filepath, step=20, figsize=(8, 6), **kwargs)

Visualizes a subset of PACE pixel locations on a scatter plot.

This function reads PACE AOP data from a specified file, subsamples the data according to a step size, and plots the longitude and latitude of the selected pixels using a scatter plot.

Parameters:

Name Type Description Default
filepath str

The path to the file containing the PACE AOP data.

required
step int

The step size for subsampling the data. A smaller step size results in more data points being plotted. Defaults to 20.

20
**kwargs Any

Additional keyword arguments to pass to the plot.scatter method.

{}

Returns:

Type Description
plt.Figure

A matplotlib figure object containing the scatter plot.

Examples:

>>> plot = view_pace_pixel_locations("path/to/your/datafile.h5", step=10)
>>> plt.show()
Source code in hypercoast/pace.py
def view_pace_pixel_locations(
    filepath: str, step: int = 20, figsize: Tuple[float, float] = (8, 6), **kwargs: Any
) -> plt.Figure:
    """
    Visualizes a subset of PACE pixel locations on a scatter plot.

    This function reads PACE AOP data from a specified file, subsamples the data according to a step size,
    and plots the longitude and latitude of the selected pixels using a scatter plot.

    Args:
        filepath (str): The path to the file containing the PACE AOP data.
        step (int, optional): The step size for subsampling the data. A smaller step size results in more
            data points being plotted. Defaults to 20.
        **kwargs (Any): Additional keyword arguments to pass to the `plot.scatter` method.

    Returns:
        plt.Figure: A matplotlib figure object containing the scatter plot.

    Example:
        >>> plot = view_pace_pixel_locations("path/to/your/datafile.h5", step=10)
        >>> plt.show()
    """

    # Create a new figure
    fig, ax = plt.subplots(figsize=figsize)

    # Create the plot
    dataset = read_pace_aop(filepath)
    number_of_lines = dataset.sizes["number_of_lines"]
    pixels_per_line = dataset.sizes["pixels_per_line"]

    ax.scatter(
        dataset.sel(
            {
                "number_of_lines": slice(None, None, number_of_lines // step),
                "pixels_per_line": slice(None, None, pixels_per_line // step),
            }
        ).longitude,
        dataset.sel(
            {
                "number_of_lines": slice(None, None, number_of_lines // step),
                "pixels_per_line": slice(None, None, pixels_per_line // step),
            }
        ).latitude,
        **kwargs,
    )

    # Set labels and title
    ax.set_xlabel("Longitude")
    ax.set_ylabel("Latitude")
    ax.set_title("PACE Pixel Locations")

    return fig

viz_pace(dataset, wavelengths=None, method='nearest', figsize=(6.4, 4.8), cmap='jet', vmin=0, vmax=0.02, ncols=1, crs=None, xlim=None, ylim=None, **kwargs)

Plots PACE data from a given xarray Dataset.

Parameters:

Name Type Description Default
dataset xr.Dataset

An xarray Dataset containing the PACE data.

required
wavelengths array-like

Specific wavelengths to select. If None, all wavelengths are selected.

None
method str

Method to use for selection when wavelengths is not None. Defaults to "nearest".

'nearest'
figsize tuple

Figure size. Defaults to (6.4, 4.8).

(6.4, 4.8)
cmap str

Colormap to use. Defaults to "jet".

'jet'
vmin float

Minimum value for the colormap. Defaults to 0.

0
vmax float

Maximum value for the colormap. Defaults to 0.02.

0.02
ncols int

Number of columns in the plot. Defaults to 1.

1
crs str or cartopy.crs.CRS

Coordinate reference system to use. If None, a simple plot is created. Defaults to None. See https://scitools.org.uk/cartopy/docs/latest/reference/projections.html

None
xlim array-like

Limits for the x-axis. Defaults to None.

None
ylim array-like

Limits for the y-axis. Defaults to None.

None
**kwargs

Additional keyword arguments to pass to the plt.subplots function.

{}
Source code in hypercoast/pace.py
def viz_pace(
    dataset: Union[xr.Dataset, str],
    wavelengths: Optional[Union[List[float], float]] = None,
    method: str = "nearest",
    figsize: Tuple[float, float] = (6.4, 4.8),
    cmap: str = "jet",
    vmin: float = 0,
    vmax: float = 0.02,
    ncols: int = 1,
    crs: Optional[str] = None,
    xlim: Optional[List[float]] = None,
    ylim: Optional[List[float]] = None,
    **kwargs,
):
    """
    Plots PACE data from a given xarray Dataset.

    Args:
        dataset (xr.Dataset): An xarray Dataset containing the PACE data.
        wavelengths (array-like, optional): Specific wavelengths to select. If None, all wavelengths are selected.
        method (str, optional): Method to use for selection when wavelengths is not None. Defaults to "nearest".
        figsize (tuple, optional): Figure size. Defaults to (6.4, 4.8).
        cmap (str, optional): Colormap to use. Defaults to "jet".
        vmin (float, optional): Minimum value for the colormap. Defaults to 0.
        vmax (float, optional): Maximum value for the colormap. Defaults to 0.02.
        ncols (int, optional): Number of columns in the plot. Defaults to 1.
        crs (str or cartopy.crs.CRS, optional): Coordinate reference system to use. If None, a simple plot is created. Defaults to None.
            See https://scitools.org.uk/cartopy/docs/latest/reference/projections.html
        xlim (array-like, optional): Limits for the x-axis. Defaults to None.
        ylim (array-like, optional): Limits for the y-axis. Defaults to None.
        **kwargs: Additional keyword arguments to pass to the `plt.subplots` function.
    """

    import matplotlib.pyplot as plt
    import numpy as np
    import math

    if isinstance(dataset, str):
        dataset = read_pace(dataset, wavelengths, method)

    if wavelengths is not None:
        if not isinstance(wavelengths, list):
            wavelengths = [wavelengths]
        dataset = dataset.sel(wavelength=wavelengths, method=method)
    else:
        wavelengths = dataset.coords["wavelength"][0].values.tolist()

    lat = dataset.coords["latitude"]
    lon = dataset.coords["longitude"]

    nrows = math.ceil(len(wavelengths) / ncols)

    if crs is None:

        fig, axes = plt.subplots(
            nrows=nrows,
            ncols=ncols,
            figsize=(figsize[0] * ncols, figsize[1] * nrows),
            **kwargs,
        )

        for i in range(nrows):
            for j in range(ncols):
                index = i * ncols + j
                if index < len(wavelengths):
                    wavelength = wavelengths[index]
                    data = dataset.sel(wavelength=wavelength, method=method)["Rrs"]

                    if min(nrows, ncols) == 1:
                        ax = axes[index]
                    else:
                        ax = axes[i, j]
                    im = ax.pcolormesh(
                        lon, lat, np.squeeze(data), cmap=cmap, vmin=vmin, vmax=vmax
                    )
                    ax.set_xlabel("Longitude")
                    ax.set_ylabel("Latitude")
                    ax.set_title(
                        f"wavelength = {dataset.coords['wavelength'].values[index]} [nm]"
                    )
                    fig.colorbar(im, ax=ax, label="Reflectance")

        plt.tight_layout()
        plt.show()

    else:

        import cartopy
        from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter

        if crs == "default":
            crs = cartopy.crs.PlateCarree()

        if xlim is None:
            xlim = [math.floor(lon.min()), math.ceil(lon.max())]

        if ylim is None:
            ylim = [math.floor(lat.min()), math.ceil(lat.max())]

        fig, axes = plt.subplots(
            nrows=nrows,
            ncols=ncols,
            figsize=(figsize[0] * ncols, figsize[1] * nrows),
            subplot_kw={"projection": cartopy.crs.PlateCarree()},
            **kwargs,
        )

        for i in range(nrows):
            for j in range(ncols):
                index = i * ncols + j
                if index < len(wavelengths):
                    wavelength = wavelengths[index]
                    data = dataset.sel(wavelength=wavelength, method=method)["Rrs"]

                    if min(nrows, ncols) == 1:
                        ax = axes[index]
                    else:
                        ax = axes[i, j]
                    im = ax.pcolormesh(lon, lat, data, cmap="jet", vmin=0, vmax=0.02)
                    ax.coastlines()
                    ax.add_feature(cartopy.feature.STATES, linewidth=0.5)
                    ax.set_xticks(np.linspace(xlim[0], xlim[1], 5), crs=crs)
                    ax.set_yticks(np.linspace(ylim[0], ylim[1], 5), crs=crs)
                    lon_formatter = LongitudeFormatter(zero_direction_label=True)
                    lat_formatter = LatitudeFormatter()
                    ax.xaxis.set_major_formatter(lon_formatter)
                    ax.yaxis.set_major_formatter(lat_formatter)
                    ax.set_xlabel("Longitude")
                    ax.set_ylabel("Latitude")
                    ax.set_title(
                        f"wavelength = {dataset.coords['wavelength'].values[index]} [nm]"
                    )
                    plt.colorbar(im, label="Reflectance")

        plt.tight_layout()
        plt.show()

viz_pace_chla(data, date=None, aspect=2, cmap='jet', size=6, **kwargs)

Visualizes PACE chlorophyll-a data using an xarray DataArray.

This function supports loading data from a file path (str) or directly using an xarray DataArray. It allows for selection of a specific date for visualization or averages over all dates if none is specified.

Parameters:

Name Type Description Default
data Union[str, xr.DataArray]

The chlorophyll-a data to visualize. Can be a file path or an xarray DataArray.

required
date Optional[str]

Specific date to visualize. If None, averages over all dates. Defaults to None.

None
aspect float

Aspect ratio of the plot. Defaults to 2.

2
cmap str

Colormap for the plot. Defaults to "jet".

'jet'
size int

Size of the plot. Defaults to 6.

6
**kwargs Any

Additional keyword arguments to pass to xarray.plot.

{}

Returns:

Type Description
xr.plot.facetgrid.FacetGrid

The plot generated from the chlorophyll-a data.

Exceptions:

Type Description
ValueError

If data is not a file path (str) or an xarray DataArray.

Source code in hypercoast/pace.py
def viz_pace_chla(
    data: Union[str, xr.DataArray],
    date: Optional[str] = None,
    aspect: float = 2,
    cmap: str = "jet",
    size: int = 6,
    **kwargs: Any,
) -> xr.plot.facetgrid.FacetGrid:
    """
    Visualizes PACE chlorophyll-a data using an xarray DataArray.

    This function supports loading data from a file path (str) or directly using an xarray DataArray.
    It allows for selection of a specific date for visualization or averages over all dates if none is specified.

    Args:
        data (Union[str, xr.DataArray]): The chlorophyll-a data to visualize. Can be a file path or an xarray DataArray.
        date (Optional[str], optional): Specific date to visualize. If None, averages over all dates. Defaults to None.
        aspect (float, optional): Aspect ratio of the plot. Defaults to 2.
        cmap (str, optional): Colormap for the plot. Defaults to "jet".
        size (int, optional): Size of the plot. Defaults to 6.
        **kwargs (Any): Additional keyword arguments to pass to `xarray.plot`.

    Returns:
        xr.plot.facetgrid.FacetGrid: The plot generated from the chlorophyll-a data.

    Raises:
        ValueError: If `data` is not a file path (str) or an xarray DataArray.
    """
    if isinstance(data, str):
        data = read_pace_chla(data)
    elif not isinstance(data, xr.DataArray):
        raise ValueError("data must be an xarray DataArray")

    if date is not None:
        data = data.sel(date=date)
    else:
        if "date" in data.coords:
            data = data.mean(dim="date")

    return data.plot(aspect=aspect, cmap=cmap, size=size, **kwargs)