fire2a.clustering

👋🌎 🌲🔥 This is the raster module docstring

View Source

  1#!python3
  2"""👋🌎 🌲🔥
  3This is the raster module docstring
  4"""
  5__author__ = "Rodrigo Mahaluf-Recasens"
  6__revision__ = "$Format:%H$"
  7
  8import numpy as np
  9from scipy.sparse import dok_matrix, lil_matrix
 10from sklearn.cluster import AgglomerativeClustering
 11from typing import Union
 12
 13from .adjacency import adjacent_cells
 14
 15
 16def raster_clusters(
 17    stacked_rasters: np.ndarray,
 18    cellsize: float,
 19    min_surface: float,
 20    max_surface: float,
 21    distance_threshold: float = 50.0,
 22    total_clusters: Union[int, None] = None,
 23    connectivity: Union[int, None] = None,
 24) -> np.ndarray:
 25    """
 26    This function receives as arguments:
 27    1. An array with the raster paths, e.g. raster_paths=[elevation_path,fuel_path,slope_path,...]
 28    You can provide as many as you want, just make sure all the raster layers are numerically defined, 
 29    even if there are cathegorical variables, you can not use string, transform them into a numerical raster.
 30
 31    2. total_clusters: number of clusters defined by the user.
 32
 33    3. min_surface: minimum area to consider into the cells aggregation process.
 34
 35    4. min_surface: maximum area to condsider into the cells aggregation process.
 36    """  # fmt: skip
 37    if min_surface >= max_surface:
 38        raise ValueError("min_surface must be less than max_surface.")
 39
 40    else:
 41        _, nrows, ncols = stacked_rasters.shape
 42        ncells = nrows * ncols
 43        cell_area = cellsize**2
 44        connectivity = connectivity if connectivity else 4
 45        assert connectivity == 4 or connectivity == 8, "Connectivity mut be either 4 or 8"
 46
 47        flattened_data = stacked_rasters.T.reshape(-1, stacked_rasters.shape[0])  # validado
 48
 49        id_pixel = list(range(1, ncells + 1))  # to set and id to every cell
 50
 51        grid = lil_matrix((nrows, ncols), dtype=int)
 52        for idx, value in enumerate(id_pixel):
 53            row = idx // ncols
 54            col = idx % ncols
 55            grid[row, col] = value
 56
 57        forest_grid_adjCells = adjacent_cells(grid, connectivity=connectivity)
 58
 59        dict_forest_grid_adjCells = dict(
 60            zip(id_pixel, forest_grid_adjCells)
 61        )  # A dictionary of adjacents cells per id cell
 62
 63        adjacency_matrix = dok_matrix((ncells, ncells))  # Create an empty matrix to save binaries adjacencies
 64
 65        ## Iterate over the dictionary items and update the adjacency matrix with 1 when a cell is adjacent, 0 when is not.
 66        for key, values in dict_forest_grid_adjCells.items():
 67            for value in values:
 68                adjacency_matrix[key - 1, value - 1] = 1
 69
 70        # Create an instance for the Agglomerative Clustering Algorithm with connectivity from the adjacency matrix
 71        clustering = AgglomerativeClustering(
 72            n_clusters=total_clusters, connectivity=adjacency_matrix, distance_threshold=distance_threshold
 73        )
 74
 75        # Apply the algorithm over the whole data
 76        clustering.fit(flattened_data)
 77        # Reshape the cluster assignments to match the original raster shape
 78        cluster_raster = clustering.labels_.reshape((nrows, ncols))
 79
 80        counts = np.bincount(cluster_raster.flatten())
 81
 82        # Assuming square cells
 83        min_elements = min_surface / (cell_area)
 84        max_elements = max_surface / (cell_area)
 85
 86        # Apply minimum and maximum surface filtering
 87        smaller_clusters = np.where(counts < min_elements)[0]
 88        larger_clusters = np.where(counts > max_elements)[0]
 89
 90        for cluster in smaller_clusters:
 91            indices = np.where(cluster_raster == cluster)
 92            cluster_raster[indices] = -1
 93
 94        for cluster in larger_clusters:
 95            indices = np.where(cluster_raster == cluster)
 96            cluster_raster[indices] = -1
 97
 98        cluster_raster = cluster_raster.astype(np.int16)
 99
100        return cluster_raster

def raster_clusters( stacked_rasters: numpy.ndarray, cellsize: float, min_surface: float, max_surface: float, distance_threshold: float = 50.0, total_clusters: Optional[int] = None, connectivity: Optional[int] = None) -> numpy.ndarray: View Source

 17def raster_clusters(
 18    stacked_rasters: np.ndarray,
 19    cellsize: float,
 20    min_surface: float,
 21    max_surface: float,
 22    distance_threshold: float = 50.0,
 23    total_clusters: Union[int, None] = None,
 24    connectivity: Union[int, None] = None,
 25) -> np.ndarray:
 26    """
 27    This function receives as arguments:
 28    1. An array with the raster paths, e.g. raster_paths=[elevation_path,fuel_path,slope_path,...]
 29    You can provide as many as you want, just make sure all the raster layers are numerically defined, 
 30    even if there are cathegorical variables, you can not use string, transform them into a numerical raster.
 31
 32    2. total_clusters: number of clusters defined by the user.
 33
 34    3. min_surface: minimum area to consider into the cells aggregation process.
 35
 36    4. min_surface: maximum area to condsider into the cells aggregation process.
 37    """  # fmt: skip
 38    if min_surface >= max_surface:
 39        raise ValueError("min_surface must be less than max_surface.")
 40
 41    else:
 42        _, nrows, ncols = stacked_rasters.shape
 43        ncells = nrows * ncols
 44        cell_area = cellsize**2
 45        connectivity = connectivity if connectivity else 4
 46        assert connectivity == 4 or connectivity == 8, "Connectivity mut be either 4 or 8"
 47
 48        flattened_data = stacked_rasters.T.reshape(-1, stacked_rasters.shape[0])  # validado
 49
 50        id_pixel = list(range(1, ncells + 1))  # to set and id to every cell
 51
 52        grid = lil_matrix((nrows, ncols), dtype=int)
 53        for idx, value in enumerate(id_pixel):
 54            row = idx // ncols
 55            col = idx % ncols
 56            grid[row, col] = value
 57
 58        forest_grid_adjCells = adjacent_cells(grid, connectivity=connectivity)
 59
 60        dict_forest_grid_adjCells = dict(
 61            zip(id_pixel, forest_grid_adjCells)
 62        )  # A dictionary of adjacents cells per id cell
 63
 64        adjacency_matrix = dok_matrix((ncells, ncells))  # Create an empty matrix to save binaries adjacencies
 65
 66        ## Iterate over the dictionary items and update the adjacency matrix with 1 when a cell is adjacent, 0 when is not.
 67        for key, values in dict_forest_grid_adjCells.items():
 68            for value in values:
 69                adjacency_matrix[key - 1, value - 1] = 1
 70
 71        # Create an instance for the Agglomerative Clustering Algorithm with connectivity from the adjacency matrix
 72        clustering = AgglomerativeClustering(
 73            n_clusters=total_clusters, connectivity=adjacency_matrix, distance_threshold=distance_threshold
 74        )
 75
 76        # Apply the algorithm over the whole data
 77        clustering.fit(flattened_data)
 78        # Reshape the cluster assignments to match the original raster shape
 79        cluster_raster = clustering.labels_.reshape((nrows, ncols))
 80
 81        counts = np.bincount(cluster_raster.flatten())
 82
 83        # Assuming square cells
 84        min_elements = min_surface / (cell_area)
 85        max_elements = max_surface / (cell_area)
 86
 87        # Apply minimum and maximum surface filtering
 88        smaller_clusters = np.where(counts < min_elements)[0]
 89        larger_clusters = np.where(counts > max_elements)[0]
 90
 91        for cluster in smaller_clusters:
 92            indices = np.where(cluster_raster == cluster)
 93            cluster_raster[indices] = -1
 94
 95        for cluster in larger_clusters:
 96            indices = np.where(cluster_raster == cluster)
 97            cluster_raster[indices] = -1
 98
 99        cluster_raster = cluster_raster.astype(np.int16)
100
101        return cluster_raster

This function receives as arguments:

An array with the raster paths, e.g. raster_paths=[elevation_path,fuel_path,slope_path,...] You can provide as many as you want, just make sure all the raster layers are numerically defined, even if there are cathegorical variables, you can not use string, transform them into a numerical raster.

total_clusters: number of clusters defined by the user.
min_surface: minimum area to consider into the cells aggregation process.
min_surface: maximum area to condsider into the cells aggregation process.