fire2a.clustering
👋🌎 🌲🔥 This is the raster module docstring
1#!python3 2"""👋🌎 🌲🔥 3This is the raster module docstring 4""" 5__author__ = "Rodrigo Mahaluf-Recasens" 6__revision__ = "$Format:%H$" 7 8import numpy as np 9from scipy.sparse import dok_matrix, lil_matrix 10from sklearn.cluster import AgglomerativeClustering 11from typing import Union 12 13from .adjacency import adjacent_cells 14 15 16def raster_clusters( 17 stacked_rasters: np.ndarray, 18 cellsize: float, 19 min_surface: float, 20 max_surface: float, 21 distance_threshold: float = 50.0, 22 total_clusters: Union[int, None] = None, 23 connectivity: Union[int, None] = None, 24) -> np.ndarray: 25 """ 26 This function receives as arguments: 27 1. An array with the raster paths, e.g. raster_paths=[elevation_path,fuel_path,slope_path,...] 28 You can provide as many as you want, just make sure all the raster layers are numerically defined, 29 even if there are cathegorical variables, you can not use string, transform them into a numerical raster. 30 31 2. total_clusters: number of clusters defined by the user. 32 33 3. min_surface: minimum area to consider into the cells aggregation process. 34 35 4. min_surface: maximum area to condsider into the cells aggregation process. 36 """ # fmt: skip 37 if min_surface >= max_surface: 38 raise ValueError("min_surface must be less than max_surface.") 39 40 else: 41 _, nrows, ncols = stacked_rasters.shape 42 ncells = nrows * ncols 43 cell_area = cellsize**2 44 connectivity = connectivity if connectivity else 4 45 assert connectivity == 4 or connectivity == 8, "Connectivity mut be either 4 or 8" 46 47 flattened_data = stacked_rasters.T.reshape(-1, stacked_rasters.shape[0]) # validado 48 49 id_pixel = list(range(1, ncells + 1)) # to set and id to every cell 50 51 grid = lil_matrix((nrows, ncols), dtype=int) 52 for idx, value in enumerate(id_pixel): 53 row = idx // ncols 54 col = idx % ncols 55 grid[row, col] = value 56 57 forest_grid_adjCells = adjacent_cells(grid, connectivity=connectivity) 58 59 dict_forest_grid_adjCells = dict( 60 zip(id_pixel, forest_grid_adjCells) 61 ) # A dictionary of adjacents cells per id cell 62 63 adjacency_matrix = dok_matrix((ncells, ncells)) # Create an empty matrix to save binaries adjacencies 64 65 ## Iterate over the dictionary items and update the adjacency matrix with 1 when a cell is adjacent, 0 when is not. 66 for key, values in dict_forest_grid_adjCells.items(): 67 for value in values: 68 adjacency_matrix[key - 1, value - 1] = 1 69 70 # Create an instance for the Agglomerative Clustering Algorithm with connectivity from the adjacency matrix 71 clustering = AgglomerativeClustering( 72 n_clusters=total_clusters, connectivity=adjacency_matrix, distance_threshold=distance_threshold 73 ) 74 75 # Apply the algorithm over the whole data 76 clustering.fit(flattened_data) 77 # Reshape the cluster assignments to match the original raster shape 78 cluster_raster = clustering.labels_.reshape((nrows, ncols)) 79 80 counts = np.bincount(cluster_raster.flatten()) 81 82 # Assuming square cells 83 min_elements = min_surface / (cell_area) 84 max_elements = max_surface / (cell_area) 85 86 # Apply minimum and maximum surface filtering 87 smaller_clusters = np.where(counts < min_elements)[0] 88 larger_clusters = np.where(counts > max_elements)[0] 89 90 for cluster in smaller_clusters: 91 indices = np.where(cluster_raster == cluster) 92 cluster_raster[indices] = -1 93 94 for cluster in larger_clusters: 95 indices = np.where(cluster_raster == cluster) 96 cluster_raster[indices] = -1 97 98 cluster_raster = cluster_raster.astype(np.int16) 99 100 return cluster_raster
def
raster_clusters( stacked_rasters: numpy.ndarray, cellsize: float, min_surface: float, max_surface: float, distance_threshold: float = 50.0, total_clusters: Optional[int] = None, connectivity: Optional[int] = None) -> numpy.ndarray:
17def raster_clusters( 18 stacked_rasters: np.ndarray, 19 cellsize: float, 20 min_surface: float, 21 max_surface: float, 22 distance_threshold: float = 50.0, 23 total_clusters: Union[int, None] = None, 24 connectivity: Union[int, None] = None, 25) -> np.ndarray: 26 """ 27 This function receives as arguments: 28 1. An array with the raster paths, e.g. raster_paths=[elevation_path,fuel_path,slope_path,...] 29 You can provide as many as you want, just make sure all the raster layers are numerically defined, 30 even if there are cathegorical variables, you can not use string, transform them into a numerical raster. 31 32 2. total_clusters: number of clusters defined by the user. 33 34 3. min_surface: minimum area to consider into the cells aggregation process. 35 36 4. min_surface: maximum area to condsider into the cells aggregation process. 37 """ # fmt: skip 38 if min_surface >= max_surface: 39 raise ValueError("min_surface must be less than max_surface.") 40 41 else: 42 _, nrows, ncols = stacked_rasters.shape 43 ncells = nrows * ncols 44 cell_area = cellsize**2 45 connectivity = connectivity if connectivity else 4 46 assert connectivity == 4 or connectivity == 8, "Connectivity mut be either 4 or 8" 47 48 flattened_data = stacked_rasters.T.reshape(-1, stacked_rasters.shape[0]) # validado 49 50 id_pixel = list(range(1, ncells + 1)) # to set and id to every cell 51 52 grid = lil_matrix((nrows, ncols), dtype=int) 53 for idx, value in enumerate(id_pixel): 54 row = idx // ncols 55 col = idx % ncols 56 grid[row, col] = value 57 58 forest_grid_adjCells = adjacent_cells(grid, connectivity=connectivity) 59 60 dict_forest_grid_adjCells = dict( 61 zip(id_pixel, forest_grid_adjCells) 62 ) # A dictionary of adjacents cells per id cell 63 64 adjacency_matrix = dok_matrix((ncells, ncells)) # Create an empty matrix to save binaries adjacencies 65 66 ## Iterate over the dictionary items and update the adjacency matrix with 1 when a cell is adjacent, 0 when is not. 67 for key, values in dict_forest_grid_adjCells.items(): 68 for value in values: 69 adjacency_matrix[key - 1, value - 1] = 1 70 71 # Create an instance for the Agglomerative Clustering Algorithm with connectivity from the adjacency matrix 72 clustering = AgglomerativeClustering( 73 n_clusters=total_clusters, connectivity=adjacency_matrix, distance_threshold=distance_threshold 74 ) 75 76 # Apply the algorithm over the whole data 77 clustering.fit(flattened_data) 78 # Reshape the cluster assignments to match the original raster shape 79 cluster_raster = clustering.labels_.reshape((nrows, ncols)) 80 81 counts = np.bincount(cluster_raster.flatten()) 82 83 # Assuming square cells 84 min_elements = min_surface / (cell_area) 85 max_elements = max_surface / (cell_area) 86 87 # Apply minimum and maximum surface filtering 88 smaller_clusters = np.where(counts < min_elements)[0] 89 larger_clusters = np.where(counts > max_elements)[0] 90 91 for cluster in smaller_clusters: 92 indices = np.where(cluster_raster == cluster) 93 cluster_raster[indices] = -1 94 95 for cluster in larger_clusters: 96 indices = np.where(cluster_raster == cluster) 97 cluster_raster[indices] = -1 98 99 cluster_raster = cluster_raster.astype(np.int16) 100 101 return cluster_raster
This function receives as arguments:
- An array with the raster paths, e.g. raster_paths=[elevation_path,fuel_path,slope_path,...] You can provide as many as you want, just make sure all the raster layers are numerically defined, even if there are cathegorical variables, you can not use string, transform them into a numerical raster.
total_clusters: number of clusters defined by the user.
min_surface: minimum area to consider into the cells aggregation process.
min_surface: maximum area to condsider into the cells aggregation process.