fire2a.weathers
👋🌎 Some functions related to weather scenario creation.
1#!python3 2"""👋🌎 3Some functions related to weather scenario creation. 4""" 5__author__ = "Rodrigo Mahaluf-Recasens" 6__revision__ = "$Format:%H$" 7 8from collections import Counter 9from datetime import datetime, timedelta 10from pathlib import Path 11from random import choice, randint 12from typing import List, Optional, Union 13 14from numpy import vstack 15from numpy.random import normal 16from pandas import DataFrame 17 18 19def re_size_durations(scenario_lengths: List[int], n_samples: int = 100) -> List[int]: 20 """Resize a list of scenario durations to generate a new list maintaining representation 21 while considering outliers. 22 23 Args: 24 scenario_lengths (List[int]): A list of integers representing desired lengths (in hours) for each weather scenario. 25 n_samples (int): Optional integer indicating how many weather files (scenarios) will be created following the distribution of 'scenario_lengths'. If not provided, defaults to 100. 26 27 Returns: 28 List[int] 29 A new list of durations, preserving the representation of the original list. 30 31 Raises: 32 ValueError: If 'scenario_lengths' is not a list of integers. 33 ValueError: If 'n_samples' is provided but not an integer. 34 """ 35 # Check if input is a list of integers 36 if not all(isinstance(length, int) for length in scenario_lengths): 37 raise ValueError("Input 'scenario_lengths' must be a list of integers.") 38 39 # Check if input is a list of integers 40 if not isinstance(n_samples, int): 41 raise ValueError("Input 'total_samples' must be an integer.") 42 43 # Calculate occurrences of each duration 44 duration_counts = Counter(scenario_lengths) 45 46 # Get the total number of scenarios 47 total_scenarios = len(scenario_lengths) 48 49 # Determine the number of items to be sampled for each duration 50 samples_per_duration = { 51 duration: min(max(int(n_samples * count / total_scenarios), 1), 10) 52 for duration, count in duration_counts.items() 53 } 54 # Generate a new list based on stratified sampling 55 new_list = [] 56 for duration, count in duration_counts.items(): 57 occurrences = min(count, samples_per_duration[duration]) 58 new_list.extend([duration] * occurrences) 59 60 # If the new list is shorter than the required number of samples, add random durations 61 while len(new_list) < n_samples: 62 new_list.append(choice(scenario_lengths)) 63 64 # If the new list is longer than the required number of samples, remove random durations 65 while len(new_list) > n_samples: 66 new_list.remove(choice(new_list)) 67 return new_list 68 69 70def cut_weather_scenarios( 71 weather_records: DataFrame, 72 scenario_lengths: List[int], 73 output_folder: Union[Path, str] = None, 74 n_output_files: Union[int,None] = None, 75) -> DataFrame: 76 """Split weather records into smaller scenarios following specified scenario lengths. The 77 number of output weather scenarios can be customized using the 'n_output_files' parameter. 78 79 Args: 80 weather_records (DataFrame): weather records where each row represents an hour of data. 81 scenario_lengths (List[int]): desired lengths (in hours) for each weather scenario. 82 - output_folder : Union[Path,str], optional 83 A Path object or a string representing the folder path where the output will be stored. 84 If not provided, 'Weathers' directory will be used. 85 - n_output_files : integer, optional 86 An integer that indicates how many weather files (scenarios) will be created following the 87 distribution of 'weather_records'. 88 If not provided, will be set to 100. 89 90 Output: 91 - write as many file as weather scenarios generated based on specified lengths. 92 93 Raises: 94 - ValueError 95 If input 'weather_records' is not a Pandas DataFrame. 96 If input 'scenario_lengths' is not a List of integers. 97 If input 'n_output_files' is not an integer. 98 If any scenario length is greater than the total length of weather_records. 99 """ 100 101 # Check if input is a Pandas DataFrame 102 if not isinstance(weather_records, DataFrame): 103 raise ValueError("Input 'weather_records' must be a Pandas DataFrame.") 104 105 # Check if input is a list of integers 106 if not all(isinstance(length, int) for length in scenario_lengths): 107 raise ValueError("Input 'scenario_lengths' must be a list of integers.") 108 109 # Create a representative sample 110 sample = re_size_durations(scenario_lengths, n_output_files) 111 112 # Define the output folder 113 output_folder = output_folder if output_folder else Path("Weathers") 114 output_folder = Path(output_folder) # Ensure output_folder is a Path object 115 output_folder.mkdir(parents=True, exist_ok=True) # Create the output directory if it doesn't exist 116 117 total_data_length = len(weather_records) 118 119 # Check if any scenario length is greater than the total data length 120 if any(length > total_data_length for length in sample): 121 raise ValueError("Scenario length cannot be greater than the total length of weather records") 122 123 scenarios : DataFrame = [] # List to store weather scenarios 124 125 # Generate scenarios based on specified lengths 126 for index, length in enumerate(sample, start=1): 127 128 # Randomly select a start index for the scenario 129 start_index = randint(0, total_data_length - length) 130 131 # Extract the scenario based on the start index and length 132 scenario = weather_records.iloc[start_index : start_index + length] 133 134 # Save the weather scenario 135 output_path = output_folder / f"Weather{index}.csv" 136 scenario.to_csv(output_path, index=False) 137 138 return scenarios 139 140 141# Example usage: 142# Assuming 'weather_data' is your DataFrame and 'scenario_lengths' is a list of desired scenario lengths 143# weather_data = pd.read_csv('your_weather_data.csv') 144# scenario_lengths = [24, 48, 72] # Example lengths 145# weather_scenarios = cut_weather_scenarios(weather_data, scenario_lengths) 146 147 148def random_weather_scenario_generator( 149 n_scenarios: int, 150 hr_limit: Optional[int] = None, 151 lambda_ws: Optional[float] = None, 152 lambda_wd: Optional[float] = None, 153 output_folder: Optional[Union[Path,str]] = None, 154): 155 """Generates random weather scenarios and saves them as CSV files. 156 157 Parameters: 158 - n_scenarios : int 159 Number of weather scenarios to generate. 160 - hr_limit : int, optional 161 Limit for the number of hours for each scenario (default is 72). 162 - lambda_ws : float, optional 163 Lambda parameter for wind speed variation (default is 0.5). If set to 0, all rows will have the same wind speed. 164 - lambda_wd : float, optional 165 Lambda parameter for wind direction variation (default is 0.5). If set to 0, all rows will have the same wind direction. 166 - output_folder : str, optional 167 Path to the folder where output files will be saved (default is 'Weathers'). 168 169 Output: 170 - Saves generated weather scenarios as CSV files in the specified output folder. 171 """ 172 hr_limit = hr_limit if hr_limit else 72 173 lambda_ws = lambda_ws if lambda_ws else 0.5 174 lambda_wd = lambda_wd if lambda_wd else 0.5 175 output_folder = Path(output_folder) if output_folder else Path("Weathers") 176 output_folder.mkdir(parents=True, exist_ok=True) # Create the output directory if it doesn't exist 177 178 for index, _ in enumerate(range(n_scenarios), start=1): 179 n_rows = randint(5, hr_limit) 180 181 instance = ["NA"] * n_rows 182 fire_scenario = [2] * n_rows 183 184 wd_0 = randint(0, 359) 185 ws_0 = randint(1, 100) 186 187 wd_1 = abs(wd_0 + normal(loc=0.0, scale=30.0, size=None)) 188 ws_1 = abs(ws_0 + normal(loc=0.0, scale=8.0, size=None)) 189 190 ws = [ws_0, ws_1] 191 wd = [wd_0, wd_1] 192 193 dt = [(datetime.now() + timedelta(hours=i)).isoformat(timespec="minutes") for i in range(n_rows)] 194 for row in range(2, n_rows): 195 wd_i = wd[row - 1] * lambda_wd + wd[row - 2] * (1 - lambda_wd) 196 ws_i = ws[row - 1] * lambda_wd + ws[row - 2] * (1 - lambda_wd) 197 198 wd.append(wd_i) 199 ws.append(ws_i) 200 201 df = DataFrame( 202 vstack((instance, dt, wd, ws, fire_scenario)).T, 203 columns=["Instance", "datetime", "WD", "WS", "FireScenario"], 204 ) 205 output_path = output_folder / f"weather{index}.csv" 206 df.to_csv(output_path, index=False)
20def re_size_durations(scenario_lengths: List[int], n_samples: int = 100) -> List[int]: 21 """Resize a list of scenario durations to generate a new list maintaining representation 22 while considering outliers. 23 24 Args: 25 scenario_lengths (List[int]): A list of integers representing desired lengths (in hours) for each weather scenario. 26 n_samples (int): Optional integer indicating how many weather files (scenarios) will be created following the distribution of 'scenario_lengths'. If not provided, defaults to 100. 27 28 Returns: 29 List[int] 30 A new list of durations, preserving the representation of the original list. 31 32 Raises: 33 ValueError: If 'scenario_lengths' is not a list of integers. 34 ValueError: If 'n_samples' is provided but not an integer. 35 """ 36 # Check if input is a list of integers 37 if not all(isinstance(length, int) for length in scenario_lengths): 38 raise ValueError("Input 'scenario_lengths' must be a list of integers.") 39 40 # Check if input is a list of integers 41 if not isinstance(n_samples, int): 42 raise ValueError("Input 'total_samples' must be an integer.") 43 44 # Calculate occurrences of each duration 45 duration_counts = Counter(scenario_lengths) 46 47 # Get the total number of scenarios 48 total_scenarios = len(scenario_lengths) 49 50 # Determine the number of items to be sampled for each duration 51 samples_per_duration = { 52 duration: min(max(int(n_samples * count / total_scenarios), 1), 10) 53 for duration, count in duration_counts.items() 54 } 55 # Generate a new list based on stratified sampling 56 new_list = [] 57 for duration, count in duration_counts.items(): 58 occurrences = min(count, samples_per_duration[duration]) 59 new_list.extend([duration] * occurrences) 60 61 # If the new list is shorter than the required number of samples, add random durations 62 while len(new_list) < n_samples: 63 new_list.append(choice(scenario_lengths)) 64 65 # If the new list is longer than the required number of samples, remove random durations 66 while len(new_list) > n_samples: 67 new_list.remove(choice(new_list)) 68 return new_list
Resize a list of scenario durations to generate a new list maintaining representation while considering outliers.
Args: scenario_lengths (List[int]): A list of integers representing desired lengths (in hours) for each weather scenario. n_samples (int): Optional integer indicating how many weather files (scenarios) will be created following the distribution of 'scenario_lengths'. If not provided, defaults to 100.
Returns: List[int] A new list of durations, preserving the representation of the original list.
Raises: ValueError: If 'scenario_lengths' is not a list of integers. ValueError: If 'n_samples' is provided but not an integer.
71def cut_weather_scenarios( 72 weather_records: DataFrame, 73 scenario_lengths: List[int], 74 output_folder: Union[Path, str] = None, 75 n_output_files: Union[int,None] = None, 76) -> DataFrame: 77 """Split weather records into smaller scenarios following specified scenario lengths. The 78 number of output weather scenarios can be customized using the 'n_output_files' parameter. 79 80 Args: 81 weather_records (DataFrame): weather records where each row represents an hour of data. 82 scenario_lengths (List[int]): desired lengths (in hours) for each weather scenario. 83 - output_folder : Union[Path,str], optional 84 A Path object or a string representing the folder path where the output will be stored. 85 If not provided, 'Weathers' directory will be used. 86 - n_output_files : integer, optional 87 An integer that indicates how many weather files (scenarios) will be created following the 88 distribution of 'weather_records'. 89 If not provided, will be set to 100. 90 91 Output: 92 - write as many file as weather scenarios generated based on specified lengths. 93 94 Raises: 95 - ValueError 96 If input 'weather_records' is not a Pandas DataFrame. 97 If input 'scenario_lengths' is not a List of integers. 98 If input 'n_output_files' is not an integer. 99 If any scenario length is greater than the total length of weather_records. 100 """ 101 102 # Check if input is a Pandas DataFrame 103 if not isinstance(weather_records, DataFrame): 104 raise ValueError("Input 'weather_records' must be a Pandas DataFrame.") 105 106 # Check if input is a list of integers 107 if not all(isinstance(length, int) for length in scenario_lengths): 108 raise ValueError("Input 'scenario_lengths' must be a list of integers.") 109 110 # Create a representative sample 111 sample = re_size_durations(scenario_lengths, n_output_files) 112 113 # Define the output folder 114 output_folder = output_folder if output_folder else Path("Weathers") 115 output_folder = Path(output_folder) # Ensure output_folder is a Path object 116 output_folder.mkdir(parents=True, exist_ok=True) # Create the output directory if it doesn't exist 117 118 total_data_length = len(weather_records) 119 120 # Check if any scenario length is greater than the total data length 121 if any(length > total_data_length for length in sample): 122 raise ValueError("Scenario length cannot be greater than the total length of weather records") 123 124 scenarios : DataFrame = [] # List to store weather scenarios 125 126 # Generate scenarios based on specified lengths 127 for index, length in enumerate(sample, start=1): 128 129 # Randomly select a start index for the scenario 130 start_index = randint(0, total_data_length - length) 131 132 # Extract the scenario based on the start index and length 133 scenario = weather_records.iloc[start_index : start_index + length] 134 135 # Save the weather scenario 136 output_path = output_folder / f"Weather{index}.csv" 137 scenario.to_csv(output_path, index=False) 138 139 return scenarios
Split weather records into smaller scenarios following specified scenario lengths. The number of output weather scenarios can be customized using the 'n_output_files' parameter.
Args: weather_records (DataFrame): weather records where each row represents an hour of data. scenario_lengths (List[int]): desired lengths (in hours) for each weather scenario.
- output_folder : Union[Path,str], optional A Path object or a string representing the folder path where the output will be stored. If not provided, 'Weathers' directory will be used.
- n_output_files : integer, optional An integer that indicates how many weather files (scenarios) will be created following the distribution of 'weather_records'. If not provided, will be set to 100.
Output:
- write as many file as weather scenarios generated based on specified lengths.
Raises:
- ValueError If input 'weather_records' is not a Pandas DataFrame. If input 'scenario_lengths' is not a List of integers. If input 'n_output_files' is not an integer. If any scenario length is greater than the total length of weather_records.
149def random_weather_scenario_generator( 150 n_scenarios: int, 151 hr_limit: Optional[int] = None, 152 lambda_ws: Optional[float] = None, 153 lambda_wd: Optional[float] = None, 154 output_folder: Optional[Union[Path,str]] = None, 155): 156 """Generates random weather scenarios and saves them as CSV files. 157 158 Parameters: 159 - n_scenarios : int 160 Number of weather scenarios to generate. 161 - hr_limit : int, optional 162 Limit for the number of hours for each scenario (default is 72). 163 - lambda_ws : float, optional 164 Lambda parameter for wind speed variation (default is 0.5). If set to 0, all rows will have the same wind speed. 165 - lambda_wd : float, optional 166 Lambda parameter for wind direction variation (default is 0.5). If set to 0, all rows will have the same wind direction. 167 - output_folder : str, optional 168 Path to the folder where output files will be saved (default is 'Weathers'). 169 170 Output: 171 - Saves generated weather scenarios as CSV files in the specified output folder. 172 """ 173 hr_limit = hr_limit if hr_limit else 72 174 lambda_ws = lambda_ws if lambda_ws else 0.5 175 lambda_wd = lambda_wd if lambda_wd else 0.5 176 output_folder = Path(output_folder) if output_folder else Path("Weathers") 177 output_folder.mkdir(parents=True, exist_ok=True) # Create the output directory if it doesn't exist 178 179 for index, _ in enumerate(range(n_scenarios), start=1): 180 n_rows = randint(5, hr_limit) 181 182 instance = ["NA"] * n_rows 183 fire_scenario = [2] * n_rows 184 185 wd_0 = randint(0, 359) 186 ws_0 = randint(1, 100) 187 188 wd_1 = abs(wd_0 + normal(loc=0.0, scale=30.0, size=None)) 189 ws_1 = abs(ws_0 + normal(loc=0.0, scale=8.0, size=None)) 190 191 ws = [ws_0, ws_1] 192 wd = [wd_0, wd_1] 193 194 dt = [(datetime.now() + timedelta(hours=i)).isoformat(timespec="minutes") for i in range(n_rows)] 195 for row in range(2, n_rows): 196 wd_i = wd[row - 1] * lambda_wd + wd[row - 2] * (1 - lambda_wd) 197 ws_i = ws[row - 1] * lambda_wd + ws[row - 2] * (1 - lambda_wd) 198 199 wd.append(wd_i) 200 ws.append(ws_i) 201 202 df = DataFrame( 203 vstack((instance, dt, wd, ws, fire_scenario)).T, 204 columns=["Instance", "datetime", "WD", "WS", "FireScenario"], 205 ) 206 output_path = output_folder / f"weather{index}.csv" 207 df.to_csv(output_path, index=False)
Generates random weather scenarios and saves them as CSV files.
Parameters:
- n_scenarios : int Number of weather scenarios to generate.
- hr_limit : int, optional Limit for the number of hours for each scenario (default is 72).
- lambda_ws : float, optional Lambda parameter for wind speed variation (default is 0.5). If set to 0, all rows will have the same wind speed.
- lambda_wd : float, optional Lambda parameter for wind direction variation (default is 0.5). If set to 0, all rows will have the same wind direction.
- output_folder : str, optional Path to the folder where output files will be saved (default is 'Weathers').
Output:
- Saves generated weather scenarios as CSV files in the specified output folder.