fire2a.weathers
👋🌎 Some functions related to weather scenario creation.
1#!python3 2"""👋🌎 3Some functions related to weather scenario creation. 4""" 5__author__ = "Rodrigo Mahaluf-Recasens" 6__revision__ = "$Format:%H$" 7 8from collections import Counter 9from datetime import datetime, timedelta 10from pathlib import Path 11from random import choice, randint 12from typing import List, Optional, Union 13 14from numpy import vstack 15from numpy.random import normal 16from pandas import DataFrame 17 18 19def re_size_durations(scenario_lengths: List[int], n_samples: int = 100) -> List[int]: 20 """Resize a list of scenario durations to generate a new list maintaining representation while considering outliers. 21 22 Args: 23 24 scenario_lengths (List[int]): A list of integers representing desired lengths (in hours) for each weather scenario. 25 n_samples (int): Optional integer indicating how many weather files (scenarios) will be created following the distribution of 'scenario_lengths'. If not provided, defaults to 100. 26 27 Returns: 28 29 List[int] : A new list of durations, preserving the representation of the original list. 30 31 Raises: 32 33 ValueError: If 'scenario_lengths' is not a list of integers. 34 ValueError: If 'n_samples' is not an integer. 35 """ 36 # Check if input is a list of integers 37 if not all(isinstance(length, int) for length in scenario_lengths): 38 raise ValueError("Input 'scenario_lengths' must be a list of integers.") 39 40 # Check if input is a list of integers 41 if not isinstance(n_samples, int): 42 raise ValueError("Input 'total_samples' must be an integer.") 43 44 # Calculate occurrences of each duration 45 duration_counts = Counter(scenario_lengths) 46 47 # Get the total number of scenarios 48 total_scenarios = len(scenario_lengths) 49 50 # Determine the number of items to be sampled for each duration 51 samples_per_duration = { 52 duration: min(max(int(n_samples * count / total_scenarios), 1), 10) 53 for duration, count in duration_counts.items() 54 } 55 # Generate a new list based on stratified sampling 56 new_list = [] 57 for duration, count in duration_counts.items(): 58 occurrences = min(count, samples_per_duration[duration]) 59 new_list.extend([duration] * occurrences) 60 61 # If the new list is shorter than the required number of samples, add random durations 62 while len(new_list) < n_samples: 63 new_list.append(choice(scenario_lengths)) 64 65 # If the new list is longer than the required number of samples, remove random durations 66 while len(new_list) > n_samples: 67 new_list.remove(choice(new_list)) 68 return new_list 69 70 71def cut_weather_scenarios( 72 weather_records: DataFrame, 73 scenario_lengths: List[int], 74 output_folder: Union[Path, str] = Path("Weathers"), 75 n_output_files: Optional[int] = None, 76) -> DataFrame: 77 """Split weather records into smaller scenarios following specified scenario lengths. The 78 number of output weather scenarios can be customized using the 'n_output_files' parameter. 79 80 Args: 81 82 weather_records (DataFrame): weather records where each row represents an hour of data. 83 scenario_lengths (List[int]): desired lengths (in hours) for each weather scenario. 84 output_folder : Union[Path,str], optional 85 A Path object or a string representing the folder path where the output will be stored. 86 If not provided, 'Weathers' directory will be used. 87 n_output_files : integer, optional 88 An integer that indicates how many weather files (scenarios) will be created following the 89 distribution of 'weather_records'. 90 If not provided, will be set to 100. 91 92 Output: 93 - write as many file as weather scenarios generated based on specified lengths. 94 95 Raises ValueError: 96 97 If input 'weather_records' is not a Pandas DataFrame. 98 If input 'scenario_lengths' is not a List of integers. 99 If input 'n_output_files' is not an integer. 100 If any scenario length is greater than the total length of weather_records. 101 """ 102 # Check if input is a Pandas DataFrame 103 if not isinstance(weather_records, DataFrame): 104 raise ValueError("Input 'weather_records' must be a Pandas DataFrame.") 105 106 # Check if input is a list of integers 107 if not all(isinstance(length, int) for length in scenario_lengths): 108 raise ValueError("Input 'scenario_lengths' must be a list of integers.") 109 110 # Create a representative sample 111 if n_output_files: 112 sample = re_size_durations(scenario_lengths, n_output_files) 113 else: 114 sample = re_size_durations(scenario_lengths) 115 116 # Define the output folder 117 output_folder = Path(output_folder) # Ensure output_folder is a Path object 118 output_folder.mkdir(parents=True, exist_ok=True) # Create the output directory if it doesn't exist 119 120 total_data_length = len(weather_records) 121 122 # Check if any scenario length is greater than the total data length 123 if any(length > total_data_length for length in sample): 124 raise ValueError("Scenario length cannot be greater than the total length of weather records") 125 126 scenarios: DataFrame = [] # List to store weather scenarios 127 128 # Generate scenarios based on specified lengths 129 for index, length in enumerate(sample, start=1): 130 131 # Randomly select a start index for the scenario 132 start_index = randint(0, total_data_length - length) 133 134 # Extract the scenario based on the start index and length 135 scenario = weather_records.iloc[start_index : start_index + length] 136 137 # Save the weather scenario 138 output_path = output_folder / f"Weather{index}.csv" 139 scenario.to_csv(output_path, index=False) 140 141 return scenarios 142 143 144# Example usage: 145# Assuming 'weather_data' is your DataFrame and 'scenario_lengths' is a list of desired scenario lengths 146# weather_data = pd.read_csv('your_weather_data.csv') 147# scenario_lengths = [24, 48, 72] # Example lengths 148# weather_scenarios = cut_weather_scenarios(weather_data, scenario_lengths) 149 150 151def random_weather_scenario_generator( 152 n_scenarios: int, 153 hr_limit: int = 72, 154 lambda_ws: float = 0.5, 155 lambda_wd: float = 0.5, 156 output_folder: Union[Path, str] = Path("Weathers"), 157) -> None: 158 """Generates random weather scenarios and saves them as CSV files in the specified output folder. 159 160 Args: 161 162 n_scenarios (int): number of weather scenarios to generate. 163 hr_limit (int, optional): limit for the number of hours for each scenario (default is 72). 164 lambda_ws (float, optional): lambda parameter for wind speed variation (default is 0.5). 165 lambda_wd (float, optional): lambda parameter for wind direction variation (default is 0.5). 166 output_folder : Union[Path,str], optional 167 168 Returns: 169 None 170 """ 171 output_folder = Path(output_folder) 172 output_folder.mkdir(parents=True, exist_ok=True) # Create the output directory if it doesn't exist 173 174 for index, _ in enumerate(range(n_scenarios), start=1): 175 n_rows = randint(5, hr_limit) 176 177 instance = ["NA"] * n_rows 178 fire_scenario = [2] * n_rows 179 180 wd_0 = randint(0, 359) 181 ws_0 = randint(1, 100) 182 183 wd_1 = abs(wd_0 + normal(loc=0.0, scale=30.0, size=None)) 184 ws_1 = abs(ws_0 + normal(loc=0.0, scale=8.0, size=None)) 185 186 ws = [ws_0, ws_1] 187 wd = [wd_0, wd_1] 188 189 dt = [(datetime.now() + timedelta(hours=i)).isoformat(timespec="minutes") for i in range(n_rows)] 190 for row in range(2, n_rows): 191 wd_i = wd[row - 1] * lambda_wd + wd[row - 2] * (1 - lambda_wd) 192 ws_i = ws[row - 1] * lambda_wd + ws[row - 2] * (1 - lambda_wd) 193 194 wd.append(wd_i) 195 ws.append(ws_i) 196 197 df = DataFrame( 198 vstack((instance, dt, wd, ws, fire_scenario)).T, 199 columns=["Instance", "datetime", "WD", "WS", "FireScenario"], 200 ) 201 output_path = output_folder / f"Weather{index}.csv" 202 df.to_csv(output_path, index=False)
def
re_size_durations(scenario_lengths: List[int], n_samples: int = 100) -> List[int]:
20def re_size_durations(scenario_lengths: List[int], n_samples: int = 100) -> List[int]: 21 """Resize a list of scenario durations to generate a new list maintaining representation while considering outliers. 22 23 Args: 24 25 scenario_lengths (List[int]): A list of integers representing desired lengths (in hours) for each weather scenario. 26 n_samples (int): Optional integer indicating how many weather files (scenarios) will be created following the distribution of 'scenario_lengths'. If not provided, defaults to 100. 27 28 Returns: 29 30 List[int] : A new list of durations, preserving the representation of the original list. 31 32 Raises: 33 34 ValueError: If 'scenario_lengths' is not a list of integers. 35 ValueError: If 'n_samples' is not an integer. 36 """ 37 # Check if input is a list of integers 38 if not all(isinstance(length, int) for length in scenario_lengths): 39 raise ValueError("Input 'scenario_lengths' must be a list of integers.") 40 41 # Check if input is a list of integers 42 if not isinstance(n_samples, int): 43 raise ValueError("Input 'total_samples' must be an integer.") 44 45 # Calculate occurrences of each duration 46 duration_counts = Counter(scenario_lengths) 47 48 # Get the total number of scenarios 49 total_scenarios = len(scenario_lengths) 50 51 # Determine the number of items to be sampled for each duration 52 samples_per_duration = { 53 duration: min(max(int(n_samples * count / total_scenarios), 1), 10) 54 for duration, count in duration_counts.items() 55 } 56 # Generate a new list based on stratified sampling 57 new_list = [] 58 for duration, count in duration_counts.items(): 59 occurrences = min(count, samples_per_duration[duration]) 60 new_list.extend([duration] * occurrences) 61 62 # If the new list is shorter than the required number of samples, add random durations 63 while len(new_list) < n_samples: 64 new_list.append(choice(scenario_lengths)) 65 66 # If the new list is longer than the required number of samples, remove random durations 67 while len(new_list) > n_samples: 68 new_list.remove(choice(new_list)) 69 return new_list
Resize a list of scenario durations to generate a new list maintaining representation while considering outliers.
Args:
scenario_lengths (List[int]): A list of integers representing desired lengths (in hours) for each weather scenario.
n_samples (int): Optional integer indicating how many weather files (scenarios) will be created following the distribution of 'scenario_lengths'. If not provided, defaults to 100.
Returns:
List[int] : A new list of durations, preserving the representation of the original list.
Raises:
ValueError: If 'scenario_lengths' is not a list of integers.
ValueError: If 'n_samples' is not an integer.
def
cut_weather_scenarios( weather_records: pandas.core.frame.DataFrame, scenario_lengths: List[int], output_folder: Union[pathlib.Path, str] = PosixPath('Weathers'), n_output_files: Optional[int] = None) -> pandas.core.frame.DataFrame:
72def cut_weather_scenarios( 73 weather_records: DataFrame, 74 scenario_lengths: List[int], 75 output_folder: Union[Path, str] = Path("Weathers"), 76 n_output_files: Optional[int] = None, 77) -> DataFrame: 78 """Split weather records into smaller scenarios following specified scenario lengths. The 79 number of output weather scenarios can be customized using the 'n_output_files' parameter. 80 81 Args: 82 83 weather_records (DataFrame): weather records where each row represents an hour of data. 84 scenario_lengths (List[int]): desired lengths (in hours) for each weather scenario. 85 output_folder : Union[Path,str], optional 86 A Path object or a string representing the folder path where the output will be stored. 87 If not provided, 'Weathers' directory will be used. 88 n_output_files : integer, optional 89 An integer that indicates how many weather files (scenarios) will be created following the 90 distribution of 'weather_records'. 91 If not provided, will be set to 100. 92 93 Output: 94 - write as many file as weather scenarios generated based on specified lengths. 95 96 Raises ValueError: 97 98 If input 'weather_records' is not a Pandas DataFrame. 99 If input 'scenario_lengths' is not a List of integers. 100 If input 'n_output_files' is not an integer. 101 If any scenario length is greater than the total length of weather_records. 102 """ 103 # Check if input is a Pandas DataFrame 104 if not isinstance(weather_records, DataFrame): 105 raise ValueError("Input 'weather_records' must be a Pandas DataFrame.") 106 107 # Check if input is a list of integers 108 if not all(isinstance(length, int) for length in scenario_lengths): 109 raise ValueError("Input 'scenario_lengths' must be a list of integers.") 110 111 # Create a representative sample 112 if n_output_files: 113 sample = re_size_durations(scenario_lengths, n_output_files) 114 else: 115 sample = re_size_durations(scenario_lengths) 116 117 # Define the output folder 118 output_folder = Path(output_folder) # Ensure output_folder is a Path object 119 output_folder.mkdir(parents=True, exist_ok=True) # Create the output directory if it doesn't exist 120 121 total_data_length = len(weather_records) 122 123 # Check if any scenario length is greater than the total data length 124 if any(length > total_data_length for length in sample): 125 raise ValueError("Scenario length cannot be greater than the total length of weather records") 126 127 scenarios: DataFrame = [] # List to store weather scenarios 128 129 # Generate scenarios based on specified lengths 130 for index, length in enumerate(sample, start=1): 131 132 # Randomly select a start index for the scenario 133 start_index = randint(0, total_data_length - length) 134 135 # Extract the scenario based on the start index and length 136 scenario = weather_records.iloc[start_index : start_index + length] 137 138 # Save the weather scenario 139 output_path = output_folder / f"Weather{index}.csv" 140 scenario.to_csv(output_path, index=False) 141 142 return scenarios
Split weather records into smaller scenarios following specified scenario lengths. The number of output weather scenarios can be customized using the 'n_output_files' parameter.
Args:
weather_records (DataFrame): weather records where each row represents an hour of data.
scenario_lengths (List[int]): desired lengths (in hours) for each weather scenario.
output_folder : Union[Path,str], optional
A Path object or a string representing the folder path where the output will be stored.
If not provided, 'Weathers' directory will be used.
n_output_files : integer, optional
An integer that indicates how many weather files (scenarios) will be created following the
distribution of 'weather_records'.
If not provided, will be set to 100.
Output:
- write as many file as weather scenarios generated based on specified lengths.
Raises ValueError:
If input 'weather_records' is not a Pandas DataFrame.
If input 'scenario_lengths' is not a List of integers.
If input 'n_output_files' is not an integer.
If any scenario length is greater than the total length of weather_records.
def
random_weather_scenario_generator( n_scenarios: int, hr_limit: int = 72, lambda_ws: float = 0.5, lambda_wd: float = 0.5, output_folder: Union[pathlib.Path, str] = PosixPath('Weathers')) -> None:
152def random_weather_scenario_generator( 153 n_scenarios: int, 154 hr_limit: int = 72, 155 lambda_ws: float = 0.5, 156 lambda_wd: float = 0.5, 157 output_folder: Union[Path, str] = Path("Weathers"), 158) -> None: 159 """Generates random weather scenarios and saves them as CSV files in the specified output folder. 160 161 Args: 162 163 n_scenarios (int): number of weather scenarios to generate. 164 hr_limit (int, optional): limit for the number of hours for each scenario (default is 72). 165 lambda_ws (float, optional): lambda parameter for wind speed variation (default is 0.5). 166 lambda_wd (float, optional): lambda parameter for wind direction variation (default is 0.5). 167 output_folder : Union[Path,str], optional 168 169 Returns: 170 None 171 """ 172 output_folder = Path(output_folder) 173 output_folder.mkdir(parents=True, exist_ok=True) # Create the output directory if it doesn't exist 174 175 for index, _ in enumerate(range(n_scenarios), start=1): 176 n_rows = randint(5, hr_limit) 177 178 instance = ["NA"] * n_rows 179 fire_scenario = [2] * n_rows 180 181 wd_0 = randint(0, 359) 182 ws_0 = randint(1, 100) 183 184 wd_1 = abs(wd_0 + normal(loc=0.0, scale=30.0, size=None)) 185 ws_1 = abs(ws_0 + normal(loc=0.0, scale=8.0, size=None)) 186 187 ws = [ws_0, ws_1] 188 wd = [wd_0, wd_1] 189 190 dt = [(datetime.now() + timedelta(hours=i)).isoformat(timespec="minutes") for i in range(n_rows)] 191 for row in range(2, n_rows): 192 wd_i = wd[row - 1] * lambda_wd + wd[row - 2] * (1 - lambda_wd) 193 ws_i = ws[row - 1] * lambda_wd + ws[row - 2] * (1 - lambda_wd) 194 195 wd.append(wd_i) 196 ws.append(ws_i) 197 198 df = DataFrame( 199 vstack((instance, dt, wd, ws, fire_scenario)).T, 200 columns=["Instance", "datetime", "WD", "WS", "FireScenario"], 201 ) 202 output_path = output_folder / f"Weather{index}.csv" 203 df.to_csv(output_path, index=False)
Generates random weather scenarios and saves them as CSV files in the specified output folder.
Args:
n_scenarios (int): number of weather scenarios to generate.
hr_limit (int, optional): limit for the number of hours for each scenario (default is 72).
lambda_ws (float, optional): lambda parameter for wind speed variation (default is 0.5).
lambda_wd (float, optional): lambda parameter for wind direction variation (default is 0.5).
output_folder : Union[Path,str], optional
Returns: None