fire2a.weathers

👋🌎 Some functions related to weather scenario creation.

  1#!python3
  2"""👋🌎
  3Some functions related to weather scenario creation.
  4"""
  5__author__ = "Rodrigo Mahaluf-Recasens"
  6__revision__ = "$Format:%H$"
  7
  8from collections import Counter
  9from datetime import datetime, timedelta
 10from pathlib import Path
 11from random import choice, randint
 12from typing import List, Optional, Union
 13
 14from numpy import vstack
 15from numpy.random import normal
 16from pandas import DataFrame
 17
 18
 19def re_size_durations(scenario_lengths: List[int], n_samples: int = 100) -> List[int]:
 20    """Resize a list of scenario durations to generate a new list maintaining representation
 21    while considering outliers.
 22
 23    Args:
 24        scenario_lengths (List[int]): A list of integers representing desired lengths (in hours) for each weather scenario.
 25        n_samples (int): Optional integer indicating how many weather files (scenarios) will be created following the distribution of 'scenario_lengths'. If not provided, defaults to 100.
 26
 27    Returns:
 28        List[int]
 29        A new list of durations, preserving the representation of the original list.
 30
 31    Raises:
 32        ValueError: If 'scenario_lengths' is not a list of integers.
 33        ValueError: If 'n_samples' is provided but not an integer.
 34    """
 35    # Check if input is a list of integers
 36    if not all(isinstance(length, int) for length in scenario_lengths):
 37        raise ValueError("Input 'scenario_lengths' must be a list of integers.")
 38
 39    # Check if input is a list of integers
 40    if not isinstance(n_samples, int):
 41        raise ValueError("Input 'total_samples' must be an integer.")
 42
 43    # Calculate occurrences of each duration
 44    duration_counts = Counter(scenario_lengths)
 45
 46    # Get the total number of scenarios
 47    total_scenarios = len(scenario_lengths)
 48
 49    # Determine the number of items to be sampled for each duration
 50    samples_per_duration = {
 51        duration: min(max(int(n_samples * count / total_scenarios), 1), 10)
 52        for duration, count in duration_counts.items()
 53    }
 54    # Generate a new list based on stratified sampling
 55    new_list = []
 56    for duration, count in duration_counts.items():
 57        occurrences = min(count, samples_per_duration[duration])
 58        new_list.extend([duration] * occurrences)
 59
 60    # If the new list is shorter than the required number of samples, add random durations
 61    while len(new_list) < n_samples:
 62        new_list.append(choice(scenario_lengths))
 63
 64    # If the new list is longer than the required number of samples, remove random durations
 65    while len(new_list) > n_samples:
 66        new_list.remove(choice(new_list))
 67    return new_list
 68
 69
 70def cut_weather_scenarios(
 71    weather_records: DataFrame,
 72    scenario_lengths: List[int],
 73    output_folder: Union[Path, str] = None,
 74    n_output_files: Union[int,None] = None,
 75) -> DataFrame:
 76    """Split weather records into smaller scenarios following specified scenario lengths. The
 77    number of output weather scenarios can be customized using the 'n_output_files' parameter.
 78
 79    Args:
 80        weather_records (DataFrame): weather records where each row represents an hour of data.
 81        scenario_lengths (List[int]): desired lengths (in hours) for each weather scenario.
 82    - output_folder : Union[Path,str], optional
 83        A Path object or a string representing the folder path where the output will be stored.
 84        If not provided, 'Weathers' directory will be used.
 85    - n_output_files : integer, optional
 86        An integer that indicates how many weather files (scenarios) will be created following the
 87        distribution of 'weather_records'.
 88        If not provided, will be set to 100.
 89
 90    Output:
 91    - write as many file as weather scenarios generated based on specified lengths.
 92
 93    Raises:
 94    - ValueError
 95        If input 'weather_records' is not a Pandas DataFrame.
 96        If input 'scenario_lengths' is not a List of integers.
 97        If input 'n_output_files' is not an integer.
 98        If any scenario length is greater than the total length of weather_records.
 99    """
100
101    # Check if input is a Pandas DataFrame
102    if not isinstance(weather_records, DataFrame):
103        raise ValueError("Input 'weather_records' must be a Pandas DataFrame.")
104
105    # Check if input is a list of integers
106    if not all(isinstance(length, int) for length in scenario_lengths):
107        raise ValueError("Input 'scenario_lengths' must be a list of integers.")
108
109    # Create a representative sample
110    sample = re_size_durations(scenario_lengths, n_output_files)
111
112    # Define the output folder
113    output_folder = output_folder if output_folder else Path("Weathers")
114    output_folder = Path(output_folder)  # Ensure output_folder is a Path object
115    output_folder.mkdir(parents=True, exist_ok=True)  # Create the output directory if it doesn't exist
116
117    total_data_length = len(weather_records)
118
119    # Check if any scenario length is greater than the total data length
120    if any(length > total_data_length for length in sample):
121        raise ValueError("Scenario length cannot be greater than the total length of weather records")
122
123    scenarios : DataFrame = []  # List to store weather scenarios
124
125    # Generate scenarios based on specified lengths
126    for index, length in enumerate(sample, start=1):
127
128        # Randomly select a start index for the scenario
129        start_index = randint(0, total_data_length - length)
130
131        # Extract the scenario based on the start index and length
132        scenario = weather_records.iloc[start_index : start_index + length]
133
134        # Save the weather scenario
135        output_path = output_folder / f"Weather{index}.csv"
136        scenario.to_csv(output_path, index=False)
137
138    return scenarios
139
140
141# Example usage:
142# Assuming 'weather_data' is your DataFrame and 'scenario_lengths' is a list of desired scenario lengths
143# weather_data = pd.read_csv('your_weather_data.csv')
144# scenario_lengths = [24, 48, 72]  # Example lengths
145# weather_scenarios = cut_weather_scenarios(weather_data, scenario_lengths)
146
147
148def random_weather_scenario_generator(
149    n_scenarios: int,
150    hr_limit: Optional[int] = None,
151    lambda_ws: Optional[float] = None,
152    lambda_wd: Optional[float] = None,
153    output_folder: Optional[Union[Path,str]] = None,
154):
155    """Generates random weather scenarios and saves them as CSV files.
156
157    Parameters:
158    - n_scenarios : int
159        Number of weather scenarios to generate.
160    - hr_limit : int, optional
161        Limit for the number of hours for each scenario (default is 72).
162    - lambda_ws : float, optional
163        Lambda parameter for wind speed variation (default is 0.5). If set to 0, all rows will have the same wind speed.
164    - lambda_wd : float, optional
165        Lambda parameter for wind direction variation (default is 0.5). If set to 0, all rows will have the same wind direction.
166    - output_folder : str, optional
167        Path to the folder where output files will be saved (default is 'Weathers').
168
169    Output:
170    - Saves generated weather scenarios as CSV files in the specified output folder.
171    """
172    hr_limit = hr_limit if hr_limit else 72
173    lambda_ws = lambda_ws if lambda_ws else 0.5
174    lambda_wd = lambda_wd if lambda_wd else 0.5
175    output_folder = Path(output_folder) if output_folder else Path("Weathers")
176    output_folder.mkdir(parents=True, exist_ok=True)  # Create the output directory if it doesn't exist
177
178    for index, _ in enumerate(range(n_scenarios), start=1):
179        n_rows = randint(5, hr_limit)
180
181        instance = ["NA"] * n_rows
182        fire_scenario = [2] * n_rows
183
184        wd_0 = randint(0, 359)
185        ws_0 = randint(1, 100)
186
187        wd_1 = abs(wd_0 + normal(loc=0.0, scale=30.0, size=None))
188        ws_1 = abs(ws_0 + normal(loc=0.0, scale=8.0, size=None))
189
190        ws = [ws_0, ws_1]
191        wd = [wd_0, wd_1]
192
193        dt = [(datetime.now() + timedelta(hours=i)).isoformat(timespec="minutes") for i in range(n_rows)]
194        for row in range(2, n_rows):
195            wd_i = wd[row - 1] * lambda_wd + wd[row - 2] * (1 - lambda_wd)
196            ws_i = ws[row - 1] * lambda_wd + ws[row - 2] * (1 - lambda_wd)
197
198            wd.append(wd_i)
199            ws.append(ws_i)
200
201        df = DataFrame(
202            vstack((instance, dt, wd, ws, fire_scenario)).T,
203            columns=["Instance", "datetime", "WD", "WS", "FireScenario"],
204        )
205        output_path = output_folder / f"weather{index}.csv"
206        df.to_csv(output_path, index=False)
def re_size_durations(scenario_lengths: List[int], n_samples: int = 100) -> List[int]:
20def re_size_durations(scenario_lengths: List[int], n_samples: int = 100) -> List[int]:
21    """Resize a list of scenario durations to generate a new list maintaining representation
22    while considering outliers.
23
24    Args:
25        scenario_lengths (List[int]): A list of integers representing desired lengths (in hours) for each weather scenario.
26        n_samples (int): Optional integer indicating how many weather files (scenarios) will be created following the distribution of 'scenario_lengths'. If not provided, defaults to 100.
27
28    Returns:
29        List[int]
30        A new list of durations, preserving the representation of the original list.
31
32    Raises:
33        ValueError: If 'scenario_lengths' is not a list of integers.
34        ValueError: If 'n_samples' is provided but not an integer.
35    """
36    # Check if input is a list of integers
37    if not all(isinstance(length, int) for length in scenario_lengths):
38        raise ValueError("Input 'scenario_lengths' must be a list of integers.")
39
40    # Check if input is a list of integers
41    if not isinstance(n_samples, int):
42        raise ValueError("Input 'total_samples' must be an integer.")
43
44    # Calculate occurrences of each duration
45    duration_counts = Counter(scenario_lengths)
46
47    # Get the total number of scenarios
48    total_scenarios = len(scenario_lengths)
49
50    # Determine the number of items to be sampled for each duration
51    samples_per_duration = {
52        duration: min(max(int(n_samples * count / total_scenarios), 1), 10)
53        for duration, count in duration_counts.items()
54    }
55    # Generate a new list based on stratified sampling
56    new_list = []
57    for duration, count in duration_counts.items():
58        occurrences = min(count, samples_per_duration[duration])
59        new_list.extend([duration] * occurrences)
60
61    # If the new list is shorter than the required number of samples, add random durations
62    while len(new_list) < n_samples:
63        new_list.append(choice(scenario_lengths))
64
65    # If the new list is longer than the required number of samples, remove random durations
66    while len(new_list) > n_samples:
67        new_list.remove(choice(new_list))
68    return new_list

Resize a list of scenario durations to generate a new list maintaining representation while considering outliers.

Args: scenario_lengths (List[int]): A list of integers representing desired lengths (in hours) for each weather scenario. n_samples (int): Optional integer indicating how many weather files (scenarios) will be created following the distribution of 'scenario_lengths'. If not provided, defaults to 100.

Returns: List[int] A new list of durations, preserving the representation of the original list.

Raises: ValueError: If 'scenario_lengths' is not a list of integers. ValueError: If 'n_samples' is provided but not an integer.

def cut_weather_scenarios( weather_records: pandas.core.frame.DataFrame, scenario_lengths: List[int], output_folder: Union[pathlib.Path, str] = None, n_output_files: Optional[int] = None) -> pandas.core.frame.DataFrame:
 71def cut_weather_scenarios(
 72    weather_records: DataFrame,
 73    scenario_lengths: List[int],
 74    output_folder: Union[Path, str] = None,
 75    n_output_files: Union[int,None] = None,
 76) -> DataFrame:
 77    """Split weather records into smaller scenarios following specified scenario lengths. The
 78    number of output weather scenarios can be customized using the 'n_output_files' parameter.
 79
 80    Args:
 81        weather_records (DataFrame): weather records where each row represents an hour of data.
 82        scenario_lengths (List[int]): desired lengths (in hours) for each weather scenario.
 83    - output_folder : Union[Path,str], optional
 84        A Path object or a string representing the folder path where the output will be stored.
 85        If not provided, 'Weathers' directory will be used.
 86    - n_output_files : integer, optional
 87        An integer that indicates how many weather files (scenarios) will be created following the
 88        distribution of 'weather_records'.
 89        If not provided, will be set to 100.
 90
 91    Output:
 92    - write as many file as weather scenarios generated based on specified lengths.
 93
 94    Raises:
 95    - ValueError
 96        If input 'weather_records' is not a Pandas DataFrame.
 97        If input 'scenario_lengths' is not a List of integers.
 98        If input 'n_output_files' is not an integer.
 99        If any scenario length is greater than the total length of weather_records.
100    """
101
102    # Check if input is a Pandas DataFrame
103    if not isinstance(weather_records, DataFrame):
104        raise ValueError("Input 'weather_records' must be a Pandas DataFrame.")
105
106    # Check if input is a list of integers
107    if not all(isinstance(length, int) for length in scenario_lengths):
108        raise ValueError("Input 'scenario_lengths' must be a list of integers.")
109
110    # Create a representative sample
111    sample = re_size_durations(scenario_lengths, n_output_files)
112
113    # Define the output folder
114    output_folder = output_folder if output_folder else Path("Weathers")
115    output_folder = Path(output_folder)  # Ensure output_folder is a Path object
116    output_folder.mkdir(parents=True, exist_ok=True)  # Create the output directory if it doesn't exist
117
118    total_data_length = len(weather_records)
119
120    # Check if any scenario length is greater than the total data length
121    if any(length > total_data_length for length in sample):
122        raise ValueError("Scenario length cannot be greater than the total length of weather records")
123
124    scenarios : DataFrame = []  # List to store weather scenarios
125
126    # Generate scenarios based on specified lengths
127    for index, length in enumerate(sample, start=1):
128
129        # Randomly select a start index for the scenario
130        start_index = randint(0, total_data_length - length)
131
132        # Extract the scenario based on the start index and length
133        scenario = weather_records.iloc[start_index : start_index + length]
134
135        # Save the weather scenario
136        output_path = output_folder / f"Weather{index}.csv"
137        scenario.to_csv(output_path, index=False)
138
139    return scenarios

Split weather records into smaller scenarios following specified scenario lengths. The number of output weather scenarios can be customized using the 'n_output_files' parameter.

Args: weather_records (DataFrame): weather records where each row represents an hour of data. scenario_lengths (List[int]): desired lengths (in hours) for each weather scenario.

  • output_folder : Union[Path,str], optional A Path object or a string representing the folder path where the output will be stored. If not provided, 'Weathers' directory will be used.
  • n_output_files : integer, optional An integer that indicates how many weather files (scenarios) will be created following the distribution of 'weather_records'. If not provided, will be set to 100.

Output:

  • write as many file as weather scenarios generated based on specified lengths.

Raises:

  • ValueError If input 'weather_records' is not a Pandas DataFrame. If input 'scenario_lengths' is not a List of integers. If input 'n_output_files' is not an integer. If any scenario length is greater than the total length of weather_records.
def random_weather_scenario_generator( n_scenarios: int, hr_limit: Optional[int] = None, lambda_ws: Optional[float] = None, lambda_wd: Optional[float] = None, output_folder: Union[pathlib.Path, str, NoneType] = None):
149def random_weather_scenario_generator(
150    n_scenarios: int,
151    hr_limit: Optional[int] = None,
152    lambda_ws: Optional[float] = None,
153    lambda_wd: Optional[float] = None,
154    output_folder: Optional[Union[Path,str]] = None,
155):
156    """Generates random weather scenarios and saves them as CSV files.
157
158    Parameters:
159    - n_scenarios : int
160        Number of weather scenarios to generate.
161    - hr_limit : int, optional
162        Limit for the number of hours for each scenario (default is 72).
163    - lambda_ws : float, optional
164        Lambda parameter for wind speed variation (default is 0.5). If set to 0, all rows will have the same wind speed.
165    - lambda_wd : float, optional
166        Lambda parameter for wind direction variation (default is 0.5). If set to 0, all rows will have the same wind direction.
167    - output_folder : str, optional
168        Path to the folder where output files will be saved (default is 'Weathers').
169
170    Output:
171    - Saves generated weather scenarios as CSV files in the specified output folder.
172    """
173    hr_limit = hr_limit if hr_limit else 72
174    lambda_ws = lambda_ws if lambda_ws else 0.5
175    lambda_wd = lambda_wd if lambda_wd else 0.5
176    output_folder = Path(output_folder) if output_folder else Path("Weathers")
177    output_folder.mkdir(parents=True, exist_ok=True)  # Create the output directory if it doesn't exist
178
179    for index, _ in enumerate(range(n_scenarios), start=1):
180        n_rows = randint(5, hr_limit)
181
182        instance = ["NA"] * n_rows
183        fire_scenario = [2] * n_rows
184
185        wd_0 = randint(0, 359)
186        ws_0 = randint(1, 100)
187
188        wd_1 = abs(wd_0 + normal(loc=0.0, scale=30.0, size=None))
189        ws_1 = abs(ws_0 + normal(loc=0.0, scale=8.0, size=None))
190
191        ws = [ws_0, ws_1]
192        wd = [wd_0, wd_1]
193
194        dt = [(datetime.now() + timedelta(hours=i)).isoformat(timespec="minutes") for i in range(n_rows)]
195        for row in range(2, n_rows):
196            wd_i = wd[row - 1] * lambda_wd + wd[row - 2] * (1 - lambda_wd)
197            ws_i = ws[row - 1] * lambda_wd + ws[row - 2] * (1 - lambda_wd)
198
199            wd.append(wd_i)
200            ws.append(ws_i)
201
202        df = DataFrame(
203            vstack((instance, dt, wd, ws, fire_scenario)).T,
204            columns=["Instance", "datetime", "WD", "WS", "FireScenario"],
205        )
206        output_path = output_folder / f"weather{index}.csv"
207        df.to_csv(output_path, index=False)

Generates random weather scenarios and saves them as CSV files.

Parameters:

  • n_scenarios : int Number of weather scenarios to generate.
  • hr_limit : int, optional Limit for the number of hours for each scenario (default is 72).
  • lambda_ws : float, optional Lambda parameter for wind speed variation (default is 0.5). If set to 0, all rows will have the same wind speed.
  • lambda_wd : float, optional Lambda parameter for wind direction variation (default is 0.5). If set to 0, all rows will have the same wind direction.
  • output_folder : str, optional Path to the folder where output files will be saved (default is 'Weathers').

Output:

  • Saves generated weather scenarios as CSV files in the specified output folder.