fire2a.weathers

👋🌎 Some functions related to weather scenario creation.

View Source

  1#!python3
  2"""👋🌎
  3Some functions related to weather scenario creation.
  4"""
  5__author__ = "Rodrigo Mahaluf-Recasens"
  6__revision__ = "$Format:%H$"
  7
  8from collections import Counter
  9from datetime import datetime, timedelta
 10from pathlib import Path
 11from random import choice, randint
 12from typing import List, Optional, Union
 13
 14from numpy import vstack
 15from numpy.random import normal
 16from pandas import DataFrame
 17
 18
 19def re_size_durations(scenario_lengths: List[int], n_samples: int = 100) -> List[int]:
 20    """Resize a list of scenario durations to generate a new list maintaining representation while considering outliers.
 21
 22    Args:
 23
 24        scenario_lengths (List[int]): A list of integers representing desired lengths (in hours) for each weather scenario.
 25        n_samples (int): Optional integer indicating how many weather files (scenarios) will be created following the distribution of 'scenario_lengths'. If not provided, defaults to 100.
 26
 27    Returns:
 28
 29        List[int] : A new list of durations, preserving the representation of the original list.
 30
 31    Raises:
 32
 33        ValueError: If 'scenario_lengths' is not a list of integers.
 34        ValueError: If 'n_samples' is not an integer.
 35    """
 36    # Check if input is a list of integers
 37    if not all(isinstance(length, int) for length in scenario_lengths):
 38        raise ValueError("Input 'scenario_lengths' must be a list of integers.")
 39
 40    # Check if input is a list of integers
 41    if not isinstance(n_samples, int):
 42        raise ValueError("Input 'total_samples' must be an integer.")
 43
 44    # Calculate occurrences of each duration
 45    duration_counts = Counter(scenario_lengths)
 46
 47    # Get the total number of scenarios
 48    total_scenarios = len(scenario_lengths)
 49
 50    # Determine the number of items to be sampled for each duration
 51    samples_per_duration = {
 52        duration: min(max(int(n_samples * count / total_scenarios), 1), 10)
 53        for duration, count in duration_counts.items()
 54    }
 55    # Generate a new list based on stratified sampling
 56    new_list = []
 57    for duration, count in duration_counts.items():
 58        occurrences = min(count, samples_per_duration[duration])
 59        new_list.extend([duration] * occurrences)
 60
 61    # If the new list is shorter than the required number of samples, add random durations
 62    while len(new_list) < n_samples:
 63        new_list.append(choice(scenario_lengths))
 64
 65    # If the new list is longer than the required number of samples, remove random durations
 66    while len(new_list) > n_samples:
 67        new_list.remove(choice(new_list))
 68    return new_list
 69
 70
 71def cut_weather_scenarios(
 72    weather_records: DataFrame,
 73    scenario_lengths: List[int],
 74    output_folder: Union[Path, str] = Path("Weathers"),
 75    n_output_files: Optional[int] = None,
 76) -> DataFrame:
 77    """Split weather records into smaller scenarios following specified scenario lengths. The
 78    number of output weather scenarios can be customized using the 'n_output_files' parameter.
 79
 80    Args:
 81
 82        weather_records (DataFrame): weather records where each row represents an hour of data.
 83        scenario_lengths (List[int]): desired lengths (in hours) for each weather scenario.
 84        output_folder : Union[Path,str], optional
 85            A Path object or a string representing the folder path where the output will be stored.
 86            If not provided, 'Weathers' directory will be used.
 87        n_output_files : integer, optional
 88            An integer that indicates how many weather files (scenarios) will be created following the
 89            distribution of 'weather_records'.
 90            If not provided, will be set to 100.
 91
 92    Output:
 93    - write as many file as weather scenarios generated based on specified lengths.
 94
 95    Raises ValueError:
 96
 97        If input 'weather_records' is not a Pandas DataFrame.
 98        If input 'scenario_lengths' is not a List of integers.
 99        If input 'n_output_files' is not an integer.
100        If any scenario length is greater than the total length of weather_records.
101    """
102    # Check if input is a Pandas DataFrame
103    if not isinstance(weather_records, DataFrame):
104        raise ValueError("Input 'weather_records' must be a Pandas DataFrame.")
105
106    # Check if input is a list of integers
107    if not all(isinstance(length, int) for length in scenario_lengths):
108        raise ValueError("Input 'scenario_lengths' must be a list of integers.")
109
110    # Create a representative sample
111    if n_output_files:
112        sample = re_size_durations(scenario_lengths, n_output_files)
113    else:
114        sample = re_size_durations(scenario_lengths)
115
116    # Define the output folder
117    output_folder = Path(output_folder)  # Ensure output_folder is a Path object
118    output_folder.mkdir(parents=True, exist_ok=True)  # Create the output directory if it doesn't exist
119
120    total_data_length = len(weather_records)
121
122    # Check if any scenario length is greater than the total data length
123    if any(length > total_data_length for length in sample):
124        raise ValueError("Scenario length cannot be greater than the total length of weather records")
125
126    scenarios: DataFrame = []  # List to store weather scenarios
127
128    # Generate scenarios based on specified lengths
129    for index, length in enumerate(sample, start=1):
130
131        # Randomly select a start index for the scenario
132        start_index = randint(0, total_data_length - length)
133
134        # Extract the scenario based on the start index and length
135        scenario = weather_records.iloc[start_index : start_index + length]
136
137        # Save the weather scenario
138        output_path = output_folder / f"Weather{index}.csv"
139        scenario.to_csv(output_path, index=False)
140
141    return scenarios
142
143
144# Example usage:
145# Assuming 'weather_data' is your DataFrame and 'scenario_lengths' is a list of desired scenario lengths
146# weather_data = pd.read_csv('your_weather_data.csv')
147# scenario_lengths = [24, 48, 72]  # Example lengths
148# weather_scenarios = cut_weather_scenarios(weather_data, scenario_lengths)
149
150
151def random_weather_scenario_generator(
152    n_scenarios: int,
153    hr_limit: int = 72,
154    lambda_ws: float = 0.5,
155    lambda_wd: float = 0.5,
156    output_folder: Union[Path, str] = Path("Weathers"),
157) -> None:
158    """Generates random weather scenarios and saves them as CSV files in the specified output folder.
159
160    Args:
161
162        n_scenarios (int): number of weather scenarios to generate.
163        hr_limit (int, optional): limit for the number of hours for each scenario (default is 72).
164        lambda_ws (float, optional): lambda parameter for wind speed variation (default is 0.5).
165        lambda_wd (float, optional): lambda parameter for wind direction variation (default is 0.5).
166        output_folder : Union[Path,str], optional
167
168    Returns:
169        None
170    """
171    output_folder = Path(output_folder)
172    output_folder.mkdir(parents=True, exist_ok=True)  # Create the output directory if it doesn't exist
173
174    for index, _ in enumerate(range(n_scenarios), start=1):
175        n_rows = randint(5, hr_limit)
176
177        instance = ["NA"] * n_rows
178        fire_scenario = [2] * n_rows
179
180        wd_0 = randint(0, 359)
181        ws_0 = randint(1, 100)
182
183        wd_1 = abs(wd_0 + normal(loc=0.0, scale=30.0, size=None))
184        ws_1 = abs(ws_0 + normal(loc=0.0, scale=8.0, size=None))
185
186        ws = [ws_0, ws_1]
187        wd = [wd_0, wd_1]
188
189        dt = [(datetime.now() + timedelta(hours=i)).isoformat(timespec="minutes") for i in range(n_rows)]
190        for row in range(2, n_rows):
191            wd_i = wd[row - 1] * lambda_wd + wd[row - 2] * (1 - lambda_wd)
192            ws_i = ws[row - 1] * lambda_wd + ws[row - 2] * (1 - lambda_wd)
193
194            wd.append(wd_i)
195            ws.append(ws_i)
196
197        df = DataFrame(
198            vstack((instance, dt, wd, ws, fire_scenario)).T,
199            columns=["Instance", "datetime", "WD", "WS", "FireScenario"],
200        )
201        output_path = output_folder / f"Weather{index}.csv"
202        df.to_csv(output_path, index=False)

def re_size_durations(scenario_lengths: List[int], n_samples: int = 100) -> List[int]: View Source

20def re_size_durations(scenario_lengths: List[int], n_samples: int = 100) -> List[int]:
21    """Resize a list of scenario durations to generate a new list maintaining representation while considering outliers.
22
23    Args:
24
25        scenario_lengths (List[int]): A list of integers representing desired lengths (in hours) for each weather scenario.
26        n_samples (int): Optional integer indicating how many weather files (scenarios) will be created following the distribution of 'scenario_lengths'. If not provided, defaults to 100.
27
28    Returns:
29
30        List[int] : A new list of durations, preserving the representation of the original list.
31
32    Raises:
33
34        ValueError: If 'scenario_lengths' is not a list of integers.
35        ValueError: If 'n_samples' is not an integer.
36    """
37    # Check if input is a list of integers
38    if not all(isinstance(length, int) for length in scenario_lengths):
39        raise ValueError("Input 'scenario_lengths' must be a list of integers.")
40
41    # Check if input is a list of integers
42    if not isinstance(n_samples, int):
43        raise ValueError("Input 'total_samples' must be an integer.")
44
45    # Calculate occurrences of each duration
46    duration_counts = Counter(scenario_lengths)
47
48    # Get the total number of scenarios
49    total_scenarios = len(scenario_lengths)
50
51    # Determine the number of items to be sampled for each duration
52    samples_per_duration = {
53        duration: min(max(int(n_samples * count / total_scenarios), 1), 10)
54        for duration, count in duration_counts.items()
55    }
56    # Generate a new list based on stratified sampling
57    new_list = []
58    for duration, count in duration_counts.items():
59        occurrences = min(count, samples_per_duration[duration])
60        new_list.extend([duration] * occurrences)
61
62    # If the new list is shorter than the required number of samples, add random durations
63    while len(new_list) < n_samples:
64        new_list.append(choice(scenario_lengths))
65
66    # If the new list is longer than the required number of samples, remove random durations
67    while len(new_list) > n_samples:
68        new_list.remove(choice(new_list))
69    return new_list

Resize a list of scenario durations to generate a new list maintaining representation while considering outliers.

Args:

scenario_lengths (List[int]): A list of integers representing desired lengths (in hours) for each weather scenario.
n_samples (int): Optional integer indicating how many weather files (scenarios) will be created following the distribution of 'scenario_lengths'. If not provided, defaults to 100.

Returns:

List[int] : A new list of durations, preserving the representation of the original list.

Raises:

ValueError: If 'scenario_lengths' is not a list of integers.
ValueError: If 'n_samples' is not an integer.

def cut_weather_scenarios( weather_records: pandas.core.frame.DataFrame, scenario_lengths: List[int], output_folder: Union[pathlib.Path, str] = PosixPath('Weathers'), n_output_files: Optional[int] = None) -> pandas.core.frame.DataFrame: View Source

 72def cut_weather_scenarios(
 73    weather_records: DataFrame,
 74    scenario_lengths: List[int],
 75    output_folder: Union[Path, str] = Path("Weathers"),
 76    n_output_files: Optional[int] = None,
 77) -> DataFrame:
 78    """Split weather records into smaller scenarios following specified scenario lengths. The
 79    number of output weather scenarios can be customized using the 'n_output_files' parameter.
 80
 81    Args:
 82
 83        weather_records (DataFrame): weather records where each row represents an hour of data.
 84        scenario_lengths (List[int]): desired lengths (in hours) for each weather scenario.
 85        output_folder : Union[Path,str], optional
 86            A Path object or a string representing the folder path where the output will be stored.
 87            If not provided, 'Weathers' directory will be used.
 88        n_output_files : integer, optional
 89            An integer that indicates how many weather files (scenarios) will be created following the
 90            distribution of 'weather_records'.
 91            If not provided, will be set to 100.
 92
 93    Output:
 94    - write as many file as weather scenarios generated based on specified lengths.
 95
 96    Raises ValueError:
 97
 98        If input 'weather_records' is not a Pandas DataFrame.
 99        If input 'scenario_lengths' is not a List of integers.
100        If input 'n_output_files' is not an integer.
101        If any scenario length is greater than the total length of weather_records.
102    """
103    # Check if input is a Pandas DataFrame
104    if not isinstance(weather_records, DataFrame):
105        raise ValueError("Input 'weather_records' must be a Pandas DataFrame.")
106
107    # Check if input is a list of integers
108    if not all(isinstance(length, int) for length in scenario_lengths):
109        raise ValueError("Input 'scenario_lengths' must be a list of integers.")
110
111    # Create a representative sample
112    if n_output_files:
113        sample = re_size_durations(scenario_lengths, n_output_files)
114    else:
115        sample = re_size_durations(scenario_lengths)
116
117    # Define the output folder
118    output_folder = Path(output_folder)  # Ensure output_folder is a Path object
119    output_folder.mkdir(parents=True, exist_ok=True)  # Create the output directory if it doesn't exist
120
121    total_data_length = len(weather_records)
122
123    # Check if any scenario length is greater than the total data length
124    if any(length > total_data_length for length in sample):
125        raise ValueError("Scenario length cannot be greater than the total length of weather records")
126
127    scenarios: DataFrame = []  # List to store weather scenarios
128
129    # Generate scenarios based on specified lengths
130    for index, length in enumerate(sample, start=1):
131
132        # Randomly select a start index for the scenario
133        start_index = randint(0, total_data_length - length)
134
135        # Extract the scenario based on the start index and length
136        scenario = weather_records.iloc[start_index : start_index + length]
137
138        # Save the weather scenario
139        output_path = output_folder / f"Weather{index}.csv"
140        scenario.to_csv(output_path, index=False)
141
142    return scenarios

Split weather records into smaller scenarios following specified scenario lengths. The number of output weather scenarios can be customized using the 'n_output_files' parameter.

Args:

weather_records (DataFrame): weather records where each row represents an hour of data.
scenario_lengths (List[int]): desired lengths (in hours) for each weather scenario.
output_folder : Union[Path,str], optional
    A Path object or a string representing the folder path where the output will be stored.
    If not provided, 'Weathers' directory will be used.
n_output_files : integer, optional
    An integer that indicates how many weather files (scenarios) will be created following the
    distribution of 'weather_records'.
    If not provided, will be set to 100.

Output:

write as many file as weather scenarios generated based on specified lengths.

Raises ValueError:

If input 'weather_records' is not a Pandas DataFrame.
If input 'scenario_lengths' is not a List of integers.
If input 'n_output_files' is not an integer.
If any scenario length is greater than the total length of weather_records.

def random_weather_scenario_generator( n_scenarios: int, hr_limit: int = 72, lambda_ws: float = 0.5, lambda_wd: float = 0.5, output_folder: Union[pathlib.Path, str] = PosixPath('Weathers')) -> None: View Source

152def random_weather_scenario_generator(
153    n_scenarios: int,
154    hr_limit: int = 72,
155    lambda_ws: float = 0.5,
156    lambda_wd: float = 0.5,
157    output_folder: Union[Path, str] = Path("Weathers"),
158) -> None:
159    """Generates random weather scenarios and saves them as CSV files in the specified output folder.
160
161    Args:
162
163        n_scenarios (int): number of weather scenarios to generate.
164        hr_limit (int, optional): limit for the number of hours for each scenario (default is 72).
165        lambda_ws (float, optional): lambda parameter for wind speed variation (default is 0.5).
166        lambda_wd (float, optional): lambda parameter for wind direction variation (default is 0.5).
167        output_folder : Union[Path,str], optional
168
169    Returns:
170        None
171    """
172    output_folder = Path(output_folder)
173    output_folder.mkdir(parents=True, exist_ok=True)  # Create the output directory if it doesn't exist
174
175    for index, _ in enumerate(range(n_scenarios), start=1):
176        n_rows = randint(5, hr_limit)
177
178        instance = ["NA"] * n_rows
179        fire_scenario = [2] * n_rows
180
181        wd_0 = randint(0, 359)
182        ws_0 = randint(1, 100)
183
184        wd_1 = abs(wd_0 + normal(loc=0.0, scale=30.0, size=None))
185        ws_1 = abs(ws_0 + normal(loc=0.0, scale=8.0, size=None))
186
187        ws = [ws_0, ws_1]
188        wd = [wd_0, wd_1]
189
190        dt = [(datetime.now() + timedelta(hours=i)).isoformat(timespec="minutes") for i in range(n_rows)]
191        for row in range(2, n_rows):
192            wd_i = wd[row - 1] * lambda_wd + wd[row - 2] * (1 - lambda_wd)
193            ws_i = ws[row - 1] * lambda_wd + ws[row - 2] * (1 - lambda_wd)
194
195            wd.append(wd_i)
196            ws.append(ws_i)
197
198        df = DataFrame(
199            vstack((instance, dt, wd, ws, fire_scenario)).T,
200            columns=["Instance", "datetime", "WD", "WS", "FireScenario"],
201        )
202        output_path = output_folder / f"Weather{index}.csv"
203        df.to_csv(output_path, index=False)

Generates random weather scenarios and saves them as CSV files in the specified output folder.

Args:

n_scenarios (int): number of weather scenarios to generate.
hr_limit (int, optional): limit for the number of hours for each scenario (default is 72).
lambda_ws (float, optional): lambda parameter for wind speed variation (default is 0.5).
lambda_wd (float, optional): lambda parameter for wind direction variation (default is 0.5).
output_folder : Union[Path,str], optional

Returns: None