Index: trunk/SDToolBox/extract_sea_level_pressure_era5.py =================================================================== diff -u -r22 -r33 --- trunk/SDToolBox/extract_sea_level_pressure_era5.py (.../extract_sea_level_pressure_era5.py) (revision 22) +++ trunk/SDToolBox/extract_sea_level_pressure_era5.py (.../extract_sea_level_pressure_era5.py) (revision 33) @@ -7,6 +7,7 @@ import sys import os +from datetime import datetime, timedelta from SDToolBox import outputmessages as outputmessages from SDToolBox import data_acquisition from netCDF4 import Dataset @@ -21,11 +22,16 @@ class ExtractSeaLevelPressureERA5: + __ds_format = 'netCDF4' __lon_key = 'longitude' __lat_key = 'latitude' + __time_key = 'time' __input_lon = None __input_lat = None - __input_data = None + __out_lat_key = data_acquisition.OutputData.var_lat_key + __out_lon_key = data_acquisition.OutputData.var_lon_key + __out_time_key = data_acquisition.OutputData.var_time_key + __out_val_key = data_acquisition.OutputData.var_val_key def __init__(self, input_data: data_acquisition.InputData) : """Initialize the waves extraction. @@ -42,7 +48,7 @@ input_data.lon_key = self.__lon_key input_data.lat_key = self.__lat_key self.__input_data = input_data - self.__input_lon, self.__input_lat = input_data.coord_list[0] + self.__input_lon, self.__input_lat = zip(*input_data.coord_list) def subset_slp_era5(self, directory_path: str, year_from: int, year_to: int) : @@ -63,39 +69,133 @@ list(Dataset) -- collection of netCDF4 subsets per variable. """ - variable_name = 'msl' + variable_dict = { + 'msl': 'msl' + } - years = generate_years_array_slp(year_from, year_to) - case_dataset_list = [] - #extracts for all the years - for year_idx, year in enumerate(years): - base_file_name = 'era5_Global_{}_p_{}.nc'.format(variable_name, year) - case_file_path = os.path.join(directory_path, base_file_name) - + output_data = data_acquisition.OutputData(variable_dict.keys()) + cases_dict = output_data.get_data_dict() + + years = self.generate_years_array(year_from, year_to) + nn_lon_idx, nn_lat_idx = self.__get_corrected_lon_lat( + directory_path, cases_dict + ) + + for n_variable, variable_name in enumerate(variable_dict): + case_name_value = variable_dict[variable_name] + cases_dict[self.__out_val_key][variable_name] + #extracts for all the years + for year_idx, year in enumerate(years): + base_file_name = 'era5_Global_{}_p_{}.nc'.format(case_name_value, year) + case_file_path = os.path.join(directory_path, base_file_name) + # If file does not exist simply go to the next one - if not os.path.exists(case_file_path): - print('File {} does not exist or could not be found.'.format(case_file_path)) - continue + if not os.path.exists(case_file_path): + print('File {} does not exist or could not be found.'.format(case_file_path)) + continue - with Dataset(case_file_path, 'r', format='netCDF4') as case_dataset: - # Get dataset - # Find nearest point (considering we are only selecting a point) + with Dataset(case_file_path, 'r', self.__ds_format) \ + as case_dataset: + # Get dataset + # Find nearest point (considering we are only selecting a point) + cases_dict[self.__out_val_key][variable_name] = \ + self.__get_variable_subset( + cases_dict[self.__out_val_key][variable_name], + case_dataset, + variable_name, + (nn_lon_idx, nn_lat_idx) + ) + + # Get the time for the variable. + # add the lines to get the reference time + # automatically just in case + if n_variable == 0: + reftime = case_dataset[self.__time_key].units.split(' ') + # This is an assumption that all the grids have + # the same scale in regards of time. + cases_dict[self.__out_time_key].append( + [datetime.strptime( + reftime[2]+' '+reftime[3], + '%Y-%m-%d %H:%M:%S.%f') + + timedelta(hours=int(ti)) + for ti in case_dataset[self.__time_key][:]]) + + return output_data - lat_list = case_dataset.variables[self.__lat_key][:] - lon_list = case_dataset.variables[self.__lon_key][:] - corr_lon = data_acquisition.get_nearest_neighbor(self.__input_lon, lon_list) - corr_lat = data_acquisition.get_nearest_neighbor(self.__input_lat, lat_list) - # Get variable subset - variable_subset = case_dataset.variables[variable_name][:corr_lon:corr_lat] - case_dataset_list.append(variable_subset) + def __get_variable_subset( + self, + variable_values: list, netcdf_dataset: Dataset, + variable_name: str, nn_idx): + """Gets the subset of vaues for the given variable. - return case_dataset_list + Arguments: + variable_values {list} -- Stored values. + netcdf_dataset {Dataset} -- Input netCDF dataset. + variable_name {str} -- Name of the variable. + nn_idx {duple} -- Duple of lon or lat index. + + Returns: + Array -- Array of values. + """ + nn_lon_idx, nn_lat_idx = nn_idx + if variable_values is None: + return self.get_case_subset( + netcdf_dataset, + variable_name, + nn_lon_idx, + nn_lat_idx) + return np.concatenate( + (variable_values, + self.get_case_subset( + netcdf_dataset, + variable_name, + nn_lon_idx, + nn_lat_idx)), + axis=0) + + def __get_corrected_lon_lat( + self, directory_path: str, cases_dict: dict): + """Gets the corrected index and value for the given input coordinates. + + Arguments: + directory_path {str} -- Parent directory. + cases_dict {dict} -- Dictionary with all values that need format. + + Returns: + [type] -- [description] + """ + nn_lon_idx = [] + nn_lat_idx = [] + base_file_name = 'era5_Global_msl_p_1981.nc' + ref_file_path = os.path.join(directory_path, base_file_name) + + # Extract index and value for all input lat, lon. + with Dataset(ref_file_path, 'r', self.__ds_format) \ + as ref_dataset: + lat_list = ref_dataset.variables[self.__lat_key][:] + lon_list = ref_dataset.variables[self.__lon_key][:] + for lon_point in self.__input_lon: + idx, value = data_acquisition.get_nearest_neighbor( + lon_point, + lon_list) + cases_dict[self.__out_lon_key].append(value) + nn_lon_idx.append(idx) + for lat_point in self.__input_lat: + idx, value = data_acquisition.get_nearest_neighbor( + lat_point, + lat_list) + cases_dict[self.__out_lat_key].append(value) + nn_lat_idx.append(idx) + return nn_lon_idx, nn_lat_idx -# this method could be moved in a generic utils stript outside here + @staticmethod + def get_case_subset(dataset, variable_name, lon, lat): + return dataset[variable_name][:, lat, lon] -def generate_years_array_slp(year_from, year_to): - years = [] - for i in range(year_to - year_from): - years.append(year_from + i) # fills an array of years - years.append(year_to) - return years \ No newline at end of file + @staticmethod + def generate_years_array(year_from, year_to): + years = [] + for i in range(year_to - year_from): + years.append(year_from + i) # fills an array of years + years.append(year_to) + return years \ No newline at end of file