Index: trunk/tests/test_extract_slp.py =================================================================== diff -u -r22 -r33 --- trunk/tests/test_extract_slp.py (.../test_extract_slp.py) (revision 22) +++ trunk/tests/test_extract_slp.py (.../test_extract_slp.py) (revision 33) @@ -15,17 +15,17 @@ def test_slp_folder_then_subset_collection_is_extracted(self): #1. Given # When using local data you can just replace the comment in these lines - dir_test_data = 'P:\\metocean-data\\open\\ERA5\\data\\Global\\msl_p' + # dir_test_data = 'P:\\metocean-data\\open\\ERA5\\data\\Global\\msl_p' - #dir_test_data = 'D:\\workspace\\SD_Toolbox\\trunk\\tests\\test_data\\netCDF_dummy_data\\' + dir_test_data = 'D:\\workspace\\SD_Toolbox\\trunk\\tests\\test_data\\msl_p' # filename = 'era5_Global_Hs_1980.nc' # filepath = dir_test_data + filename input_data = data_acquisition.InputData() - input_data.coord_list = [(4.2, 2.4)] + input_data.coord_list = [(4.2, 2.4), (42, 2.4), (42, 24), (4.2, 24)] #2. When extract_slp = extract_sea_level_pressure_era5.ExtractSeaLevelPressureERA5(input_data) - dataset_list = extract_slp.subset_slp_era5(dir_test_data, 1979, 2016) + dataset_list = extract_slp.subset_slp_era5(dir_test_data, 1981, 1982) #3. Then assert dataset_list is not None \ No newline at end of file Index: trunk/SDToolBox/extract_sea_level_pressure_era5.py =================================================================== diff -u -r22 -r33 --- trunk/SDToolBox/extract_sea_level_pressure_era5.py (.../extract_sea_level_pressure_era5.py) (revision 22) +++ trunk/SDToolBox/extract_sea_level_pressure_era5.py (.../extract_sea_level_pressure_era5.py) (revision 33) @@ -7,6 +7,7 @@ import sys import os +from datetime import datetime, timedelta from SDToolBox import outputmessages as outputmessages from SDToolBox import data_acquisition from netCDF4 import Dataset @@ -21,11 +22,16 @@ class ExtractSeaLevelPressureERA5: + __ds_format = 'netCDF4' __lon_key = 'longitude' __lat_key = 'latitude' + __time_key = 'time' __input_lon = None __input_lat = None - __input_data = None + __out_lat_key = data_acquisition.OutputData.var_lat_key + __out_lon_key = data_acquisition.OutputData.var_lon_key + __out_time_key = data_acquisition.OutputData.var_time_key + __out_val_key = data_acquisition.OutputData.var_val_key def __init__(self, input_data: data_acquisition.InputData) : """Initialize the waves extraction. @@ -42,7 +48,7 @@ input_data.lon_key = self.__lon_key input_data.lat_key = self.__lat_key self.__input_data = input_data - self.__input_lon, self.__input_lat = input_data.coord_list[0] + self.__input_lon, self.__input_lat = zip(*input_data.coord_list) def subset_slp_era5(self, directory_path: str, year_from: int, year_to: int) : @@ -63,39 +69,133 @@ list(Dataset) -- collection of netCDF4 subsets per variable. """ - variable_name = 'msl' + variable_dict = { + 'msl': 'msl' + } - years = generate_years_array_slp(year_from, year_to) - case_dataset_list = [] - #extracts for all the years - for year_idx, year in enumerate(years): - base_file_name = 'era5_Global_{}_p_{}.nc'.format(variable_name, year) - case_file_path = os.path.join(directory_path, base_file_name) - + output_data = data_acquisition.OutputData(variable_dict.keys()) + cases_dict = output_data.get_data_dict() + + years = self.generate_years_array(year_from, year_to) + nn_lon_idx, nn_lat_idx = self.__get_corrected_lon_lat( + directory_path, cases_dict + ) + + for n_variable, variable_name in enumerate(variable_dict): + case_name_value = variable_dict[variable_name] + cases_dict[self.__out_val_key][variable_name] + #extracts for all the years + for year_idx, year in enumerate(years): + base_file_name = 'era5_Global_{}_p_{}.nc'.format(case_name_value, year) + case_file_path = os.path.join(directory_path, base_file_name) + # If file does not exist simply go to the next one - if not os.path.exists(case_file_path): - print('File {} does not exist or could not be found.'.format(case_file_path)) - continue + if not os.path.exists(case_file_path): + print('File {} does not exist or could not be found.'.format(case_file_path)) + continue - with Dataset(case_file_path, 'r', format='netCDF4') as case_dataset: - # Get dataset - # Find nearest point (considering we are only selecting a point) + with Dataset(case_file_path, 'r', self.__ds_format) \ + as case_dataset: + # Get dataset + # Find nearest point (considering we are only selecting a point) + cases_dict[self.__out_val_key][variable_name] = \ + self.__get_variable_subset( + cases_dict[self.__out_val_key][variable_name], + case_dataset, + variable_name, + (nn_lon_idx, nn_lat_idx) + ) + + # Get the time for the variable. + # add the lines to get the reference time + # automatically just in case + if n_variable == 0: + reftime = case_dataset[self.__time_key].units.split(' ') + # This is an assumption that all the grids have + # the same scale in regards of time. + cases_dict[self.__out_time_key].append( + [datetime.strptime( + reftime[2]+' '+reftime[3], + '%Y-%m-%d %H:%M:%S.%f') + + timedelta(hours=int(ti)) + for ti in case_dataset[self.__time_key][:]]) + + return output_data - lat_list = case_dataset.variables[self.__lat_key][:] - lon_list = case_dataset.variables[self.__lon_key][:] - corr_lon = data_acquisition.get_nearest_neighbor(self.__input_lon, lon_list) - corr_lat = data_acquisition.get_nearest_neighbor(self.__input_lat, lat_list) - # Get variable subset - variable_subset = case_dataset.variables[variable_name][:corr_lon:corr_lat] - case_dataset_list.append(variable_subset) + def __get_variable_subset( + self, + variable_values: list, netcdf_dataset: Dataset, + variable_name: str, nn_idx): + """Gets the subset of vaues for the given variable. - return case_dataset_list + Arguments: + variable_values {list} -- Stored values. + netcdf_dataset {Dataset} -- Input netCDF dataset. + variable_name {str} -- Name of the variable. + nn_idx {duple} -- Duple of lon or lat index. + + Returns: + Array -- Array of values. + """ + nn_lon_idx, nn_lat_idx = nn_idx + if variable_values is None: + return self.get_case_subset( + netcdf_dataset, + variable_name, + nn_lon_idx, + nn_lat_idx) + return np.concatenate( + (variable_values, + self.get_case_subset( + netcdf_dataset, + variable_name, + nn_lon_idx, + nn_lat_idx)), + axis=0) + + def __get_corrected_lon_lat( + self, directory_path: str, cases_dict: dict): + """Gets the corrected index and value for the given input coordinates. + + Arguments: + directory_path {str} -- Parent directory. + cases_dict {dict} -- Dictionary with all values that need format. + + Returns: + [type] -- [description] + """ + nn_lon_idx = [] + nn_lat_idx = [] + base_file_name = 'era5_Global_msl_p_1981.nc' + ref_file_path = os.path.join(directory_path, base_file_name) + + # Extract index and value for all input lat, lon. + with Dataset(ref_file_path, 'r', self.__ds_format) \ + as ref_dataset: + lat_list = ref_dataset.variables[self.__lat_key][:] + lon_list = ref_dataset.variables[self.__lon_key][:] + for lon_point in self.__input_lon: + idx, value = data_acquisition.get_nearest_neighbor( + lon_point, + lon_list) + cases_dict[self.__out_lon_key].append(value) + nn_lon_idx.append(idx) + for lat_point in self.__input_lat: + idx, value = data_acquisition.get_nearest_neighbor( + lat_point, + lat_list) + cases_dict[self.__out_lat_key].append(value) + nn_lat_idx.append(idx) + return nn_lon_idx, nn_lat_idx -# this method could be moved in a generic utils stript outside here + @staticmethod + def get_case_subset(dataset, variable_name, lon, lat): + return dataset[variable_name][:, lat, lon] -def generate_years_array_slp(year_from, year_to): - years = [] - for i in range(year_to - year_from): - years.append(year_from + i) # fills an array of years - years.append(year_to) - return years \ No newline at end of file + @staticmethod + def generate_years_array(year_from, year_to): + years = [] + for i in range(year_to - year_from): + years.append(year_from + i) # fills an array of years + years.append(year_to) + return years \ No newline at end of file Index: trunk/SDToolBox/extract_sea_level_pressure_EARTH.py =================================================================== diff -u -r17 -r33 --- trunk/SDToolBox/extract_sea_level_pressure_EARTH.py (.../extract_sea_level_pressure_EARTH.py) (revision 17) +++ trunk/SDToolBox/extract_sea_level_pressure_EARTH.py (.../extract_sea_level_pressure_EARTH.py) (revision 33) @@ -6,11 +6,11 @@ # region // imports import sys import os - from SDToolBox import outputmessages as outputmessages from SDToolBox import data_acquisition from netCDF4 import Dataset from netCDF4.utils import ncinfo +import datetime import numpy as np # endregion @@ -19,15 +19,11 @@ # endregion -class ExtractSeaLevelPressure: +class ExtractSeaLevelPressureEARTH: __lon_key = 'lon' __lat_key = 'lat' - __input_lon_area = None - __input_lat_area = None __input_data = None - __areaR_lat = 0 - __area_Rlon = 0 def __init__(self, input_data: data_acquisition.InputData): if not input_data or not input_data.coord_list or len(input_data.coord_list) < 1: @@ -36,20 +32,76 @@ input_data.lon_key = self.__lon_key input_data.lat_key = self.__lat_key self.__input_data = input_data - self.__input_lon, self.__input_lat = input_data.coord_list[0] + self.__input_lat = input_data.coord_list[0] + self.__input_lon = input_data.coord_list[1] - def subset_earth_v2(self, areaR_lat, areaR_lon, year1, yearN, pathSLP, pathEARTH) : - - case_file_path = pathSLP + 'RCP45_' + 'EC-Earth_RCP4.5_MSLP_204101.nc' - ncfile = ncinfo(case_file_path) + def subset_earth_v2(self, directory_path: str, year_from: int, year_to: int) : + ds_format = 'netCDF4' + case_dataset_list = [] + years = generate_years_array_slp(year_from, year_to) + months_list = ['01', '02', '03', '04', '05', '06', '07', '08','09','10','11', '12'] - with Dataset(case_file_path, 'r', format='netCDF4') as case_dataset: - lat_list = case_dataset.variables[self.__lat_key][:] # read latitude EARTH v2 - #lon_list = case_dataset.variables[0:1.125:180, -180+1.125:1.125:0-1.125] # read latitude EARTH v2 - lon_list = [np.arange(0, 180, 1.125).tolist(), np.arange(-180+1.125, 0-1.125, 1.125).tolist()] - - position_longitude1 = data_acquisition.get_nearest_neighbor(self.__input_lon_area, lon_list[0]) - position_longitude2 = data_acquisition.get_nearest_neighbor(self.__input_lon_area, lon_list[1]) - position_latitude1 = data_acquisition.get_nearest_neighbor(self.__input_lat, lat_list[0]) - position_latitude2 = data_acquisition.get_nearest_neighbor(self.__input_lat, lat_list[1]) - latitude = np.arange(position_latitude1, position_latitude2).tolist() \ No newline at end of file + for yearidx, year in enumerate(years) : + for month in months_list : + case_file_path = directory_path + 'RCP45_\\' + 'EC-Earth_RCP4.5_MSLP_' + str(year) + month + '.nc' + with Dataset(case_file_path, 'r', ds_format) as case_dataset: + # Find nearest point + # (considering we are only selecting a point) + lat_list = case_dataset.variables[self.__lat_key][:] + lon_list = case_dataset.variables[self.__lon_key][:] + + lon_list = rescale_input_area(lon_list, 180) # longitude has to be rescaled to -180:180 + corr_lon = [data_acquisition.get_nearest_neighbor(input_lon, lon_list) for input_lon in self.__input_lon] + corr_lat = [data_acquisition.get_nearest_neighbor(input_lat, lat_list) for input_lat in self.__input_lat] + + pos_lon1 = corr_lon[0] + pos_lon2 = corr_lon[-1] + pos_lat1 = corr_lat[0] + pos_lat2 = corr_lat[-1] + + # concatenates the data. Time has to be formatted accordingly + + if(pos_lon1 < pos_lon2) & (pos_lat1 < pos_lat2) : + if (yearidx == 0) : + variable_subset = case_dataset.variables['var151'][:,corr_lat,corr_lon] + variable_subset = np.squeeze(variable_subset) + reftime = case_dataset['time'].units.split(' ') + datetime.strptime(reftime[2]+' '+reftime[3], '%Y-%m-%d %H:%M:%S.%f') + # reftime.append([datetime.strptime(reftime[2]+' '+reftime[3], '%Y-%m-%d %H:%M:%S.%f')+timedelta(hours=int(ti)) for ti in case_dataset['time'][:]]) + else : + variable_sub = np.concatenate(variable_sub, + case_dataset['var151'][:,corr_lat,corr_lon]) + #format the time + variable_sub = np.concatenate(variable_sub, + case_dataset['time'][:,corr_lat,corr_lon]) + else : + if (yearidx == 0) : + variable_subset = case_dataset.variables['var151'][:,corr_lat,corr_lon] + variable_subset = np.squeeze(variable_subset) + # variable_subset = np.concatenate(case_dataset.variables['time'][:,corr_lat,corr_lon]) + else : + variable_subset = case_dataset.variables['var151'][:,corr_lat,corr_lon] + variable_subset = np.squeeze(variable_subset) + + # Get variable subset + + case_dataset_list.append(variable_subset) + + return case_dataset_list + + + + # this method could be moved in a generic utils stript outside here + +def generate_years_array_slp(year_from, year_to): + years = [] + for i in range(year_to - year_from): + years.append(year_from + i) # fills an array of years + years.append(year_to) + return years + +def rescale_input_area(input_list : list, scale_factor: int): + for idx,element in enumerate(input_list): + input_list[idx] = input_list[idx] - 180 + return input_list +