Index: trunk/tests/test_output_data.py =================================================================== diff -u -r43 -r44 --- trunk/tests/test_output_data.py (.../test_output_data.py) (revision 43) +++ trunk/tests/test_output_data.py (.../test_output_data.py) (revision 44) @@ -36,7 +36,6 @@ assert data_result[output_data.var_lon_key] is not None assert data_result[output_data.var_lat_key] is not None assert data_result[output_data.var_val_key] is not None - assert data_result[output_data.var_val_key] @pytest.mark.unittest def test_when_list_of_variables_given_then_sets_data_dict(self): @@ -68,8 +67,8 @@ def test_when_given_wave_cases_generates_output(self): # 1. Given # When using local data you can just replace the comment in these lines - dir_test_data = TestUtils.get_local_test_data_dir('netCDF_Waves_data') - # dir_test_data = 'P:\\metocean-data\\open\\ERA5\\data\\Global' + # dir_test_data = TestUtils.get_local_test_data_dir('netCDF_Waves_data') + dir_test_data = 'P:\\metocean-data\\open\\ERA5\\data\\Global' output_test_data = TestUtils.get_local_test_data_dir('output_data') input_data = InputData() input_data.input_variables = ['swh'] Index: trunk/SDToolBox/extract_data.py =================================================================== diff -u -r43 -r44 --- trunk/SDToolBox/extract_data.py (.../extract_data.py) (revision 43) +++ trunk/SDToolBox/extract_data.py (.../extract_data.py) (revision 44) @@ -22,17 +22,17 @@ class ExtractData: - __ds_format = 'netCDF4' - __time_key = 'time' - __lon_key = 'longitude' - __lat_key = 'latitude' - __input_lon = None - __input_lat = None - __input_variables = [] - __out_lat_key = data_acquisition.OutputData.var_lat_key - __out_lon_key = data_acquisition.OutputData.var_lon_key - __out_time_key = data_acquisition.OutputData.var_time_key - __out_val_key = data_acquisition.OutputData.var_val_key + _ds_format = 'netCDF4' + _time_key = 'time' + _lon_key = 'longitude' + _lat_key = 'latitude' + _input_lon = None + _input_lat = None + _input_variables = [] + _out_lat_key = data_acquisition.OutputData.var_lat_key + _out_lon_key = data_acquisition.OutputData.var_lon_key + _out_time_key = data_acquisition.OutputData.var_time_key + _out_val_key = data_acquisition.OutputData.var_val_key def __init__(self, input_data: data_acquisition.InputData): """Initialize the waves extraction. @@ -49,8 +49,8 @@ if not input_data.input_variables: raise IOError(om.error_no_valid_list_of_vars) - input_data.lon_key = self.__lon_key - input_data.lat_key = self.__lat_key + input_data.lon_key = self._lon_key + input_data.lat_key = self._lat_key - self.__input_variables = input_data.input_variables - self.__input_lon, self.__input_lat = zip(*input_data.coord_list) + self._input_variables = input_data.input_variables + self._input_lon, self._input_lat = zip(*input_data.coord_list) Index: trunk/tests/test_extract_data_era5.py =================================================================== diff -u -r43 -r44 --- trunk/tests/test_extract_data_era5.py (.../test_extract_data_era5.py) (revision 43) +++ trunk/tests/test_extract_data_era5.py (.../test_extract_data_era5.py) (revision 44) @@ -17,8 +17,8 @@ def test_given_waves_folder_then_subset_collection_is_extracted(self): # 1. Given # When using local data you can just replace the comment in these lines - dir_test_data = TestUtils.get_local_test_data_dir('netCDF_Waves_data') - # dir_test_data = 'P:\\metocean-data\\open\\ERA5\\data\\Global' + # dir_test_data = TestUtils.get_local_test_data_dir('netCDF_Waves_data') + dir_test_data = 'P:\\metocean-data\\open\\ERA5\\data\\Global' input_data = data_acquisition.InputData() input_data.input_variables = ['swh'] @@ -51,75 +51,14 @@ # 2. When extract_data = ExtractDataEra5(input_data) - dataset_list = extract_data.subset_waves(dir_test_data, 1981, 1982) + output_data = extract_data.subset_waves(dir_test_data, 1981, 1982) # 3. Then - assert dataset_list is not None - - """ - Checks that the longitude is normalized - if a value higher than 180 is passed - """ - @pytest.mark.unittest - def test_given_longitude_higher_than_180_returns_normalized_longitude(self): - # setup - longitude = 400 - normalized_longitude = longitude - 180 - dir_test_data = 'P:\\metocean-data\\open\\ERA5\\data\\Global\\Hs' - filename = 'era5_Global_Hs_1980.nc' - path = dir_test_data + filename - with Dataset(path, 'r', format='netCDF4') as case_dataset: - input_data = data_acquisition.InputData() - input_data.coord_list = [(4.2, 2.4)] - extract_data = ExtractDataEra5(input_data) - - # call - result = extract_data.check_for_longitude(longitude) - # assert - assert result == normalized_longitude - - """ - Checks that the longitude is unchanged if a value lower than 180 is passed - """ - @pytest.mark.unittest - def test_given_longitude_lower_than_180_returns_normalized_unchanged(self): - longitude = 30 - dir_test_data = 'P:\\metocean-data\\open\\ERA5\\data\\Global\\Hs' - filename = 'era5_Global_Hs_1980.nc' - path = dir_test_data + filename - with Dataset(path, 'r', format='netCDF4') as case_dataset: - input_data = data_acquisition.InputData() - input_data.input_variables = ['swh'] - input_data.coord_list = [(4.2, 2.4)] - extractwaves = ExtractDataEra5(input_data) - - # call - result = extractwaves.check_for_longitude(longitude) - # assert - assert result == 30 - - """ - Checks that array of years if correctly generated - """ - @pytest.mark.unittest - def test_years_array_is_correctly_generated(self): - # setup - year1 = 1980 - yearN = 1983 - result_array = [1980, 1981, 1982, 1983] - result = [] - dir_test_data = 'P:\\metocean-data\\open\\ERA5\\data\\Global\\Hs' - filename = 'era5_Global_Hs_1980.nc' - path = dir_test_data + filename - with Dataset(path, 'r', format='netCDF4') as case_dataset: - input_data = data_acquisition.InputData() - input_data.input_variables = ['swh'] - input_data.coord_list = [(4.2, 2.4)] - extractwaves = ExtractDataEra5(input_data) - # call - result = extractwaves.generate_years_array(year1, yearN) - # assert - assert result[0] == result_array[0] - assert result[1] == result_array[1] - assert result[2] == result_array[2] - assert result[3] == result_array[3] + assert output_data is not None + data_dict = output_data.get_data_dict() + assert data_dict is not None + assert data_dict[output_data.var_time_key] is not None + assert data_dict[output_data.var_lon_key] is not None + assert data_dict[output_data.var_lat_key] is not None + assert data_dict[output_data.var_val_key] is not None + assert data_dict[output_data.var_val_key]['swh'] is not None \ No newline at end of file Index: trunk/SDToolBox/extract_data_era5.py =================================================================== diff -u -r43 -r44 --- trunk/SDToolBox/extract_data_era5.py (.../extract_data_era5.py) (revision 43) +++ trunk/SDToolBox/extract_data_era5.py (.../extract_data_era5.py) (revision 44) @@ -6,11 +6,11 @@ # region // imports import sys import os - +from typing import Tuple from datetime import datetime, timedelta from SDToolBox import outputmessages as outputmessages from SDToolBox import data_acquisition -from SDToolBox import extract_data +from SDToolBox.extract_data import ExtractData from netCDF4 import Dataset import numpy as np @@ -21,7 +21,7 @@ # endregion -class ExtractDataEra5(extract_data.ExtractData): +class ExtractDataEra5(ExtractData): __lon_key = 'longitude' __lat_key = 'latitude' @@ -53,13 +53,14 @@ nn_idx = None for n_variable, variable_name in enumerate(filtered_dict): - case_name_value = variable_dict[variable_name] + case_name_value = filtered_dict[variable_name] for year in years: print(year, '-', variable_name) - base_file_name = 'era5_Global_{}_{}.nc'.format(case_name_value, year) + base_file_name = '' + \ + 'era5_Global_{}_{}.nc'.format(case_name_value, year) case_dir = os.path.join(directory_path, case_name_value) case_file_path = os.path.join(case_dir, base_file_name) - self.__get_case_subset_from_netcdf( + nn_idx = self.__get_case_subset_from_netcdf( case_file_path, cases_dict, nn_idx, @@ -102,7 +103,7 @@ for n_variable, variable_name in enumerate(filtered_dict): case_name_value = filtered_dict[variable_name] # Extracts for all the years - for year_idx, year in enumerate(years): + for year in years: base_file_name = \ 'era5_Global_{}_p_{}.nc'.format(case_name_value, year) case_file_path = os.path.join(directory_path, base_file_name) @@ -152,7 +153,7 @@ case_name_value = filtered_dict[variable_name] # Extracts for all the years - for year_idx, year in enumerate(years): + for year in years: # Format of the file name is like era5_Global_wind_v_1979.nc base_file_name = '' + \ 'era5_Global_{}_{}.nc'.format(case_name_value, year) @@ -172,7 +173,7 @@ return { k: v for k, v in input_dict.items() - if k in self._ExtractData__input_variables} + if k in self._input_variables} def __get_initial_extraction_data( self, variable_dict: dict, year_from: int, year_to: int): @@ -189,25 +190,39 @@ filtered_dict = self.__get_filtered_dict(variable_dict) output_data = data_acquisition.OutputData( - self._ExtractData__input_variables + self._input_variables ) cases_dict = output_data.get_data_dict() # longitude should be found as the 'x' in the first coordinate of self.__input_lon = [ self.__check_for_longitude(lon) - for lon in self._ExtractData__input_lon] + for lon in self._input_lon] years = self.__generate_years_array(year_from, year_to) return filtered_dict, output_data, cases_dict, years def __get_case_subset_from_netcdf( self, - case_file_path, - cases_dict, - nn_idx, - variable_name, - n_variable): + case_file_path: str, + cases_dict: str, + nn_idx: Tuple[int, int], + variable_name: str, + n_variable: int): + """Gets all the values from a netcdf for the given variable + and delimited nearest neighbors. + + Arguments: + case_file_path {str} -- Path to the netcdf file. + cases_dict {str} -- Output values. + nn_idx {Tuple[int, int]} -- Nearest Neighbors lon/lat. + variable_name {str} -- Name of the variable to extract. + n_variable {int} -- Index of the variable to search. + + Returns: + Tuple[int, int] -- Nearest neigbors lon/lat. + """ + # If file does not exist simply go to the next one if not os.path.exists(case_file_path): print( @@ -221,11 +236,11 @@ ) # Lazy loading of the dataset. - with Dataset(case_file_path, 'r', self._ExtractData__ds_format) \ + with Dataset(case_file_path, 'r', self._ds_format) \ as case_dataset: - cases_dict[self._ExtractData__out_val_key][variable_name] = \ + cases_dict[self._out_val_key][variable_name] = \ self.__get_variable_subset( - cases_dict[self._ExtractData__out_val_key][variable_name], + cases_dict[self._out_val_key][variable_name], case_dataset, variable_name, nn_idx @@ -235,16 +250,17 @@ # add the lines to get the reference time # automatically just in case reftime = \ - case_dataset[self._ExtractData__out_time_key].units.split(' ') + case_dataset[self._out_time_key].units.split(' ') # This is an assumption that all the grids have # the same scale in regards of time. - cases_dict[self._ExtractData__out_time_key].append( + cases_dict[self._out_time_key].extend( [datetime.strptime( reftime[2]+' '+reftime[3], '%Y-%m-%d %H:%M:%S.%f') + timedelta(hours=int(ti)) - for ti in case_dataset[self._ExtractData__time_key][:]] + for ti in case_dataset[self._time_key][:]] ) + return nn_idx def __get_variable_subset( self, @@ -295,21 +311,21 @@ nn_lat_idx = [] # Extract index and value for all input lat, lon. - with Dataset(ref_file_path, 'r', self._ExtractData__ds_format) \ + with Dataset(ref_file_path, 'r', self._ds_format) \ as ref_dataset: - lat_list = ref_dataset.variables[self._ExtractData__lat_key][:] - lon_list = ref_dataset.variables[self._ExtractData__lon_key][:] - for lon_point in self._ExtractData__input_lon: + lat_list = ref_dataset.variables[self._lat_key][:] + lon_list = ref_dataset.variables[self._lon_key][:] + for lon_point in self._input_lon: idx, value = data_acquisition.get_nearest_neighbor( lon_point, lon_list) - cases_dict[self._ExtractData__out_lon_key].append(value) + cases_dict[self._out_lon_key].append(value) nn_lon_idx.append(idx) - for lat_point in self._ExtractData__input_lat: + for lat_point in self._input_lat: idx, value = data_acquisition.get_nearest_neighbor( lat_point, lat_list) - cases_dict[self._ExtractData__out_lat_key].append(value) + cases_dict[self._out_lat_key].append(value) nn_lat_idx.append(idx) return nn_lon_idx, nn_lat_idx