Index: trunk/SDToolBox/data_acquisition.py =================================================================== diff -u -r27 -r30 --- trunk/SDToolBox/data_acquisition.py (.../data_acquisition.py) (revision 27) +++ trunk/SDToolBox/data_acquisition.py (.../data_acquisition.py) (revision 30) @@ -2,6 +2,8 @@ from datetime import datetime import os +import SDToolBox.outputmessages as output_messages + from netCDF4 import Dataset from sklearn.neighbors import BallTree as BallTree @@ -152,15 +154,6 @@ __data_dict = {} def __init__(self, var_list: list): - """Creates an OutputData object based on an input - netCDF4 Dataset. - - Arguments: - input_netcdf {Dataset} -- Input Dataset. - """ - if not var_list: - raise IOError('List of variable names should be given.') - """Creates a proxy dictionary with a dictionary of values from the given list. @@ -170,6 +163,9 @@ Returns: dict -- Dictionary of formated input. """ + if not var_list: + raise IOError(output_messages.error_missing_list_of_variables) + values_dict = {} for var in var_list: # Set the values to None so we can @@ -190,36 +186,49 @@ def generate_wave_netcdf( self, - file_path: str, dataset_code: str, - file_format: str, - params: list, - set_wave_variables): - """Generates unified netcdf4 of type 'GTSM' + dir_path: str, + base_name: str, + dataset_code: str, + file_format: str): + """Generates an unified netcdf4 file from the self.__data_dict. Arguments: file_path {str} -- Target file path. + base_name {str} -- Base name for the output file. dataset_code {str} -- Code of the dataset file_format {str} -- Format for the dataset - params {list} -- Parameters to store. Returns: netCDF4.Dataset -- Created dataset. """ - file_name = 'NS_{}_{}.nc'.format(file_format, dataset_code) - nc_filename = os.path.join(file_path, file_name) + file_name = '{}_{}_{}.nc'.format( + base_name, file_format, dataset_code) + nc_filename = os.path.join(dir_path, file_name) print('Writing {}'.format(nc_filename)) with Dataset(nc_filename, mode='w') as netcdf: - ts_samples = len(self.__time_values) - n_stations = 42 - self.__set_dataset_structure(netcdf, n_stations, ts_samples) + self.__set_dataset_structure(netcdf) self.__set_wave_variables(netcdf, dataset_code) - self.__set_wave_params(netcdf, dataset_code, params) + self.__set_wave_params(netcdf, dataset_code) self.__set_global_data(netcdf, file_format, dataset_code) self.output_netcdf = netcdf return self.output_netcdf + def __get_station_number(self): + lon_keys = self.__data_dict[self.var_lon_key] + lat_keys = self.__data_dict[self.var_lat_key] + return len(lon_keys) * len(lat_keys) + + def __get_time_samples(self): + time_array = self.__data_dict[self.var_time_key] + if not time_array or len(time_array) == 0: + return 0 + return len(time_array[0]) + + def __get_time_values(self): + return self.__data_dict[self.var_time_key][0] + def __set_global_data( self, netcdf: Dataset, output_format: str, dataset_code: str): """Sets the global data for our new output dataset. @@ -252,14 +261,15 @@ # Bounds (spatial, temporal) netcdf.lat_bounds = [ - self.__lat_values.values.min(), - self.__lat_values.values.max()] + min(self.__data_dict[self.var_lat_key]), + max(self.__data_dict[self.var_lat_key])] netcdf.lon_bounds = [ - self.__lon_values.values.min(), - self.__lon_values.values.max()] + min(self.__data_dict[self.var_lat_key]), + max(self.__data_dict[self.var_lat_key])] + netcdf.time_bounds = [ - self.__time_values[0].strftime('%d/%m/%Y %H:%M:%S'), - self.__time_values[-1].strftime('%d/%m/%Y %H:%M:%S')] + self.__get_time_values()[0].strftime('%d/%m/%Y %H:%M:%S'), + self.__get_time_values()[-1].strftime('%d/%m/%Y %H:%M:%S')] netcdf.geospatial_lat_units = 'degrees_north' netcdf.geospatial_lon_units = 'degrees_east' netcdf.time_coverage_resolution = 'hourly' @@ -278,17 +288,18 @@ def __set_dataset_structure( self, - netcdf: Dataset, - number_of_stations: int, samples_in_ts: int): + netcdf: Dataset): """Sets the default dimensions for our standarize outpud netCDF4 file. Arguments: netcdf {Dataset} -- Extracted dataset. - number_of_stations {int} -- Number of observation stations. - samples_in_ts {int} -- Number of samples in timeseries. """ - netcdf.createDimension(self.dim_station_key, number_of_stations) - netcdf.createDimension(self.dim_time_key, samples_in_ts) + netcdf.createDimension( + self.dim_station_key, + self.__get_station_number()) + netcdf.createDimension( + self.dim_time_key, + self.__get_time_samples()) self.__set_time_and_stations(netcdf) self.__set_geo_data(netcdf) @@ -325,14 +336,24 @@ # As offset, # use the minimal occuring value in REAL UNITS, i.e. seconds. - time_delta_from_origin = df.index - dt.datetime(1970, 1, 1, 0, 0, 0) - netcdf.variables[self.var_time_key].add_offset = time_delta_from_origin.total_seconds().min() + time_delta_from_origin = [ + time - datetime(1970, 1, 1, 0, 0, 0) + for time in self.__get_time_values()] - # The data is given per hour, therefore: 3600 scale-factor is sufficient - # However: there are more than 65536 hours of data, so it should still be stored as an u4 - netcdf.variables[self.var_time_key].scale_factor = 1 # resolution of one second - netcdf.variables[self.var_time_key][:] = time_delta_from_origin.total_seconds().values # NO scaling when scale_factor is already applied + netcdf.variables[self.var_time_key].add_offset = \ + min(time_delta_from_origin).total_seconds() + # The data is given per hour, therefore: 3600 scale-factor + # is sufficient. However: + # there are more than 65536 hours of data, + # so it should still be stored as an u4 + # resolution of one second + netcdf.variables[self.var_time_key].scale_factor = 1 + # NO scaling when scale_factor is already applied + netcdf.variables[self.var_time_key][:] = [ + time.total_seconds() + for time in time_delta_from_origin] + return netcdf def __set_geo_data(self, netcdf: Dataset): @@ -349,14 +370,14 @@ netcdf, self.var_lat_key, 'station latitude', 'latitude', 'degrees_north', 'Y', - self.__lat_values[0].values + self.__data_dict[self.var_lat_key] ) self.__set_geo_variable( netcdf, self.var_lon_key, 'station longitude', 'longitude', 'degrees_east', 'X', - self.__lon_values[0].values + self.__data_dict[self.var_lon_key] ) # Set projection details. @@ -371,7 +392,7 @@ def __set_wave_params( self, - netcdf: Dataset, dataset_code: str, dict_value: dict): + netcdf: Dataset, dataset_code: str): """ Sets the variables and values for the waves. Arguments: @@ -380,7 +401,8 @@ dict_value {dict} -- Dictionary of input variables. """ # Add data. - for par in dict_value.keys: + variables = self.__data_dict[self.var_val_key] + for par in variables.keys(): if par not in netcdf.variables: pass # print( Index: trunk/tests/test_output_data.py =================================================================== diff -u -r23 -r30 --- trunk/tests/test_output_data.py (.../test_output_data.py) (revision 23) +++ trunk/tests/test_output_data.py (.../test_output_data.py) (revision 30) @@ -8,55 +8,78 @@ import string from tests.TestUtils import TestUtils + +from SDToolBox import extract_waves, data_acquisition from SDToolBox.data_acquisition import OutputData +import SDToolBox.outputmessages as output_messages class Test_create: @pytest.mark.unittest - def test_when_no_dataset_given_then_exception_risen(self): + def test_when_no_list_of_variables_given_then_exception_risen(self): output_result = None + expected_error_message = \ + output_messages.error_missing_list_of_variables with pytest.raises(IOError) as e_info: - output_result = OutputData(None, None, None, None) + output_result = OutputData(None) error_message = str(e_info.value) assert output_result is None - assert error_message == 'Original dataset should be given' + assert error_message == expected_error_message, \ + 'Expected {}'.format(expected_error_message) + \ + 'but got {}'.format(error_message) @pytest.mark.unittest - def test_when_no_lat_values_given_then_exception_risen(self): - output_result = None - with pytest.raises(IOError) as e_info: - output_result = OutputData(4.2, None, None, None) + def test_when_list_of_variables_given_then_sets_data_dict(self): + # 1. Given. + var_list = ['dum', 'my'] + data_result = [] + # 2. When + try: + output_data = OutputData(var_list) + data_result = output_data.get_data_dict() + except Exception as e_info: + pytest.fail( + 'Exception thrown but not expected. {}'.format(str(e_info))) - error_message = str(e_info.value) - assert output_result is None - assert error_message == 'Lateral values should be given' + # 3. Then + assert data_result is not None + assert data_result[output_data.var_time_key] is not None + assert data_result[output_data.var_lon_key] is not None + assert data_result[output_data.var_lat_key] is not None + assert data_result[output_data.var_val_key] is not None + variables_data = data_result[output_data.var_val_key] + assert variables_data is not None + assert set(variables_data.keys()) == set(var_list) - @pytest.mark.unittest - def test_when_no_lon_values_given_then_exception_risen(self): - output_result = None - with pytest.raises(IOError) as e_info: - output_result = OutputData(4.2, 42, None, None) - error_message = str(e_info.value) - assert output_result is None - assert error_message == 'Longitude values should be given' +class Test_generate_wave_netcdf: - @pytest.mark.unittest - def test_when_no_time_values_given_then_exception_risen(self): - output_result = None - with pytest.raises(IOError) as e_info: - output_result = OutputData(4.2, 42, 2.4, None) + @pytest.mark.systemtest + def test_when_given_wave_cases_generates_output(self): + # 1. Given + # When using local data you can just replace the comment in these lines + # dir_test_data = TestUtils.get_local_test_data_dir('netCDF_Waves_data') + dir_test_data = 'P:\\metocean-data\\open\\ERA5\\data\\Global' - error_message = str(e_info.value) - assert output_result is None - assert error_message == 'Time values should be given' + input_data = data_acquisition.InputData() + input_data.coord_list = [(4.2, 2.4), ] + # 2. When + extract_wave = extract_waves.ExtractWaves(input_data) + output_data = extract_wave.subset_era_5(dir_test_data, 1981, 1982) -class Test_generate_gtsm_netcdf: + try: + netcdf = output_data.generate_wave_netcdf( + dir_path=dir_test_data, + base_name='test_wave', + dataset_code=None, + file_format='CF' + ) + except Exception as e_info: + pytest.fail( + 'Exception thrown but not expected. {}'.format(str(e_info))) - @pytest.mark.systemtest - def test_when_given_wave_cases_generates_output(self): - pytest.faile('ToDo') - pass \ No newline at end of file + # 3. Then + assert netcdf is not None Index: trunk/SDToolBox/outputmessages.py =================================================================== diff -u -r2 -r30 --- trunk/SDToolBox/outputmessages.py (.../outputmessages.py) (revision 2) +++ trunk/SDToolBox/outputmessages.py (.../outputmessages.py) (revision 30) @@ -5,4 +5,5 @@ """ Warning messages """ """ Error messages """ -error_missing_arguments = 'Missing arguments -i -o. Run {0} -h for help.' \ No newline at end of file +error_missing_arguments = 'Missing arguments -i -o. Run {0} -h for help.' +error_missing_list_of_variables = 'List of variable names should be given.' \ No newline at end of file Index: trunk/SDToolBox/extract_waves.py =================================================================== diff -u -r29 -r30 --- trunk/SDToolBox/extract_waves.py (.../extract_waves.py) (revision 29) +++ trunk/SDToolBox/extract_waves.py (.../extract_waves.py) (revision 30) @@ -170,6 +170,7 @@ """ nn_lon_idx = [] nn_lat_idx = [] + # TODO: this should be dynamic, not hardcoded. base_file_name = 'Hs\\era5_Global_Hs_1981.nc' ref_file_path = os.path.join(directory_path, base_file_name)