Index: trunk/tests/test_output_data.py =================================================================== diff -u -r40 -r41 --- trunk/tests/test_output_data.py (.../test_output_data.py) (revision 40) +++ trunk/tests/test_output_data.py (.../test_output_data.py) (revision 41) @@ -87,7 +87,7 @@ output_data = extract_wave.subset_era_5(dir_test_data, 1981, 1982) try: - netcdf_filepath = output_data.generate_wave_netcdf( + netcdf_filepath = output_data.generate_netcdf( dir_path=output_test_data, base_name='test_wave', dataset_code=None, @@ -105,7 +105,59 @@ class Test_generate_output: + @pytest.mark.unittest + def test_when_no_arguments_given_no_valid_data_dict_then_raises(self): + # 1. Given + input_data = data_acquisition.InputData() + input_data.input_dict = test_wave_variable_dict + input_data.coord_list = [(4.2, 2.4), ] + output_data = None + generated_output = None + expected_message = output_messages.error_not_initialized_data_dict + + # 2. When + with pytest.raises(IOError) as e_info: + output_data = OutputData(None) + generated_output = output_data.generate_output() + + # 3. Then + assert generated_output is None + assert expected_message == str(e_info.value) + @pytest.mark.systemtest + def test_when_no_arguments_given_valid_data_dict_then_returns_expected_output(self): + # 1. Given + # When using local data you can just replace the comment in these lines + # dir_test_data = TestUtils.get_local_test_data_dir('netCDF_Waves_data') + dir_test_data = 'P:\\metocean-data\\open\\ERA5\\data\\Global' + + input_data = data_acquisition.InputData() + input_data.input_dict = test_wave_variable_dict + input_data.coord_list = [(4.2, 2.4), ] + return_value = None + + # 2. When + extract_wave = extract_waves.ExtractWaves(input_data) + output_data = extract_wave.subset_era_5(dir_test_data, 1981, 1982) + + try: + return_value = output_data.generate_output() + except Exception as e_info: + pytest.fail( + 'Exception thrown but not expected. {}'.format(str(e_info))) + + # 3. Then + output_xarray = output_data.get_xarray() + output_netcdf_filepath = output_data.get_netcdf_filepath() + + assert return_value is not None + assert output_netcdf_filepath is not None + assert os.path.exists(output_netcdf_filepath) + assert output_xarray is not None + + os.remove(output_netcdf_filepath) + + @pytest.mark.systemtest def test_when_given_all_arguments_does_not_raise(self): # 1. Given # When using local data you can just replace the comment in these lines Index: trunk/SDToolBox/data_acquisition.py =================================================================== diff -u -r40 -r41 --- trunk/SDToolBox/data_acquisition.py (.../data_acquisition.py) (revision 40) +++ trunk/SDToolBox/data_acquisition.py (.../data_acquisition.py) (revision 41) @@ -1,8 +1,10 @@ #! /usr/bin/env python from datetime import datetime import os +import random +import string -import SDToolBox.outputmessages as output_messages +import SDToolBox.outputmessages as om from netCDF4 import Dataset from sklearn.neighbors import BallTree as BallTree @@ -167,7 +169,7 @@ """ if not var_list: var_list = [] - print(output_messages.error_missing_list_of_variables) + print(om.error_missing_list_of_variables) values_dict = {} for var in var_list: @@ -192,28 +194,42 @@ def generate_output( self, - dir_path: str, - base_name: str, - dataset_code: str, - file_format: str): + dir_path: str = None, + base_name: str = None, + dataset_code: str = None): """Generates an unified netcdf4 file and its xarray equivalent from the self.__data_dict. - Arguments: - file_path {str} -- Target file path. - base_name {str} -- Base name for the output file. - dataset_code {str} -- Code of the dataset - file_format {str} -- Format for the dataset + Keyword Arguments: + dir_path {str} -- Target file path. (default: {None}) + base_name {str} -- Base name for the output file. (default: {None}) + dataset_code {str} -- Code of the dataset. (default: {None}) Returns: xarray {xarray.Dataset} -- Equivalent xarray of the dict. """ - netcdf_filepath = self.generate_wave_netcdf( + if not self.__data_dict or \ + not self.__data_dict[self.var_lat_key] or \ + not self.__data_dict[self.var_lon_key] or \ + not self.__data_dict[self.var_time_key] or \ + not self.__data_dict[self.var_val_key]: + raise IOError(om.error_not_initialized_data_dict) + + if not dir_path or \ + not base_name or \ + not dataset_code: + # Get current directory + dir_path = os.getcwd() + # temp_generated_netcdf + base_name = 'temp_generated_{}'.format(self.__get_random_str(4)) + dataset_code = 'CF' + + netcdf_filepath = self.generate_netcdf( dir_path=dir_path, base_name=base_name, - dataset_code=dataset_code, - file_format=file_format + dataset_code=dataset_code ) + self.__output_netcdf_file = netcdf_filepath return self.generate_xarray_from_netcdf(netcdf_filepath) @@ -233,33 +249,31 @@ # so basically an importer. return self.__output_xarray - def generate_wave_netcdf( + def generate_netcdf( self, dir_path: str, base_name: str, - dataset_code: str, - file_format: str): + dataset_code: str): """Generates an unified netcdf4 file from the self.__data_dict. Arguments: file_path {str} -- Target file path. base_name {str} -- Base name for the output file. dataset_code {str} -- Code of the dataset - file_format {str} -- Format for the dataset Returns: nc_filename {str} -- Filepath to the generated dataset. """ - file_name = '{}_{}_{}.nc'.format( - base_name, file_format, dataset_code) + file_name = '{}_{}.nc'.format( + base_name, dataset_code) nc_filename = os.path.join(dir_path, file_name) print('Writing {}'.format(nc_filename)) self.__cleanup_data_dict() with Dataset(nc_filename, mode='w', format="NETCDF4") as netcdf: self.__set_dataset_structure(netcdf) self.__set_wave_variables(netcdf, dataset_code) self.__set_wave_data(netcdf, dataset_code) - self.__set_global_data(netcdf, file_format, dataset_code) + self.__set_global_data(netcdf, dataset_code) self.__output_netcdf_file = nc_filename @@ -276,6 +290,11 @@ for key in remove_keys: del self.__data_dict[self.var_val_key][key] + def __get_random_str(self, length: int = 10): + """Generate a random string of fixed length """ + letters = string.ascii_lowercase + return ''.join(random.choice(letters) for i in range(length)) + def __get_station_number(self): lon_keys = self.__data_dict[self.var_lon_key] lat_keys = self.__data_dict[self.var_lat_key] @@ -291,7 +310,7 @@ return self.__data_dict[self.var_time_key] def __set_global_data( - self, netcdf: Dataset, output_format: str, dataset_code: str): + self, netcdf: Dataset, dataset_code: str): """Sets the global data for our new output dataset. see: http://www.unidata.ucar.edu/software/thredds/current/ netcdf-java/formats/DataDiscoveryAttConvention.html @@ -305,7 +324,7 @@ netcdf.Conventions = 'CF-1.4' netcdf.standard_name_vocabulary = 'CF Standard Name Table vNN' - netcdf.title = '{} {}'.format(dataset_code, output_format) + \ + netcdf.title = '{}'.format(dataset_code) + \ ' data North Sea Dutch coast' netcdf.summary = '' + \ 'North Sea Water Level data in coastal gauges for the Dutch coast' Index: trunk/SDToolBox/outputmessages.py =================================================================== diff -u -r40 -r41 --- trunk/SDToolBox/outputmessages.py (.../outputmessages.py) (revision 40) +++ trunk/SDToolBox/outputmessages.py (.../outputmessages.py) (revision 41) @@ -12,3 +12,6 @@ 'Not enough coordinates were provided as InputData.coord_list.' error_no_valid_dict_of_vars = '' + \ 'A dictionary of variables (key: value) should be provided.' +error_not_initialized_data_dict = '' + \ + 'Data dictionary has not been initialized, ' + \ + 'no xarray or netCDF can be created.' \ No newline at end of file Index: trunk/SDToolBox/extract_waves.py =================================================================== diff -u -r40 -r41 --- trunk/SDToolBox/extract_waves.py (.../extract_waves.py) (revision 40) +++ trunk/SDToolBox/extract_waves.py (.../extract_waves.py) (revision 41) @@ -169,11 +169,6 @@ nn_lon_idx = [] nn_lat_idx = [] - # ref_file_path = self.find_default_file(directory_path) - # Hardcoded: - # base_file_name = 'Hs\\era5_Global_Hs_1981.nc' - # ref_file_path = os.path.join(directory_path, base_file_name) - # Extract index and value for all input lat, lon. with Dataset(ref_file_path, 'r', self.__ds_format) \ as ref_dataset: @@ -194,13 +189,6 @@ return nn_lon_idx, nn_lat_idx @staticmethod - def find_default_file(dir: str): - for root, dirs, files in os.walk(dir): - for file in files: - if file.endswith('.nc'): - return os.path.join(root, file) - - @staticmethod def get_case_subset(dataset, variable_name, lon, lat): return dataset[variable_name][:, lat, lon]