Fisheye: Tag 58 refers to a dead (removed) revision in file `trunk/tests/test_main_unit.py'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 58 refers to a dead (removed) revision in file `trunk/tests/test_extract_wind.py'. Fisheye: No comparison available. Pass `N' to diff? Index: trunk/SDToolBox/main.py =================================================================== diff -u -r2 -r58 --- trunk/SDToolBox/main.py (.../main.py) (revision 2) +++ trunk/SDToolBox/main.py (.../main.py) (revision 58) @@ -6,7 +6,7 @@ # region // imports import sys import argparse -from SDToolBox import outputmessages as outputmessages +from SDToolBox import output_messages as om # endregion # region // Variables @@ -23,7 +23,8 @@ __fileOutput = output_file if __fileInput == '' or __fileOutput == '': - __report_expected_arguments(outputmessages.error_missing_arguments.format('main.py')) + __report_expected_arguments( + om.error_missing_arguments.format('main.py')) return print('Input file: {0}'.format(__fileInput)) Fisheye: Tag 58 refers to a dead (removed) revision in file `trunk/SDToolBox/extract_data_era5.py'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 58 refers to a dead (removed) revision in file `trunk/SDToolBox/data_acquisition.py'. Fisheye: No comparison available. Pass `N' to diff? Index: trunk/tests/test_extract_data.py =================================================================== diff -u -r47 -r58 --- trunk/tests/test_extract_data.py (.../test_extract_data.py) (revision 47) +++ trunk/tests/test_extract_data.py (.../test_extract_data.py) (revision 58) @@ -2,117 +2,75 @@ import os from os import path import netCDF4 +from typing import List, Set, Dict, Tuple, Optional + from tests.TestUtils import TestUtils from netCDF4 import Dataset from SDToolBox import main as main +from SDToolBox.input_data import InputData from SDToolBox.extract_data import ExtractData -from SDToolBox.data_acquisition import InputData -import SDToolBox.outputmessages as om +import SDToolBox.output_messages as om -class Test_create: +class Test_get_era5: - @pytest.mark.unittest - def test_given_no_input_data_then_exception_is_risen(self): + @pytest.mark.systemtest + def test_given_list_of_coordinates_then_subset_is_extracted(self): # 1. Given - input_data = None - output_result = None - expected_error = om.error_no_valid_input_data - # 2. When - with pytest.raises(IOError) as e_info: - output_result = ExtractData(input_data) + # When using local data you can just replace the comment in these lines + dir_test_data = TestUtils.get_local_test_data_dir('era5_test_data') + # dir_test_data = 'P:\\metocean-data\\open\\ERA5\\data\\Global' - # 3. Then - error_message = str(e_info.value) - assert output_result is None - assert error_message == expected_error, '' + \ - 'Expected exception message {},'.format(expected_error) + \ - 'retrieved {}'.format(error_message) - - @pytest.mark.unittest - def test_given_no_input_coords_then_exception_is_risen(self): - # 1. Given input_data = InputData() - input_data.input_coordinates = [] - input_data.input_years = [2014] - input_data.input_variables = ['var'] - data_extractor = None - expected_error = om.error_not_enough_coordinates - # 2. When - with pytest.raises(IOError) as e_info: - data_extractor = ExtractData(input_data) + input_data.input_variables = ['swh'] + input_data.input_years = [1981, 1982] + input_data.input_coordinates = \ + [(4.2, 2.4), (42, 2.4), (42, 24), (4.2, 24)] - # 3. Then - error_message = str(e_info.value) - assert data_extractor is None - assert error_message == expected_error, '' + \ - 'Expected exception message {},'.format(expected_error) + \ - 'retrieved {}'.format(error_message) - - @pytest.mark.unittest - def test_given_no_list_of_vars_then_exception_is_risen(self): - # 1. Given - input_data = InputData() - input_data.input_coordinates = [(4.2, 42)] - input_data.input_years = [2014] - input_data.input_variables = None - data_extractor = None - expected_error = om.error_no_valid_list_of_vars # 2. When - with pytest.raises(IOError) as e_info: - data_extractor = ExtractData(input_data) + output_data = ExtractData.get_era_5( + dir_test_data, + input_data) # 3. Then - error_message = str(e_info.value) - assert data_extractor is None - assert error_message == expected_error, '' + \ - 'Expected exception message {},'.format(expected_error) + \ - 'retrieved {}'.format(error_message) + assert output_data is not None + data_dict = output_data.get_data_dict() + assert data_dict is not None + assert data_dict[output_data.var_time_key] is not None + assert data_dict[output_data.var_lon_key] is not None + assert data_dict[output_data.var_lat_key] is not None + assert data_dict[output_data.var_val_key] is not None + assert data_dict[output_data.var_val_key]['swh'] is not None - @pytest.mark.unittest - def test_given_no_list_of_years_then_exception_is_risen(self): + @pytest.mark.systemtest + @pytest.mark.parametrize( + "input_variables", + [(['swh']), (['u10']), (['msl_p'])], + ids=['Waves', 'Wind', 'SLP']) + def test_when_extract_single_point_from_era5_dir_then_returns_output( + self, input_variables: List[str]): # 1. Given - input_data = InputData() - input_data.input_coordinates = [(4.2, 42)] - input_data.input_variables = ['var'] - input_data.input_years = None - data_extractor = None - expected_error = om.error_no_valid_list_of_years - # 2. When - with pytest.raises(IOError) as e_info: - data_extractor = ExtractData(input_data) + # When using local data you can just replace the comment in these lines + dir_test_data = TestUtils.get_local_test_data_dir('era5_test_data') + # dir_test_data = 'P:\\metocean-data\\open\\ERA5\\data\\Global' - # 3. Then - error_message = str(e_info.value) - assert data_extractor is None - assert error_message == expected_error, '' + \ - 'Expected exception message {},'.format(expected_error) + \ - 'retrieved {}'.format(error_message) - - - @pytest.mark.unittest - def test_given_valid_input_args_then_data_is_set(self): - # 1. Given input_data = InputData() - input_data.input_coordinates = [(42, 4.2)] - input_data.input_variables = ['swh'] - input_data.input_years = [1942] - data_extractor = None - expected_lon_list, expected_lat_list = \ - zip(*input_data.input_coordinates) + input_data.input_variables = input_variables + input_data.input_coordinates = [(4.2, 2.4), ] + input_data.input_years = [1981, 1982] # 2. When - try: - data_extractor = ExtractData(input_data) - except Exception as e_info: - pytest.fail( - 'Exception risen {}'.format(str(e_info)) + - ' but not expected.') + output_data = ExtractData.get_era_5( + dir_test_data, + input_data) # 3. Then - assert data_extractor is not None - assert data_extractor._input_variables == input_data.input_variables - assert data_extractor._input_years == input_data.input_years - assert data_extractor._input_lon == expected_lon_list - assert data_extractor._input_lat == expected_lat_list + assert output_data is not None + data_dict = output_data.get_data_dict() + assert data_dict is not None + assert data_dict[output_data.var_time_key] is not None + assert data_dict[output_data.var_lon_key] is not None + assert data_dict[output_data.var_lat_key] is not None + assert data_dict[output_data.var_val_key] is not None + assert data_dict[output_data.var_val_key]['swh'] is not None Fisheye: Tag 58 refers to a dead (removed) revision in file `trunk/tests/test_extract_slp.py'. Fisheye: No comparison available. Pass `N' to diff? Index: trunk/SDToolBox/extract_data.py =================================================================== diff -u -r52 -r58 --- trunk/SDToolBox/extract_data.py (.../extract_data.py) (revision 52) +++ trunk/SDToolBox/extract_data.py (.../extract_data.py) (revision 58) @@ -7,9 +7,12 @@ import sys import os +from typing import List, Set, Dict, Tuple, Optional from datetime import datetime, timedelta -from SDToolBox import outputmessages as om -from SDToolBox import data_acquisition +from SDToolBox import output_messages as om +from SDToolBox.input_data import InputData +from SDToolBox.output_data import OutputData + from netCDF4 import Dataset import numpy as np @@ -21,32 +24,322 @@ class ExtractData: + _era5_lon_key = 'longitude' + _era5_lat_key = 'latitude' - _ds_format = 'netCDF4' - _time_key = 'time' - _input_lon = None - _input_lat = None - _input_variables = [] - _input_years = [] - _out_lat_key = data_acquisition.OutputData.var_lat_key - _out_lon_key = data_acquisition.OutputData.var_lon_key - _out_time_key = data_acquisition.OutputData.var_time_key - _out_val_key = data_acquisition.OutputData.var_val_key - _input_EARTH_scenario = ' ' + _era5_var_dict = { + 'swh': 'Hs', + 'pp1d': 'Tp', + 'mwd': 'MWD', + 'mwp': 'Tm', + 'msl': 'msl', + 'u10': 'wind_u', + 'v10': 'wind_v' + } - def __init__(self, input_data: data_acquisition.InputData): - """Initialize the waves extraction. + @staticmethod + def get_era_5(directory_path: str, input_data: InputData): + """Extracts a collection of netCDF4 ERA5 subsets based on the + provided input_data. Arguments: - input_data {data_acquisition.InputData} -- Required. + directory_path {str} -- Location of all the variable diretories. + input_data {InputData} -- Data structure with input parameters. + + Returns: + OutputData -- Output data object to generate xarray or netCDF4. """ - # verify input_data not none - if not input_data: - raise IOError(om.error_no_valid_input_data) - input_data.validate() + # Set ERA5 min / max longitudes. + input_data.min_longitude = -180 + input_data.max_longitude = 180 - self._input_EARTH_scenario = input_data.input_EARTH_scenario - self._input_variables = input_data.input_variables - self._input_years = input_data.input_years - self._input_lon, self._input_lat = zip(*input_data.input_coordinates) + # Define extractor. + data_extractor = ExtractData.__Era5Extractor() + + return data_extractor.extract_subset( + directory_path=directory_path, + input_data=input_data + ) + + def get_era5_GTSM(self, directory_path: str): + """Extracts a collection of netCDF4 subsets based on the input data + set when creating the extract_sea_level_pressure object. + + Arguments: + directory_path {str} -- Location of all the variable diretories. + area_latitude {list} -- List with x,y coordinate for the latitude + area_longitude {list} -- List with x,y coordinate for the longitude + year_from {int} -- Start of time data to substract. + year_to {int} -- End of time data to substract. + + we suppose that the input is already a point of coordinates + lon and lat, since the point is + already calculated previously (see Slp_Grd_ERA5_extractR.m lines 11-41) + + Returns: + list(Dataset) -- collection of netCDF4 subsets per variable. + """ + + # filtered_dict, output_data, cases_dict = \ + # self.__get_initial_extraction_data() + # nn_idx = None + # for n_variable, variable_name in enumerate(filtered_dict): + # case_name_value = filtered_dict[variable_name] + # # Extracts for all the years + # for year in self._input_years: + # # Format of the file name is like era5_Global_wind_v_1979.nc + # base_file_name = \ + # 'era5_Global_{}_{}.nc'.format(case_name_value, year) + # case_file_path = os.path.join(directory_path, base_file_name) + # self.__get_case_subset_from_netcdf( + # case_file_path, + # cases_dict, + # nn_idx, + # variable_name, + # n_variable + # ) + # return output_data + pass + + @staticmethod + def get_nearest_neighbor(value, data_array): + """ + search for nearest decimal degree in an array of decimal degrees + and return the index. + np.argmin returns the indices of minium value along an axis. + so subtract value from all values in data_array, take absolute value + and find index of minium. + """ + index_found = (np.abs(data_array - value)).argmin() + value_found = data_array[index_found] + return index_found, value_found + + class __Era5Extractor: + + def extract_subset(self, directory_path: str, input_data: InputData): + """Extracts an ERA5 subset given a directory path and + the input data. + + Arguments: + directory_path {str} + -- Location of the files organised as var/year_file. + input_data {InputData} -- Input data + + Returns: + OutputData -- Object with the extracted data. + """ + # Validate the input data. + input_data.validate() + + # Get initial values. + filtered_dict = { + k: v + for k, v in ExtractData._era5_var_dict.items() + if k in input_data.input_variables} + output_data = OutputData(input_data.input_variables) + nn_idx = None + # Iterate over all possible combinations of variable-year. + for n_variable, variable_name in enumerate(filtered_dict): + for year in input_data.input_years: + case_file_path = self.__get_netcdf_file_name( + dir_path=directory_path, + variable_key=filtered_dict.get(variable_name), + year=year + ) + # Process the subset for the file. + print( + 'Extracting variable: {},'.format(year) + + ' year {}.'.format(variable_name)) + nn_idx = self.__get_case_subset_from_netcdf( + case_file_path=case_file_path, + input_data=input_data, + output_data=output_data, + nn_idx=nn_idx, + variable_name=variable_name, + n_variable=n_variable + ) + return output_data + + def __get_netcdf_file_name( + self, + dir_path: str, + variable_key: str, + year: int): + """Gets the era5 filepath. + + Arguments: + dir_path {str} -- Parent directory. + variable_key {str} -- Variable in file name. + year {int} -- Year in file name. + + Returns: + str -- File path location based on ERA5 format. + """ + # Find the matching file + base_file_name = '' + \ + 'era5_Global_{}_{}.nc'.format(variable_key, year) + case_dir = os.path.join(dir_path, variable_key) + file_path = os.path.join(case_dir, base_file_name) + return file_path + + def __get_case_subset_from_netcdf( + self, + case_file_path: str, + input_data: InputData, + output_data: OutputData, + nn_idx: Tuple[int, int], + variable_name: str, + n_variable: int): + """Gets all the values from a netcdf for the given variable + and delimited nearest neighbors. + + Arguments: + case_file_path {str} -- Path to the netcdf file. + input_data {InputData} -- Data structure with input params. + output_data {OutputData} -- Output values. + nn_idx {Tuple[int, int]} -- Nearest Neighbors lon/lat. + variable_name {str} -- Name of the variable to extract. + n_variable {int} -- Index of the variable to search. + + Returns: + Tuple[int, int] -- Nearest neigbors lon/lat. + """ + + # If file does not exist simply go to the next one + if not os.path.exists(case_file_path): + print( + 'File {}'.format(case_file_path) + + 'does not exist or could not be found.') + return + + cases_dict = output_data.get_data_dict() + + if not nn_idx: + nn_idx = self.__get_nearest_neighbors_lon_lat( + ref_file_path=case_file_path, + input_data=input_data, + cases_dict=cases_dict + ) + + # Lazy loading of the dataset. + with Dataset(case_file_path, 'r', output_data._ds_format) \ + as case_dataset: + cases_dict[OutputData.var_val_key][variable_name] = \ + self.__get_variable_subset( + cases_dict[OutputData.var_val_key][variable_name], + case_dataset, + variable_name, + nn_idx + ) + # Get the time for the variable. + if n_variable == 0: + # add the lines to get the reference time + # automatically just in case + reftime = \ + case_dataset[OutputData.var_time_key].units.split(' ') + # This is an assumption that all the grids have + # the same scale in regards of time. + cases_dict[OutputData.var_time_key].extend( + [datetime.strptime( + reftime[2]+' '+reftime[3], + '%Y-%m-%d %H:%M:%S.%f') + + timedelta(hours=int(ti)) + for ti in case_dataset['time'][:]] + ) + return nn_idx + + def __get_variable_subset( + self, + variable_values: list, + netcdf_dataset: Dataset, + variable_name: str, + nn_idx): + """Gets the subset of vaues for the given variable. + + Arguments: + variable_values {list} -- Stored values. + netcdf_dataset {Dataset} -- Input netCDF dataset. + variable_name {str} -- Name of the variable. + nn_idx {duple} -- Duple of lon or lat index. + + Returns: + Array -- Array of values. + """ + nn_lon_idx, nn_lat_idx = nn_idx + if variable_values is None: + return self.__get_case_subset( + dataset=netcdf_dataset, + variable_name=variable_name, + lon=nn_lon_idx, + lat=nn_lat_idx) + return np.concatenate( + (variable_values, + self.__get_case_subset( + dataset=netcdf_dataset, + variable_name=variable_name, + lon=nn_lon_idx, + lat=nn_lat_idx)), + axis=0) + + def __get_nearest_neighbors_lon_lat( + self, + ref_file_path: str, + input_data: InputData, + cases_dict: dict): + """Gets the corrected index and value for the given + input coordinates. + + Arguments: + directory_path {str} -- Parent directory. + input_data {InputData} -- Input data. + cases_dict {Dict[str,str]} + -- Dictionary with all values that need format. + + Returns: + Tuple[int, int] -- Indices of nearest neighbors. + """ + # Extract index and value for all input lat, lon. + with Dataset(ref_file_path, 'r', OutputData._ds_format) \ + as ref_dataset: + nn_lat_idx = self.__set_nn( + input_values=input_data._input_lat, + reference_list=ref_dataset.variables[ + ExtractData._era5_lat_key][:], + output_values=cases_dict[OutputData.var_lat_key] + ) + nn_lon_idx = self.__set_nn( + input_values=input_data._input_lon, + reference_list=ref_dataset.variables[ + ExtractData._era5_lon_key][:], + output_values=cases_dict[OutputData.var_lon_key], + ) + return nn_lon_idx, nn_lat_idx + + @staticmethod + def __set_nn( + input_values: List[float], + reference_list: List[float], + output_values: List[float]): + """Sets the nearest neighbor for all the elements + given in the points_list. + + Arguments: + input_values {List[float]} -- List of elements to correct. + reference_list {List[float]} -- Available neighbor list. + output_values {List[float]} -- Corrected values. + + Returns: + List[int] -- Indices of the nearest neighbors positions + """ + output_idx = [] + for point in input_values: + idx, value = ExtractData.get_nearest_neighbor( + point, + reference_list) + output_values.append(value) + output_idx.append(idx) + return output_idx + + def __get_case_subset(self, dataset, variable_name, lon, lat): + return dataset[variable_name][:, lat, lon] Fisheye: Tag 58 refers to a dead (removed) revision in file `trunk/tests/test_extract_data_era5.py'. Fisheye: No comparison available. Pass `N' to diff? Index: trunk/tests/test_input_data.py =================================================================== diff -u -r50 -r58 --- trunk/tests/test_input_data.py (.../test_input_data.py) (revision 50) +++ trunk/tests/test_input_data.py (.../test_input_data.py) (revision 58) @@ -8,22 +8,42 @@ import string from tests.TestUtils import TestUtils -from SDToolBox.data_acquisition import InputData -import SDToolBox.outputmessages as om +from SDToolBox.input_data import InputData +import SDToolBox.output_messages as om -class Test_CreateDataAcquisition: +class Test_CreateInputData: @pytest.mark.unittest def test_when_create_empty_dataacquistion_no_exception_risen(self): try: InputData() except Exception as e_info: - err_mssg = 'Error while creating DataAquisition object.' + \ + err_mssg = 'Error while creating Input data object.' + \ '{}'.format(str(e_info)) pytest.fail(err_mssg) + @pytest.mark.unittest + def test_when_given_args_sets_them(self): + # 1. Given + input_variables = ['du', 'du', 'mmy'] + input_coordinates = [(42, 4.2)] + input_years = [1989, 1990, 1988, 1989] + # 2. When + input_data = InputData( + input_coordinates=input_coordinates, + input_variables=input_variables, + input_years=input_years + ) + + # 3. Then + assert input_data is not None + assert input_data.input_coordinates == input_coordinates + assert input_data.input_variables == input_variables + assert input_data.input_years == input_years + + class Test_Validate: @pytest.mark.unittest @@ -135,6 +155,7 @@ assert input_data.input_variables == expected_input_variables assert input_data.input_years == expected_input_years + class Test_ValidateInputYears: @pytest.mark.unittest @@ -317,3 +338,46 @@ # 3. Then assert validation_result + + @pytest.mark.integrationtest + def test_given_valid_args_extracts_lon_lat(self): + # 1. Given + input_data = InputData() + input_data.input_coordinates = [(42, 4.2), (24, 2.4)] + expected_lon = [42, 24] + expected_lat = [4.2, 2.4] + validation_result = None + + # 2. When + try: + validation_result = input_data.validate_input_coordinates() + except Exception as e_info: + pytest.fail( + 'Exception risen {}'.format(str(e_info)) + + ' but not expected.') + + # 3. Then + assert validation_result + assert list(input_data._input_lat) == expected_lat + assert list(input_data._input_lon) == expected_lon + + @pytest.mark.integrationtest + def test_given_valid_args_when_max_lon_set_then_corrects_lon(self): + pytest.fail('To Do, not sure about the validation rules yet.') + + @pytest.mark.integrationtest + def test_given_no_max_min_lon_when_validate_then_raises(self): + # 1. Given + input_data = InputData() + input_data.input_coordinates = [(42, 4.2), (24, 2.4)] + input_data.max_longitude = None + validation_result = None + expected_message = om.error_max_lon_not_set + + # 2. When + with pytest.raises(Exception) as e_info: + validation_result = input_data.validate_input_coordinates() + + # 3. Then + assert validation_result is None + assert expected_message == str(e_info.value) Index: trunk/SDToolBox/input_data.py =================================================================== diff -u --- trunk/SDToolBox/input_data.py (revision 0) +++ trunk/SDToolBox/input_data.py (revision 58) @@ -0,0 +1,167 @@ +#! /usr/bin/env python +""" + +""" + +# region // imports +from typing import List, Set, Dict, Tuple, Optional +from datetime import datetime +import os +import random +import string + +import SDToolBox.output_messages as om + +# endregion + +# region // variables + +# endregion + + +class InputData: + + # input scenarios for check + input_coordinates = [] + input_variables = [] + input_years = [] + + # list of Lon Lat, for now they get extracted from the + # input_coordinates. + _input_lon = [] + _input_lat = [] + + # Earth scenarios? + input_EARTH_scenario = None + values_EARTH_scenarios = ['RCP45', 'RCP85', 'HIST'] + + # These parameters are set in the extraction methods + # Default values [-180, 180] + min_longitude = -180 + max_longitude = 180 + + def __init__( + self, + input_coordinates: List[Tuple[float, float]] = [], + input_variables: List[str] = [], + input_years: List[int] = []): + """Constructor of InputData data structure that can + be used by the extract subset methods. + + Keyword Arguments: + input_coordinates {List[Tuple[float, float]]} + -- List of coordinates (x,y) (default: {[]}) + input_variables {List[str]} + -- List of variables to extract. (default: {[]}) + input_years {List[int]} + -- List of years as integers. (default: {[]}) + """ + self.input_coordinates = input_coordinates + self.input_variables = input_variables + self.input_years = input_years + + def validate(self): + """Validates the data structure based on the given input + parameters + + Returns: + Boolean -- Whether the validation succeeds or not + """ + valid_dates = self.validate_input_years() + if not valid_dates: + return False + + valid_coords = self.validate_input_coordinates() + if not valid_coords: + return False + + return self.validate_input_variables() + + def validate_input_coordinates(self): + """Validates whether input coordinates have been + set in the format list(Tuple(float, float)) + + Raises: + IOError: Not enough coordinates given. + + Returns: + True -- True when there are values. + """ + if not self.input_coordinates or \ + len(self.input_coordinates) < 1: + raise IOError(om.error_not_enough_coordinates) + self.__extract_input_lon_lat() + return True + + def validate_input_variables(self): + """Validates whether the input variables list + contains elements or not. Removes duplicates. + + Raises: + IOError: Exception when no values are set. + + Returns: + True -- True when there are values. + """ + if not self.input_variables: + raise IOError(om.error_no_valid_list_of_vars) + self.input_variables = list(set(self.input_variables)) + return True + + def validate_input_years(self): + """Validates the input_years is valid and refines the list + to avoid duplications and unordered years. + + Returns: + Boolean -- Dates validation. + """ + # Verify date is valid. + if not self.input_years: + raise IOError(om.error_no_valid_list_of_years) + + self.input_years = sorted(set(self.input_years)) + return True + + def validate_input_scenario(self): + """Validates the EARTH_Scenario is valid and refines the list + to avoid duplications and unordered years. + + Arguments: + EART_ scenario {datetime} -- one of the scenarios. + + Returns: + Boolean -- Dates validation. + """ + # Verify date is valid. + if not self: + raise IOError(om.error_no_valid_list_of_years) + if self.input_EARTH_scenario not in self.values_EARTH_scenarios: + raise IOError(om.error_no_valid_list_of_years) + return True + + def __extract_input_lon_lat(self): + self._input_lon, self._input_lat = zip(*self.input_coordinates) + self._input_lat = list(self._input_lat) + self._input_lon = [ + self.__get_corrected_longitude(lon) + for lon in self._input_lon] + + def __get_corrected_longitude(self, longitude: int): + """Corrects a longitude if it is outside the + defined boundaries. + + Arguments: + longitude {int} -- Longitude to validate. + + Raises: + Exception: Value outside boundaries. + + Returns: + float -- Corrected longitude. + """ + if self.max_longitude is None: + raise Exception(om.error_max_lon_not_set) + + if longitude > self.max_longitude: + return longitude-self.max_longitude + return longitude Index: trunk/SDToolBox/extract_data_EARTH.py =================================================================== diff -u -r55 -r58 --- trunk/SDToolBox/extract_data_EARTH.py (.../extract_data_EARTH.py) (revision 55) +++ trunk/SDToolBox/extract_data_EARTH.py (.../extract_data_EARTH.py) (revision 58) @@ -6,11 +6,9 @@ # region // imports import sys import os -from typing import Tuple +from typing import List, Set, Dict, Tuple, Optional from datetime import datetime, timedelta -from SDToolBox import outputmessages as outputmessages -from SDToolBox import data_acquisition -from SDToolBox.extract_data import ExtractData +from SDToolBox.output_data import OutputData from netCDF4 import Dataset import numpy as np @@ -21,7 +19,7 @@ # endregion -class ExtractDataEARTH(ExtractData): +class ExtractDataEARTH(): __lon_key = 'lon' __lat_key = 'lat' @@ -47,8 +45,7 @@ nn_idx = None for n_variable, variable_name in enumerate(filtered_dict): - case_name_value = filtered_dict[variable_name] - for yearidx, year in enumerate(self._input_years): + for year in self._input_years: if month < 10: base_file_name = self._input_EARTH_scenario + '_\\' + 'EC-Earth_RCP4.5_MSLP_' + str(year) + '0' + str(month) + '.nc' else: @@ -68,16 +65,16 @@ return output_data def __get_filtered_dict(self): - """Filters the defined dictionary with only the - values provided by the user as input_variables. + """Filters the defined dictionary with only the + values provided by the user as input_variables. - Returns: - dict -- Dictionary of type str: str. - """ - return { - k: v - for k, v in self._variable_dict.items() - if k in self._input_variables} + Returns: + dict -- Dictionary of type str: str. + """ + return { + k: v + for k, v in self._variable_dict.items() + if k in self._input_variables} def __get_initial_extraction_data(self): """Gets the basic elements for extracting ERA5 data. @@ -88,7 +85,7 @@ """ filtered_dict = self.__get_filtered_dict() - output_data = data_acquisition.OutputData( + output_data = OutputData( self._input_variables ) cases_dict = output_data.get_data_dict() @@ -136,9 +133,9 @@ # Lazy loading of the dataset. with Dataset(case_file_path, 'r', self._ds_format) \ as case_dataset: - cases_dict[self._out_val_key][variable_name] = \ + cases_dict[OutputData.var_val_key][variable_name] = \ self.__get_variable_subset( - cases_dict[self._out_val_key][variable_name], + cases_dict[OutputData.var_val_key][variable_name], case_dataset, variable_name, nn_idx @@ -148,10 +145,10 @@ # add the lines to get the reference time # automatically just in case reftime = \ - case_dataset[self._out_time_key].units.split(' ') + case_dataset[OutputData.var_time_key].units.split(' ') # This is an assumption that all the grids have # the same scale in regards of time. - cases_dict[self._out_time_key].extend( + cases_dict[OutputData.var_time_key].extend( [datetime.strptime( reftime[2]+' '+reftime[3], '%Y-%m-%d %H:%M:%S') @@ -213,16 +210,16 @@ lat_list = ref_dataset.variables[self.__lat_key][:] lon_list = ref_dataset.variables[self.__lon_key][:] for lon_point in self._input_lon: - idx, value = data_acquisition.get_nearest_neighbor( + idx, value = d_a.get_nearest_neighbor( lon_point, lon_list) - cases_dict[self._out_lon_key].append(value) + cases_dict[OutputData.var_lon_key].append(value) nn_lon_idx.append(idx) for lat_point in self._input_lat: - idx, value = data_acquisition.get_nearest_neighbor( + idx, value = d_a.get_nearest_neighbor( lat_point, lat_list) - cases_dict[self._out_lat_key].append(value) + cases_dict[OutputData.var_lat_key].append(value) nn_lat_idx.append(idx) return nn_lon_idx, nn_lat_idx Index: trunk/SDToolBox/output_messages.py =================================================================== diff -u --- trunk/SDToolBox/output_messages.py (revision 0) +++ trunk/SDToolBox/output_messages.py (revision 58) @@ -0,0 +1,22 @@ +#! /usr/bin/env python + +""" Info messages """ + +""" Warning messages """ + +""" Error messages """ +error_missing_arguments = 'Missing arguments -i -o. Run {0} -h for help.' +error_missing_list_of_variables = 'List of variable names should be given.' +error_no_valid_input_data = 'No InputData object was provided.' +error_not_enough_coordinates = '' + \ + 'Not enough coordinates were provided as InputData.input_coordinates.' +error_no_valid_list_of_years = '' + \ + 'A list of years (int) to extract should be provided.' +error_no_valid_list_of_vars = '' + \ + 'A list of variables (str) to extract should be provided.' +error_not_initialized_data_dict = '' + \ + 'Data dictionary has not been initialized, ' + \ + 'no xarray or netCDF can be created.' +error_no_dataset_in_output_data = 'No valid dataset in OutputData.' +error_all_arguments_required = 'All arguments are required for resampling.' +error_max_lon_not_set = 'Max longitud needs to be set.' Fisheye: Tag 58 refers to a dead (removed) revision in file `trunk/SDToolBox/outputmessages.py'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 58 refers to a dead (removed) revision in file `trunk/tests/test_main_acceptance.py'. Fisheye: No comparison available. Pass `N' to diff? Index: trunk/tests/test_output_data.py =================================================================== diff -u -r47 -r58 --- trunk/tests/test_output_data.py (.../test_output_data.py) (revision 47) +++ trunk/tests/test_output_data.py (.../test_output_data.py) (revision 58) @@ -10,9 +10,10 @@ from tests.TestUtils import TestUtils -from SDToolBox.data_acquisition import InputData, OutputData -from SDToolBox.extract_data_era5 import ExtractDataEra5 -import SDToolBox.outputmessages as output_messages +from SDToolBox.input_data import InputData +from SDToolBox.output_data import OutputData +from SDToolBox.extract_data import ExtractData +import SDToolBox.output_messages as om class Test_create: @@ -61,14 +62,15 @@ assert set(variables_data.keys()) == set(var_list) -class Test_generate_wave_netcdf: +class Test_generate_era5_netcdf: @pytest.mark.systemtest - def test_when_given_wave_cases_generates_output(self): + def test_when_given_era5_dir_generates_output(self): # 1. Given # When using local data you can just replace the comment in these lines - # dir_test_data = TestUtils.get_local_test_data_dir('netCDF_Waves_data') - dir_test_data = 'P:\\metocean-data\\open\\ERA5\\data\\Global' + dir_test_data = \ + TestUtils.get_local_test_data_dir('era5_test_data') + # dir_test_data = 'P:\\metocean-data\\open\\ERA5\\data\\Global' output_test_data = TestUtils.get_local_test_data_dir('output_data') input_data = InputData() input_data.input_variables = ['swh'] @@ -77,10 +79,10 @@ netcdf_filepath = None # 2. When - extract_data = ExtractDataEra5(input_data) - output_data = extract_data.subset_waves(dir_test_data) - try: + output_data = ExtractData.get_era_5( + directory_path=dir_test_data, + input_data=input_data) netcdf_filepath = output_data.generate_netcdf( dir_path=output_test_data, base_name='test_wave', @@ -107,7 +109,7 @@ input_data.input_years = [1981, 1982] output_data = None generated_output = None - expected_message = output_messages.error_not_initialized_data_dict + expected_message = om.error_not_initialized_data_dict # 2. When with pytest.raises(IOError) as e_info: @@ -119,11 +121,11 @@ assert expected_message == str(e_info.value) @pytest.mark.systemtest - def test_when_no_arguments_given_valid_data_dict_then_returns_expected_output(self): + def test_when_no_arguments_given_and_valid_dict_then_returns_output(self): # 1. Given # When using local data you can just replace the comment in these lines # dir_test_data = \ - # TestUtils.get_local_test_data_dir('netCDF_Waves_data') + # TestUtils.get_local_test_data_dir('era5_test_data') dir_test_data = 'P:\\metocean-data\\open\\ERA5\\data\\Global' input_data = InputData() @@ -133,10 +135,10 @@ return_value = None # 2. When - extract_data = ExtractDataEra5(input_data) - output_data = extract_data.subset_waves(dir_test_data) - try: + output_data = ExtractData.get_era_5( + directory_path=dir_test_data, + input_data=input_data) return_value = output_data.generate_output() except Exception as e_info: pytest.fail( @@ -158,7 +160,7 @@ # 1. Given # When using local data you can just replace the comment in these lines # dir_test_data = \ - # TestUtils.get_local_test_data_dir('netCDF_Waves_data') + # TestUtils.get_local_test_data_dir('era5_test_data') dir_test_data = 'P:\\metocean-data\\open\\ERA5\\data\\Global' output_test_data = TestUtils.get_local_test_data_dir('output_data') @@ -168,10 +170,10 @@ input_data.input_years = [1981, 1982] # 2. When - extract_data = ExtractDataEra5(input_data) - output_data = extract_data.subset_waves(dir_test_data) - try: + output_data = ExtractData.get_era_5( + directory_path=dir_test_data, + input_data=input_data) output_data.generate_output( dir_path=output_test_data, base_name='test_wave', Index: trunk/SDToolBox/data_processing.py =================================================================== diff -u -r53 -r58 --- trunk/SDToolBox/data_processing.py (.../data_processing.py) (revision 53) +++ trunk/SDToolBox/data_processing.py (.../data_processing.py) (revision 58) @@ -9,8 +9,8 @@ from datetime import datetime, timedelta -from SDToolBox import outputmessages as om -from SDToolBox.data_acquisition import OutputData +from SDToolBox import output_messages as om +from SDToolBox.output_data import OutputData import numpy as np import xarray as xr @@ -81,8 +81,12 @@ """ if output_data is None: raise Exception(om.error_no_dataset_in_output_data) - return feature_resampling( - datset=output_data.get_xarray(), + return DataProcessing.feature_resampling( + dataset=output_data.get_xarray(), scale=scale, frequency_string=frequency_string ) + + @staticmethod + def spatial_resampling(): + pass Index: trunk/tests/test_data_processing.py =================================================================== diff -u -r53 -r58 --- trunk/tests/test_data_processing.py (.../test_data_processing.py) (revision 53) +++ trunk/tests/test_data_processing.py (.../test_data_processing.py) (revision 58) @@ -12,8 +12,8 @@ from tests.TestUtils import TestUtils from SDToolBox.data_processing import DataProcessing -from SDToolBox.data_acquisition import OutputData -import SDToolBox.outputmessages as om +from SDToolBox.output_data import OutputData +import SDToolBox.output_messages as om class Test_FeatureResampling: Index: trunk/SDToolBox/output_data.py =================================================================== diff -u --- trunk/SDToolBox/output_data.py (revision 0) +++ trunk/SDToolBox/output_data.py (revision 58) @@ -0,0 +1,538 @@ +#! /usr/bin/env python +""" + +""" + +# region // imports +from datetime import datetime +import os +import random +import string + +import SDToolBox.output_messages as om + +from netCDF4 import Dataset +from sklearn.neighbors import BallTree as BallTree + +import xarray as xr +import numpy as np +# endregion + +# region // variables + +# endregion + + +class OutputData: + + _ds_format = 'netCDF4' + + dim_station_key = 'station' + dim_time_key = 'time' + + var_station_key = 'station' + var_time_key = 'time' + var_lat_key = 'lat' + var_lon_key = 'lon' + var_val_key = 'variables' + var_proj_key = 'projection' + + epsg_code = 'EPSG:4326' + + __output_netcdf_file = None + __output_xarray = None + __data_dict = {} + + def __init__(self, var_list: list): + """Creates a proxy dictionary with a dictionary + of values from the given list. + + Arguments: + var_list {list} -- List of variable names. + + Returns: + dict -- Dictionary of formated input. + """ + if not var_list: + var_list = [] + print(om.error_missing_list_of_variables) + + values_dict = {} + for var in var_list: + # Set the values to None so we can + # assign them later. + values_dict[var] = None + self.__data_dict = { + OutputData.var_lat_key: [], + OutputData.var_lon_key: [], + OutputData.var_time_key: [], + OutputData.var_val_key: values_dict + } + + def get_data_dict(self): + return self.__data_dict + + def get_xarray(self): + return self.__output_xarray + + def get_netcdf_filepath(self): + return self.__output_netcdf_file + + def generate_output( + self, + dir_path: str = None, + base_name: str = None, + dataset_code: str = None): + """Generates an unified netcdf4 file and its xarray equivalent + from the self.__data_dict. + + Keyword Arguments: + dir_path {str} -- Target file path. (default: {None}) + base_name {str} -- Base name for the output file. (default: {None}) + dataset_code {str} -- Code of the dataset. (default: {None}) + + Returns: + xarray {xarray.Dataset} -- Equivalent xarray of the dict. + """ + if not self.__data_dict or \ + not self.__data_dict[self.var_lat_key] or \ + not self.__data_dict[self.var_lon_key] or \ + not self.__data_dict[self.var_time_key] or \ + not self.__data_dict[self.var_val_key]: + raise IOError(om.error_not_initialized_data_dict) + + if not dir_path or \ + not base_name or \ + not dataset_code: + # Get current directory + dir_path = os.getcwd() + # temp_generated_netcdf + base_name = 'temp_generated_{}'.format(self.__get_random_str(4)) + dataset_code = 'CF' + + netcdf_filepath = self.generate_netcdf( + dir_path=dir_path, + base_name=base_name, + dataset_code=dataset_code + ) + + self.__output_netcdf_file = netcdf_filepath + return self.generate_xarray_from_netcdf(netcdf_filepath) + + def generate_xarray_from_netcdf(self, file_path: str): + """Generates an xarray and sets a new dataset + + Arguments: + file_path {str} -- path to the new netcdf. + + Returns: + xarray -- Xarray dataset. + """ + self.__output_xarray = xr.open_dataset(file_path) + self.__output_xarray.close() + # TODO: + # we should probably update the data dictionary + # so basically an importer. + return self.__output_xarray + + def generate_netcdf( + self, + dir_path: str, + base_name: str, + dataset_code: str): + """Generates an unified netcdf4 file from the self.__data_dict. + + Arguments: + file_path {str} -- Target file path. + base_name {str} -- Base name for the output file. + dataset_code {str} -- Code of the dataset + + Returns: + nc_filename {str} -- Filepath to the generated dataset. + """ + file_name = '{}_{}.nc'.format( + base_name, dataset_code) + nc_filename = os.path.join(dir_path, file_name) + print('Writing {}'.format(nc_filename)) + self.__cleanup_data_dict() + with Dataset(nc_filename, mode='w', format="NETCDF4") as netcdf: + self.__set_dataset_structure(netcdf) + self.__set_wave_variables(netcdf, dataset_code) + self.__set_wave_data(netcdf, dataset_code) + self.__set_global_data(netcdf, dataset_code) + + self.__output_netcdf_file = nc_filename + + return nc_filename + + def __cleanup_data_dict(self): + """Removes variable entries with no values. + """ + remove_keys = [ + key + for key in self.__data_dict[self.var_val_key].keys() + if self.__data_dict[self.var_val_key][key] is None + ] + for key in remove_keys: + del self.__data_dict[self.var_val_key][key] + + def __get_random_str(self, length: int = 10): + """Generate a random string of fixed length """ + letters = string.ascii_lowercase + return ''.join(random.choice(letters) for i in range(length)) + + def __get_station_number(self): + lon_keys = self.__data_dict[self.var_lon_key] + lat_keys = self.__data_dict[self.var_lat_key] + return len(lon_keys) * len(lat_keys) + + def __get_time_samples(self): + time_array = self.__data_dict[self.var_time_key] + if not time_array: + return 0 + return len(time_array) + + def __get_time_values(self): + return self.__data_dict[self.var_time_key] + + def __set_global_data( + self, netcdf: Dataset, dataset_code: str): + """Sets the global data for our new output dataset. + see: http://www.unidata.ucar.edu/software/thredds/current/ + netcdf-java/formats/DataDiscoveryAttConvention.html + + Arguments: + netcdf {Dataset} -- Output dataset + output_format {str} -- Format for the netcdf file. + dataset_code {str} -- Input dataset format. + """ + netcdf.featureType = 'timeSeries' + netcdf.Conventions = 'CF-1.4' + netcdf.standard_name_vocabulary = 'CF Standard Name Table vNN' + + netcdf.title = '{}'.format(dataset_code) + \ + ' data North Sea Dutch coast' + netcdf.summary = '' + \ + 'North Sea Water Level data in coastal gauges for the Dutch coast' + netcdf.period = '{}'.format(dataset_code) + netcdf.keywords = '' + \ + 'water level, storm surge, astronomical tide, mean sea level' + + netcdf.institution = 'Deltares' + netcdf.publisher_name = 'Deltares' + netcdf.publisher_url = 'https://www.deltares.nl' + netcdf.publisher_email = 'jose.antolinez@deltares.nl' + + # Bounds (spatial, temporal) + netcdf.lat_bounds = [ + min(self.__data_dict[self.var_lat_key]), + max(self.__data_dict[self.var_lat_key])] + netcdf.lon_bounds = [ + min(self.__data_dict[self.var_lat_key]), + max(self.__data_dict[self.var_lat_key])] + + netcdf.time_bounds = [ + self.__get_time_values()[0].strftime('%d/%m/%Y %H:%M:%S'), + self.__get_time_values()[-1].strftime('%d/%m/%Y %H:%M:%S')] + netcdf.geospatial_lat_units = 'degrees_north' + netcdf.geospatial_lon_units = 'degrees_east' + netcdf.time_coverage_resolution = 'hourly' + + netcdf.date_created = datetime.strftime( + datetime.utcnow(), + format='%Y-%m-%dT%H:%MZ') + netcdf.date_modified = datetime.strftime( + datetime.utcnow(), + format='%Y-%m-%dT%H:%MZ') + netcdf.date_issued = datetime.strftime( + datetime.utcnow(), + format='%Y-%m-%dT%H:%MZ') + + netcdf.cdm_data_type = 'gridded' + + def __set_dataset_structure( + self, + netcdf: Dataset): + """Sets the default dimensions for our standarize outpud netCDF4 file. + + Arguments: + netcdf {Dataset} -- Extracted dataset. + """ + self.__set_time_and_stations(netcdf) + self.__set_geo_data(netcdf) + + def __set_time_and_stations(self, netcdf: Dataset): + """Creates the needed header defined by the dataset + time and station dimension and variables. + + Arguments: + netcdf {Dataset} -- Output netCDF4. + + Returns: + netCDF4.Dataset -- Output netCDF4 dataset. + """ + # Create dimensions + netcdf.createDimension( + self.dim_station_key, + self.__get_station_number()) + netcdf.createDimension( + self.dim_time_key, + self.__get_time_samples()) + + # Stations and Time + station_variable = netcdf.createVariable( + self.var_station_key, 'u2', + (self.dim_station_key,), + zlib=True, complevel=5, shuffle=True) + station_variable.long_name = 'station name' + station_variable.cf_role = 'timeseries_id' + # TODO: not able to set correctly the dimensions. + + time_variable = netcdf.createVariable( + self.var_time_key, 'u4', + (self.dim_time_key,), + zlib=True, complevel=5, shuffle=True) + time_variable.long_name = 'time of simulation' + time_variable.standard_name = 'time' + # Start of epoch + time_variable.units = '' + \ + 'seconds since 1970-01-01 00:00:00' + time_variable.calendar = 'julian' + time_variable.axis = 'T' + + # As offset, + # use the minimal occuring value in REAL UNITS, i.e. seconds. + time_delta_from_origin = [ + time - datetime(1970, 1, 1, 0, 0, 0) + for time in self.__get_time_values()] + + netcdf.variables[self.var_time_key].add_offset = \ + min(time_delta_from_origin).total_seconds() + + # The data is given per hour, therefore: 3600 scale-factor + # is sufficient. However: + # there are more than 65536 hours of data, + # so it should still be stored as an u4 + # resolution of one second + time_variable.scale_factor = 1 + # NO scaling when scale_factor is already applied + time_variable[:] = list(map( + lambda t: t.total_seconds(), + time_delta_from_origin)) + + return netcdf + + def __set_geo_data(self, netcdf: Dataset): + """Sets the Geo variables needed in the Dataset. + + Arguments: + netcdf {Dataset} -- Output netCDF4 dataset. + + Returns: + netCDF4.Dataset -- Output netCDF4 dataset. + """ + + self.__set_geo_variable( + netcdf, self.var_lat_key, + 'station latitude', 'latitude', + 'degrees_north', 'Y', + self.__data_dict[self.var_lat_key] + ) + + self.__set_geo_variable( + netcdf, self.var_lon_key, + 'station longitude', 'longitude', + 'degrees_east', 'X', + self.__data_dict[self.var_lon_key] + ) + + # Set projection details. + netcdf.createVariable( + self.var_proj_key, + '|S1', + (), + zlib=True, complevel=5, shuffle=True) + netcdf.variables[self.var_proj_key].EPSG_code = 'EPSG:4326' + + return netcdf + + def __set_wave_data( + self, + netcdf: Dataset, dataset_code: str): + """ Sets the variables and values for the waves. + + Arguments: + netcdf {Dataset} -- Output dataset. + dataset_code {str} -- Data type code. + dict_value {dict} -- Dictionary of input variables. + """ + # Add data. + variables = self.__data_dict[self.var_val_key] + for var_name in variables.keys(): + if var_name.lower() not in map(str.lower, netcdf.variables.keys()): + print( + 'parameter {} '.format(var_name) + + 'not present in output netCDF') + # to next parameter + continue + # Assign the data from the frame towards the netCDF-parameter + print('writing variable {}'.format(var_name)) + netcdf.variables[var_name.upper()][:] = variables[var_name] + + def __set_wave_variables(self, netcdf: Dataset, dataset_code: str): + """Sets all sort of variables for the wave model. + + Arguments: + netcdf {Dataset} -- Dataset where to store the variables. + dataset_code {str} -- Auxiliar string with the input type. + """ + self.__set_model_variable( + netcdf, + 'SWH', 'significant wave height', + 'sea_surface_wave_significant_height', + 'significant wave height of combined wind waves and swell', + 'lat lon', 'm', + 0.01, 0 + ) + self.__set_model_variable( + netcdf, + 'PP1D', 'peak wave period of 1D spectr', + 'sea_surface_wave_period_at_variance_spectral_density_maximum', + 'peak wave period', + 'lat lon', 's', + 0.01, 0 + ) + + self.__set_model_variable( + netcdf, + 'MP1', 'mean wave period based on first moment', + 'sea_surface_wave_mean_period_from_variance_' + + 'spectral_density_first_frequency_moment', + 'mean wave period', + 'lat lon', 's', + 0.01, 0 + ) + + self.__set_model_variable( + netcdf, + 'MWD', 'mean wave direction', + 'sea_surface_wave_from_direction', + 'mean wave direction', + 'lat lon', 'degree', + 0.01, 0 + ) + + self.__set_model_variable( + netcdf, + 'WDW', 'wave spectral directional width', + 'wave_spectral_directional_width', + 'wave spectral directional width', + 'lat lon', 'dimensionless', + 0.01, 0 + ) + + self.__set_model_variable( + netcdf, + 'WL', 'total water level', + 'total_water_level', 'total water level', + 'lat lon', 'm', + 0.01, 0 + ) + self.__set_model_variable( + netcdf, + 'AT', 'astronomical tide', + 'astronomical_tide', 'astronomical tide', + 'lat lon', 'm', + 0.01, 0 + ) + + self.__set_model_variable( + netcdf, + 'SS', 'non tidal residual', + 'non_tidal_residual', 'non tidal residual', + 'lat lon', 'm', + 0.01, 0 + ) + + if dataset_code in['RCP45', 'RCP85']: + self.__set_model_variable( + netcdf, + 'SLR', 'mean sea level', + 'mean_sea_level', 'mean sea level', + 'lat lon', 'm', + 0.01, 0 + ) + + def __set_model_variable( + self, + netcdf: Dataset, + var_name: str, description: str, + standard_name: str, long_name: str, + coordinates: str, units: str, + scale_factor: float, offset: float): + """Creates a new variable in the given netCDF dataset. + + Arguments: + netcdf {Dataset} -- Dataset for new variable. + var_name {str} -- Variable name. + description {str} -- Description of variable. + standard_name {str} -- Standard name for variable. + long_name {str} -- Long name for variable. + coordinates {str} -- Coordinates name for variable. + units {str} -- Units name for variable. + scale_factor {float} -- Scale factor. + offset {float} -- Offset for the given variable. + + Returns: + Dataset.Variable -- New created variable. + """ + keys_lower = map(str.lower, self.__data_dict[self.var_val_key].keys()) + if var_name.lower() not in keys_lower: + return + model_var = netcdf.createVariable( + var_name, 'i2', + (self.dim_time_key, self.dim_station_key), + zlib=True, complevel=5, shuffle=True) + model_var.coordinates = coordinates + model_var.description = description + model_var.standard_name = standard_name + model_var.units = units + model_var.long_name = long_name + model_var.scale_factor = scale_factor + model_var.add_offset = offset + + return model_var + + def __set_geo_variable( + self, + netcdf: Dataset, + variable_name: str, + long_name: str, standard_name: str, + units: str, axis: str, + values: list): + """Creates a given geo variable with the standard parameters. + + Arguments: + netcdf {Dataset} -- Output dataset. + variable_name {str} -- Name to set for the new variable. + long_name {str} -- Long name in NetCDF4. + standard_name {str} -- Standard name in NetCDF4. + units {str} -- Units for the given variable. + axis {str} -- Axis for the given variable. + values {list} -- Array of values for the given variable. + + Returns: + Dataset.Variable -- New created geo variable. + """ + geo_variable = netcdf.createVariable( + variable_name, + 'f4', + (self.dim_station_key,), + zlib=True, complevel=5, shuffle=True) + geo_variable.long_name = long_name + geo_variable.standard_name = standard_name + geo_variable.units = units + geo_variable.axis = axis + geo_variable[:] = values + + return geo_variable Index: trunk/tests/test_extract_data_EARTH.py =================================================================== diff -u -r54 -r58 --- trunk/tests/test_extract_data_EARTH.py (.../test_extract_data_EARTH.py) (revision 54) +++ trunk/tests/test_extract_data_EARTH.py (.../test_extract_data_EARTH.py) (revision 58) @@ -7,8 +7,8 @@ from SDToolBox import main as main from SDToolBox.extract_data_EARTH import ExtractDataEARTH -from SDToolBox import data_acquisition -from SDToolBox import outputmessages as om +from SDToolBox.input_data import InputData +from SDToolBox import output_messages as om class TestExtractDataEARTH: @@ -20,7 +20,7 @@ dir_test_data = TestUtils.get_local_test_data_dir('meteo_1_Global') # dir_test_data = 'P:\11200665-c3s-codec\2_Hydro\DMI_meteo\meteo_1_Global\RCP45_' - input_data = data_acquisition.InputData() + input_data = InputData() input_data.input_EARTH_scenario = 'RCP45' input_data.input_variables = ['var151'] input_data.input_years = [2041, 2042]