Index: trunk/SDToolBox/extract_data.py =================================================================== diff -u -r65 -r69 --- trunk/SDToolBox/extract_data.py (.../extract_data.py) (revision 65) +++ trunk/SDToolBox/extract_data.py (.../extract_data.py) (revision 69) @@ -202,6 +202,7 @@ dir_path=directory_path) output_data = OutputData(input_data.input_variables) + output_data.is_gridded = input_data.is_gridded if len(self.__file_iterator) == 0: return output_data @@ -229,6 +230,7 @@ var_name=variable_key, value=self.__get_variable_subset( output_data.get_from_var_dict(variable_key), + input_data, input_dataset, variable_key, nn_idx @@ -260,6 +262,7 @@ def __get_variable_subset( self, variable_values: list, + input_data: InputData, netcdf_dataset: Dataset, variable_name: str, nn_idx): @@ -268,6 +271,7 @@ Arguments: variable_values {list} -- Stored values. netcdf_dataset {Dataset} -- Input netCDF dataset. + input_data {InputData} -- Input variables. variable_name {str} -- Name of the variable. nn_idx {duple} -- Duple of lon or lat index. @@ -278,17 +282,19 @@ if variable_values is None: return self.__get_case_subset( dataset=netcdf_dataset, + input_data=input_data, variable_name=variable_name, lon=nn_lon_idx, lat=nn_lat_idx) return np.concatenate( (variable_values, self.__get_case_subset( dataset=netcdf_dataset, + input_data=input_data, variable_name=variable_name, lon=nn_lon_idx, lat=nn_lat_idx)), - axis=0) + axis=int(not input_data.is_gridded)) def __get_nearest_neighbors_lon_lat( self, @@ -380,9 +386,20 @@ return index_found, value_found @staticmethod - def __get_case_subset(dataset, variable_name, lon, lat): - return dataset[variable_name][:, lat, lon] + def __get_case_subset(dataset, variable_name, input_data, lon, lat): + if not input_data.is_gridded: + vc = [] + # We access to the points individually. + for l in range(0, len(lat)): + vc.append(dataset[variable_name][:, lat[l], lon[l]]) + return vc + # Is gridded, we get only an ortogonal of the lat-lon. + return dataset[variable_name][ + :, + lat[0]:lat[-1], + lon[0]:lon[-1]] + class __Era5Extractor(BaseExtractor): __era5_lon_key = 'longitude' __era5_lat_key = 'latitude' Index: trunk/SDToolBox/input_data.py =================================================================== diff -u -r65 -r69 --- trunk/SDToolBox/input_data.py (.../input_data.py) (revision 65) +++ trunk/SDToolBox/input_data.py (.../input_data.py) (revision 69) @@ -26,6 +26,7 @@ input_variables = [] input_years = [] input_scenarios = [] + is_gridded = False # list of Lon Lat, for now they get extracted from the # input_coordinates. @@ -140,12 +141,16 @@ def __extract_input_lon_lat(self): """Extracts the longitude and latitude from the input_coordinates and corrects the longitude if needed. + In case of a gridded input, we only get """ self._input_lon, self._input_lat = zip(*self.input_coordinates) - self._input_lat = list(self._input_lat) self._input_lon = [ self.__get_corrected_longitude(lon) for lon in self._input_lon] + if self.is_gridded: + self._input_lat = set(self._input_lat) + self._input_lon = list(set(self._input_lon)) + self._input_lat = list(self._input_lat) def __get_corrected_longitude(self, longitude: int): """Corrects a longitude if it is outside the Index: trunk/SDToolBox/output_data.py =================================================================== diff -u -r65 -r69 --- trunk/SDToolBox/output_data.py (.../output_data.py) (revision 65) +++ trunk/SDToolBox/output_data.py (.../output_data.py) (revision 69) @@ -14,6 +14,7 @@ import SDToolBox.output_messages as om from netCDF4 import Dataset +import netCDF4 from sklearn.neighbors import BallTree as BallTree import xarray as xr @@ -39,6 +40,8 @@ var_val_key = 'variables' var_proj_key = 'projection' + is_gridded = False + epsg_code = 'EPSG:4326' __output_netcdf_file = None @@ -159,7 +162,8 @@ print('Writing {}'.format(nc_filename)) self.__cleanup_data_dict() with Dataset(nc_filename, mode='w', format="NETCDF4") as netcdf: - self.__set_dataset_structure(netcdf) + self.__set_time_and_stations(netcdf) + self.__set_geo_data(netcdf) self.__set_dataset_variables(netcdf, dataset_code) self.__set_variables_data(netcdf, dataset_code) self.__set_global_data(netcdf, dataset_code) @@ -208,7 +212,9 @@ def __get_station_number(self): lon_keys = self.__data_dict[self.var_lon_key] lat_keys = self.__data_dict[self.var_lat_key] - return len(lon_keys) * len(lat_keys) + if len(lon_keys) != len(lat_keys): + raise Exception('Lon / Lat dimensions do not match.') + return len(lon_keys) def __get_time_samples(self): time_array = self.__data_dict[self.var_time_key] @@ -255,8 +261,8 @@ min(self.__data_dict[self.var_lat_key]), max(self.__data_dict[self.var_lat_key])] netcdf.lon_bounds = [ - min(self.__data_dict[self.var_lat_key]), - max(self.__data_dict[self.var_lat_key])] + min(self.__data_dict[self.var_lon_key]), + max(self.__data_dict[self.var_lon_key])] netcdf.time_bounds = [ self.__get_time_values()[0].strftime('%d/%m/%Y %H:%M:%S'), @@ -277,17 +283,6 @@ netcdf.cdm_data_type = 'gridded' - def __set_dataset_structure( - self, - netcdf: Dataset): - """Sets the default dimensions for our standarize outpud netCDF4 file. - - Arguments: - netcdf {Dataset} -- Extracted dataset. - """ - self.__set_time_and_stations(netcdf) - self.__set_geo_data(netcdf) - def __set_time_and_stations(self, netcdf: Dataset): """Creates the needed header defined by the dataset time and station dimension and variables. @@ -299,22 +294,39 @@ netCDF4.Dataset -- Output netCDF4 dataset. """ # Create dimensions + if self.is_gridded: + netcdf.createDimension( + self.var_lat_key, + len(self.data_dict[self.var_lat_key]), + ) + netcdf.createDimension( + self.var_lon_key, + len(self.data_dict[self.var_lon_key]), + ) + else: + netcdf.createDimension( + self.dim_station_key, + self.__get_station_number()) + netcdf.createDimension( - self.dim_station_key, - self.__get_station_number()) - netcdf.createDimension( self.dim_time_key, self.__get_time_samples()) - # Stations and Time + # Stations station_variable = netcdf.createVariable( self.var_station_key, 'u2', (self.dim_station_key,), zlib=True, complevel=5, shuffle=True) station_variable.long_name = 'station name' station_variable.cf_role = 'timeseries_id' # TODO: not able to set correctly the dimensions. + # station_grid = np.meshgrid( + # self.data_dict[self.var_lon_key], + # self.data_dict[self.var_lat_key] + # ) + # station_variable[:] = station_grid + # Time time_variable = netcdf.createVariable( self.var_time_key, 'u4', (self.dim_time_key,), @@ -362,15 +374,13 @@ self.__set_geo_variable( netcdf, self.var_lat_key, 'station latitude', 'latitude', - 'degrees_north', 'Y', - self.__data_dict[self.var_lat_key] + 'degrees_north', 'Y' ) self.__set_geo_variable( netcdf, self.var_lon_key, 'station longitude', 'longitude', - 'degrees_east', 'X', - self.__data_dict[self.var_lon_key] + 'degrees_east', 'X' ) # Set projection details. @@ -383,29 +393,6 @@ return netcdf - def __set_variables_data( - self, - netcdf: Dataset, dataset_code: str): - """ Sets the variables and values for the waves. - - Arguments: - netcdf {Dataset} -- Output dataset. - dataset_code {str} -- Data type code. - dict_value {dict} -- Dictionary of input variables. - """ - # Add data. - variables = self.__data_dict[self.var_val_key] - for var_name in variables.keys(): - if var_name.lower() not in map(str.lower, netcdf.variables.keys()): - print( - 'parameter {} '.format(var_name) + - 'not present in output netCDF') - # to next parameter - continue - # Assign the data from the frame towards the netCDF-parameter - print('writing variable {}'.format(var_name)) - netcdf.variables[var_name.upper()][:] = variables[var_name] - def __set_dataset_variables( self, netcdf: Dataset, @@ -518,9 +505,15 @@ keys_lower = map(str.lower, self.__data_dict[self.var_val_key].keys()) if var_name.lower() not in keys_lower: return + variable_dimension = [self.dim_time_key] + if self.is_gridded: + variable_dimension.append(self.var_lon_key, self.var_lat_key) + else: + variable_dimension.append(self.dim_station_key) + model_var = netcdf.createVariable( var_name, 'i2', - (self.dim_time_key, self.dim_station_key), + variable_dimension, zlib=True, complevel=5, shuffle=True) model_var.coordinates = coordinates model_var.description = description @@ -537,8 +530,7 @@ netcdf: Dataset, variable_name: str, long_name: str, standard_name: str, - units: str, axis: str, - values: list): + units: str, axis: str): """Creates a given geo variable with the standard parameters. Arguments: @@ -553,15 +545,46 @@ Returns: Dataset.Variable -- New created geo variable. """ + dimension_name = variable_name + if not self.is_gridded: + dimension_name = self.dim_station_key geo_variable = netcdf.createVariable( variable_name, 'f4', - (self.dim_station_key,), + (dimension_name,), zlib=True, complevel=5, shuffle=True) geo_variable.long_name = long_name geo_variable.standard_name = standard_name geo_variable.units = units geo_variable.axis = axis - geo_variable[:] = values + # fill_val = netCDF4.default_fillvals[ + # geo_variable.dtype.str[1:]] + geo_variable[:] = self.__data_dict[variable_name] + geo_variable.original_scale = \ + np.diff(self.__data_dict[variable_name]).mean() return geo_variable + + def __set_variables_data( + self, + netcdf: Dataset, dataset_code: str): + """ Sets the variables and values for the waves. + + Arguments: + netcdf {Dataset} -- Output dataset. + dataset_code {str} -- Data type code. + dict_value {dict} -- Dictionary of input variables. + """ + # Add data. + variables = self.__data_dict[self.var_val_key] + for var_name in variables.keys(): + if var_name.lower() not in map(str.lower, netcdf.variables.keys()): + print( + 'parameter {} '.format(var_name) + + 'not present in output netCDF') + # to next parameter + continue + # Assign the data from the frame towards the netCDF-parameter + print('writing variable {}'.format(var_name)) + netcdf.variables[var_name.upper()][:] = \ + np.transpose(variables[var_name])