Index: trunk/tests/test_data_processing.py =================================================================== diff -u -r73 -r74 --- trunk/tests/test_data_processing.py (.../test_data_processing.py) (revision 73) +++ trunk/tests/test_data_processing.py (.../test_data_processing.py) (revision 74) @@ -183,7 +183,8 @@ def test_when_max_resampling_given_file_then_returns_new_xarray(self): # 1. Given dir_test_data = TestUtils.get_local_test_data_dir('system_tests_nc') - test_nc_file = os.path.join(dir_test_data, 'dummy_wave_multiple_file.nc') + test_nc_file = os.path.join( + dir_test_data, 'dummy_wave_multiple_file.nc') assert os.path.exists(test_nc_file), '' + \ 'Test file not found at {}'.format(test_nc_file) dataset = xr.open_dataset(test_nc_file) @@ -263,19 +264,35 @@ times = pd.date_range('2000-01-01', periods=4) coordinates = ['lon', 'lat'] - data = np.random.rand(len(times), len(lon), len(lat)) + dir_test_data = TestUtils.get_local_test_data_dir('processing_data') + test_nc_file = os.path.join(dir_test_data, 'test_wave_None.nc') + assert os.path.exists(test_nc_file) - data_array = xr.DataArray( - data, - coords=[times, lon, lat], - dims=['times', 'longitude', 'latitude']) - assert data_array is not None + # 2. When + try: + output_data = OutputData(None) + output_data.generate_xarray_from_netcdf(test_nc_file) + except Exception as e_info: + pytest.fail( + 'Exception thrown but not expected. {}'.format(str(e_info))) + # 3. Then + output_xarray = output_data.get_xarray() + assert output_xarray is not None + + # data = np.random.rand(len(times), len(lon), len(lat)) + + # data_array = xr.DataArray( + # data, + # coords=[times, lon, lat], + # dims=['times', 'longitude', 'latitude']) + # assert data_array is not None + data_processing = DataProcessing() - result = data_processing.compute_spatial_gradients(data_array) + result = data_processing.compute_spatial_gradients(output_xarray) result_array = result.attrs['gradient'] - #still need to test with consistent values + # still need to test with consistent values assert result_array is not None @@ -300,3 +317,58 @@ assert deviation == 0.699088962313868 assert variance[0] == 79.18030217025802 assert variance[1] == 20.819697829741987 + + +class TestAtmosphericPredictor: + + @pytest.mark.unittest + def test_given_input_data_aggregated_aggregation_is_performed(self): + grad2slp = np.array([[1.5, -1.2, 2.3], + [2.4, 0.5, 0.5], + [0.3, 1.5, -1.5]]) + + dir_test_data = TestUtils.get_local_test_data_dir('processing_data') + test_nc_file = os.path.join(dir_test_data, 'test_wave_None.nc') + assert os.path.exists(test_nc_file) + + # 2. When + try: + output_data = OutputData(None) + output_data.generate_xarray_from_netcdf(test_nc_file) + except Exception as e_info: + pytest.fail( + 'Exception thrown but not expected. {}'.format(str(e_info))) + + # 3. Then + output_xarray = output_data.get_xarray() + assert output_xarray is not None + + # data = np.random.rand(len(times), len(lon), len(lat)) + + # data_array = xr.DataArray( + # data, + # coords=[times, lon, lat], + # dims=['times', 'longitude', 'latitude']) + # assert data_array is not None + + data_processing = DataProcessing() + result = data_processing.compute_spatial_gradients(output_xarray) + + output_array = data_processing.build_atmospheric_data_predictor( + "aggregated", + 2, + output_array) + + assert output_array is not None + + @pytest.mark.unittest + def test_given_input_data_veraged_aggregation_is_performed(self): + grad2slp = np.array([[1.5, -1.2, 2.3], + [2.4, 0.5, 0.5], + [0.3, 1.5, -1.5]]) + + data_processing = DataProcessing() + output_array = data_processing.build_atmospheric_data_predictor( + "averaged") + + assert output_array is not None Index: trunk/tests/test_output_data.py =================================================================== diff -u -r73 -r74 --- trunk/tests/test_output_data.py (.../test_output_data.py) (revision 73) +++ trunk/tests/test_output_data.py (.../test_output_data.py) (revision 74) @@ -74,7 +74,11 @@ output_test_data = TestUtils.get_local_test_data_dir('output_data') input_data = InputData() input_data.input_variables = ['swh'] - input_data.input_coordinates = [(4.2, 2.4), ] + input_data.input_coordinates = [ + (4.2, 2.4), + (2.5, 42), + (4.2, 3.6)] + input_data.is_gridded = False input_data.input_years = [1981, 1982] netcdf_filepath = None @@ -166,6 +170,10 @@ input_data = InputData() input_data.input_variables = ['swh'] + input_data.input_coordinates = [ + (4.2, 2.4), + (2.5, 42), + (4.2, 3.6)] input_data.input_years = [1981, 1982] input_data.input_coordinates = [ (4.2, 2.6), Index: trunk/SDToolBox/data_processing.py =================================================================== diff -u -r73 -r74 --- trunk/SDToolBox/data_processing.py (.../data_processing.py) (revision 73) +++ trunk/SDToolBox/data_processing.py (.../data_processing.py) (revision 74) @@ -163,15 +163,30 @@ @staticmethod def compute_spatial_gradients(data_array: xr.DataArray): + """Computes the spatial gradients. + Arguments: + data array {xr.DataArray} -- DataArray for + the gradients computation. + + Raises: + Exception: When no arguments are given. + + Returns: + xarray -- Gradients points added to input DataArray. + """ + + if data_array is None: + raise Exception(om.error_no_valid_input_data) + meshed_latitudes, meshed_longitudes = \ np.meshgrid( - data_array.coords['latitude'], - data_array.coords['longitude']) + data_array.coords['lat'], + data_array.coords['lon']) - times = data_array.coords['times'] - latitude = data_array.coords['latitude'] - longitude = data_array.coords['longitude'] + times = data_array.coords['time'] + latitude = data_array.coords['lat'] + longitude = data_array.coords['lon'] latitude_size = len(latitude) longitude_size = len(longitude) @@ -236,31 +251,99 @@ data_array.attrs['gradient'] = derivative_points return data_array - # probably here we need to use .assign_coords function to add grad2slp to the existing array @staticmethod def compute_PCA(data_array: xr.DataArray): + """Computes the Pricipal Component Analysis. + Arguments: + data array {xr.DataArray} -- DataArray for + the gradients computation. + + Raises: + Exception: When no arguments are given. + + Returns: + pca mean, std deviation and variance + """ + + if data_array is None: + raise Exception(om.error_no_valid_input_data) + grad2slp = data_array.attrs['grad2slp'] if grad2slp is None: raise Exception(om.error_no_gradient_was_calculated) scaler = StandardScaler(with_mean=True, with_std=True) scaler.fit(grad2slp) - transformed_grad2slp = scaler.transform(grad2slp) # normalizing the features + transformed_grad2slp = scaler.transform(grad2slp) exvar = 99.88/100 pca = skPCA(n_components=exvar) pca.fit(transformed_grad2slp) principal_components = pca.components_ temporal_indices = \ np.dot(transformed_grad2slp, principal_components.transpose()) standard_temporal_indices = temporal_indices.std(axis=0) - pca.mean = scaler.mean_ # probably the mean is on the transformed + pca.mean = scaler.mean_ pca.std = np.std(scaler.var_) pca.eof = principal_components pca.pc = temporal_indices pca.variance = \ standard_temporal_indices**2/np.sum(standard_temporal_indices**2)*100 return pca.mean, pca.std, pca.variance + + @staticmethod + def build_atmospheric_data_predictor(method: str, + input_timescale_window: int, + data_array: xr.DataArray): + """Builds and athmosferic data predictor. + + Arguments: + method -- used method (averaged or aggregated) + input_timescale_window -- input timescale + data array {xr.DataArray} -- DataArray with times data + + Raises: + Exception: When no arguments are given. + + Returns: + pca mean, std deviation and variance + """ + times = data_array.variable['times'] + if method is "aggregated": + daysaggr = input_timescale_window + 1 + days = get_aggregation_times(times, + daysaggr, + input_timescale_window) + mspday = [] + for idx, day in enumerate(days): + mspday.append( + times[ + day.time_index_day_start:day.time_index_day_end, :, :] + ) + return days + if method is "averaged": + data_array.rolling(input_timescale_window, center=False).mean() + + def get_aggregation_times(self, times, daysaggr, input_timescale_window): + """Helper method to calculate the aggregation times. + + Arguments: + times -- times array extracted from the input data array + daysaggr -- parameter to create the time window + input_timescale_window -- input timescale + + Raises: + Exception: When no arguments are given. + + Returns: + days array with day start and end for each set + """ + days = [] + for i in range(0, daysaggr): + time_index_day_start = times[0] + daysaggr - i + time_index_day_end = times[-1] + daysaggr - i + days.append((time_index_day_start, time_index_day_end)) + return days