Index: trunk/SDToolBox/data_processing.py =================================================================== diff -u -r74 -r77 --- trunk/SDToolBox/data_processing.py (.../data_processing.py) (revision 74) +++ trunk/SDToolBox/data_processing.py (.../data_processing.py) (revision 77) @@ -184,25 +184,17 @@ data_array.coords['lat'], data_array.coords['lon']) - times = data_array.coords['time'] - latitude = data_array.coords['lat'] - longitude = data_array.coords['lon'] - - latitude_size = len(latitude) - longitude_size = len(longitude) - times_size = len(times) - derivative_points = \ np.nan*np.ones(( - times_size, - latitude_size, - longitude_size), + data_array['time'].size, + data_array['lat'].size, + data_array['lon'].size), dtype=float) gradient_slp = [] - for time in range(1, len(times)): - for longitude_idx in range(2, longitude_size-1): - for latitude_idx in range(2, latitude_size-1): + for time in range(0, data_array.coords['time'].size - 1): + for longitude_idx in range(1, data_array.coords['lon'].size - 1): + for latitude_idx in range(1, data_array.coords['lat'].size - 1): phi = np.pi * \ np.abs( meshed_latitudes[latitude_idx, longitude_idx])/180 @@ -248,12 +240,10 @@ + (np.square(derivative_latitude_first) + np.square(derivative_latitude_second))/2 - data_array.attrs['gradient'] = derivative_points + return derivative_points - return data_array - @staticmethod - def compute_PCA(data_array: xr.DataArray): + def compute_PCA(data_set: xr.Dataset): """Computes the Pricipal Component Analysis. Arguments: @@ -267,17 +257,29 @@ pca mean, std deviation and variance """ - if data_array is None: + if data_set is None: raise Exception(om.error_no_valid_input_data) - grad2slp = data_array.attrs['grad2slp'] + if 'grad2slp' not in data_set.attrs: + raise Exception(om.error_attribute_not_in_dataset) + if 'slp' not in data_set.attrs: + raise Exception(om.error_attribute_not_in_dataset) + grad2slp = data_set.attrs['grad2slp'] + slp = data_set.attrs['slp'] + + slpflat = np.reshape(slp, (slp.shape[0], slp.shape[1], slp.shape[2])) + grad2slpflat = np.reshape(grad2slp, (grad2slp.shape[0], grad2slp.shape[1], grad2slp.shape[2])) + slpgrad2slp = np.concatenate((slpflat, grad2slpflat), axis=1) + + if slp is None: + raise Exception(om.error_no_gradient_was_calculated) if grad2slp is None: raise Exception(om.error_no_gradient_was_calculated) scaler = StandardScaler(with_mean=True, with_std=True) - scaler.fit(grad2slp) - transformed_grad2slp = scaler.transform(grad2slp) + scaler.fit(slpgrad2slp) + transformed_grad2slp = scaler.transform(slpgrad2slp) exvar = 99.88/100 pca = skPCA(n_components=exvar) pca.fit(transformed_grad2slp) Index: trunk/SDToolBox/output_messages.py =================================================================== diff -u -r73 -r77 --- trunk/SDToolBox/output_messages.py (.../output_messages.py) (revision 73) +++ trunk/SDToolBox/output_messages.py (.../output_messages.py) (revision 77) @@ -26,4 +26,5 @@ error_no_gradient_was_calculated = 'No gradient was calculated, ' + \ ' please calculate the gradient on the xarray before proceeding' error_function_not_implemented = 'Functionality not implemented.' -error_only_for_gridded_dataset = 'Functionality only available for gridded datasets.' \ No newline at end of file +error_only_for_gridded_dataset = 'Functionality only available for gridded datasets.' +error_attribute_not_in_dataset = 'Attribute not in dataset' Index: trunk/tests/test_data_processing.py =================================================================== diff -u -r74 -r77 --- trunk/tests/test_data_processing.py (.../test_data_processing.py) (revision 74) +++ trunk/tests/test_data_processing.py (.../test_data_processing.py) (revision 77) @@ -10,9 +10,10 @@ import string from tests.TestUtils import TestUtils - +from SDToolBox.input_data import InputData from SDToolBox.data_processing import DataProcessing from SDToolBox.output_data import OutputData +from SDToolBox.extract_data import ExtractData import SDToolBox.output_messages as om @@ -23,13 +24,15 @@ Returns: xarray.DataArray -- Test datarray. """ + return xr.DataArray( np.linspace(0, 11, num=12), coords=[ pd.date_range( '15/12/1999', periods=12, - freq=pd.DateOffset(months=1))], + freq=pd.DateOffset(months=1)) + ], dims='time') @@ -255,56 +258,53 @@ assert resampled_data is not None -class TestSpatialGradientsCalculation: +class Test_SpatialGradientsCalculation: @pytest.mark.unittest def test_given_an_xarray_compute_slp_gradients(self): lat = [43.125, 50, 60.125, 13.5] lon = [43.125, 50, 60.125, 13.5] times = pd.date_range('2000-01-01', periods=4) coordinates = ['lon', 'lat'] + data = np.random.rand(len(times), len(lon), len(lat)) + data_array = xr.DataArray( + data, + coords=[times, lon, lat], + dims=['time', 'lon', 'lat']) - dir_test_data = TestUtils.get_local_test_data_dir('processing_data') - test_nc_file = os.path.join(dir_test_data, 'test_wave_None.nc') - assert os.path.exists(test_nc_file) - - # 2. When - try: - output_data = OutputData(None) - output_data.generate_xarray_from_netcdf(test_nc_file) - except Exception as e_info: - pytest.fail( - 'Exception thrown but not expected. {}'.format(str(e_info))) - - # 3. Then - output_xarray = output_data.get_xarray() - assert output_xarray is not None - - # data = np.random.rand(len(times), len(lon), len(lat)) - - # data_array = xr.DataArray( - # data, - # coords=[times, lon, lat], - # dims=['times', 'longitude', 'latitude']) - # assert data_array is not None - data_processing = DataProcessing() - result = data_processing.compute_spatial_gradients(output_xarray) + result = data_processing.compute_spatial_gradients(data_array) - result_array = result.attrs['gradient'] - # still need to test with consistent values - assert result_array is not None + assert result is not None -class TestComputePCA: +class Test_ComputePCA: @pytest.mark.unittest def test_given_input_data_compute_PCA(self): - grad2slp = np.array([[1.5, -1.2, 2.3], - [2.4, 0.5, 0.5], - [0.3, 1.5, -1.5]]) + # grad2slp = np.array([[[1.5, -1.2, 2.3], + # [2.4, 0.5, 0.5], + # [0.3, 1.5, -1.5]], + # [[1.5, -1.2, 2.3], + # [2.4, 0.5, 0.5], + # [0.3, 1.5, -1.5]], + # [[1.5, -1.2, 2.3], + # [2.4, 0.5, 0.5], + # [0.3, 1.5, -1.5]]]) + # slp = np.array([[[1.5, -1.2, 2.3], + # [2.4, 0.5, 0.5], + # [0.3, 1.5, -1.5]], + # [[1.5, -1.2, 2.3], + # [2.4, 0.5, 0.5], + # [0.3, 1.5, -1.5]], + # [[1.5, -1.2, 2.3], + # [2.4, 0.5, 0.5], + # [0.3, 1.5, -1.5]]]) + grad2slp = np.random.rand(10, 3, 4) + slp = np.random.rand(10, 3, 4) array_dataset = get_default_xarray_dataset() + array_dataset.attrs['slp'] = slp array_dataset.attrs['grad2slp'] = grad2slp data_processing = DataProcessing() [mean, deviation, variance] = \ @@ -319,7 +319,7 @@ assert variance[1] == 20.819697829741987 -class TestAtmosphericPredictor: +class Test_AtmosphericPredictor: @pytest.mark.unittest def test_given_input_data_aggregated_aggregation_is_performed(self):