Index: trunk/SDToolBox/output_messages.py
===================================================================
diff -u -r60 -r62
--- trunk/SDToolBox/output_messages.py	(.../output_messages.py)	(revision 60)
+++ trunk/SDToolBox/output_messages.py	(.../output_messages.py)	(revision 62)
@@ -23,3 +23,4 @@
 error_needs_to_be_in_subclass = 'Needs to be implemented in subclasses.'
 error_no_gradient_was_calculated = 'No gradient was calculated, ' + \
                                     ' please calculate the gradient on the xarray before proceeding'
+error_function_not_implemented = 'Functionality not implemented.'
Fisheye: Tag 62 refers to a dead (removed) revision in file `trunk/SDToolBox/extract_data_EARTH.py'.
Fisheye: No comparison available.  Pass `N' to diff?
Index: trunk/SDToolBox/extract_data.py
===================================================================
diff -u -r61 -r62
--- trunk/SDToolBox/extract_data.py	(.../extract_data.py)	(revision 61)
+++ trunk/SDToolBox/extract_data.py	(.../extract_data.py)	(revision 62)
@@ -7,8 +7,9 @@
 import sys
 import os
 
-from typing import List, Set, Dict, Tuple, Optional
+from typing import List, Set, Dict, Tuple, Optional, Any
 from abc import ABC, abstractmethod
+import itertools
 
 from SDToolBox import output_messages as om
 from SDToolBox.input_data import InputData
@@ -58,7 +59,7 @@
 
     @staticmethod
     def get_era5_GTSM(directory_path: str, input_data: InputData):
-        pass
+        raise Exception(om.error_function_not_implemented)
 
     @staticmethod
     def get_earth(directory_path: str, input_data: InputData):
@@ -89,7 +90,16 @@
             )
 
     class BaseExtractor(ABC):
+        file_var_key = 'variable'
+        file_key_key = 'key'
+        file_fpath_key = 'filepath'
+        file_year_key = 'year'
+        file_month_key = 'month'
+        file_scenario_key = 'scenario'
+        netcdf_format = 'netCDF4'
 
+        __file_iterator = []
+
         # Region Abstract methods / properties.
         @property
         @abstractmethod
@@ -103,9 +113,17 @@
 
         @property
         @abstractmethod
-        def file_iterator(self) -> Dict[str, str]:
+        def var_dict(self) -> Dict[str, str]:
             raise Exception(om.error_needs_to_be_in_subclass)
 
+        @property
+        def __first_filepath(self) -> str:
+            return self.__file_iterator[0][self.file_fpath_key]
+
+        @property
+        def __first_variable_key(self) -> str:
+            return self.__file_iterator[0][self.file_key_key]
+
         @abstractmethod
         def get_case_time_values(
                 self,
@@ -115,16 +133,59 @@
             raise Exception(om.error_needs_to_be_in_subclass)
 
         @abstractmethod
-        def set_file_iterator(self, input_data: InputData, dir_path: str):
+        def get_file_combinations(**args) -> List[List[Any]]:
             raise Exception(om.error_needs_to_be_in_subclass)
 
+        @abstractmethod
+        def get_filepath(
+                self,
+                dir_path: str,
+                file_entry: List[List[Any]]) -> str:
+            raise Exception(om.error_needs_to_be_in_subclass)
+
+        @abstractmethod
+        def get_new_file_iter(self, file_entry, file_path) -> Dict[str, str]:
+            raise Exception(om.error_needs_to_be_in_subclass)
+
+        def set_file_iterator(
+                self,
+                input_data: InputData,
+                dir_path: str) -> List[Dict[str, str]]:
+            """Sets the file iterator based on the possible file
+            combinations.
+
+            Arguments:
+                file_combinations {List[List[Any]]}
+                    -- File combinations for the dataset type.
+                dir_path {str} -- Path to the parent directory.
+
+            Returns:
+                List[Dict[str, str]] -- [description]
+            """
+            # Get combinations
+            file_combinations = self.get_file_combinations(input_data)
+            # Iterate over f_combs
+            for file_entry in file_combinations:
+                file_path = self.get_filepath(
+                        dir_path=dir_path,
+                        file_entry=file_entry)
+                if not os.path.exists(file_path):
+                    print('File not found {}'.format(file_path))
+                    continue
+                self.__file_iterator.append(
+                    self.get_new_file_iter(
+                        file_entry=file_entry,
+                        file_path=file_path
+                    ))
+
         # End region
+
         def extract_subset(
                 self,
                 directory_path: str,
                 input_data: InputData):
-            """Extracts an ERA5 subset given a directory path and
-            the input data.
+            """Extracts an netCDF subset given a directory path and
+            with boundaries given through the input_data.
 
             Arguments:
                 directory_path {str}
@@ -139,24 +200,28 @@
                 dir_path=directory_path)
 
             output_data = OutputData(input_data.input_variables)
+            if len(self.__file_iterator) == 0:
+                return output_data
+
             # Set the nearest neighbors and the time reference.
             nn_idx = self.__get_nearest_neighbors_lon_lat(
                     ref_file_path=self.__first_filepath,
                     input_data=input_data,
                     cases_dict=output_data.data_dict
                 )
+            time_entry_refs = self.__get_time_ref_group()
 
             # Iterate over all possible combinations of variable-year.
-            for file_entry in self.file_iterator:
-                variable_name = file_entry.get('variable')
-                filepath = file_entry.get('filepath')
-                variable_key = file_entry.get('key')
+            for file_entry in self.__file_iterator:
+                variable_name = file_entry.get(self.file_var_key)
+                filepath = file_entry.get(self.file_fpath_key)
+                variable_key = file_entry.get(self.file_key_key)
                 # Process the subset for the file.
                 print(
                     'Extracting variable: {},'.format(variable_name) +
-                    ' year {}.'.format(file_entry['year']))
+                    ' year {}.'.format(file_entry.get(self.file_year_key)))
                 # Lazy loading of the dataset.
-                with Dataset(filepath, 'r', output_data._ds_format) \
+                with Dataset(filepath, 'r', self.netcdf_format) \
                         as input_dataset:
                     output_data.set_in_var_dict(
                         var_name=variable_key,
@@ -167,12 +232,11 @@
                             nn_idx
                         )
                     )
-                    # Set the time if needed.
-                    if variable_key == self.__first_variable_key:
+                    # Set the time if the file needs to be considered.
+                    if file_entry in time_entry_refs:
                         # add the lines to get the reference time
-                        reftime = \
-                            input_dataset[
-                                OutputData.var_time_key].units.split(' ')
+                        reftime = input_dataset[
+                            OutputData.var_time_key].units.split(' ')
                         # This is an assumption that all the grids have
                         # the same scale in regards of time.
                         output_data.data_dict[OutputData.var_time_key].extend(
@@ -184,14 +248,13 @@
 
             return output_data
 
-        @property
-        def __first_filepath(self) -> str:
-            return self.file_iterator[0]['filepath']
+        def __get_time_ref_group(self) -> List[str]:
+            return [
+                file_entry
+                for file_entry in self.__file_iterator
+                if file_entry[self.file_year_key] ==
+                self.__file_iterator[0][self.file_year_key]]
 
-        @property
-        def __first_variable_key(self) -> str:
-            return self.file_iterator[0]['key']
-
         def __get_variable_subset(
                 self,
                 variable_values: list,
@@ -229,7 +292,7 @@
                 self,
                 ref_file_path: str,
                 input_data: InputData,
-                cases_dict: dict):
+                cases_dict: dict) -> Tuple[List[int], List[int]]:
             """Gets the corrected index and value for the given
             input coordinates.
 
@@ -240,10 +303,11 @@
                     -- Dictionary with all values that need format.
 
             Returns:
-                Tuple[int, int] -- Indices of nearest neighbors.
+                Tuple[List[int], List[int]]
+                    -- Indices of nearest neighbors.
             """
             # Extract index and value for all input lat, lon.
-            with Dataset(ref_file_path, 'r', OutputData._ds_format) \
+            with Dataset(ref_file_path, 'r', self.netcdf_format) \
                     as ref_dataset:
                 nn_lat_idx = self.__set_nn(
                     input_values=input_data._input_lat,
@@ -279,7 +343,7 @@
                 self,
                 input_values: List[float],
                 reference_list: List[float],
-                output_values: List[float]):
+                output_values: List[float]) -> List[int]:
             """Sets the nearest neighbor for all the elements
             given in the points_list.
 
@@ -301,7 +365,7 @@
             return output_idx
 
         @staticmethod
-        def get_nearest_neighbor(value, data_array):
+        def get_nearest_neighbor(value, data_array) -> Tuple[int, int]:
             """
                 search for nearest decimal degree in an array of decimal
                 degrees and return the index.
@@ -331,8 +395,6 @@
             'v10': 'wind_v'
         }
 
-        __file_iterator = []
-
         @property
         def lon_key(self):
             return self.__era5_lon_key
@@ -342,36 +404,35 @@
             return self.__era5_lat_key
 
         @property
-        def file_iterator(self) -> Dict[str, str]:
-            return self.__file_iterator
+        def var_dict(self) -> Dict[str, str]:
+            return self.__era5_var_dict
 
-        def __get_netcdf_filepath(
+        def get_filepath(
                 self,
                 dir_path: str,
-                variable: str,
-                year: int):
-            """Gets the era5 filepath.
+                file_entry: Dict[str, str]) -> str:
+            """Gets the earth filepath.
 
             Arguments:
                 dir_path {str} -- Parent directory.
-                variable {str} -- Variable in file name.
-                year {int} -- Year in file name.
+                file_entry {Dict[str]} -- Dict of file attributes.
 
             Returns:
-                str -- File path location based on ERA5 format.
+                str -- File path location based on EARTH format.
             """
             # Find the matching file
             base_file_name = '' + \
-                'era5_Global_{}_{}.nc'.format(variable, year)
-            case_dir = os.path.join(dir_path, variable)
-            file_path = os.path.join(case_dir, base_file_name)
-            return file_path
+                'era5_Global_{}_{}.nc'.format(
+                    file_entry[0][1],
+                    file_entry[1])
+            case_dir = os.path.join(dir_path, file_entry[0][1])
+            return os.path.join(case_dir, base_file_name)
 
         def get_case_time_values(
                 self,
                 ymd: str,
                 hmsmm: str,
-                case_values: Dataset):
+                case_values: Dataset) -> List[datetime]:
             """Returns a list of formatted datetime values from
             a given dataset.
 
@@ -389,31 +450,21 @@
                     + timedelta(hours=int(ti))
                     for ti in case_values['time'][:]]
 
-        def set_file_iterator(self, input_data: InputData, dir_path: str):
+        def get_file_combinations(self, input_data: InputData):
             filtered_dict = self._get_filtered_dict(
                 input_data.input_variables,
                 self.__era5_var_dict)
+            return itertools.product(
+                    filtered_dict.items(),
+                    input_data.input_years)
 
-            for key_name, variable_name in filtered_dict.items():
-                for year in input_data.input_years:
-                    file_path = self.__get_netcdf_filepath(
-                        dir_path=dir_path,
-                        variable=variable_name,
-                        year=year
-                    )
-                    # If file does not exist simply go to the next one
-                    if not os.path.exists(file_path):
-                        print(
-                            'File {}'.format(file_path) +
-                            'does not exist or could not be found.')
-                        continue
-                    self.__file_iterator.append(
-                        {
-                            'year': year,
-                            'filepath': file_path,
-                            'variable': variable_name,
-                            'key': key_name
-                        })
+        def get_new_file_iter(self, file_entry, file_path) -> Dict[str, str]:
+            return {
+                self.file_var_key: file_entry[0][1],
+                self.file_key_key: file_entry[0][0],
+                self.file_year_key: file_entry[1],
+                self.file_fpath_key: file_path
+            }
 
     class __EarthExtractor(BaseExtractor):
         __earth_lon_key = 'lon'
@@ -425,8 +476,6 @@
 
         possible_scenarios = ['RCP45', 'RCP85', 'HIST']
 
-        __file_iterator = {}
-
         @property
         def lon_key(self):
             return self.__earth_lon_key
@@ -436,8 +485,8 @@
             return self.__earth_lat_key
 
         @property
-        def file_iterator(self) -> Dict[str, str]:
-            return self.__file_iterator
+        def var_dict(self) -> Dict[str, str]:
+            return self.__earth_var_dict
 
         def get_case_time_values(
                 self,
@@ -461,35 +510,57 @@
                     + timedelta(hours=int(ti))
                     for ti in case_values['time'][:]]
 
-        def __get_files_for_scenario(self) -> List[Dict[str, str]]:
-            pass
+        def get_filepath(
+                self,
+                dir_path: str,
+                file_entry: Dict[str, str]) -> str:
+            """Gets the Earth filepath.
 
-        def set_file_iterator(self, input_data: InputData, dir_path: str):
-            filtered_vars = self._get_filtered_dict(
+            Arguments:
+                dir_path {str} -- Parent directory.
+                file_entry {Dict[str]} -- Dict of file attributes.
+
+            Returns:
+                str -- File path location based on ERA5 format.
+            """
+            # Find the matching file
+            base_file_name = '' + \
+                'EC-EARTH_{}_{}_{}{}.nc'.format(
+                    file_entry[3],
+                    file_entry[0][1],
+                    file_entry[1], file_entry[2])
+            case_dir = os.path.join(dir_path, file_entry[3])
+            return os.path.join(case_dir, base_file_name)
+
+        def get_file_combinations(self, input_data: InputData):
+            """Builds a specific file iterator to be used in the parent
+            class.
+
+            Arguments:
+                input_data {InputData} -- Input elements.
+            """
+            filtered_dict = self._get_filtered_dict(
                     input_data.input_variables,
                     self.__earth_var_dict)
 
-            for scenario in input_data.input_scenarios:
-                if scenario.upper() not in self.possible_scenarios:
-                    continue
-                files_for_scenario = self.__get_files_for_scenario()
-                for key_name, variable_name in filtered_vars.items():
-                    for year in input_data.input_years:
-                        file_path = self.__get_netcdf_filepath(
-                            dir_path=dir_path,
-                            variable=variable_name,
-                            year=year
-                        )
-                    # If file does not exist simply go to the next one
-                    if not os.path.exists(file_path):
-                        print(
-                            'File {}'.format(file_path) +
-                            'does not exist or could not be found.')
-                        continue
-                    self.__file_iterator.append(
-                        {
-                            'year': year,
-                            'filepath': file_path,
-                            'variable': variable_name,
-                            'key': key_name
-                        })
+            filtered_scenarios = [
+                scenario
+                for scenario in input_data.input_scenarios
+                if scenario.upper() in self.possible_scenarios
+            ]
+
+            return itertools.product(
+                    filtered_dict.items(),
+                    input_data.input_years,
+                    [str(mm).zfill(2) for mm in range(1, 13)],
+                    filtered_scenarios)
+
+        def get_new_file_iter(self, file_entry, file_path) -> Dict[str, str]:
+            return {
+                    self.file_var_key: file_entry[0][1],
+                    self.file_key_key: file_entry[0][0],
+                    self.file_year_key: file_entry[1],
+                    self.file_month_key: file_entry[2],
+                    self.file_scenario_key: file_entry[3],
+                    self.file_fpath_key: file_path
+                }
Fisheye: Tag 62 refers to a dead (removed) revision in file `trunk/tests/test_extract_data_EARTH.py'.
Fisheye: No comparison available.  Pass `N' to diff?
Index: trunk/tests/test_extract_data.py
===================================================================
diff -u -r59 -r62
--- trunk/tests/test_extract_data.py	(.../test_extract_data.py)	(revision 59)
+++ trunk/tests/test_extract_data.py	(.../test_extract_data.py)	(revision 62)
@@ -81,6 +81,37 @@
 
 class Test_get_earth:
 
-    @pytest.mark.unittest
-    def test_dummy(self):
-        ExtractData.BaseExtractor()
\ No newline at end of file
+    @pytest.mark.systemtest
+    @pytest.mark.parametrize(
+        "input_variables, input_scenarios",
+        [(['var151'], ['RCP45'])],
+        ids=['RCP45 simple'])
+    def test_when_extract_single_point_from_earth_dir_then_returns_output(
+            self, input_variables: List[str], input_scenarios: List[str]):
+        # 1. Given
+        # When using local data you can just replace the comment in these lines
+        dir_test_data = TestUtils.get_local_test_data_dir('earth_test_data')
+        # dir_test_data = 'P:\\metocean-data\\open\\ERA5\\data\\Global'
+
+        input_data = InputData()
+        input_data.input_variables = input_variables
+        input_data.input_coordinates = [(4.2, 2.4), ]
+        input_data.input_years = [1981, 1982]
+        input_data.input_scenarios = input_scenarios
+
+        # 2. When
+        try:
+            output_data = ExtractData.get_earth(
+                dir_test_data,
+                input_data)
+        except Exception as e_info:
+            pytest.fail('Exception {} thrown.'.format(str(e_info)))
+
+        # 3. Then
+        assert output_data is not None
+        data_dict = output_data.data_dict
+        assert data_dict is not None
+        assert data_dict[output_data.var_time_key] is not None
+        assert data_dict[output_data.var_lon_key] is not None
+        assert data_dict[output_data.var_lat_key] is not None
+        assert data_dict[output_data.var_val_key] is not None