Index: trunk/tests/testing_Etienne/SD_ToolBox_Testing.ipynb
===================================================================
diff -u -r114 -r115
--- trunk/tests/testing_Etienne/SD_ToolBox_Testing.ipynb	(.../SD_ToolBox_Testing.ipynb)	(revision 114)
+++ trunk/tests/testing_Etienne/SD_ToolBox_Testing.ipynb	(.../SD_ToolBox_Testing.ipynb)	(revision 115)
@@ -21,7 +21,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -79,23 +79,23 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Make use of the following functions\n",
+    "Make use of the following functions:\n",
     "1. input_data (ind)\n",
     "2. extract_data (exd)\n",
     "3. output_data (oud)\n",
-    "4. output_messages (ome)"
+    "4. (output_messages (ome))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "1a. Extract wave parameters (wave height, peak period, wave direction) at locations in 20m depth along the Dutch coast"
+    "Task 1a. Extract wave parameters (wave height, peak period, wave direction) at locations in 20m depth along the Dutch coast"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -138,7 +138,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -147,7 +147,11 @@
      "text": [
       "Extracting variable: Hs, year 1986.\n",
       "Extracting variable: Tp, year 1986.\n",
-      "Extracting variable: MWD, year 1986.\n"
+      "Extracting variable: MWD, year 1986.\n",
+      "Writing C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets\\WAM_ERA5_WAVE.nc\n",
+      "writing variable pp1d\n",
+      "writing variable swh\n",
+      "writing variable mwd\n"
      ]
     }
    ],
@@ -169,6 +173,7 @@
     "Input_DataWAM = ind.InputData(\n",
     "    input_coordinates=coordsWAM,\n",
     "    input_variables=varWAM,\n",
+    "    input_scenarios=['era5'],\n",
     "    input_years=timeWAMy) # default is_gridded is false, referring to points\n",
     "\n",
     "# use the SDToolBox function to extract data\n",
@@ -179,7 +184,7 @@
     "    Extract_Data_ERA5WAM,\n",
     "    dir_path=r'C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets',\n",
     "    base_name='WAM',\n",
-    "    dataset_code='ERA5')"
+    "    dataset_code='ERA5_WAVE')"
    ]
   },
   {
@@ -316,171 +321,422 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "1b. Extract wave parameters for year 1986 in a gridded box"
+    "Task 1b. Extract wave parameters for year 1986 in a gridded box"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 82,
+   "execution_count": 91,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Extracting variable: Hs, year 1986.\n",
-      "Extracting variable: Tp, year 1986.\n",
-      "Extracting variable: MWD, year 1986.\n",
-      "Extracting variable: Hs, year 1986.\n",
-      "Extracting variable: Tp, year 1986.\n",
-      "Extracting variable: MWD, year 1986.\n",
-      "Extracting variable: Hs, year 1986.\n",
-      "Extracting variable: Tp, year 1986.\n",
-      "Extracting variable: MWD, year 1986.\n",
-      "Extracting variable: Hs, year 1986.\n",
-      "Extracting variable: Tp, year 1986.\n",
-      "Extracting variable: MWD, year 1986.\n",
-      "Extracting variable: Hs, year 1986.\n",
-      "Extracting variable: Tp, year 1986.\n",
-      "Extracting variable: MWD, year 1986.\n",
-      "Writing C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets\\BOX_ERA5.nc\n",
-      "writing variable swh\n"
+      "['msl']\n",
+      "Extracting variable: Hs, year 1986.\n"
      ]
     },
     {
-     "ename": "ValueError",
-     "evalue": "shape mismatch: objects cannot be broadcast to a single shape",
+     "ename": "KeyError",
+     "evalue": "'swh'",
      "output_type": "error",
      "traceback": [
       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "\u001b[1;32m<ipython-input-82-c62bc5c52ccf>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m     25\u001b[0m     \u001b[0mdir_path\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34mr'C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     26\u001b[0m     \u001b[0mbase_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'BOX'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 27\u001b[1;33m     dataset_code='ERA5')\n\u001b[0m",
-      "\u001b[1;32mC:\\checkouts\\trunk\\SDToolBox\\output_data.py\u001b[0m in \u001b[0;36mgenerate_netcdf\u001b[1;34m(self, dir_path, base_name, dataset_code)\u001b[0m\n\u001b[0;32m    170\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__set_geo_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnetcdf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    171\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__set_dataset_variables\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnetcdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdataset_code\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 172\u001b[1;33m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__set_variables_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnetcdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdataset_code\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    173\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__set_global_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnetcdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdataset_code\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    174\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32mC:\\checkouts\\trunk\\SDToolBox\\output_data.py\u001b[0m in \u001b[0;36m__set_variables_data\u001b[1;34m(self, netcdf, dataset_code)\u001b[0m\n\u001b[0;32m    597\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mis_gridded\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    598\u001b[0m                 \u001b[0mnetcdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvariables\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mvar_name\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 599\u001b[1;33m                     \u001b[0mvariables\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mvar_name\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    600\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    601\u001b[0m                 \u001b[1;31m# When non-gridded we need to transpose the dimensions.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32mnetCDF4\\_netCDF4.pyx\u001b[0m in \u001b[0;36mnetCDF4._netCDF4.Variable.__setitem__\u001b[1;34m()\u001b[0m\n",
-      "\u001b[1;32m~\\anaconda3\\envs\\SDToolBox_env\\lib\\site-packages\\netCDF4\\utils.py\u001b[0m in \u001b[0;36m_StartCountStride\u001b[1;34m(elem, shape, dimensions, grp, datashape, put, use_get_vars)\u001b[0m\n\u001b[0;32m    363\u001b[0m         \u001b[0mfullslice\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    364\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mfullslice\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mdatashape\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mput\u001b[0m \u001b[1;32mand\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mhasunlim\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 365\u001b[1;33m         \u001b[0mdatashape\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mbroadcasted_shape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdatashape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    366\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    367\u001b[0m     \u001b[1;31m# pad datashape with zeros for dimensions not being sliced (issue #906)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32m~\\anaconda3\\envs\\SDToolBox_env\\lib\\site-packages\\netCDF4\\utils.py\u001b[0m in \u001b[0;36mbroadcasted_shape\u001b[1;34m(shp1, shp2)\u001b[0m\n\u001b[0;32m    971\u001b[0m     \u001b[0ma\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mas_strided\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mshape\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mshp1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstrides\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mshp1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    972\u001b[0m     \u001b[0mb\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mas_strided\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mshape\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mshp2\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstrides\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mshp2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 973\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbroadcast\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[1;31mValueError\u001b[0m: shape mismatch: objects cannot be broadcast to a single shape"
+      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "\u001b[1;32m<ipython-input-91-46658c84726f>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m     21\u001b[0m \u001b[1;31m# use the SDToolBox function to extract data\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     22\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mInput_DataBOX\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minput_variables\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 23\u001b[1;33m \u001b[0mExtract_Data_ERA5BOX\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mexd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mExtractData\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_era_5\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdir_ERA5\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mInput_DataBOX\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     24\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     25\u001b[0m \u001b[1;31m# generate output data for waves\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mC:\\checkouts\\trunk\\SDToolBox\\extract_data.py\u001b[0m in \u001b[0;36mget_era_5\u001b[1;34m(directory_path, input_data)\u001b[0m\n\u001b[0;32m     55\u001b[0m         return data_extractor.extract_subset(\n\u001b[0;32m     56\u001b[0m                 \u001b[0mdirectory_path\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdirectory_path\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 57\u001b[1;33m                 \u001b[0minput_data\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0minput_data\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     58\u001b[0m             )\n\u001b[0;32m     59\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mC:\\checkouts\\trunk\\SDToolBox\\extract_data.py\u001b[0m in \u001b[0;36mextract_subset\u001b[1;34m(self, directory_path, input_data)\u001b[0m\n\u001b[0;32m    245\u001b[0m                         \u001b[0mvar_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mvariable_key\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    246\u001b[0m                         value=self.__get_variable_subset(\n\u001b[1;32m--> 247\u001b[1;33m                             \u001b[0moutput_data\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_from_var_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvariable_key\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    248\u001b[0m                             \u001b[0minput_data\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    249\u001b[0m                             \u001b[0minput_dataset\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mC:\\checkouts\\trunk\\SDToolBox\\output_data.py\u001b[0m in \u001b[0;36mget_from_var_dict\u001b[1;34m(self, var_name)\u001b[0m\n\u001b[0;32m    187\u001b[0m             \u001b[0mAny\u001b[0m \u001b[1;33m-\u001b[0m\u001b[1;33m-\u001b[0m \u001b[0mValue\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mrequested\u001b[0m \u001b[0mvariable\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    188\u001b[0m         \"\"\"\n\u001b[1;32m--> 189\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__data_dict\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvar_val_key\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mvar_name\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    190\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    191\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mset_in_var_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvar_name\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mAny\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;31mKeyError\u001b[0m: 'swh'"
      ]
     }
    ],
    "source": [
     "# select a spatial range (box) for which data is to be extracted\n",
-    "step = 2 # degrees\n",
-    "lonl = 170 # left lon value\n",
-    "lonr = 180 # right lon value\n",
-    "latl = -5 # lower lat value\n",
-    "latu = 5 # upper lat value\n",
-    "xrang = np.arange(lonl, lonr+step, step).tolist() # array or list?\n",
-    "yrang = np.arange(latl, latu+step, step).tolist() # array or list?\n",
+    "steplon = 0.5 # degrees\n",
+    "lonl = -5 # left lon value\n",
+    "lonr = 11 # right lon value\n",
+    "steplat = 0.5\n",
+    "latl = 50 # lower lat value\n",
+    "latu = 62 # upper lat value\n",
+    "xrang = np.arange(lonl, lonr+steplon, steplon).tolist() \n",
+    "yrang = np.arange(latl, latu+steplat, steplat).tolist() \n",
     "\n",
     "coordsBOX = {'LON': xrang, 'LAT': yrang}\n",
     "    \n",
     "# use the SDToolBox function to create input data\n",
     "Input_DataBOX = ind.InputData(\n",
     "    input_coordinates=coordsBOX, \n",
-    "    input_variables=varWAM, \n",
+    "    input_variables=['msl'], \n",
+    "    input_scenarios=['era5'],\n",
     "    input_years=timeWAMy, \n",
     "    is_gridded = True)\n",
     "\n",
     "# use the SDToolBox function to extract data\n",
+    "print(Input_DataBOX.input_variables)\n",
     "Extract_Data_ERA5BOX = exd.ExtractData.get_era_5(dir_ERA5, Input_DataBOX)\n",
     "\n",
     "# generate output data for waves\n",
     "ERA5BOX = oud.OutputData.generate_netcdf(\n",
     "    Extract_Data_ERA5BOX,\n",
     "    dir_path=r'C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets',\n",
     "    base_name='BOX',\n",
-    "    dataset_code='ERA5')"
+    "    dataset_code='ERA5_WAVE')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 74,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[50.  50.5 51.  51.5 52.  52.5 53.  53.5 54.  54.5 55.  55.5 56.  56.5\n",
-      " 57.  57.5 58.  58.5 59.  59.5 60.  60.5 61.  61.5 62. ]\n",
-      "[-5.  -4.5 -4.  -3.5 -3.  -2.5 -2.  -1.5 -1.  -0.5  0.   0.5  1.   1.5\n",
-      "  2.   2.5  3.   3.5  4.   4.5  5.   5.5  6.   6.5  7.   7.5  8.   8.5\n",
-      "  9.   9.5 10.  10.5 11. ]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "#%% Extract data gridded\n",
-    "lonmin=-5;lonmax=11;dlon=0.5\n",
-    "latmin=50;latmax=62;dlat=0.5\n",
-    "lon = np.arange(latmin,latmax+dlat,dlat)\n",
-    "lat = np.arange(lonmin,lonmax+dlon,dlon)\n",
-    "print(lon)\n",
-    "print(lat)\n",
-    "#lon,lat = np.meshgrid(lon,lat)\n",
-    "#plt.plot(lat,lon,'or');plt.axis('equal')\n",
-    "inda = ind.InputData(\n",
-    "    input_coordinates={'LON':lon,'LAT':lat},\n",
-    "    input_variables=varWAM,\n",
-    "    input_years=timeWAMy,\n",
-    "    is_gridded=True)"
+    "# verification: plot with spatial maps of the mean values and standard deviation\n",
+    "\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "Task 2. Extract mean sea level pressure from ERA5 dataset on a gridded box on the North Sea"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# select a spatial range (box) for which data is to be extracted\n",
+    "steplon = 0.5 # degrees\n",
+    "lonl = -5 # left lon value\n",
+    "lonr = 11 # right lon value\n",
+    "steplat = 0.5\n",
+    "latl = 50 # lower lat value\n",
+    "latu = 62 # upper lat value\n",
+    "xrang = np.arange(lonl, lonr+steplon, steplon).tolist() \n",
+    "yrang = np.arange(latl, latu+steplat, steplat).tolist() \n",
+    "\n",
+    "coordsBOX = {'LON': xrang, 'LAT': yrang}\n",
+    "    \n",
+    "# use the SDToolBox function to create input data\n",
+    "del(Input_DataBOX) # to clear up the previous one that was defined\n",
+    "Input_DataBOX = ind.InputData(\n",
+    "    input_coordinates=coordsBOX, \n",
+    "    input_variables=['msl_p'], \n",
+    "    input_scenarios=['era5'],\n",
+    "    input_years=timeWAMy, \n",
+    "    is_gridded = True)\n",
+    "\n",
+    "# use the SDToolBox function to extract data\n",
+    "print(Input_DataBOX.input_variables)\n",
+    "Extract_Data_ERA5BOX = exd.ExtractData.get_era_5(dir_ERA5, Input_DataBOX)\n",
+    "\n",
+    "# generate output data for waves\n",
+    "ERA5BOX = oud.OutputData.generate_netcdf(\n",
+    "    Extract_Data_ERA5BOX,\n",
+    "    dir_path=r'C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets',\n",
+    "    base_name='BOX',\n",
+    "    dataset_code='ERA5_MSL')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: plot temporal series of a point in the grid"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verficiation: plot spatial map of the mean sea level pressure"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Task 3. Test functionality to generate output netcdf files for waves and sealevel pressure"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# testing various different parameters to test the functionality of the input_data, extract_data and output_data functions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
     "# 2. Data Preprocessing"
    ]
   },
   {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Make use of the following functions:\n",
+    "5. preditor_definition (pde)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Task 4. Compute the squared spatial gradients of mean sea level pressure"
+   ]
+  },
+  {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# data_processing function"
+    "# (data_processing function)?\n",
+    "# predictor_definition library -> compute_spatial_gradient"
    ]
   },
   {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: for a point in the grid plot temporal series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: spatial map of the spatial gradient of mean sea level pressure"
+   ]
+  },
+  {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "Task 5. Temporal resampling"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# wave climate data: hourly to 3 hourly"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# mean sea level pressure and gradient: hourly to 6hourly"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: for a point in the grid plot temporal series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: spatial map of the mean sea level pressure and gradient"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Task 6. Spatial resampling"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# mean sea level pressure 0.5 degree to 1 degree"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: for a point in the grid plot temporal series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: spatial map of the mean sea level pressure and gradient"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
     "# 3. Predictor Definition"
    ]
   },
   {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Make use of the following functions:\n",
+    "5. preditor_definition (pde)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Task 7a. Aggregation of atmospheric data correlated with wave generation (averaged during 1 day). For computing daily running means of the 6h spatial fields."
+   ]
+  },
+  {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# predictor_definition function"
+    "# predictor_definition library -> atmospheric_averaged_mean"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "Task 7b. If we are good with time, repeat task 7a with aggregation data option."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# perform aggregation data option"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: for a point in the grid plot temporal series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: spatial map of the mean sea level pressure and gradient"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Task 8a. Perform Principal Component Analysis on the temporal and spatial fields of mean sea level pressure and the squared gradient. The function returns the Empirical orthogonal functions (EOF) and the principal components (PCs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# predictor_definition library -> compute_PCA"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: plot EOFs (spatial map) and PCs (temporal series)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
     "# 4. Statistical Model"
    ]
   },
   {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Make use of the following functions:\n",
+    "5. preditor_definition (pde)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Task 8b. Compute Principal Component Analysis on the different coastal locations with wave climate (if we are doing well with time). If this task is not done, continue selecting only one wave station in the coast."
+   ]
+  },
+  {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# statistical model function"
+    "# (statistical model function)?"
    ]
   },
   {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: plot EOFs (spatial map) and PCs (temporal series)"
+   ]
+  },
+  {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# 5. Application: Future Wave Projections"
+    "Task 9. Normalization of the predictor data (X, they will be the PCs) using Z score method"
    ]
   },
   {
@@ -489,6 +745,121 @@
    "metadata": {},
    "outputs": [],
    "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Task 10. Define calibration and validation subsets with stratified K folds, try with 5 splits (80% calibration; 20% validation) and 2 repetitions to start with."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: Plot temporal series of wave climate selected for calibration and validation.\n",
+    "# (can be using different colors on the same plot)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Task 11. Train the multivariate regression model (Y=alpha0+sum(alphai*Xi)), the predictand Y = Significant wave height, the predictor X= normalized PCs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: Check which PCs are significative (which numbers are in the model)\n",
+    "# Plot scatter X,Y empirical and X,Y* modelled, for the training subset and the validation subset \n",
+    "# (you can do this very simply using the average of the coefficients of the 8 models defined, (5-1)splits*2repetitions)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Task 12. Compute accuracy scores for the stratified folds taken for validating"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# (there are functions defined for this in the library statistical model, get_all_scorers())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: table with scores"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Task 13. Repeat 10, 11 and 12 but with wave direction, because the regression model for circular variables is different"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# .."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verifications: ..."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 5. Application: Future Wave Projections"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# to be continued"
+   ]
   }
  ],
  "metadata": {
Index: trunk/tests/testing_Etienne/.ipynb_checkpoints/SD_ToolBox_Testing-checkpoint.ipynb
===================================================================
diff -u -r114 -r115
--- trunk/tests/testing_Etienne/.ipynb_checkpoints/SD_ToolBox_Testing-checkpoint.ipynb	(.../SD_ToolBox_Testing-checkpoint.ipynb)	(revision 114)
+++ trunk/tests/testing_Etienne/.ipynb_checkpoints/SD_ToolBox_Testing-checkpoint.ipynb	(.../SD_ToolBox_Testing-checkpoint.ipynb)	(revision 115)
@@ -21,7 +21,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -79,23 +79,23 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Make use of the following functions\n",
+    "Make use of the following functions:\n",
     "1. input_data (ind)\n",
     "2. extract_data (exd)\n",
     "3. output_data (oud)\n",
-    "4. output_messages (ome)"
+    "4. (output_messages (ome))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "1a. Extract wave parameters (wave height, peak period, wave direction) at locations in 20m depth along the Dutch coast"
+    "Task 1a. Extract wave parameters (wave height, peak period, wave direction) at locations in 20m depth along the Dutch coast"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -138,7 +138,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -147,7 +147,11 @@
      "text": [
       "Extracting variable: Hs, year 1986.\n",
       "Extracting variable: Tp, year 1986.\n",
-      "Extracting variable: MWD, year 1986.\n"
+      "Extracting variable: MWD, year 1986.\n",
+      "Writing C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets\\WAM_ERA5_WAVE.nc\n",
+      "writing variable pp1d\n",
+      "writing variable swh\n",
+      "writing variable mwd\n"
      ]
     }
    ],
@@ -169,6 +173,7 @@
     "Input_DataWAM = ind.InputData(\n",
     "    input_coordinates=coordsWAM,\n",
     "    input_variables=varWAM,\n",
+    "    input_scenarios=['era5'],\n",
     "    input_years=timeWAMy) # default is_gridded is false, referring to points\n",
     "\n",
     "# use the SDToolBox function to extract data\n",
@@ -179,7 +184,7 @@
     "    Extract_Data_ERA5WAM,\n",
     "    dir_path=r'C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets',\n",
     "    base_name='WAM',\n",
-    "    dataset_code='ERA5')"
+    "    dataset_code='ERA5_WAVE')"
    ]
   },
   {
@@ -316,171 +321,422 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "1b. Extract wave parameters for year 1986 in a gridded box"
+    "Task 1b. Extract wave parameters for year 1986 in a gridded box"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 82,
+   "execution_count": 91,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Extracting variable: Hs, year 1986.\n",
-      "Extracting variable: Tp, year 1986.\n",
-      "Extracting variable: MWD, year 1986.\n",
-      "Extracting variable: Hs, year 1986.\n",
-      "Extracting variable: Tp, year 1986.\n",
-      "Extracting variable: MWD, year 1986.\n",
-      "Extracting variable: Hs, year 1986.\n",
-      "Extracting variable: Tp, year 1986.\n",
-      "Extracting variable: MWD, year 1986.\n",
-      "Extracting variable: Hs, year 1986.\n",
-      "Extracting variable: Tp, year 1986.\n",
-      "Extracting variable: MWD, year 1986.\n",
-      "Extracting variable: Hs, year 1986.\n",
-      "Extracting variable: Tp, year 1986.\n",
-      "Extracting variable: MWD, year 1986.\n",
-      "Writing C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets\\BOX_ERA5.nc\n",
-      "writing variable swh\n"
+      "['msl']\n",
+      "Extracting variable: Hs, year 1986.\n"
      ]
     },
     {
-     "ename": "ValueError",
-     "evalue": "shape mismatch: objects cannot be broadcast to a single shape",
+     "ename": "KeyError",
+     "evalue": "'swh'",
      "output_type": "error",
      "traceback": [
       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "\u001b[1;32m<ipython-input-82-c62bc5c52ccf>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m     25\u001b[0m     \u001b[0mdir_path\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34mr'C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     26\u001b[0m     \u001b[0mbase_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'BOX'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 27\u001b[1;33m     dataset_code='ERA5')\n\u001b[0m",
-      "\u001b[1;32mC:\\checkouts\\trunk\\SDToolBox\\output_data.py\u001b[0m in \u001b[0;36mgenerate_netcdf\u001b[1;34m(self, dir_path, base_name, dataset_code)\u001b[0m\n\u001b[0;32m    170\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__set_geo_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnetcdf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    171\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__set_dataset_variables\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnetcdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdataset_code\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 172\u001b[1;33m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__set_variables_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnetcdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdataset_code\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    173\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__set_global_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnetcdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdataset_code\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    174\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32mC:\\checkouts\\trunk\\SDToolBox\\output_data.py\u001b[0m in \u001b[0;36m__set_variables_data\u001b[1;34m(self, netcdf, dataset_code)\u001b[0m\n\u001b[0;32m    597\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mis_gridded\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    598\u001b[0m                 \u001b[0mnetcdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvariables\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mvar_name\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 599\u001b[1;33m                     \u001b[0mvariables\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mvar_name\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    600\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    601\u001b[0m                 \u001b[1;31m# When non-gridded we need to transpose the dimensions.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32mnetCDF4\\_netCDF4.pyx\u001b[0m in \u001b[0;36mnetCDF4._netCDF4.Variable.__setitem__\u001b[1;34m()\u001b[0m\n",
-      "\u001b[1;32m~\\anaconda3\\envs\\SDToolBox_env\\lib\\site-packages\\netCDF4\\utils.py\u001b[0m in \u001b[0;36m_StartCountStride\u001b[1;34m(elem, shape, dimensions, grp, datashape, put, use_get_vars)\u001b[0m\n\u001b[0;32m    363\u001b[0m         \u001b[0mfullslice\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    364\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mfullslice\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mdatashape\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mput\u001b[0m \u001b[1;32mand\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mhasunlim\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 365\u001b[1;33m         \u001b[0mdatashape\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mbroadcasted_shape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdatashape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    366\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    367\u001b[0m     \u001b[1;31m# pad datashape with zeros for dimensions not being sliced (issue #906)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32m~\\anaconda3\\envs\\SDToolBox_env\\lib\\site-packages\\netCDF4\\utils.py\u001b[0m in \u001b[0;36mbroadcasted_shape\u001b[1;34m(shp1, shp2)\u001b[0m\n\u001b[0;32m    971\u001b[0m     \u001b[0ma\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mas_strided\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mshape\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mshp1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstrides\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mshp1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    972\u001b[0m     \u001b[0mb\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mas_strided\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mshape\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mshp2\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstrides\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mshp2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 973\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbroadcast\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[1;31mValueError\u001b[0m: shape mismatch: objects cannot be broadcast to a single shape"
+      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "\u001b[1;32m<ipython-input-91-46658c84726f>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m     21\u001b[0m \u001b[1;31m# use the SDToolBox function to extract data\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     22\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mInput_DataBOX\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minput_variables\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 23\u001b[1;33m \u001b[0mExtract_Data_ERA5BOX\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mexd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mExtractData\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_era_5\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdir_ERA5\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mInput_DataBOX\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     24\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     25\u001b[0m \u001b[1;31m# generate output data for waves\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mC:\\checkouts\\trunk\\SDToolBox\\extract_data.py\u001b[0m in \u001b[0;36mget_era_5\u001b[1;34m(directory_path, input_data)\u001b[0m\n\u001b[0;32m     55\u001b[0m         return data_extractor.extract_subset(\n\u001b[0;32m     56\u001b[0m                 \u001b[0mdirectory_path\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdirectory_path\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 57\u001b[1;33m                 \u001b[0minput_data\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0minput_data\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     58\u001b[0m             )\n\u001b[0;32m     59\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mC:\\checkouts\\trunk\\SDToolBox\\extract_data.py\u001b[0m in \u001b[0;36mextract_subset\u001b[1;34m(self, directory_path, input_data)\u001b[0m\n\u001b[0;32m    245\u001b[0m                         \u001b[0mvar_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mvariable_key\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    246\u001b[0m                         value=self.__get_variable_subset(\n\u001b[1;32m--> 247\u001b[1;33m                             \u001b[0moutput_data\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_from_var_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvariable_key\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    248\u001b[0m                             \u001b[0minput_data\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    249\u001b[0m                             \u001b[0minput_dataset\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mC:\\checkouts\\trunk\\SDToolBox\\output_data.py\u001b[0m in \u001b[0;36mget_from_var_dict\u001b[1;34m(self, var_name)\u001b[0m\n\u001b[0;32m    187\u001b[0m             \u001b[0mAny\u001b[0m \u001b[1;33m-\u001b[0m\u001b[1;33m-\u001b[0m \u001b[0mValue\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mrequested\u001b[0m \u001b[0mvariable\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    188\u001b[0m         \"\"\"\n\u001b[1;32m--> 189\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__data_dict\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvar_val_key\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mvar_name\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    190\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    191\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mset_in_var_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvar_name\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mAny\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;31mKeyError\u001b[0m: 'swh'"
      ]
     }
    ],
    "source": [
     "# select a spatial range (box) for which data is to be extracted\n",
-    "step = 2 # degrees\n",
-    "lonl = 170 # left lon value\n",
-    "lonr = 180 # right lon value\n",
-    "latl = -5 # lower lat value\n",
-    "latu = 5 # upper lat value\n",
-    "xrang = np.arange(lonl, lonr+step, step).tolist() # array or list?\n",
-    "yrang = np.arange(latl, latu+step, step).tolist() # array or list?\n",
+    "steplon = 0.5 # degrees\n",
+    "lonl = -5 # left lon value\n",
+    "lonr = 11 # right lon value\n",
+    "steplat = 0.5\n",
+    "latl = 50 # lower lat value\n",
+    "latu = 62 # upper lat value\n",
+    "xrang = np.arange(lonl, lonr+steplon, steplon).tolist() \n",
+    "yrang = np.arange(latl, latu+steplat, steplat).tolist() \n",
     "\n",
     "coordsBOX = {'LON': xrang, 'LAT': yrang}\n",
     "    \n",
     "# use the SDToolBox function to create input data\n",
     "Input_DataBOX = ind.InputData(\n",
     "    input_coordinates=coordsBOX, \n",
-    "    input_variables=varWAM, \n",
+    "    input_variables=['msl'], \n",
+    "    input_scenarios=['era5'],\n",
     "    input_years=timeWAMy, \n",
     "    is_gridded = True)\n",
     "\n",
     "# use the SDToolBox function to extract data\n",
+    "print(Input_DataBOX.input_variables)\n",
     "Extract_Data_ERA5BOX = exd.ExtractData.get_era_5(dir_ERA5, Input_DataBOX)\n",
     "\n",
     "# generate output data for waves\n",
     "ERA5BOX = oud.OutputData.generate_netcdf(\n",
     "    Extract_Data_ERA5BOX,\n",
     "    dir_path=r'C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets',\n",
     "    base_name='BOX',\n",
-    "    dataset_code='ERA5')"
+    "    dataset_code='ERA5_WAVE')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 74,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[50.  50.5 51.  51.5 52.  52.5 53.  53.5 54.  54.5 55.  55.5 56.  56.5\n",
-      " 57.  57.5 58.  58.5 59.  59.5 60.  60.5 61.  61.5 62. ]\n",
-      "[-5.  -4.5 -4.  -3.5 -3.  -2.5 -2.  -1.5 -1.  -0.5  0.   0.5  1.   1.5\n",
-      "  2.   2.5  3.   3.5  4.   4.5  5.   5.5  6.   6.5  7.   7.5  8.   8.5\n",
-      "  9.   9.5 10.  10.5 11. ]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "#%% Extract data gridded\n",
-    "lonmin=-5;lonmax=11;dlon=0.5\n",
-    "latmin=50;latmax=62;dlat=0.5\n",
-    "lon = np.arange(latmin,latmax+dlat,dlat)\n",
-    "lat = np.arange(lonmin,lonmax+dlon,dlon)\n",
-    "print(lon)\n",
-    "print(lat)\n",
-    "#lon,lat = np.meshgrid(lon,lat)\n",
-    "#plt.plot(lat,lon,'or');plt.axis('equal')\n",
-    "inda = ind.InputData(\n",
-    "    input_coordinates={'LON':lon,'LAT':lat},\n",
-    "    input_variables=varWAM,\n",
-    "    input_years=timeWAMy,\n",
-    "    is_gridded=True)"
+    "# verification: plot with spatial maps of the mean values and standard deviation\n",
+    "\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "Task 2. Extract mean sea level pressure from ERA5 dataset on a gridded box on the North Sea"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# select a spatial range (box) for which data is to be extracted\n",
+    "steplon = 0.5 # degrees\n",
+    "lonl = -5 # left lon value\n",
+    "lonr = 11 # right lon value\n",
+    "steplat = 0.5\n",
+    "latl = 50 # lower lat value\n",
+    "latu = 62 # upper lat value\n",
+    "xrang = np.arange(lonl, lonr+steplon, steplon).tolist() \n",
+    "yrang = np.arange(latl, latu+steplat, steplat).tolist() \n",
+    "\n",
+    "coordsBOX = {'LON': xrang, 'LAT': yrang}\n",
+    "    \n",
+    "# use the SDToolBox function to create input data\n",
+    "del(Input_DataBOX) # to clear up the previous one that was defined\n",
+    "Input_DataBOX = ind.InputData(\n",
+    "    input_coordinates=coordsBOX, \n",
+    "    input_variables=['msl_p'], \n",
+    "    input_scenarios=['era5'],\n",
+    "    input_years=timeWAMy, \n",
+    "    is_gridded = True)\n",
+    "\n",
+    "# use the SDToolBox function to extract data\n",
+    "print(Input_DataBOX.input_variables)\n",
+    "Extract_Data_ERA5BOX = exd.ExtractData.get_era_5(dir_ERA5, Input_DataBOX)\n",
+    "\n",
+    "# generate output data for waves\n",
+    "ERA5BOX = oud.OutputData.generate_netcdf(\n",
+    "    Extract_Data_ERA5BOX,\n",
+    "    dir_path=r'C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets',\n",
+    "    base_name='BOX',\n",
+    "    dataset_code='ERA5_MSL')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: plot temporal series of a point in the grid"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verficiation: plot spatial map of the mean sea level pressure"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Task 3. Test functionality to generate output netcdf files for waves and sealevel pressure"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# testing various different parameters to test the functionality of the input_data, extract_data and output_data functions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
     "# 2. Data Preprocessing"
    ]
   },
   {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Make use of the following functions:\n",
+    "5. preditor_definition (pde)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Task 4. Compute the squared spatial gradients of mean sea level pressure"
+   ]
+  },
+  {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# data_processing function"
+    "# (data_processing function)?\n",
+    "# predictor_definition library -> compute_spatial_gradient"
    ]
   },
   {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: for a point in the grid plot temporal series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: spatial map of the spatial gradient of mean sea level pressure"
+   ]
+  },
+  {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "Task 5. Temporal resampling"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# wave climate data: hourly to 3 hourly"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# mean sea level pressure and gradient: hourly to 6hourly"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: for a point in the grid plot temporal series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: spatial map of the mean sea level pressure and gradient"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Task 6. Spatial resampling"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# mean sea level pressure 0.5 degree to 1 degree"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: for a point in the grid plot temporal series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: spatial map of the mean sea level pressure and gradient"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
     "# 3. Predictor Definition"
    ]
   },
   {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Make use of the following functions:\n",
+    "5. preditor_definition (pde)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Task 7a. Aggregation of atmospheric data correlated with wave generation (averaged during 1 day). For computing daily running means of the 6h spatial fields."
+   ]
+  },
+  {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# predictor_definition function"
+    "# predictor_definition library -> atmospheric_averaged_mean"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "Task 7b. If we are good with time, repeat task 7a with aggregation data option."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# perform aggregation data option"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: for a point in the grid plot temporal series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: spatial map of the mean sea level pressure and gradient"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Task 8a. Perform Principal Component Analysis on the temporal and spatial fields of mean sea level pressure and the squared gradient. The function returns the Empirical orthogonal functions (EOF) and the principal components (PCs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# predictor_definition library -> compute_PCA"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: plot EOFs (spatial map) and PCs (temporal series)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
     "# 4. Statistical Model"
    ]
   },
   {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Make use of the following functions:\n",
+    "5. preditor_definition (pde)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Task 8b. Compute Principal Component Analysis on the different coastal locations with wave climate (if we are doing well with time). If this task is not done, continue selecting only one wave station in the coast."
+   ]
+  },
+  {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# statistical model function"
+    "# (statistical model function)?"
    ]
   },
   {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: plot EOFs (spatial map) and PCs (temporal series)"
+   ]
+  },
+  {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# 5. Application: Future Wave Projections"
+    "Task 9. Normalization of the predictor data (X, they will be the PCs) using Z score method"
    ]
   },
   {
@@ -489,6 +745,121 @@
    "metadata": {},
    "outputs": [],
    "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Task 10. Define calibration and validation subsets with stratified K folds, try with 5 splits (80% calibration; 20% validation) and 2 repetitions to start with."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: Plot temporal series of wave climate selected for calibration and validation.\n",
+    "# (can be using different colors on the same plot)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Task 11. Train the multivariate regression model (Y=alpha0+sum(alphai*Xi)), the predictand Y = Significant wave height, the predictor X= normalized PCs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: Check which PCs are significative (which numbers are in the model)\n",
+    "# Plot scatter X,Y empirical and X,Y* modelled, for the training subset and the validation subset \n",
+    "# (you can do this very simply using the average of the coefficients of the 8 models defined, (5-1)splits*2repetitions)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Task 12. Compute accuracy scores for the stratified folds taken for validating"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# (there are functions defined for this in the library statistical model, get_all_scorers())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verification: table with scores"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Task 13. Repeat 10, 11 and 12 but with wave direction, because the regression model for circular variables is different"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# .."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# verifications: ..."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 5. Application: Future Wave Projections"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# to be continued"
+   ]
   }
  ],
  "metadata": {