Index: trunk/tests/testing_Etienne/SD_ToolBox_Testing.ipynb =================================================================== diff -u -r114 -r115 --- trunk/tests/testing_Etienne/SD_ToolBox_Testing.ipynb (.../SD_ToolBox_Testing.ipynb) (revision 114) +++ trunk/tests/testing_Etienne/SD_ToolBox_Testing.ipynb (.../SD_ToolBox_Testing.ipynb) (revision 115) @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -79,23 +79,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Make use of the following functions\n", + "Make use of the following functions:\n", "1. input_data (ind)\n", "2. extract_data (exd)\n", "3. output_data (oud)\n", - "4. output_messages (ome)" + "4. (output_messages (ome))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "1a. Extract wave parameters (wave height, peak period, wave direction) at locations in 20m depth along the Dutch coast" + "Task 1a. Extract wave parameters (wave height, peak period, wave direction) at locations in 20m depth along the Dutch coast" ] }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -138,7 +138,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -147,7 +147,11 @@ "text": [ "Extracting variable: Hs, year 1986.\n", "Extracting variable: Tp, year 1986.\n", - "Extracting variable: MWD, year 1986.\n" + "Extracting variable: MWD, year 1986.\n", + "Writing C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets\\WAM_ERA5_WAVE.nc\n", + "writing variable pp1d\n", + "writing variable swh\n", + "writing variable mwd\n" ] } ], @@ -169,6 +173,7 @@ "Input_DataWAM = ind.InputData(\n", " input_coordinates=coordsWAM,\n", " input_variables=varWAM,\n", + " input_scenarios=['era5'],\n", " input_years=timeWAMy) # default is_gridded is false, referring to points\n", "\n", "# use the SDToolBox function to extract data\n", @@ -179,7 +184,7 @@ " Extract_Data_ERA5WAM,\n", " dir_path=r'C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets',\n", " base_name='WAM',\n", - " dataset_code='ERA5')" + " dataset_code='ERA5_WAVE')" ] }, { @@ -316,171 +321,422 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "1b. Extract wave parameters for year 1986 in a gridded box" + "Task 1b. Extract wave parameters for year 1986 in a gridded box" ] }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 91, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Extracting variable: Hs, year 1986.\n", - "Extracting variable: Tp, year 1986.\n", - "Extracting variable: MWD, year 1986.\n", - "Extracting variable: Hs, year 1986.\n", - "Extracting variable: Tp, year 1986.\n", - "Extracting variable: MWD, year 1986.\n", - "Extracting variable: Hs, year 1986.\n", - "Extracting variable: Tp, year 1986.\n", - "Extracting variable: MWD, year 1986.\n", - "Extracting variable: Hs, year 1986.\n", - "Extracting variable: Tp, year 1986.\n", - "Extracting variable: MWD, year 1986.\n", - "Extracting variable: Hs, year 1986.\n", - "Extracting variable: Tp, year 1986.\n", - "Extracting variable: MWD, year 1986.\n", - "Writing C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets\\BOX_ERA5.nc\n", - "writing variable swh\n" + "['msl']\n", + "Extracting variable: Hs, year 1986.\n" ] }, { - "ename": "ValueError", - "evalue": "shape mismatch: objects cannot be broadcast to a single shape", + "ename": "KeyError", + "evalue": "'swh'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 25\u001b[0m \u001b[0mdir_path\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34mr'C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 26\u001b[0m \u001b[0mbase_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'BOX'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 27\u001b[1;33m dataset_code='ERA5')\n\u001b[0m", - "\u001b[1;32mC:\\checkouts\\trunk\\SDToolBox\\output_data.py\u001b[0m in \u001b[0;36mgenerate_netcdf\u001b[1;34m(self, dir_path, base_name, dataset_code)\u001b[0m\n\u001b[0;32m 170\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__set_geo_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnetcdf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 171\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__set_dataset_variables\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnetcdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdataset_code\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 172\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__set_variables_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnetcdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdataset_code\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 173\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__set_global_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnetcdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdataset_code\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 174\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32mC:\\checkouts\\trunk\\SDToolBox\\output_data.py\u001b[0m in \u001b[0;36m__set_variables_data\u001b[1;34m(self, netcdf, dataset_code)\u001b[0m\n\u001b[0;32m 597\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mis_gridded\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 598\u001b[0m \u001b[0mnetcdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvariables\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mvar_name\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 599\u001b[1;33m \u001b[0mvariables\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mvar_name\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 600\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 601\u001b[0m \u001b[1;31m# When non-gridded we need to transpose the dimensions.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32mnetCDF4\\_netCDF4.pyx\u001b[0m in \u001b[0;36mnetCDF4._netCDF4.Variable.__setitem__\u001b[1;34m()\u001b[0m\n", - "\u001b[1;32m~\\anaconda3\\envs\\SDToolBox_env\\lib\\site-packages\\netCDF4\\utils.py\u001b[0m in \u001b[0;36m_StartCountStride\u001b[1;34m(elem, shape, dimensions, grp, datashape, put, use_get_vars)\u001b[0m\n\u001b[0;32m 363\u001b[0m \u001b[0mfullslice\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 364\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mfullslice\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mdatashape\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mput\u001b[0m \u001b[1;32mand\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mhasunlim\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 365\u001b[1;33m \u001b[0mdatashape\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mbroadcasted_shape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdatashape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 366\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 367\u001b[0m \u001b[1;31m# pad datashape with zeros for dimensions not being sliced (issue #906)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m~\\anaconda3\\envs\\SDToolBox_env\\lib\\site-packages\\netCDF4\\utils.py\u001b[0m in \u001b[0;36mbroadcasted_shape\u001b[1;34m(shp1, shp2)\u001b[0m\n\u001b[0;32m 971\u001b[0m \u001b[0ma\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mas_strided\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mshape\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mshp1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstrides\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mshp1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 972\u001b[0m \u001b[0mb\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mas_strided\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mshape\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mshp2\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstrides\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mshp2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 973\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbroadcast\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;31mValueError\u001b[0m: shape mismatch: objects cannot be broadcast to a single shape" + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 21\u001b[0m \u001b[1;31m# use the SDToolBox function to extract data\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 22\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mInput_DataBOX\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minput_variables\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 23\u001b[1;33m \u001b[0mExtract_Data_ERA5BOX\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mexd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mExtractData\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_era_5\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdir_ERA5\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mInput_DataBOX\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 24\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 25\u001b[0m \u001b[1;31m# generate output data for waves\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mC:\\checkouts\\trunk\\SDToolBox\\extract_data.py\u001b[0m in \u001b[0;36mget_era_5\u001b[1;34m(directory_path, input_data)\u001b[0m\n\u001b[0;32m 55\u001b[0m return data_extractor.extract_subset(\n\u001b[0;32m 56\u001b[0m \u001b[0mdirectory_path\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdirectory_path\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 57\u001b[1;33m \u001b[0minput_data\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0minput_data\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 58\u001b[0m )\n\u001b[0;32m 59\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mC:\\checkouts\\trunk\\SDToolBox\\extract_data.py\u001b[0m in \u001b[0;36mextract_subset\u001b[1;34m(self, directory_path, input_data)\u001b[0m\n\u001b[0;32m 245\u001b[0m \u001b[0mvar_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mvariable_key\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 246\u001b[0m value=self.__get_variable_subset(\n\u001b[1;32m--> 247\u001b[1;33m \u001b[0moutput_data\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_from_var_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvariable_key\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 248\u001b[0m \u001b[0minput_data\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 249\u001b[0m \u001b[0minput_dataset\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mC:\\checkouts\\trunk\\SDToolBox\\output_data.py\u001b[0m in \u001b[0;36mget_from_var_dict\u001b[1;34m(self, var_name)\u001b[0m\n\u001b[0;32m 187\u001b[0m \u001b[0mAny\u001b[0m \u001b[1;33m-\u001b[0m\u001b[1;33m-\u001b[0m \u001b[0mValue\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mrequested\u001b[0m \u001b[0mvariable\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 188\u001b[0m \"\"\"\n\u001b[1;32m--> 189\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__data_dict\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvar_val_key\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mvar_name\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 190\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 191\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mset_in_var_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvar_name\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mAny\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mKeyError\u001b[0m: 'swh'" ] } ], "source": [ "# select a spatial range (box) for which data is to be extracted\n", - "step = 2 # degrees\n", - "lonl = 170 # left lon value\n", - "lonr = 180 # right lon value\n", - "latl = -5 # lower lat value\n", - "latu = 5 # upper lat value\n", - "xrang = np.arange(lonl, lonr+step, step).tolist() # array or list?\n", - "yrang = np.arange(latl, latu+step, step).tolist() # array or list?\n", + "steplon = 0.5 # degrees\n", + "lonl = -5 # left lon value\n", + "lonr = 11 # right lon value\n", + "steplat = 0.5\n", + "latl = 50 # lower lat value\n", + "latu = 62 # upper lat value\n", + "xrang = np.arange(lonl, lonr+steplon, steplon).tolist() \n", + "yrang = np.arange(latl, latu+steplat, steplat).tolist() \n", "\n", "coordsBOX = {'LON': xrang, 'LAT': yrang}\n", " \n", "# use the SDToolBox function to create input data\n", "Input_DataBOX = ind.InputData(\n", " input_coordinates=coordsBOX, \n", - " input_variables=varWAM, \n", + " input_variables=['msl'], \n", + " input_scenarios=['era5'],\n", " input_years=timeWAMy, \n", " is_gridded = True)\n", "\n", "# use the SDToolBox function to extract data\n", + "print(Input_DataBOX.input_variables)\n", "Extract_Data_ERA5BOX = exd.ExtractData.get_era_5(dir_ERA5, Input_DataBOX)\n", "\n", "# generate output data for waves\n", "ERA5BOX = oud.OutputData.generate_netcdf(\n", " Extract_Data_ERA5BOX,\n", " dir_path=r'C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets',\n", " base_name='BOX',\n", - " dataset_code='ERA5')" + " dataset_code='ERA5_WAVE')" ] }, { "cell_type": "code", - "execution_count": 74, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[50. 50.5 51. 51.5 52. 52.5 53. 53.5 54. 54.5 55. 55.5 56. 56.5\n", - " 57. 57.5 58. 58.5 59. 59.5 60. 60.5 61. 61.5 62. ]\n", - "[-5. -4.5 -4. -3.5 -3. -2.5 -2. -1.5 -1. -0.5 0. 0.5 1. 1.5\n", - " 2. 2.5 3. 3.5 4. 4.5 5. 5.5 6. 6.5 7. 7.5 8. 8.5\n", - " 9. 9.5 10. 10.5 11. ]\n" - ] - } - ], + "outputs": [], "source": [ - "#%% Extract data gridded\n", - "lonmin=-5;lonmax=11;dlon=0.5\n", - "latmin=50;latmax=62;dlat=0.5\n", - "lon = np.arange(latmin,latmax+dlat,dlat)\n", - "lat = np.arange(lonmin,lonmax+dlon,dlon)\n", - "print(lon)\n", - "print(lat)\n", - "#lon,lat = np.meshgrid(lon,lat)\n", - "#plt.plot(lat,lon,'or');plt.axis('equal')\n", - "inda = ind.InputData(\n", - " input_coordinates={'LON':lon,'LAT':lat},\n", - " input_variables=varWAM,\n", - " input_years=timeWAMy,\n", - " is_gridded=True)" + "# verification: plot with spatial maps of the mean values and standard deviation\n", + "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ + "Task 2. Extract mean sea level pressure from ERA5 dataset on a gridded box on the North Sea" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# select a spatial range (box) for which data is to be extracted\n", + "steplon = 0.5 # degrees\n", + "lonl = -5 # left lon value\n", + "lonr = 11 # right lon value\n", + "steplat = 0.5\n", + "latl = 50 # lower lat value\n", + "latu = 62 # upper lat value\n", + "xrang = np.arange(lonl, lonr+steplon, steplon).tolist() \n", + "yrang = np.arange(latl, latu+steplat, steplat).tolist() \n", + "\n", + "coordsBOX = {'LON': xrang, 'LAT': yrang}\n", + " \n", + "# use the SDToolBox function to create input data\n", + "del(Input_DataBOX) # to clear up the previous one that was defined\n", + "Input_DataBOX = ind.InputData(\n", + " input_coordinates=coordsBOX, \n", + " input_variables=['msl_p'], \n", + " input_scenarios=['era5'],\n", + " input_years=timeWAMy, \n", + " is_gridded = True)\n", + "\n", + "# use the SDToolBox function to extract data\n", + "print(Input_DataBOX.input_variables)\n", + "Extract_Data_ERA5BOX = exd.ExtractData.get_era_5(dir_ERA5, Input_DataBOX)\n", + "\n", + "# generate output data for waves\n", + "ERA5BOX = oud.OutputData.generate_netcdf(\n", + " Extract_Data_ERA5BOX,\n", + " dir_path=r'C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets',\n", + " base_name='BOX',\n", + " dataset_code='ERA5_MSL')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: plot temporal series of a point in the grid" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verficiation: plot spatial map of the mean sea level pressure" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Task 3. Test functionality to generate output netcdf files for waves and sealevel pressure" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# testing various different parameters to test the functionality of the input_data, extract_data and output_data functions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "# 2. Data Preprocessing" ] }, { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make use of the following functions:\n", + "5. preditor_definition (pde)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Task 4. Compute the squared spatial gradients of mean sea level pressure" + ] + }, + { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# data_processing function" + "# (data_processing function)?\n", + "# predictor_definition library -> compute_spatial_gradient" ] }, { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: for a point in the grid plot temporal series" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: spatial map of the spatial gradient of mean sea level pressure" + ] + }, + { "cell_type": "markdown", "metadata": {}, "source": [ + "Task 5. Temporal resampling" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# wave climate data: hourly to 3 hourly" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# mean sea level pressure and gradient: hourly to 6hourly" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: for a point in the grid plot temporal series" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: spatial map of the mean sea level pressure and gradient" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Task 6. Spatial resampling" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# mean sea level pressure 0.5 degree to 1 degree" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: for a point in the grid plot temporal series" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: spatial map of the mean sea level pressure and gradient" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "# 3. Predictor Definition" ] }, { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make use of the following functions:\n", + "5. preditor_definition (pde)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Task 7a. Aggregation of atmospheric data correlated with wave generation (averaged during 1 day). For computing daily running means of the 6h spatial fields." + ] + }, + { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# predictor_definition function" + "# predictor_definition library -> atmospheric_averaged_mean" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ + "Task 7b. If we are good with time, repeat task 7a with aggregation data option." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# perform aggregation data option" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: for a point in the grid plot temporal series" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: spatial map of the mean sea level pressure and gradient" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Task 8a. Perform Principal Component Analysis on the temporal and spatial fields of mean sea level pressure and the squared gradient. The function returns the Empirical orthogonal functions (EOF) and the principal components (PCs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# predictor_definition library -> compute_PCA" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: plot EOFs (spatial map) and PCs (temporal series)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "# 4. Statistical Model" ] }, { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make use of the following functions:\n", + "5. preditor_definition (pde)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Task 8b. Compute Principal Component Analysis on the different coastal locations with wave climate (if we are doing well with time). If this task is not done, continue selecting only one wave station in the coast." + ] + }, + { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# statistical model function" + "# (statistical model function)?" ] }, { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: plot EOFs (spatial map) and PCs (temporal series)" + ] + }, + { "cell_type": "markdown", "metadata": {}, "source": [ - "# 5. Application: Future Wave Projections" + "Task 9. Normalization of the predictor data (X, they will be the PCs) using Z score method" ] }, { @@ -489,6 +745,121 @@ "metadata": {}, "outputs": [], "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Task 10. Define calibration and validation subsets with stratified K folds, try with 5 splits (80% calibration; 20% validation) and 2 repetitions to start with." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: Plot temporal series of wave climate selected for calibration and validation.\n", + "# (can be using different colors on the same plot)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Task 11. Train the multivariate regression model (Y=alpha0+sum(alphai*Xi)), the predictand Y = Significant wave height, the predictor X= normalized PCs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: Check which PCs are significative (which numbers are in the model)\n", + "# Plot scatter X,Y empirical and X,Y* modelled, for the training subset and the validation subset \n", + "# (you can do this very simply using the average of the coefficients of the 8 models defined, (5-1)splits*2repetitions)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Task 12. Compute accuracy scores for the stratified folds taken for validating" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# (there are functions defined for this in the library statistical model, get_all_scorers())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: table with scores" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Task 13. Repeat 10, 11 and 12 but with wave direction, because the regression model for circular variables is different" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# .." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verifications: ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 5. Application: Future Wave Projections" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# to be continued" + ] } ], "metadata": { Index: trunk/tests/testing_Etienne/.ipynb_checkpoints/SD_ToolBox_Testing-checkpoint.ipynb =================================================================== diff -u -r114 -r115 --- trunk/tests/testing_Etienne/.ipynb_checkpoints/SD_ToolBox_Testing-checkpoint.ipynb (.../SD_ToolBox_Testing-checkpoint.ipynb) (revision 114) +++ trunk/tests/testing_Etienne/.ipynb_checkpoints/SD_ToolBox_Testing-checkpoint.ipynb (.../SD_ToolBox_Testing-checkpoint.ipynb) (revision 115) @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -79,23 +79,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Make use of the following functions\n", + "Make use of the following functions:\n", "1. input_data (ind)\n", "2. extract_data (exd)\n", "3. output_data (oud)\n", - "4. output_messages (ome)" + "4. (output_messages (ome))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "1a. Extract wave parameters (wave height, peak period, wave direction) at locations in 20m depth along the Dutch coast" + "Task 1a. Extract wave parameters (wave height, peak period, wave direction) at locations in 20m depth along the Dutch coast" ] }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -138,7 +138,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -147,7 +147,11 @@ "text": [ "Extracting variable: Hs, year 1986.\n", "Extracting variable: Tp, year 1986.\n", - "Extracting variable: MWD, year 1986.\n" + "Extracting variable: MWD, year 1986.\n", + "Writing C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets\\WAM_ERA5_WAVE.nc\n", + "writing variable pp1d\n", + "writing variable swh\n", + "writing variable mwd\n" ] } ], @@ -169,6 +173,7 @@ "Input_DataWAM = ind.InputData(\n", " input_coordinates=coordsWAM,\n", " input_variables=varWAM,\n", + " input_scenarios=['era5'],\n", " input_years=timeWAMy) # default is_gridded is false, referring to points\n", "\n", "# use the SDToolBox function to extract data\n", @@ -179,7 +184,7 @@ " Extract_Data_ERA5WAM,\n", " dir_path=r'C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets',\n", " base_name='WAM',\n", - " dataset_code='ERA5')" + " dataset_code='ERA5_WAVE')" ] }, { @@ -316,171 +321,422 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "1b. Extract wave parameters for year 1986 in a gridded box" + "Task 1b. Extract wave parameters for year 1986 in a gridded box" ] }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 91, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Extracting variable: Hs, year 1986.\n", - "Extracting variable: Tp, year 1986.\n", - "Extracting variable: MWD, year 1986.\n", - "Extracting variable: Hs, year 1986.\n", - "Extracting variable: Tp, year 1986.\n", - "Extracting variable: MWD, year 1986.\n", - "Extracting variable: Hs, year 1986.\n", - "Extracting variable: Tp, year 1986.\n", - "Extracting variable: MWD, year 1986.\n", - "Extracting variable: Hs, year 1986.\n", - "Extracting variable: Tp, year 1986.\n", - "Extracting variable: MWD, year 1986.\n", - "Extracting variable: Hs, year 1986.\n", - "Extracting variable: Tp, year 1986.\n", - "Extracting variable: MWD, year 1986.\n", - "Writing C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets\\BOX_ERA5.nc\n", - "writing variable swh\n" + "['msl']\n", + "Extracting variable: Hs, year 1986.\n" ] }, { - "ename": "ValueError", - "evalue": "shape mismatch: objects cannot be broadcast to a single shape", + "ename": "KeyError", + "evalue": "'swh'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 25\u001b[0m \u001b[0mdir_path\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34mr'C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 26\u001b[0m \u001b[0mbase_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'BOX'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 27\u001b[1;33m dataset_code='ERA5')\n\u001b[0m", - "\u001b[1;32mC:\\checkouts\\trunk\\SDToolBox\\output_data.py\u001b[0m in \u001b[0;36mgenerate_netcdf\u001b[1;34m(self, dir_path, base_name, dataset_code)\u001b[0m\n\u001b[0;32m 170\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__set_geo_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnetcdf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 171\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__set_dataset_variables\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnetcdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdataset_code\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 172\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__set_variables_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnetcdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdataset_code\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 173\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__set_global_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnetcdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdataset_code\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 174\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32mC:\\checkouts\\trunk\\SDToolBox\\output_data.py\u001b[0m in \u001b[0;36m__set_variables_data\u001b[1;34m(self, netcdf, dataset_code)\u001b[0m\n\u001b[0;32m 597\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mis_gridded\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 598\u001b[0m \u001b[0mnetcdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvariables\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mvar_name\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 599\u001b[1;33m \u001b[0mvariables\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mvar_name\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 600\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 601\u001b[0m \u001b[1;31m# When non-gridded we need to transpose the dimensions.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32mnetCDF4\\_netCDF4.pyx\u001b[0m in \u001b[0;36mnetCDF4._netCDF4.Variable.__setitem__\u001b[1;34m()\u001b[0m\n", - "\u001b[1;32m~\\anaconda3\\envs\\SDToolBox_env\\lib\\site-packages\\netCDF4\\utils.py\u001b[0m in \u001b[0;36m_StartCountStride\u001b[1;34m(elem, shape, dimensions, grp, datashape, put, use_get_vars)\u001b[0m\n\u001b[0;32m 363\u001b[0m \u001b[0mfullslice\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 364\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mfullslice\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mdatashape\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mput\u001b[0m \u001b[1;32mand\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mhasunlim\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 365\u001b[1;33m \u001b[0mdatashape\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mbroadcasted_shape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdatashape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 366\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 367\u001b[0m \u001b[1;31m# pad datashape with zeros for dimensions not being sliced (issue #906)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m~\\anaconda3\\envs\\SDToolBox_env\\lib\\site-packages\\netCDF4\\utils.py\u001b[0m in \u001b[0;36mbroadcasted_shape\u001b[1;34m(shp1, shp2)\u001b[0m\n\u001b[0;32m 971\u001b[0m \u001b[0ma\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mas_strided\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mshape\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mshp1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstrides\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mshp1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 972\u001b[0m \u001b[0mb\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mas_strided\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mshape\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mshp2\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstrides\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mshp2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 973\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbroadcast\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;31mValueError\u001b[0m: shape mismatch: objects cannot be broadcast to a single shape" + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 21\u001b[0m \u001b[1;31m# use the SDToolBox function to extract data\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 22\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mInput_DataBOX\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minput_variables\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 23\u001b[1;33m \u001b[0mExtract_Data_ERA5BOX\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mexd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mExtractData\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_era_5\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdir_ERA5\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mInput_DataBOX\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 24\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 25\u001b[0m \u001b[1;31m# generate output data for waves\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mC:\\checkouts\\trunk\\SDToolBox\\extract_data.py\u001b[0m in \u001b[0;36mget_era_5\u001b[1;34m(directory_path, input_data)\u001b[0m\n\u001b[0;32m 55\u001b[0m return data_extractor.extract_subset(\n\u001b[0;32m 56\u001b[0m \u001b[0mdirectory_path\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdirectory_path\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 57\u001b[1;33m \u001b[0minput_data\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0minput_data\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 58\u001b[0m )\n\u001b[0;32m 59\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mC:\\checkouts\\trunk\\SDToolBox\\extract_data.py\u001b[0m in \u001b[0;36mextract_subset\u001b[1;34m(self, directory_path, input_data)\u001b[0m\n\u001b[0;32m 245\u001b[0m \u001b[0mvar_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mvariable_key\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 246\u001b[0m value=self.__get_variable_subset(\n\u001b[1;32m--> 247\u001b[1;33m \u001b[0moutput_data\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_from_var_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvariable_key\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 248\u001b[0m \u001b[0minput_data\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 249\u001b[0m \u001b[0minput_dataset\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mC:\\checkouts\\trunk\\SDToolBox\\output_data.py\u001b[0m in \u001b[0;36mget_from_var_dict\u001b[1;34m(self, var_name)\u001b[0m\n\u001b[0;32m 187\u001b[0m \u001b[0mAny\u001b[0m \u001b[1;33m-\u001b[0m\u001b[1;33m-\u001b[0m \u001b[0mValue\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mrequested\u001b[0m \u001b[0mvariable\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 188\u001b[0m \"\"\"\n\u001b[1;32m--> 189\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__data_dict\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvar_val_key\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mvar_name\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 190\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 191\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mset_in_var_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvar_name\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mAny\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mKeyError\u001b[0m: 'swh'" ] } ], "source": [ "# select a spatial range (box) for which data is to be extracted\n", - "step = 2 # degrees\n", - "lonl = 170 # left lon value\n", - "lonr = 180 # right lon value\n", - "latl = -5 # lower lat value\n", - "latu = 5 # upper lat value\n", - "xrang = np.arange(lonl, lonr+step, step).tolist() # array or list?\n", - "yrang = np.arange(latl, latu+step, step).tolist() # array or list?\n", + "steplon = 0.5 # degrees\n", + "lonl = -5 # left lon value\n", + "lonr = 11 # right lon value\n", + "steplat = 0.5\n", + "latl = 50 # lower lat value\n", + "latu = 62 # upper lat value\n", + "xrang = np.arange(lonl, lonr+steplon, steplon).tolist() \n", + "yrang = np.arange(latl, latu+steplat, steplat).tolist() \n", "\n", "coordsBOX = {'LON': xrang, 'LAT': yrang}\n", " \n", "# use the SDToolBox function to create input data\n", "Input_DataBOX = ind.InputData(\n", " input_coordinates=coordsBOX, \n", - " input_variables=varWAM, \n", + " input_variables=['msl'], \n", + " input_scenarios=['era5'],\n", " input_years=timeWAMy, \n", " is_gridded = True)\n", "\n", "# use the SDToolBox function to extract data\n", + "print(Input_DataBOX.input_variables)\n", "Extract_Data_ERA5BOX = exd.ExtractData.get_era_5(dir_ERA5, Input_DataBOX)\n", "\n", "# generate output data for waves\n", "ERA5BOX = oud.OutputData.generate_netcdf(\n", " Extract_Data_ERA5BOX,\n", " dir_path=r'C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets',\n", " base_name='BOX',\n", - " dataset_code='ERA5')" + " dataset_code='ERA5_WAVE')" ] }, { "cell_type": "code", - "execution_count": 74, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[50. 50.5 51. 51.5 52. 52.5 53. 53.5 54. 54.5 55. 55.5 56. 56.5\n", - " 57. 57.5 58. 58.5 59. 59.5 60. 60.5 61. 61.5 62. ]\n", - "[-5. -4.5 -4. -3.5 -3. -2.5 -2. -1.5 -1. -0.5 0. 0.5 1. 1.5\n", - " 2. 2.5 3. 3.5 4. 4.5 5. 5.5 6. 6.5 7. 7.5 8. 8.5\n", - " 9. 9.5 10. 10.5 11. ]\n" - ] - } - ], + "outputs": [], "source": [ - "#%% Extract data gridded\n", - "lonmin=-5;lonmax=11;dlon=0.5\n", - "latmin=50;latmax=62;dlat=0.5\n", - "lon = np.arange(latmin,latmax+dlat,dlat)\n", - "lat = np.arange(lonmin,lonmax+dlon,dlon)\n", - "print(lon)\n", - "print(lat)\n", - "#lon,lat = np.meshgrid(lon,lat)\n", - "#plt.plot(lat,lon,'or');plt.axis('equal')\n", - "inda = ind.InputData(\n", - " input_coordinates={'LON':lon,'LAT':lat},\n", - " input_variables=varWAM,\n", - " input_years=timeWAMy,\n", - " is_gridded=True)" + "# verification: plot with spatial maps of the mean values and standard deviation\n", + "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ + "Task 2. Extract mean sea level pressure from ERA5 dataset on a gridded box on the North Sea" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# select a spatial range (box) for which data is to be extracted\n", + "steplon = 0.5 # degrees\n", + "lonl = -5 # left lon value\n", + "lonr = 11 # right lon value\n", + "steplat = 0.5\n", + "latl = 50 # lower lat value\n", + "latu = 62 # upper lat value\n", + "xrang = np.arange(lonl, lonr+steplon, steplon).tolist() \n", + "yrang = np.arange(latl, latu+steplat, steplat).tolist() \n", + "\n", + "coordsBOX = {'LON': xrang, 'LAT': yrang}\n", + " \n", + "# use the SDToolBox function to create input data\n", + "del(Input_DataBOX) # to clear up the previous one that was defined\n", + "Input_DataBOX = ind.InputData(\n", + " input_coordinates=coordsBOX, \n", + " input_variables=['msl_p'], \n", + " input_scenarios=['era5'],\n", + " input_years=timeWAMy, \n", + " is_gridded = True)\n", + "\n", + "# use the SDToolBox function to extract data\n", + "print(Input_DataBOX.input_variables)\n", + "Extract_Data_ERA5BOX = exd.ExtractData.get_era_5(dir_ERA5, Input_DataBOX)\n", + "\n", + "# generate output data for waves\n", + "ERA5BOX = oud.OutputData.generate_netcdf(\n", + " Extract_Data_ERA5BOX,\n", + " dir_path=r'C:\\checkouts\\trunk\\tests\\testing_Etienne\\datasets',\n", + " base_name='BOX',\n", + " dataset_code='ERA5_MSL')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: plot temporal series of a point in the grid" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verficiation: plot spatial map of the mean sea level pressure" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Task 3. Test functionality to generate output netcdf files for waves and sealevel pressure" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# testing various different parameters to test the functionality of the input_data, extract_data and output_data functions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "# 2. Data Preprocessing" ] }, { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make use of the following functions:\n", + "5. preditor_definition (pde)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Task 4. Compute the squared spatial gradients of mean sea level pressure" + ] + }, + { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# data_processing function" + "# (data_processing function)?\n", + "# predictor_definition library -> compute_spatial_gradient" ] }, { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: for a point in the grid plot temporal series" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: spatial map of the spatial gradient of mean sea level pressure" + ] + }, + { "cell_type": "markdown", "metadata": {}, "source": [ + "Task 5. Temporal resampling" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# wave climate data: hourly to 3 hourly" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# mean sea level pressure and gradient: hourly to 6hourly" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: for a point in the grid plot temporal series" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: spatial map of the mean sea level pressure and gradient" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Task 6. Spatial resampling" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# mean sea level pressure 0.5 degree to 1 degree" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: for a point in the grid plot temporal series" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: spatial map of the mean sea level pressure and gradient" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "# 3. Predictor Definition" ] }, { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make use of the following functions:\n", + "5. preditor_definition (pde)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Task 7a. Aggregation of atmospheric data correlated with wave generation (averaged during 1 day). For computing daily running means of the 6h spatial fields." + ] + }, + { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# predictor_definition function" + "# predictor_definition library -> atmospheric_averaged_mean" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ + "Task 7b. If we are good with time, repeat task 7a with aggregation data option." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# perform aggregation data option" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: for a point in the grid plot temporal series" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: spatial map of the mean sea level pressure and gradient" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Task 8a. Perform Principal Component Analysis on the temporal and spatial fields of mean sea level pressure and the squared gradient. The function returns the Empirical orthogonal functions (EOF) and the principal components (PCs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# predictor_definition library -> compute_PCA" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: plot EOFs (spatial map) and PCs (temporal series)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "# 4. Statistical Model" ] }, { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make use of the following functions:\n", + "5. preditor_definition (pde)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Task 8b. Compute Principal Component Analysis on the different coastal locations with wave climate (if we are doing well with time). If this task is not done, continue selecting only one wave station in the coast." + ] + }, + { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# statistical model function" + "# (statistical model function)?" ] }, { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: plot EOFs (spatial map) and PCs (temporal series)" + ] + }, + { "cell_type": "markdown", "metadata": {}, "source": [ - "# 5. Application: Future Wave Projections" + "Task 9. Normalization of the predictor data (X, they will be the PCs) using Z score method" ] }, { @@ -489,6 +745,121 @@ "metadata": {}, "outputs": [], "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Task 10. Define calibration and validation subsets with stratified K folds, try with 5 splits (80% calibration; 20% validation) and 2 repetitions to start with." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: Plot temporal series of wave climate selected for calibration and validation.\n", + "# (can be using different colors on the same plot)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Task 11. Train the multivariate regression model (Y=alpha0+sum(alphai*Xi)), the predictand Y = Significant wave height, the predictor X= normalized PCs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: Check which PCs are significative (which numbers are in the model)\n", + "# Plot scatter X,Y empirical and X,Y* modelled, for the training subset and the validation subset \n", + "# (you can do this very simply using the average of the coefficients of the 8 models defined, (5-1)splits*2repetitions)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Task 12. Compute accuracy scores for the stratified folds taken for validating" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# (there are functions defined for this in the library statistical model, get_all_scorers())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verification: table with scores" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Task 13. Repeat 10, 11 and 12 but with wave direction, because the regression model for circular variables is different" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# .." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# verifications: ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 5. Application: Future Wave Projections" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# to be continued" + ] } ], "metadata": {