Merge branch 'MLGlobalHealth:main' into main

MLGlobalHealth · Jul 22, 2024 · fe05010 · fe05010
2 parents 0a41cbe + f75b07d
commit fe05010
Show file tree

Hide file tree

Showing 2 changed files with 57 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 ## Introduction
 
-This is a repository for code of NeurIPS benchmark and dataset submission 2024.
+This is a repository for the work **KidSat: satellite imagery to map childhood poverty dataset and benchmark**.
 
 
 ## Getting All DHS Data

diff --git a/modelling/mosaiks/ridge_fitting.ipynb b/modelling/mosaiks/ridge_fitting.ipynb
@@ -0,0 +1,56 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.linear_model import RidgeCV\n",
+    "from sklearn.pipeline import Pipeline\n",
+    "from sklearn.metrics import mean_absolute_error\n",
+    "\n",
+    "# Load the data\n",
+    "train_df = pd.read_csv('path/to/your/train.csv')\n",
+    "test_df = pd.read_csv('path/to/your/test.csv')\n",
+    "target = 'target' # e.g. deprived_sev\n",
+    "\n",
+    "# Split features and target\n",
+    "X_train = train_df.drop(columns=[target])\n",
+    "y_train = train_df[target]\n",
+    "X_test = test_df.drop(columns=[target])\n",
+    "y_test = test_df[target]\n",
+    "\n",
+    "# Define a wide range of alphas\n",
+    "alphas = np.logspace(-6, 6, 13)\n",
+    "\n",
+    "# Create the pipeline\n",
+    "pipeline = Pipeline([\n",
+    "    ('scaler', StandardScaler()),\n",
+    "    ('ridge', RidgeCV(alphas=alphas, store_cv_values=True))\n",
+    "])\n",
+    "\n",
+    "# Fit the model\n",
+    "pipeline.fit(X_train, y_train)\n",
+    "\n",
+    "# Predict\n",
+    "predictions = pipeline.predict(X_test)\n",
+    "\n",
+    "# Evaluate using MAE\n",
+    "mae = mean_absolute_error(y_test, predictions)\n",
+    "print(f'Mean Absolute Error: {mae}')\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}