From abab5abb6d4f5644f42c1eb820d9ee0243b8cc40 Mon Sep 17 00:00:00 2001
From: codebasics <learnpythonlanguage@gmail.com>
Date: Tue, 23 Mar 2021 19:44:48 -0400
Subject: [PATCH] word embeddings

---
 .../supervised_word_embeddings.ipynb          | 317 ++++++++++++++++++
 1 file changed, 317 insertions(+)
 create mode 100644 DeepLearningML/22_word_embedding/supervised_word_embeddings.ipynb

diff --git a/DeepLearningML/22_word_embedding/supervised_word_embeddings.ipynb b/DeepLearningML/22_word_embedding/supervised_word_embeddings.ipynb
new file mode 100644
index 00000000..b949e5ee
--- /dev/null
+++ b/DeepLearningML/22_word_embedding/supervised_word_embeddings.ipynb
@@ -0,0 +1,317 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from tensorflow.keras.preprocessing.text import one_hot\n",
+    "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
+    "from tensorflow.keras.models import Sequential\n",
+    "from tensorflow.keras.layers import Dense\n",
+    "from tensorflow.keras.layers import Flatten\n",
+    "from tensorflow.keras.layers import Embedding\n",
+    "\n",
+    "reviews = ['nice food',\n",
+    "        'amazing restaurant',\n",
+    "        'too good',\n",
+    "        'just loved it!',\n",
+    "        'will go again',\n",
+    "        'horrible food',\n",
+    "        'never go there',\n",
+    "        'poor service',\n",
+    "        'poor quality',\n",
+    "        'needs improvement']\n",
+    "\n",
+    "sentiment = np.array([1,1,1,1,1,0,0,0,0,0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[4, 23]"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "one_hot(\"amazing restaurant\",30)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[13, 21], [4, 23], [14, 17], [8, 15, 16], [22, 15, 29], [8, 21], [26, 15, 24], [16, 4], [16, 12], [4, 29]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "vocab_size = 30\n",
+    "encoded_reviews = [one_hot(d, vocab_size) for d in reviews]\n",
+    "print(encoded_reviews)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[13 21  0  0]\n",
+      " [ 4 23  0  0]\n",
+      " [14 17  0  0]\n",
+      " [ 8 15 16  0]\n",
+      " [22 15 29  0]\n",
+      " [ 8 21  0  0]\n",
+      " [26 15 24  0]\n",
+      " [16  4  0  0]\n",
+      " [16 12  0  0]\n",
+      " [ 4 29  0  0]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "max_length = 4\n",
+    "padded_reviews = pad_sequences(encoded_reviews, maxlen=max_length, padding='post')\n",
+    "print(padded_reviews)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "embeded_vector_size = 5\n",
+    "\n",
+    "model = Sequential()\n",
+    "model.add(Embedding(vocab_size, embeded_vector_size, input_length=max_length,name=\"embedding\"))\n",
+    "model.add(Flatten())\n",
+    "model.add(Dense(1, activation='sigmoid'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = padded_reviews\n",
+    "y = sentiment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model: \"sequential_1\"\n",
+      "_________________________________________________________________\n",
+      "Layer (type)                 Output Shape              Param #   \n",
+      "=================================================================\n",
+      "embedding (Embedding)        (None, 4, 5)              150       \n",
+      "_________________________________________________________________\n",
+      "flatten_1 (Flatten)          (None, 20)                0         \n",
+      "_________________________________________________________________\n",
+      "dense_1 (Dense)              (None, 1)                 21        \n",
+      "=================================================================\n",
+      "Total params: 171\n",
+      "Trainable params: 171\n",
+      "Non-trainable params: 0\n",
+      "_________________________________________________________________\n",
+      "None\n"
+     ]
+    }
+   ],
+   "source": [
+    "model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])\n",
+    "print(model.summary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<tensorflow.python.keras.callbacks.History at 0x1bb8daa5a30>"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.fit(X, y, epochs=50, verbose=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1/1 [==============================] - 0s 1ms/step - loss: 0.6384 - accuracy: 1.0000\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "1.0"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# evaluate the model\n",
+    "loss, accuracy = model.evaluate(X, y)\n",
+    "accuracy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "30"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "weights = model.get_layer('embedding').get_weights()[0]\n",
+    "len(weights)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([-0.08330977, -0.06752131, -0.04629624, -0.00765801, -0.02024159],\n",
+       "      dtype=float32)"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "weights[13]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([-0.07935128, -0.08574004,  0.06615968, -0.02349528,  0.00917289],\n",
+       "      dtype=float32)"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "weights[4]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ 0.0128377 ,  0.03549778,  0.05134471, -0.07147218,  0.03261041],\n",
+       "      dtype=float32)"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "weights[16]"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}