From abab5abb6d4f5644f42c1eb820d9ee0243b8cc40 Mon Sep 17 00:00:00 2001 From: codebasics Date: Tue, 23 Mar 2021 19:44:48 -0400 Subject: [PATCH] word embeddings --- .../supervised_word_embeddings.ipynb | 317 ++++++++++++++++++ 1 file changed, 317 insertions(+) create mode 100644 DeepLearningML/22_word_embedding/supervised_word_embeddings.ipynb diff --git a/DeepLearningML/22_word_embedding/supervised_word_embeddings.ipynb b/DeepLearningML/22_word_embedding/supervised_word_embeddings.ipynb new file mode 100644 index 00000000..b949e5ee --- /dev/null +++ b/DeepLearningML/22_word_embedding/supervised_word_embeddings.ipynb @@ -0,0 +1,317 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from tensorflow.keras.preprocessing.text import one_hot\n", + "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", + "from tensorflow.keras.models import Sequential\n", + "from tensorflow.keras.layers import Dense\n", + "from tensorflow.keras.layers import Flatten\n", + "from tensorflow.keras.layers import Embedding\n", + "\n", + "reviews = ['nice food',\n", + " 'amazing restaurant',\n", + " 'too good',\n", + " 'just loved it!',\n", + " 'will go again',\n", + " 'horrible food',\n", + " 'never go there',\n", + " 'poor service',\n", + " 'poor quality',\n", + " 'needs improvement']\n", + "\n", + "sentiment = np.array([1,1,1,1,1,0,0,0,0,0])" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[4, 23]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "one_hot(\"amazing restaurant\",30)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[13, 21], [4, 23], [14, 17], [8, 15, 16], [22, 15, 29], [8, 21], [26, 15, 24], [16, 4], [16, 12], [4, 29]]\n" + ] + } + ], + "source": [ + "vocab_size = 30\n", + "encoded_reviews = [one_hot(d, vocab_size) for d in reviews]\n", + "print(encoded_reviews)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[13 21 0 0]\n", + " [ 4 23 0 0]\n", + " [14 17 0 0]\n", + " [ 8 15 16 0]\n", + " [22 15 29 0]\n", + " [ 8 21 0 0]\n", + " [26 15 24 0]\n", + " [16 4 0 0]\n", + " [16 12 0 0]\n", + " [ 4 29 0 0]]\n" + ] + } + ], + "source": [ + "max_length = 4\n", + "padded_reviews = pad_sequences(encoded_reviews, maxlen=max_length, padding='post')\n", + "print(padded_reviews)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "embeded_vector_size = 5\n", + "\n", + "model = Sequential()\n", + "model.add(Embedding(vocab_size, embeded_vector_size, input_length=max_length,name=\"embedding\"))\n", + "model.add(Flatten())\n", + "model.add(Dense(1, activation='sigmoid'))" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "X = padded_reviews\n", + "y = sentiment" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"sequential_1\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "embedding (Embedding) (None, 4, 5) 150 \n", + "_________________________________________________________________\n", + "flatten_1 (Flatten) (None, 20) 0 \n", + "_________________________________________________________________\n", + "dense_1 (Dense) (None, 1) 21 \n", + "=================================================================\n", + "Total params: 171\n", + "Trainable params: 171\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n" + ] + } + ], + "source": [ + "model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])\n", + "print(model.summary())" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.fit(X, y, epochs=50, verbose=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1/1 [==============================] - 0s 1ms/step - loss: 0.6384 - accuracy: 1.0000\n" + ] + }, + { + "data": { + "text/plain": [ + "1.0" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# evaluate the model\n", + "loss, accuracy = model.evaluate(X, y)\n", + "accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "30" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "weights = model.get_layer('embedding').get_weights()[0]\n", + "len(weights)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([-0.08330977, -0.06752131, -0.04629624, -0.00765801, -0.02024159],\n", + " dtype=float32)" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "weights[13]" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([-0.07935128, -0.08574004, 0.06615968, -0.02349528, 0.00917289],\n", + " dtype=float32)" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "weights[4]" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0.0128377 , 0.03549778, 0.05134471, -0.07147218, 0.03261041],\n", + " dtype=float32)" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "weights[16]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}