forked from codebasics/py
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
codebasics
committed
Mar 23, 2021
1 parent
409e7e2
commit abab5ab
Showing
1 changed file
with
317 additions
and
0 deletions.
There are no files selected for viewing
317 changes: 317 additions & 0 deletions
317
DeepLearningML/22_word_embedding/supervised_word_embeddings.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,317 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 19, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import numpy as np\n", | ||
"from tensorflow.keras.preprocessing.text import one_hot\n", | ||
"from tensorflow.keras.preprocessing.sequence import pad_sequences\n", | ||
"from tensorflow.keras.models import Sequential\n", | ||
"from tensorflow.keras.layers import Dense\n", | ||
"from tensorflow.keras.layers import Flatten\n", | ||
"from tensorflow.keras.layers import Embedding\n", | ||
"\n", | ||
"reviews = ['nice food',\n", | ||
" 'amazing restaurant',\n", | ||
" 'too good',\n", | ||
" 'just loved it!',\n", | ||
" 'will go again',\n", | ||
" 'horrible food',\n", | ||
" 'never go there',\n", | ||
" 'poor service',\n", | ||
" 'poor quality',\n", | ||
" 'needs improvement']\n", | ||
"\n", | ||
"sentiment = np.array([1,1,1,1,1,0,0,0,0,0])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 20, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"[4, 23]" | ||
] | ||
}, | ||
"execution_count": 20, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"one_hot(\"amazing restaurant\",30)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 21, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"[[13, 21], [4, 23], [14, 17], [8, 15, 16], [22, 15, 29], [8, 21], [26, 15, 24], [16, 4], [16, 12], [4, 29]]\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"vocab_size = 30\n", | ||
"encoded_reviews = [one_hot(d, vocab_size) for d in reviews]\n", | ||
"print(encoded_reviews)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 22, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"[[13 21 0 0]\n", | ||
" [ 4 23 0 0]\n", | ||
" [14 17 0 0]\n", | ||
" [ 8 15 16 0]\n", | ||
" [22 15 29 0]\n", | ||
" [ 8 21 0 0]\n", | ||
" [26 15 24 0]\n", | ||
" [16 4 0 0]\n", | ||
" [16 12 0 0]\n", | ||
" [ 4 29 0 0]]\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"max_length = 4\n", | ||
"padded_reviews = pad_sequences(encoded_reviews, maxlen=max_length, padding='post')\n", | ||
"print(padded_reviews)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 23, | ||
"metadata": { | ||
"scrolled": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"embeded_vector_size = 5\n", | ||
"\n", | ||
"model = Sequential()\n", | ||
"model.add(Embedding(vocab_size, embeded_vector_size, input_length=max_length,name=\"embedding\"))\n", | ||
"model.add(Flatten())\n", | ||
"model.add(Dense(1, activation='sigmoid'))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 24, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"X = padded_reviews\n", | ||
"y = sentiment" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 25, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Model: \"sequential_1\"\n", | ||
"_________________________________________________________________\n", | ||
"Layer (type) Output Shape Param # \n", | ||
"=================================================================\n", | ||
"embedding (Embedding) (None, 4, 5) 150 \n", | ||
"_________________________________________________________________\n", | ||
"flatten_1 (Flatten) (None, 20) 0 \n", | ||
"_________________________________________________________________\n", | ||
"dense_1 (Dense) (None, 1) 21 \n", | ||
"=================================================================\n", | ||
"Total params: 171\n", | ||
"Trainable params: 171\n", | ||
"Non-trainable params: 0\n", | ||
"_________________________________________________________________\n", | ||
"None\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])\n", | ||
"print(model.summary())" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 26, | ||
"metadata": { | ||
"scrolled": true | ||
}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"<tensorflow.python.keras.callbacks.History at 0x1bb8daa5a30>" | ||
] | ||
}, | ||
"execution_count": 26, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"model.fit(X, y, epochs=50, verbose=0)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 29, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"1/1 [==============================] - 0s 1ms/step - loss: 0.6384 - accuracy: 1.0000\n" | ||
] | ||
}, | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"1.0" | ||
] | ||
}, | ||
"execution_count": 29, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"# evaluate the model\n", | ||
"loss, accuracy = model.evaluate(X, y)\n", | ||
"accuracy" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 30, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"30" | ||
] | ||
}, | ||
"execution_count": 30, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"weights = model.get_layer('embedding').get_weights()[0]\n", | ||
"len(weights)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 31, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"array([-0.08330977, -0.06752131, -0.04629624, -0.00765801, -0.02024159],\n", | ||
" dtype=float32)" | ||
] | ||
}, | ||
"execution_count": 31, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"weights[13]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 32, | ||
"metadata": { | ||
"scrolled": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"array([-0.07935128, -0.08574004, 0.06615968, -0.02349528, 0.00917289],\n", | ||
" dtype=float32)" | ||
] | ||
}, | ||
"execution_count": 32, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"weights[4]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 33, | ||
"metadata": { | ||
"scrolled": true | ||
}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"array([ 0.0128377 , 0.03549778, 0.05134471, -0.07147218, 0.03261041],\n", | ||
" dtype=float32)" | ||
] | ||
}, | ||
"execution_count": 33, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"weights[16]" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.5" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |