Skip to content

Commit

Permalink
create more diverse value for name
Browse files Browse the repository at this point in the history
  • Loading branch information
mhilmiasyrofi committed Jul 2, 2020
1 parent 4e69f9e commit f8f8cff
Showing 1 changed file with 59 additions and 46 deletions.
105 changes: 59 additions & 46 deletions codes/prepare-data-from-gender-computer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
},
{
"cell_type": "code",
"execution_count": 65,
"execution_count": 150,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -30,7 +30,7 @@
},
{
"cell_type": "code",
"execution_count": 119,
"execution_count": 151,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -49,7 +49,7 @@
},
{
"cell_type": "code",
"execution_count": 120,
"execution_count": 152,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -179,7 +179,7 @@
"[12739 rows x 4 columns]"
]
},
"execution_count": 120,
"execution_count": 152,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -197,7 +197,7 @@
},
{
"cell_type": "code",
"execution_count": 139,
"execution_count": 153,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -220,7 +220,7 @@
},
{
"cell_type": "code",
"execution_count": 140,
"execution_count": 154,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -261,7 +261,7 @@
" '../data/gc_name/raw/AlbaniaMaleUTF8.csv']"
]
},
"execution_count": 140,
"execution_count": 154,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -272,7 +272,7 @@
},
{
"cell_type": "code",
"execution_count": 123,
"execution_count": 155,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -313,7 +313,7 @@
" '../data/gc_name/raw/HungaryFemaleUTF8.csv']"
]
},
"execution_count": 123,
"execution_count": 155,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -335,26 +335,32 @@
},
{
"cell_type": "code",
"execution_count": 124,
"execution_count": 156,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Afghanistan is discarded\n",
"Length data is less than 10\n",
"Length data is less than 30\n",
"Somalia is discarded\n",
"Length data is less than 30\n",
"Albania is discarded\n",
"Length data is less than 10\n",
"Length data is less than 30\n",
"Vietnam is discarded\n",
"Length data is less than 30\n",
"Albania is discarded\n",
"Length data is less than 10\n",
"Length data is less than 30\n",
"Afghanistan is discarded\n",
"Length data is less than 10\n"
"Length data is less than 30\n",
"Somalia is discarded\n",
"Length data is less than 30\n"
]
}
],
"source": [
"n = 10 # number of name that want to take from each country\n",
"n = 30 # number of name that want to take from each country\n",
"\n",
"# list of discarded country\n",
"discarded_country = set()\n",
Expand All @@ -380,7 +386,7 @@
},
{
"cell_type": "code",
"execution_count": 130,
"execution_count": 157,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -414,7 +420,7 @@
},
{
"cell_type": "code",
"execution_count": 131,
"execution_count": 158,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -481,58 +487,58 @@
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>615</th>\n",
" <td>Virág</td>\n",
" <th>1735</th>\n",
" <td>Valéria</td>\n",
" <td>female</td>\n",
" <td>Hungary</td>\n",
" </tr>\n",
" <tr>\n",
" <th>616</th>\n",
" <td>Adél</td>\n",
" <th>1736</th>\n",
" <td>Ilus</td>\n",
" <td>female</td>\n",
" <td>Hungary</td>\n",
" </tr>\n",
" <tr>\n",
" <th>617</th>\n",
" <td>Olga</td>\n",
" <th>1737</th>\n",
" <td>Kata</td>\n",
" <td>female</td>\n",
" <td>Hungary</td>\n",
" </tr>\n",
" <tr>\n",
" <th>618</th>\n",
" <td>Jolán</td>\n",
" <th>1738</th>\n",
" <td>Emmi</td>\n",
" <td>female</td>\n",
" <td>Hungary</td>\n",
" </tr>\n",
" <tr>\n",
" <th>619</th>\n",
" <td>Szeréna</td>\n",
" <th>1739</th>\n",
" <td>Irmus</td>\n",
" <td>female</td>\n",
" <td>Hungary</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>620 rows × 3 columns</p>\n",
"<p>1740 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" Name Gender Country\n",
"0 Roen male UK\n",
"1 Jeet male UK\n",
"2 Hagen male UK\n",
"3 Willow male UK\n",
"4 Belal male UK\n",
".. ... ... ...\n",
"615 Virág female Hungary\n",
"616 Adél female Hungary\n",
"617 Olga female Hungary\n",
"618 Jolán female Hungary\n",
"619 Szeréna female Hungary\n",
" Name Gender Country\n",
"0 Roen male UK\n",
"1 Jeet male UK\n",
"2 Hagen male UK\n",
"3 Willow male UK\n",
"4 Belal male UK\n",
"... ... ... ...\n",
"1735 Valéria female Hungary\n",
"1736 Ilus female Hungary\n",
"1737 Kata female Hungary\n",
"1738 Emmi female Hungary\n",
"1739 Irmus female Hungary\n",
"\n",
"[620 rows x 3 columns]"
"[1740 rows x 3 columns]"
]
},
"execution_count": 131,
"execution_count": 158,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -543,7 +549,7 @@
},
{
"cell_type": "code",
"execution_count": 144,
"execution_count": 159,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -559,7 +565,7 @@
},
{
"cell_type": "code",
"execution_count": 145,
"execution_count": 160,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -572,7 +578,7 @@
},
{
"cell_type": "code",
"execution_count": 148,
"execution_count": 161,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -589,14 +595,21 @@
},
{
"cell_type": "code",
"execution_count": 149,
"execution_count": 162,
"metadata": {},
"outputs": [],
"source": [
"for c in used_country :\n",
" dfc = df[df[\"Country\"] == c]\n",
" dfc.to_csv(basedir + \"/country/\" + c + \".csv\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down

0 comments on commit f8f8cff

Please sign in to comment.