Skip to content
This repository has been archived by the owner on May 17, 2024. It is now read-only.

Commit

Permalink
Merge pull request #155 from Soujanya8977/master
Browse files Browse the repository at this point in the history
Fixed issue in clean_names()
  • Loading branch information
Soujanya8977 authored Oct 18, 2021
2 parents 56a41a9 + 3bb1755 commit 96b085a
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 7 deletions.
8 changes: 4 additions & 4 deletions dxc/ai/clean_data/clean_data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pandas as pd
#import janitor #data cleaning
import janitor #data cleaning
from ftfy import fix_text #data cleaning
import nltk #data cleaning
nltk.download('punkt') #data cleaning
Expand Down Expand Up @@ -34,10 +34,10 @@ def clean_dataframe(df, impute = False, text_fields = [], date_fields = [], nume
clean_df = (
df
#make the column names lower case and remove spaces
#.clean_names()
.clean_names()

#remove empty columns
#.remove_empty()
.remove_empty()

#remove empty rows and columns
.dropna(how='all')
Expand Down Expand Up @@ -69,7 +69,7 @@ def clean_dataframe(df, impute = False, text_fields = [], date_fields = [], nume
field = '_'.join(field.split()).lower()
clean_df[field] = clean_df[field].astype('category')

#clean_df=clean_df.clean_names()
clean_df=clean_df.clean_names()

globals_file.clean_data_used = True

Expand Down
2 changes: 1 addition & 1 deletion dxc/ai/pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def insert_collection(data_layer, collection_name, df):
def write_raw_data(data_layer, raw_data, arrow_date_fields = []):
##make the column names lower case and remove spaces
if globals_file.clean_data_used == True:
#raw_data = raw_data.clean_names()
raw_data = raw_data.clean_names()
globals_file.wrt_raw_data_used = True
globals_file.clean_data_used = False
##convert your raw data into writable data by converting Arrow dates to strings
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ ftfy==6.0.3
interpret_community==0.19.3
missingno==0.5.0
arrow==1.1.1
pyjanitor==0.21.0
pyjanitor==0.20.0
pyaf==3.0
pandas_profiling==3.0.0
datacleaner==0.1.5
Algorithmia==1.10.0
GitPython==3.1.18
ipython>=7.16.1
janitor==0.1.1
#janitor==0.1.1
raiwidgets==0.9.4
scikit-learn
flatten-json==0.1.13
Expand Down

0 comments on commit 96b085a

Please sign in to comment.