import pandas as pd
df = pd.read_csv("e://data/CustomerList.csv")
print(df)
Output
id name type city
0 100 John NaN Boston
1 101 Bob Online Chicago
2 102 NaN Retail NaN
3 103 ? NaN NaN
# Checking for null values for all columns
df.isnull().any()
Output
id False
name True
type True
city True
dtype: bool
#Checking for null values for type column
print(df[df['type'].isnull()].head())
Output
id name type city
0 100 John NaN Boston
3 103 ? NaN NaN
val={'type':'Regular'}
dfnew = df.fillna(value=val)
print(dfnew)
Output
id name type city
0 100 John Regular Boston
1 101 Bob Online Chicago
2 102 NaN Retail NaN
3 103 ? Regular NaN
# Keep rows with minimum 3 non na values
df2=dfnew.dropna(thresh=3)
print(df2)
Output
id name type city
0 100 John Regular Boston
1 101 Bob Online Chicago
3 103 ? Regular NaN
# Column to be considered for fillna
val={'city':'Not Available'}
# NAN in city column is replced by Not Available
df3=df2.fillna(value=val)
# ? is replace with Unknown
print(df3.replace({'name':'?'},'Unknown'))
Output
id name type city
0 100 John Regular Boston
1 101 Bob Online Chicago
3 103 Unknown Regular Not Available