Second largest number

import os numbers = [] for n in os.sys.argv[1:]: numbers.append(n) numbers.sort() print(‘Second largest number: ‘,numbers[-2])

MySQL resultset into Json

from mysql import connector import json connection = connector.connect(host = ‘localhost’, database = ‘testdb’, user = ‘root’, password = ‘root’) cursor = connection.cursor() cursor.execute(‘select * from product’) products = cursor.fetchall() prodcode=list() prodname=list() for product in products: prodcode.append(product[0]) prodname.append(product[1]) connection.close() result=zip(prodcode,prodname) prod=dict(result) s=json.dumps(prod,separators=(‘,’, ‘:’)) open(‘product.json’,’w’).write(s) Output: {“A100″:”PenDrive”,”A102″:”TV”,”A103″:”BlueRay Player”,”B100″:”Mouse”,”B101″:”Keyboard”,”B102″:”Monitor”,”C300″:”Speakers”}

Data cleaning

import pandas as pddf = pd.read_csv(“e://data/CustomerList.csv”)print(df)Output id name type city0 100 John NaN Boston1 101 Bob Online Chicago2 102 NaN Retail NaN3 103 ? NaN NaN# Checking for null values for all columnsdf.isnull().any()Outputid Falsename Truetype Truecity Truedtype: bool#Checking for null values for type columnprint(df[df[‘type’].isnull()].head())Outputid name type city0 100 John NaN Boston3 103 ? NaN NaNContinue reading “Data cleaning”

Dataframe metadata

import pandas as pd # Create data frame from csv file df=pd.read_csv(“e://data/state-population.csv”) #Meta data about data frame df.info() Output <class ‘pandas.core.frame.DataFrame’> RangeIndex: 2544 entries, 0 to 2543 Data columns (total 4 columns): state/region 2544 non-null object ages 2544 non-null object year 2544 non-null int64 population 2524 non-null float64 dtypes: float64(1), int64(1), object(2) memory usage: 79.6+Continue reading “Dataframe metadata”

Dataframe index

import pandas as pd# Create data frame from csv filedf=pd.read_csv(“e://data/state-population.csv”)print(df.set_index(‘year’).head(3)) Outputstate/region ages populationyear 2012 AL under18 1117489.02012 AL total 4817528.02010 AL under18 1130966.0 #indexing on multiple columnsprint(df.set_index([‘year’,’state/region’]).head(3)) Outputages populationyear state/region 2012 AL under18 1117489.0AL total 4817528.02010 AL under18 1130966.0

Dataframe rename columns

import pandas as pd# Create data frame from csv filedf=pd.read_csv(“e://data/state-population.csv”)# Rename column state/region to state-regionprint(df.rename(columns={‘state/region’:’state-region’}).head(2)) Outputstate-region ages year population0 AL under18 2012 1117489.01 AL total 2012 4817528.0 #Change column names in upper caseprint(df.rename(str.upper,axis=’columns’).head(2)) OutputSTATE/REGION AGES YEAR POPULATION0 AL under18 2012 1117489.01 AL total 2012 4817528.0