import numpy as np
import pandas as pd

s = pd.Series([1,3,5,np.nan,6,8])
print(s)

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

dates = pd.date_range('20130101', periods=6)
print(dates)

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=('A','B','C','D'))  #using series
print(df)

                   A         B         C         D
2013-01-01  0.375465  1.208336  0.402344  0.620279
2013-01-02 -1.013392  0.697856 -1.108243  0.556585
2013-01-03  1.972607  0.126715 -1.203313  1.335513
2013-01-04  0.898955  1.949423 -1.090135  2.268246
2013-01-05 -1.569889 -0.062766 -0.289277  0.351344
2013-01-06  0.369853 -0.490518  0.654728 -0.683796

df=pd.DataFrame({ 'A' : 1.,
                 'B' : pd.Timestamp('20130102'),
                 'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
                 'D' : np.array([3] * 4,dtype='int32'),
                 'E' : pd.Categorical(["test","train","test","train"]),
                 'F' : 'foo' })
print(df)

print(df.head())

print(df.index)

     A          B    C  D      E    F
0  1.0 2013-01-02  1.0  3   test  foo
1  1.0 2013-01-02  1.0  3  train  foo
2  1.0 2013-01-02  1.0  3   test  foo
3  1.0 2013-01-02  1.0  3  train  foo
     A          B    C  D      E    F
0  1.0 2013-01-02  1.0  3   test  foo
1  1.0 2013-01-02  1.0  3  train  foo
2  1.0 2013-01-02  1.0  3   test  foo
3  1.0 2013-01-02  1.0  3  train  foo
Int64Index([0, 1, 2, 3], dtype='int64')

print(df.columns)

Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

df.values

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']], dtype=object)

print(list('1234'))

['1', '2', '3', '4']

df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=('A','B','C','D'))  #using series
print(df)




df.sort_index(axis=0, ascending=False)

                   A         B         C         D
2013-01-01 -0.027837  0.077297 -0.124679 -0.089361
2013-01-02 -0.333209 -0.524610 -0.968841  0.705234
2013-01-03 -0.097520 -0.280874  0.468379  0.520813
2013-01-04  0.047438  1.877535 -1.117088  1.213645
2013-01-05 -0.866087 -0.381429 -0.650145 -1.208026
2013-01-06  0.509621  1.447343 -0.089048 -0.320007

df.sort_index(axis=1, ascending=False)

df.sort_values(by='B')

df[0:2]             #slices rows

df.loc['20130102':'20130104',['A','B']]

df.loc[:,['A','B']]

df.loc[dates[0],'A']     #pointing to a number

-0.027837176903767264

df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=('A','B','C','D'))  #using series
print(df)






print(df.loc[dates[3]])

print(df.iloc[3])

                   A         B         C         D
2013-01-01  0.674418  0.850490 -1.928506 -0.055898
2013-01-02 -1.070952 -0.908185  0.915426 -0.196263
2013-01-03 -0.557899 -0.624848  0.119355  0.773706
2013-01-04 -1.605233  0.367749 -1.345935 -0.776756
2013-01-05  1.054863  0.585973 -0.512838  1.924100
2013-01-06 -0.374367 -0.420607  0.536889  2.099798
A   -1.605233
B    0.367749
C   -1.345935
D   -0.776756
Name: 2013-01-04 00:00:00, dtype: float64
A   -1.605233
B    0.367749
C   -1.345935
D   -0.776756
Name: 2013-01-04 00:00:00, dtype: float64

print(df.iloc[3:5,0:2])

                   A         B
2013-01-04 -1.605233  0.367749
2013-01-05  1.054863  0.585973

df.iloc[1:3,:]

 df.iloc[:,1:3]

df.iloc[1,1]         #for getting value explicitly

-0.90818478724555773

df[df.A > 0]

df[df > 0]

df2=df.copy()
df2['E'] = ['one', 'one','two','three','four','three']
print(df2)

                   A         B         C         D      E
2013-01-01  0.674418  0.850490 -1.928506 -0.055898    one
2013-01-02 -1.070952 -0.908185  0.915426 -0.196263    one
2013-01-03 -0.557899 -0.624848  0.119355  0.773706    two
2013-01-04 -1.605233  0.367749 -1.345935 -0.776756  three
2013-01-05  1.054863  0.585973 -0.512838  1.924100   four
2013-01-06 -0.374367 -0.420607  0.536889  2.099798  three

print(df)

                   A         B         C         D
2013-01-01  0.674418  0.850490 -1.928506 -0.055898
2013-01-02 -1.070952 -0.908185  0.915426 -0.196263
2013-01-03 -0.557899 -0.624848  0.119355  0.773706
2013-01-04 -1.605233  0.367749 -1.345935 -0.776756
2013-01-05  1.054863  0.585973 -0.512838  1.924100
2013-01-06 -0.374367 -0.420607  0.536889  2.099798

df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=('A','B','C','D'))  #using series
print(df)

                   A         B         C         D
2013-01-01  0.165240 -0.446295 -1.473960 -2.272753
2013-01-02  0.849150 -0.848962 -1.255791  0.883076
2013-01-03 -2.605466  0.538571 -1.187388  0.386836
2013-01-04  0.622302 -1.006722  1.116170 -0.826446
2013-01-05  0.444221  1.084396  0.510132 -0.234789
2013-01-06  2.091659  0.969032 -0.167117  0.191899

#reindex function


df2=df.reindex(index=dates,columns=list(df.columns)+['E'] )
df2.loc[:,['E']]=1
print(df2)

                   A         B         C         D    E
2013-01-01  0.165240 -0.446295 -1.473960 -2.272753  1.0
2013-01-02  0.849150 -0.848962 -1.255791  0.883076  1.0
2013-01-03 -2.605466  0.538571 -1.187388  0.386836  1.0
2013-01-04  0.622302 -1.006722  1.116170 -0.826446  1.0
2013-01-05  0.444221  1.084396  0.510132 -0.234789  1.0
2013-01-06  2.091659  0.969032 -0.167117  0.191899  1.0

df.mean()

A    0.261184
B    0.048337
C   -0.409659
D   -0.312030
dtype: float64

df.mean(1)

2013-01-01   -1.006942
2013-01-02   -0.093132
2013-01-03   -0.716862
2013-01-04   -0.023674
2013-01-05    0.450990
2013-01-06    0.771368
Freq: D, dtype: float64

df = pd.DataFrame(np.random.randn(10, 4))

df

pieces = [df[:3], df[3:7], df[7:]]

pd.concat(pieces)

	A	B	C	D
2013-01-01	-0.431875	2.015969	0.634258	0.862669
2013-01-02	2.086694	0.829265	-0.702303	0.657043

	0	1	2	3
0	0.034300	0.562577	-0.793628	0.266447
1	1.164551	-0.889588	0.512698	-0.374190
2	-1.815902	0.887592	-1.588327	0.129538
3	-0.126049	0.082714	-0.000536	0.513339
4	-0.025417	0.451445	0.629116	0.772384
5	0.011039	1.621303	0.234002	0.945682
6	-0.823008	0.771252	-0.295291	0.638264
7	-2.560043	-1.152196	-0.410878	-0.967670
8	-0.474092	0.549937	1.142994	-0.170030
9	1.174293	-0.158292	0.823994	0.285797

	0	1	2	3
0	0.034300	0.562577	-0.793628	0.266447
1	1.164551	-0.889588	0.512698	-0.374190
2	-1.815902	0.887592	-1.588327	0.129538
3	-0.126049	0.082714	-0.000536	0.513339
4	-0.025417	0.451445	0.629116	0.772384
5	0.011039	1.621303	0.234002	0.945682
6	-0.823008	0.771252	-0.295291	0.638264
7	-2.560043	-1.152196	-0.410878	-0.967670
8	-0.474092	0.549937	1.142994	-0.170030
9	1.174293	-0.158292	0.823994	0.285797

Search This Blog

Analyzing the Analytics

Working with Pandas

Comments

Post a Comment

Popular posts from this blog

Playing with Tableau

Visualizing with Tableau-1

	A	B	C	D
2013-01-06	0.509621	1.447343	-0.089048	-0.320007
2013-01-05	-0.866087	-0.381429	-0.650145	-1.208026
2013-01-04	0.047438	1.877535	-1.117088	1.213645
2013-01-03	-0.097520	-0.280874	0.468379	0.520813
2013-01-02	-0.333209	-0.524610	-0.968841	0.705234
2013-01-01	-0.027837	0.077297	-0.124679	-0.089361

	A	B	C	D
2013-01-02	-1.070952	-0.908185	0.915426	-0.196263
2013-01-03	-0.557899	-0.624848	0.119355	0.773706

	B	C
2013-01-01	0.850490	-1.928506
2013-01-02	-0.908185	0.915426
2013-01-03	-0.624848	0.119355
2013-01-04	0.367749	-1.345935
2013-01-05	0.585973	-0.512838
2013-01-06	-0.420607	0.536889

	A	B	C	D
2013-01-01	0.674418	0.850490	-1.928506	-0.055898
2013-01-05	1.054863	0.585973	-0.512838	1.924100