current position:Home>Python - Pandas - data processing

Python - Pandas - data processing

2022-02-02 12:04:05 wlkkhhxx

Data merging - join:

import pandas as pd
import numpy as np

df1 = pd.DataFrame(np.arange(10).reshape((2,5)),index=['A','B'],columns=['V','W','X','Y','Z'],dtype=float)
'''
     V    W    X    Y    Z
A  0.0  1.0  2.0  3.0  4.0
B  5.0  6.0  7.0  8.0  9.0
'''
df2 = pd.DataFrame(np.arange(12).reshape((3,4)),index=['A','B','C'],columns=range(4),dtype=float)
'''
     0    1     2     3
A  0.0  1.0   2.0   3.0
B  4.0  5.0   6.0   7.0
C  8.0  9.0  10.0  11.0
'''

df1.join(df2)
'''
     V    W    X    Y    Z    0    1    2    3
A  0.0  1.0  2.0  3.0  4.0  0.0  1.0  2.0  3.0
B  5.0  6.0  7.0  8.0  9.0  4.0  5.0  6.0  7.0
'''

df2.join(df1)
'''
     0    1     2     3    V    W    X    Y    Z
A  0.0  1.0   2.0   3.0  0.0  1.0  2.0  3.0  4.0
B  4.0  5.0   6.0   7.0  5.0  6.0  7.0  8.0  9.0
C  8.0  9.0  10.0  11.0  NaN  NaN  NaN  NaN  NaN
'''

Data merging - merge:

import pandas as pd
import numpy as np

df1 = pd.DataFrame(np.arange(12).reshape((3,4)),index=['A','B','C'],columns=['M','N','O','P'],dtype=float)
'''
     M    N     O     P
A  0.0  1.0   2.0   3.0
B  4.0  5.0   6.0   7.0
C  8.0  9.0  10.0  11.0
'''

df2 = pd.DataFrame(np.arange(10).reshape((2,5)),index=['A','B'],columns=['V','W','X','Y','Z'],dtype=float)
'''
     V    W    X    Y    Z
A  0.0  1.0  2.0  3.0  4.0
B  5.0  6.0  7.0  8.0  9.0
'''


df1.merge(df2,left_on="O",right_on="X")
#  The default is inner Union mode of 
'''
     M    N    O    P    V    W    X    Y    Z
0  0.0  1.0  2.0  3.0  0.0  1.0  2.0  3.0  4.0
'''

df1.merge(df2,left_on="O",right_on="X",how='inner')
# inner  Take and merge 
'''
     M    N    O    P    V    W    X    Y    Z
0  0.0  1.0  2.0  3.0  0.0  1.0  2.0  3.0  4.0
'''

df1.merge(df2,left_on="O",right_on="X",how='outer')
# outer  Take and hand in 
'''
     M    N     O     P    V    W    X    Y    Z
0  0.0  1.0   2.0   3.0  0.0  1.0  2.0  3.0  4.0
1  4.0  5.0   6.0   7.0  NaN  NaN  NaN  NaN  NaN
2  8.0  9.0  10.0  11.0  NaN  NaN  NaN  NaN  NaN
3  NaN  NaN   NaN   NaN  5.0  6.0  7.0  8.0  9.0
'''

df1.merge(df2,left_on="O",right_on="X",how='left')
#  Whichever is left ,NaN repair 
'''
     M    N     O     P    V    W    X    Y    Z
0  0.0  1.0   2.0   3.0  0.0  1.0  2.0  3.0  4.0
1  4.0  5.0   6.0   7.0  NaN  NaN  NaN  NaN  NaN
2  8.0  9.0  10.0  11.0  NaN  NaN  NaN  NaN  NaN
'''

df1.merge(df2,left_on="O",right_on="X",how='right')
#  Whichever is right ,NaN repair 
'''
     M    N    O    P    V    W    X    Y    Z
0  0.0  1.0  2.0  3.0  0.0  1.0  2.0  3.0  4.0
1  NaN  NaN  NaN  NaN  5.0  6.0  7.0  8.0  9.0
'''

Series Composite index :

import pandas as pd
import numpy as np

a = pd.DataFrame({'a': range(7),'b': range(7, 0, -1),'c':
    ['one','one','one','two','two','two', 'two'],'d': list("hjklmno")})
'''
   a  b    c  d
0  0  7  one  h
1  1  6  one  j
2  2  5  one  k
3  3  4  two  l
4  4  3  two  m
5  5  2  two  n
6  6  1  two  o
'''

x = a.set_index(['c','d'])['a']
'''
c    d
one  h    0
     j    1
     k    2
two  l    3
     m    4
     n    5
     o    6
Name: a, dtype: int64
'''

x['one','h']
'''
0
'''
x['two','n']
'''
5
'''

x.swaplevel()
# level: Exchange the inner and outer layers of the composite index 
'''
d  c  
h  one    0
j  one    1
k  one    2
l  two    3
m  two    4
n  two    5
o  two    6
Name: a, dtype: int64
'''

x.swaplevel()['h']
'''
c
one    0
Name: a, dtype: int64
'''

x.swaplevel()['k']
'''
c
one    2
Name: a, dtype: int64
'''

DataFrame Composite index :

import pandas as pd
import numpy as np

a = pd.DataFrame({'a': range(7),'b': range(7, 0, -1),'c':
    ['one','one','one','two','two','two', 'two'],'d': list("hjklmno")})
'''
   a  b    c  d
0  0  7  one  h
1  1  6  one  j
2  2  5  one  k
3  3  4  two  l
4  4  3  two  m
5  5  2  two  n
6  6  1  two  o
'''

x = a.set_index(['c','d'])['a']
'''
c    d
one  h    0
     j    1
     k    2
two  l    3
     m    4
     n    5
     o    6
Name: a, dtype: int64
'''

x.loc['one']
'''
d
h    0
j    1
k    2
Name: a, dtype: int64
'''

x.loc['one'].loc['h']
'''
0
'''

x.swaplevel().loc['h']
'''
c
one    0
Name: a, dtype: int64
'''

copyright notice
author[wlkkhhxx],Please bring the original link to reprint, thank you.
https://en.pythonmana.com/2022/02/202202021204039001.html

Random recommended