33  update

33.1 基础用法

以index作为依据来update

import pandas as pd

# Create two sample DataFrames
df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
df2 = pd.DataFrame({'A': [7, 8, 9], 'B': [10, 11, 12]})

df1
A B
0 1 4
1 2 5
2 3 6
df2
A B
0 7 10
1 8 11
2 9 12
# Update values in df1 with values from df2
df1.update(df2)
df1
A B
0 7 10
1 8 11
2 9 12
# Create two sample DataFrames
df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
df2 = pd.DataFrame({'A': [10, 11, 12], 'B': [13, 14, 15]})

df1
A B C
0 1 4 7
1 2 5 8
2 3 6 9
df2
A B
0 10 13
1 11 14
2 12 15
# Update specific columns in df1 with values from df2
df1.update(df2[['A', 'B']])
print(df1)
    A   B  C
0  10  13  7
1  11  14  8
2  12  15  9

33.2 高级用法

以某列作为键更新

33.2.1 旧数据更新新数据相同key的部分行

应用场景:每日得到的运行病人清单,新数据里的字段是空的,但旧数据(比如已经编辑保存在数据库里的数据)里的字段不是空的,可以用旧数据更新新数据,以保证数据的更新。

# Create the old DataFrame
old_df = pd.DataFrame({'key': ['A', 'B', 'C'], 'col1': [1, 2, 3], 'col2': [4, 5, 6]})
old_df.set_index('key', inplace=True)

# Create the new DataFrame
new_df = pd.DataFrame({'key': ['A', 'B', 'C', 'D'], 'col1': [10, 20, 30, 40], 'col2': [40, 50, 60, 70]})
new_df.set_index('key', inplace=True)

old_df
col1 col2
key
A 1 4
B 2 5
C 3 6
new_df
col1 col2
key
A 10 40
B 20 50
C 30 60
D 40 70
# Update the new DataFrame with the old DataFrame
new_df.update(old_df)

new_df.reset_index()
/tmp/ipykernel_4236/922394229.py:2: FutureWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`
  new_df.update(old_df)
key col1 col2
0 A 1.0 4.0
1 B 2.0 5.0
2 C 3.0 6.0
3 D 40.0 70.0

33.2.2 只更新缺失值

import numpy as np
# Create two sample DataFrames
df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, np.nan, 6]})
df2 = pd.DataFrame({'A': [7, 8, 9], 'B': [10, 11, 12]})

df1 
A B
0 1 4.0
1 2 NaN
2 3 6.0
df2
A B
0 7 10
1 8 11
2 9 12
# Update df1 with df2, overwriting existing values
df1.update(df2, overwrite=True)
df1
A B
0 7 10.0
1 8 11.0
2 9 12.0
# Update df1 with df2, without overwriting existing values
df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, np.nan, 6]})
df2 = pd.DataFrame({'A': [7, 8, 9], 'B': [10, 11, 12]})

df1.update(df2, overwrite = False)
df1
A B
0 1 4.0
1 2 11.0
2 3 6.0