import pandas as pd
# Create two sample DataFrames
= pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
df1 = pd.DataFrame({'A': [7, 8, 9], 'B': [10, 11, 12]})
df2
df1
A | B | |
---|---|---|
0 | 1 | 4 |
1 | 2 | 5 |
2 | 3 | 6 |
以index作为依据来update
import pandas as pd
# Create two sample DataFrames
= pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
df1 = pd.DataFrame({'A': [7, 8, 9], 'B': [10, 11, 12]})
df2
df1
A | B | |
---|---|---|
0 | 1 | 4 |
1 | 2 | 5 |
2 | 3 | 6 |
df2
A | B | |
---|---|---|
0 | 7 | 10 |
1 | 8 | 11 |
2 | 9 | 12 |
# Update values in df1 with values from df2
df1.update(df2) df1
A | B | |
---|---|---|
0 | 7 | 10 |
1 | 8 | 11 |
2 | 9 | 12 |
# Create two sample DataFrames
= pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
df1 = pd.DataFrame({'A': [10, 11, 12], 'B': [13, 14, 15]})
df2
df1
A | B | C | |
---|---|---|---|
0 | 1 | 4 | 7 |
1 | 2 | 5 | 8 |
2 | 3 | 6 | 9 |
df2
A | B | |
---|---|---|
0 | 10 | 13 |
1 | 11 | 14 |
2 | 12 | 15 |
# Update specific columns in df1 with values from df2
'A', 'B']])
df1.update(df2[[print(df1)
A B C
0 10 13 7
1 11 14 8
2 12 15 9
以某列作为键更新
应用场景:每日得到的运行病人清单,新数据里的字段是空的,但旧数据(比如已经编辑保存在数据库里的数据)里的字段不是空的,可以用旧数据更新新数据,以保证数据的更新。
# Create the old DataFrame
= pd.DataFrame({'key': ['A', 'B', 'C'], 'col1': [1, 2, 3], 'col2': [4, 5, 6]})
old_df 'key', inplace=True)
old_df.set_index(
# Create the new DataFrame
= pd.DataFrame({'key': ['A', 'B', 'C', 'D'], 'col1': [10, 20, 30, 40], 'col2': [40, 50, 60, 70]})
new_df 'key', inplace=True)
new_df.set_index(
old_df
col1 | col2 | |
---|---|---|
key | ||
A | 1 | 4 |
B | 2 | 5 |
C | 3 | 6 |
new_df
col1 | col2 | |
---|---|---|
key | ||
A | 10 | 40 |
B | 20 | 50 |
C | 30 | 60 |
D | 40 | 70 |
# Update the new DataFrame with the old DataFrame
new_df.update(old_df)
new_df.reset_index()
/tmp/ipykernel_4236/922394229.py:2: FutureWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`
new_df.update(old_df)
key | col1 | col2 | |
---|---|---|---|
0 | A | 1.0 | 4.0 |
1 | B | 2.0 | 5.0 |
2 | C | 3.0 | 6.0 |
3 | D | 40.0 | 70.0 |
import numpy as np
# Create two sample DataFrames
= pd.DataFrame({'A': [1, 2, 3], 'B': [4, np.nan, 6]})
df1 = pd.DataFrame({'A': [7, 8, 9], 'B': [10, 11, 12]})
df2
df1
A | B | |
---|---|---|
0 | 1 | 4.0 |
1 | 2 | NaN |
2 | 3 | 6.0 |
df2
A | B | |
---|---|---|
0 | 7 | 10 |
1 | 8 | 11 |
2 | 9 | 12 |
# Update df1 with df2, overwriting existing values
=True)
df1.update(df2, overwrite df1
A | B | |
---|---|---|
0 | 7 | 10.0 |
1 | 8 | 11.0 |
2 | 9 | 12.0 |
# Update df1 with df2, without overwriting existing values
= pd.DataFrame({'A': [1, 2, 3], 'B': [4, np.nan, 6]})
df1 = pd.DataFrame({'A': [7, 8, 9], 'B': [10, 11, 12]})
df2
= False)
df1.update(df2, overwrite df1
A | B | |
---|---|---|
0 | 1 | 4.0 |
1 | 2 | 11.0 |
2 | 3 | 6.0 |