import pandas as pd
# Create two sample DataFrames
df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
df2 = pd.DataFrame({'A': [7, 8, 9], 'B': [10, 11, 12]})
df1| A | B | |
|---|---|---|
| 0 | 1 | 4 |
| 1 | 2 | 5 |
| 2 | 3 | 6 |
以index作为依据来update
import pandas as pd
# Create two sample DataFrames
df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
df2 = pd.DataFrame({'A': [7, 8, 9], 'B': [10, 11, 12]})
df1| A | B | |
|---|---|---|
| 0 | 1 | 4 |
| 1 | 2 | 5 |
| 2 | 3 | 6 |
df2| A | B | |
|---|---|---|
| 0 | 7 | 10 |
| 1 | 8 | 11 |
| 2 | 9 | 12 |
# Update values in df1 with values from df2
df1.update(df2)
df1| A | B | |
|---|---|---|
| 0 | 7 | 10 |
| 1 | 8 | 11 |
| 2 | 9 | 12 |
# Create two sample DataFrames
df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
df2 = pd.DataFrame({'A': [10, 11, 12], 'B': [13, 14, 15]})
df1| A | B | C | |
|---|---|---|---|
| 0 | 1 | 4 | 7 |
| 1 | 2 | 5 | 8 |
| 2 | 3 | 6 | 9 |
df2| A | B | |
|---|---|---|
| 0 | 10 | 13 |
| 1 | 11 | 14 |
| 2 | 12 | 15 |
# Update specific columns in df1 with values from df2
df1.update(df2[['A', 'B']])
print(df1) A B C
0 10 13 7
1 11 14 8
2 12 15 9
以某列作为键更新
应用场景:每日得到的运行病人清单,新数据里的字段是空的,但旧数据(比如已经编辑保存在数据库里的数据)里的字段不是空的,可以用旧数据更新新数据,以保证数据的更新。
# Create the old DataFrame
old_df = pd.DataFrame({'key': ['A', 'B', 'C'], 'col1': [1, 2, 3], 'col2': [4, 5, 6]})
old_df.set_index('key', inplace=True)
# Create the new DataFrame
new_df = pd.DataFrame({'key': ['A', 'B', 'C', 'D'], 'col1': [10, 20, 30, 40], 'col2': [40, 50, 60, 70]})
new_df.set_index('key', inplace=True)
old_df| col1 | col2 | |
|---|---|---|
| key | ||
| A | 1 | 4 |
| B | 2 | 5 |
| C | 3 | 6 |
new_df| col1 | col2 | |
|---|---|---|
| key | ||
| A | 10 | 40 |
| B | 20 | 50 |
| C | 30 | 60 |
| D | 40 | 70 |
# Update the new DataFrame with the old DataFrame
new_df.update(old_df)
new_df.reset_index()/tmp/ipykernel_4236/922394229.py:2: FutureWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`
new_df.update(old_df)
| key | col1 | col2 | |
|---|---|---|---|
| 0 | A | 1.0 | 4.0 |
| 1 | B | 2.0 | 5.0 |
| 2 | C | 3.0 | 6.0 |
| 3 | D | 40.0 | 70.0 |
import numpy as np
# Create two sample DataFrames
df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, np.nan, 6]})
df2 = pd.DataFrame({'A': [7, 8, 9], 'B': [10, 11, 12]})
df1 | A | B | |
|---|---|---|
| 0 | 1 | 4.0 |
| 1 | 2 | NaN |
| 2 | 3 | 6.0 |
df2| A | B | |
|---|---|---|
| 0 | 7 | 10 |
| 1 | 8 | 11 |
| 2 | 9 | 12 |
# Update df1 with df2, overwriting existing values
df1.update(df2, overwrite=True)
df1| A | B | |
|---|---|---|
| 0 | 7 | 10.0 |
| 1 | 8 | 11.0 |
| 2 | 9 | 12.0 |
# Update df1 with df2, without overwriting existing values
df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, np.nan, 6]})
df2 = pd.DataFrame({'A': [7, 8, 9], 'B': [10, 11, 12]})
df1.update(df2, overwrite = False)
df1| A | B | |
|---|---|---|
| 0 | 1 | 4.0 |
| 1 | 2 | 11.0 |
| 2 | 3 | 6.0 |