25  na

25.1 删除空值行

25.1.1 删除A列有空值的行

import pandas as pd

# 创建示例 DataFrame
data = {
    'A': [1, 2, None, 4, 5],
    'B': ['apple', 'banana', 'cherry', 'date', 'elderberry'],
    'C': [1, 2, 3, 4, 3],
    'D': ['dog', 'elephant', 'fox', 'dog', 'fox']
}

df = pd.DataFrame(data)

df
A B C D
0 1.0 apple 1 dog
1 2.0 banana 2 elephant
2 NaN cherry 3 fox
3 4.0 date 4 dog
4 5.0 elderberry 3 fox
# 删除包含空值的行
df = df.dropna(subset=['A'])

# 输出结果
df
A B C D
0 1.0 apple 1 dog
1 2.0 banana 2 elephant
3 4.0 date 4 dog
4 5.0 elderberry 3 fox

25.2 删除空值列

25.2.1 删除所有行都为空值的列

注意:空值是指np.nan,不是字符的’’

from faker import Faker
import pandas as pd
import numpy as np
# 创建Faker对象
fake = Faker()

# 生成名字和句子的列表
data = [(fake.name(), fake.email(), np.nan) for _ in range(10)]

# 将列表转换为DataFrame
df = pd.DataFrame(data, columns=['Name', 'Email', 'Address'])

df.iloc[2,1] = np.nan

df
Name Email Address
0 Gregory Jones kimberly07@example.com NaN
1 Monica Leonard leeerin@example.com NaN
2 Jessica Richards NaN NaN
3 Jeremiah Espinoza christina73@example.net NaN
4 Jessica Sparks kathleen86@example.org NaN
5 Peter Frank mmcdonald@example.org NaN
6 Douglas Myers yjones@example.org NaN
7 John Martin moorearthur@example.com NaN
8 Claudia Acosta pamelaross@example.net NaN
9 Anthony Spencer anna28@example.com NaN
df1 = df.dropna(axis = 1, how='all')

df1
Name Email
0 Gregory Jones kimberly07@example.com
1 Monica Leonard leeerin@example.com
2 Jessica Richards NaN
3 Jeremiah Espinoza christina73@example.net
4 Jessica Sparks kathleen86@example.org
5 Peter Frank mmcdonald@example.org
6 Douglas Myers yjones@example.org
7 John Martin moorearthur@example.com
8 Claudia Acosta pamelaross@example.net
9 Anthony Spencer anna28@example.com

25.3 判断某个值是否为空值

import pandas as pd

# 创建示例 DataFrame
data = {
    'A': [1, 2, None, 4, 5],
    'B': ['apple', 'banana', 'cherry', 'date', 'elderberry'],
    'C': [1, 2, 3, 4, 3],
    'D': ['dog', 'elephant', 'fox', 'dog', 'fox']
}

d_dict = pd.DataFrame(data).to_dict('records')
print(d_dict)
[{'A': 1.0, 'B': 'apple', 'C': 1, 'D': 'dog'}, {'A': 2.0, 'B': 'banana', 'C': 2, 'D': 'elephant'}, {'A': nan, 'B': 'cherry', 'C': 3, 'D': 'fox'}, {'A': 4.0, 'B': 'date', 'C': 4, 'D': 'dog'}, {'A': 5.0, 'B': 'elderberry', 'C': 3, 'D': 'fox'}]
for d in d_dict:
    if pd.isnull(d.get('A')):
        print(f"pd.isnull()判断是空值:{d['A']})")

# 或者
import numpy as np
for d in d_dict:
    if np.isnan(d.get('A')):
        print(f"np.isnan()判断是空值:{d['A']})")

## 错误示例
for d in d_dict:
    if d.get('A') == np.nan or d.get('A') == "nan":
        print(d['A'])
pd.isnull()判断是空值:nan)
np.isnan()判断是空值:nan)