import re
= '这个单词是由-EabcdxyxG共11个字符组成的吗?'
text
# 单个字符
= r'\w'
pattern print(re.findall(pattern, text))
['这', '个', '单', '词', '是', '由', 'E', 'a', 'b', 'c', 'd', 'x', 'y', 'x', 'G', '共', '1', '1', '个', '字', '符', '组', '成', '的', '吗']
不包含标点符号
import re
= '这个单词是由-EabcdxyxG共11个字符组成的吗?'
text
# 单个字符
= r'\w'
pattern print(re.findall(pattern, text))
['这', '个', '单', '词', '是', '由', 'E', 'a', 'b', 'c', 'd', 'x', 'y', 'x', 'G', '共', '1', '1', '个', '字', '符', '组', '成', '的', '吗']
# 多个字符
= r'\w+'
pattern print(re.findall(pattern, text))
['这个单词是由', 'EabcdxyxG共11个字符组成的吗']
# 非字符,包括标点符号
= r'\W'
pattern print(re.findall(pattern, text))
['-', '?']
# 要分割的文本
= "Hello,world!How are you?"
text
# 按照逗号、句点、感叹号和问号来分割文本
= re.split(r'[,.!?]', text)
result
# 输出分割后的结果
print(result)
# `maxsplit`设置分割的次数,
= re.split(r'[,.!?]', text, maxsplit=1)
result
print(result)
# 如果为0,则按最大可能次数分割,它是默认值
= re.split(r'[,.!?]', text, maxsplit=0)
result
print(result)
['Hello', 'world', 'How are you', '']
['Hello', 'world!How are you?']
['Hello', 'world', 'How are you', '']
= "Hello|World@How|Are|You"
text
# 分割符为|或者@
= re.split(fr"(?<=[@|])", text)
result
print(result)
['Hello|', 'World@', 'How|', 'Are|', 'You']
?re.split
import re
# 定义一个字符串
= "apple banana cherry"
text # 使用 re.sub() 将 "banana" 替换为 "orange"
= re.sub(r"banana", "orange", text)
new_text print(new_text) # 输出:"apple orange cherry"
apple orange cherry
# 定义一个替换函数,将匹配到的字符串转换为大写
def to_uppercase(match):
return match.group(0).upper()
# 定义一个字符串
= "apple banana cherry"
text # 使用 re.sub() 将字符串中的小写单词转换为大写
= re.sub(r"\b[a-z]+\b", to_uppercase, text)
new_text print(new_text) # 输出:"APPLE BANANA CHERRY"
APPLE BANANA CHERRY
Check if a string contains the word word in it (case insensitive). If you have no idea, I guess you could try /word/.
import re
= r'\bword\b'
ptn = "I have no word to say, because i always have words with him using Word."
txt # 不忽略大小写
re.findall(ptn, txt) compile(ptn, re.I), txt) # 忽略大小写, I代表incasesensitivity re.findall(re.
['word', 'Word']
Use substitution to replace every occurrence of the word i with the word I (uppercase, I as in me). E.g.: i’‘m replacing it. am i not? -> I’’m replacing it. am I not?.
= '''i''ll use it on input to fix my lazy spelling! i mean... i just wanted to check if you understood how it worked.'''
txt = r'\bi\b'
ptn "I", txt) re.sub(ptn,
"I''ll use it on input to fix my lazy spelling! I mean... I just wanted to check if you understood how it worked."
With regex you can count the number of matches. Can you make it return the number of uppercase consonants (B,C,D,F,..,X,Y,Z) in a given string? E.g.: it should return 3 with the text ABcDeFO!. Note: Only ASCII. We consider Y to be a consonant!
= 'You should use the WHO guide to treat COVID-19.'
txt
= r'[B-DF-HJ-NP-TV-Z]'
ptn
re.findall(ptn, txt)
['Y', 'W', 'H', 'C', 'V', 'D']
Oh no! It seems my friends spilled beer all over my keyboard last night and my keys are super sticky now. Some of the time whennn I press a key, I get two duplicates.
Can you ppplease help me fix thhhis?
= '''Oh no! It seems my friends spilled beer all over my keyboard last night and my keys are super sticky now. Some of the time whennn I press a key, I get two duplicates.
txt
Can you ppplease help me fix thhhis?'''
= r'([nph]){3}'
ptn
r"\1", txt, 0, re.M) # r"\1"指匹配到的group 1,即n,p,h。有些语言中用$1表示;默认 0 表示替换所有的匹配 re.sub(ptn,
'Oh no! It seems my friends spilled beer all over my keyboard last night and my keys are super sticky now. Some of the time when I press a key, I get two duplicates.\n\nCan you please help me fix this?'
Match positive integers less than or equal to 255
import re
= r"\b1[0-9][0-9]\b|\b2[0-5][0-5]\b|\b\d{1,2}\b" # \b设定好边界
ptn
= "11 123 501 9 10 31 30 2530 0 255 -1 256 -125 100 199"
txt
= re.sub(r'-\d+', '', txt) # 删除负整数
txt
re.findall(ptn, txt)
['11', '123', '9', '10', '31', '30', '0', '255', '100', '199']
Validate IPv4 address
import re
= r'^((25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\.){3}(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])$' # 前导0不正确
ptn1 = r'\b^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.)){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$\b' # 前导0也正确
ptn2
= "1.1.23.269"
txt1 = "1.1.23.169"
txt2 = "01.1.023.169"
txt3
for txt in [txt1, txt2, txt3]:
if re.findall(ptn1, txt):
print("{} : Valid IPv4 address using ptn1.".format(txt))
else:
print("{} : Invalid IPv4 address using ptn1.".format(txt))
if re.findall(ptn2, txt):
print("{} : Valid IPv4 address using ptn2.".format(txt))
else:
print("{} : Invalid IPv4 address using ptn2.".format(txt))
if not re.findall(ptn1, txt) and not re.findall(ptn2, txt):
print("{} : Invalid IPv4 address using ptn1 and ptn2.".format(txt))
1.1.23.269 : Invalid IPv4 address using ptn1.
1.1.23.269 : Invalid IPv4 address using ptn2.
1.1.23.269 : Invalid IPv4 address using ptn1 and ptn2.
1.1.23.169 : Valid IPv4 address using ptn1.
1.1.23.169 : Valid IPv4 address using ptn2.
01.1.023.169 : Invalid IPv4 address using ptn1.
01.1.023.169 : Valid IPv4 address using ptn2.
Find IPv4 address
import re
= r'((25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\.){3}(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])' # 前导0不正确
ptn1 = r'((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.)){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)' # 前导0也正确
ptn2
= "1.1.23.269是IP地址"
txt1 = "判断10.1.23.169是IP地址,203.245.25.67也是IP地址"
txt2 = "01.1.023.169"
txt3
# 只能找到第1个
re.search(ptn2, txt2)
# 找到所有,search返回的是整体的match,而不是分组内容
def regexMatchAny(pattern:str, rawText):
= re.compile(pattern)
ptn = ptn.search(rawText)
res = []
lst while res:
= res.span()
start, end 0))
lst.append(res.group(= ptn.search(rawText, end+1)
res return lst
r'((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.)){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)', txt2) regexMatchAny(
['10.1.23.169', '203.245.25.67']