from docx import Document
import re
from urllib.parse import quote, unquote
from IPython.display import Markdown
doc = Document('../写作与投稿/data/her2/bmc-her2.docx')
search_term_list = []
for p in doc.paragraphs:
if "doi.org" in p.text:
# print(p.text)
doi= re.findall(r'https{0,}://[a-z.]+/(.*[\d\w])', p.text)[0]
doi = unquote(doi) # 将被urlencoded得到的`%2F`还原为`/`
search_term= f"\"{doi}\" [doi] " # 记得要用双引号,否则搜索会被截断
search_term_list.append(search_term)
print(f"总共{len(search_term_list)}条记录")
search_str = "OR ".join(search_term_list)
search_url = f"https://pubmed.ncbi.nlm.nih.gov/?term={quote(search_str)}"
print(search_url)
Markdown(f"[Pubmed搜索结果链接]({search_url})")
总共32条记录
https://pubmed.ncbi.nlm.nih.gov/?term=%2210.1038/nrc3775%22%20%5Bdoi%5D%20OR%20%2210.1038/s41591-021-01450-2%22%20%5Bdoi%5D%20OR%20%2210.1016/S0140-6736%2810%2962101-0%22%20%5Bdoi%5D%20OR%20%2210.1002/cac2.12359%22%20%5Bdoi%5D%20OR%20%2210.1001/jamaoncol.2018.0049%22%20%5Bdoi%5D%20OR%20%2210.2217/fon-2021-0455%22%20%5Bdoi%5D%20OR%20%2210.1016/j.ctrv.2015.03.009%22%20%5Bdoi%5D%20OR%20%2210.1016/j.pharmthera.2019.06.005%22%20%5Bdoi%5D%20OR%20%2210.1093/neuonc/nou146%22%20%5Bdoi%5D%20OR%20%2210.1186/s40880-019-0354-z%22%20%5Bdoi%5D%20OR%20%2210.1016/j.esmoop.2022.100395%22%20%5Bdoi%5D%20OR%20%2210.1016/j.ctrv.2023.102520%22%20%5Bdoi%5D%20OR%20%2210.3390/cancers14174155%22%20%5Bdoi%5D%20OR%20%2210.1097/JTO.0000000000000379%22%20%5Bdoi%5D%20OR%20%2210.1007/s00432-022-04196-7%22%20%5Bdoi%5D%20OR%20%2210.1016/j.bbcan.2021.188605%22%20%5Bdoi%5D%20OR%20%2210.1093/annonc/12.suppl_1.s23%22%20%5Bdoi%5D%20OR%20%2210.1016/S0140-6736%2816%2931473-8%22%20%5Bdoi%5D%20OR%20%2210.1016/j.lungcan.2023.107414%22%20%5Bdoi%5D%20OR%20%2210.1186/s12967-015-0622-x%22%20%5Bdoi%5D%20OR%20%2210.1158/1078-0432.ccr-0373-3%22%20%5Bdoi%5D%20OR%20%2210.1007/s00432-023-05218-8%22%20%5Bdoi%5D%20OR%20%2210.3390/cancers12123804%22%20%5Bdoi%5D%20OR%20%2210.1001/jamaoncol.2018.4304%22%20%5Bdoi%5D%20OR%20%2210.1016/j.pulmoe.2019.05.003%22%20%5Bdoi%5D%20OR%20%2210.1056/NEJMsb1901642%22%20%5Bdoi%5D%20OR%20%2210.1038/s41571-021-00558-1%22%20%5Bdoi%5D%20OR%20%2210.1016/j.ccell.2022.06.002%22%20%5Bdoi%5D%20OR%20%2210.3390/cancers15041286%22%20%5Bdoi%5D%20OR%20%2210.1186/s13062-015-0086-1%22%20%5Bdoi%5D%20OR%20%2210.1136/bmj.39335.541782.AD%22%20%5Bdoi%5D%20OR%20%2210.1136/bmjopen-2016-012799%22%20%5Bdoi%5D%20