1 导入代码

可以通过from hulinhui import *，将hulinui.py的函数导入

2 字符

2.1 判断是否是中文

Code

def is_chinese(string):
    """
    检查整个字符串是否包含中文
    :param string: 需要检查的字符串
    :return: bool
    """
    for ch in string:
        if u'\u4e00' <= ch <= u'\u9fff':
            return True
    return False

is_chinese('额尔古那河右岸')

True

2.2 哈希转换

Code

import hashlib
def md5(raw_str: str):
    m = hashlib.md5()
    m.update(raw_str.encode("utf8"))
    return m.hexdigest()

md5('额尔古那河右岸')

'456406450f077ba22a683e2ccf35164b'

MD5（Message Digest Algorithm 5）是一种常用的哈希算法，用于将输入的字符串转换为固定长度（通常是128位）的哈希值。

函数的作用是将输入的字符串进行MD5哈希运算，并返回运算结果的十六进制表示。具体的实现步骤如下：

创建一个hashlib.md5()对象，用于进行MD5哈希运算。
使用m.update()方法将输入的字符串进行编码，并更新哈希对象的状态。
使用m.hexdigest()方法获取哈希对象的十六进制表示，即MD5哈希值。
将MD5哈希值作为函数的返回值。

3 文件

3.1 路径

Code

from os import path

file_path = 'data/NEJM-Timing of Initiation of RRT in AKI.pdf'

file_path_name, file_extension = path.splitext(file_path)
file_base_name_without_extension = path.basename(file_path_name)

file_base_name_with_extension = path.basename(file_path)

file_path_name, file_extension, file_path_name_without_extension, file_base_name_with_extension

('data/NEJM-Timing of Initiation of RRT in AKI',
 '.pdf',
 'NEJM-Timing of Initiation of RRT in AKI',
 'NEJM-Timing of Initiation of RRT in AKI.pdf')

4 PDF

4.1 按页码范围提取PDF

Code

from PyPDF2 import PdfWriter, PdfReader
from os import path

def extract_pdf_pages(pdfPath:str, startPage:int, endPage:int, outputPdfPath:str=""):
    '''
    第n页的页码数为n，如要导出第1页至第5页，则参数设置为1，5
    '''
    output = PdfWriter()
    with open(pdfPath, "rb") as file:
        pdf_file = PdfReader(file)
        pdf_pages_len = len(pdf_file.pages)
        if endPage >= pdf_pages_len:
            print("结束页码不能大于总页数")
            return
        for i in range(startPage, endPage+1):
            output.add_page(pdf_file.pages[i-1])
        if outputPdfPath == "":
            file_name, file_extension = os.path.splitext(pdfPath)
            file_name_without_extension = os.path.basename(file_name)
            outputPdfPath = f"{file_name}_extracted_p{startPage}-{endPage}{file_extension}"
        with open(outputPdfPath, "wb") as output_file:
            output.write(output_file)
        print("总页数{}，导出第{}页至第{}页".format(pdf_pages_len, startPage, endPage))

extract_pdf_pages('data/NEJM-Timing of Initiation of RRT in AKI.pdf', 1, 1)

总页数12，导出第1页至第1页