Python常用标准库模块和类

8 minute read

Python 常用标准库模块和类详解

除了基本数据类型，Python 标准库提供了丰富的内置模块和类，涵盖各种功能。以下是详细的分类介绍：

一、数据处理与集合工具

1. `collections` 模块

提供高性能容器数据类型：

from collections import defaultdict, Counter, deque, OrderedDict, namedtuple, ChainMap

# 默认字典 - 访问不存在的键时返回默认值
dd = defaultdict(list)
dd['key'].append(1)  # 自动创建空列表

# 计数器 - 统计可哈希对象
cnt = Counter(['a', 'b', 'a', 'c'])
print(cnt.most_common(2))  # [('a', 2), ('b', 1)]

# 双端队列 - 线程安全，两端高效操作
dq = deque([1, 2, 3])
dq.appendleft(0)  # 左端添加
dq.pop()  # 右端弹出

# 命名元组 - 创建有名字段的元组
Point = namedtuple('Point', ['x', 'y'])
p = Point(10, 20)
print(p.x, p.y)  # 10 20

# 链式映射 - 将多个映射链接为一个
dict1 = {'a': 1, 'b': 2}
dict2 = {'b': 3, 'c': 4}
cm = ChainMap(dict1, dict2)
print(cm['b'])  # 2 (从dict1获取)
print(cm['c'])  # 4 (从dict2获取)

2. `itertools` 模块

迭代器工具，用于高效循环：

import itertools

# 无限迭代器
count = itertools.count(10, 2)  # 10, 12, 14...
cycle = itertools.cycle(['A', 'B'])  # A, B, A, B...
repeat = itertools.repeat(5, 3)  # 5, 5, 5

# 排列组合
list(itertools.combinations('ABC', 2))  # [('A','B'), ('A','C'), ('B','C')]
list(itertools.permutations('AB', 2))   # [('A','B'), ('B','A')]
list(itertools.product('AB', repeat=2)) # [('A','A'), ('A','B'), ('B','A'), ('B','B')]

# 分组
for key, group in itertools.groupby('AAABBBCCAAA'):
    print(key, list(group))
# A ['A', 'A', 'A']
# B ['B', 'B', 'B']
# C ['C', 'C']
# A ['A', 'A', 'A']

# 链式迭代
chain = itertools.chain('ABC', 'DEF')  # A B C D E F

3. `functools` 模块

高阶函数工具：

import functools

# 偏函数 - 固定部分参数
int2 = functools.partial(int, base=2)
int2('1010')  # 10

# LRU缓存 - 缓存函数结果
@functools.lru_cache(maxsize=128)
def fibonacci(n):
    if n < 2:
        return n
    return fibonacci(n-1) + fibonacci(n-2)

# 排序键函数
sorted([(1, 2), (3, 1), (2, 3)], key=functools.cmp_to_key(lambda x, y: x[1] - y[1]))

# 包装器 - 保留原始函数的元数据
@functools.wraps(original_func)
def wrapper(*args, **kwargs):
    pass

4. `heapq` 模块

堆队列算法（优先队列）：

import heapq

heap = []
heapq.heappush(heap, 3)
heapq.heappush(heap, 1)
heapq.heappush(heap, 2)
heapq.heappop(heap)  # 1 (总是弹出最小的)

# 堆化列表
data = [5, 3, 8, 1]
heapq.heapify(data)  # 原地转换为堆

# 获取n个最大/最小元素
heapq.nlargest(3, [1, 5, 2, 8, 3])  # [8, 5, 3]
heapq.nsmallest(2, [1, 5, 2, 8, 3])  # [1, 2]

5. `bisect` 模块

二分查找算法：

import bisect

sorted_list = [1, 3, 5, 7, 9]

# 查找插入位置
pos = bisect.bisect_left(sorted_list, 4)  # 2 (插入位置，保持顺序)
bisect.insort_left(sorted_list, 4)  # 插入并保持有序

# 右插入
pos = bisect.bisect_right(sorted_list, 5)  # 3 (相等元素插入右侧)

二、数学与随机数

6. `math` 模块

数学运算：

import math

# 常数
math.pi, math.e, math.inf, math.nan

# 基本运算
math.sqrt(16)      # 4.0
math.pow(2, 3)     # 8.0
math.log(100, 10)  # 2.0
math.exp(1)        # e ≈ 2.718

# 三角函数
math.sin(math.pi/2)  # 1.0
math.degrees(math.pi)  # 180.0
math.radians(180)     # π

# 特殊函数
math.gcd(12, 8)    # 4
math.factorial(5)  # 120
math.comb(5, 2)    # 10 (组合数)
math.perm(5, 2)    # 20 (排列数)

# 取整函数
math.floor(3.7)   # 3
math.ceil(3.1)    # 4
math.trunc(-3.7)  # -3

7. `random` 模块

随机数生成：

import random

# 基本随机
random.random()           # [0.0, 1.0) 的随机浮点数
random.uniform(1, 10)     # 指定范围的随机浮点数
random.randint(1, 6)      # 随机整数，包含端点
random.randrange(0, 10, 2)  # 随机偶数

# 序列操作
items = ['a', 'b', 'c', 'd']
random.choice(items)      # 随机选择一个
random.choices(items, k=3)  # 有放回抽样
random.sample(items, 2)   # 无放回抽样
random.shuffle(items)     # 原地打乱

# 分布
random.gauss(mu=0, sigma=1)  # 正态分布
random.expovariate(1.0)      # 指数分布

8. `statistics` 模块

统计学计算：

import statistics

data = [1, 2, 3, 4, 5, 6, 7, 8, 9]

statistics.mean(data)      # 平均数: 5.0
statistics.median(data)    # 中位数: 5
statistics.mode([1, 1, 2, 3])  # 众数: 1
statistics.stdev(data)     # 样本标准差
statistics.variance(data)  # 样本方差
statistics.quantiles(data, n=4)  # 四分位数

9. `decimal` 模块

精确十进制运算：

from decimal import Decimal, getcontext, ROUND_HALF_UP

# 精确计算
Decimal('0.1') + Decimal('0.2')  # Decimal('0.3')

# 设置精度
getcontext().prec = 6
Decimal(1) / Decimal(7)  # Decimal('0.142857')

# 四舍五入
d = Decimal('3.14159')
d.quantize(Decimal('0.001'), rounding=ROUND_HALF_UP)  # Decimal('3.142')

10. `fractions` 模块

有理数运算：

from fractions import Fraction

Fraction(3, 4)  # 3/4
Fraction(0.25)  # 1/4
Fraction('3/7')  # 3/7

Fraction(1, 3) + Fraction(1, 6)  # 1/2
Fraction(3, 4) * 2  # 3/2

三、日期与时间

11. `datetime` 模块

日期时间处理：

from datetime import datetime, date, time, timedelta, timezone

# 当前时间
now = datetime.now()  # 2023-10-01 12:30:45.123456
today = date.today()  # 2023-10-01

# 创建特定时间
dt = datetime(2023, 10, 1, 12, 30, 45)
d = date(2023, 10, 1)
t = time(12, 30, 45)

# 时间运算
dt + timedelta(days=1, hours=3)  # 加1天3小时
dt - datetime(2023, 9, 1)  # 时间差: 30 days

# 时区处理
utc_time = datetime.now(timezone.utc)
beijing_time = utc_time.astimezone(timezone(timedelta(hours=8)))

# 格式化
dt.strftime('%Y-%m-%d %H:%M:%S')  # '2023-10-01 12:30:45'
datetime.strptime('2023-10-01', '%Y-%m-%d')

12. `time` 模块

时间相关函数：

import time

# 时间戳
time.time()  # 1696141845.123456 (秒)
time.time_ns()  # 纳秒

# 等待
time.sleep(0.5)  # 暂停0.5秒

# 性能计时
start = time.perf_counter()  # 高精度计时器
# ... 执行代码 ...
elapsed = time.perf_counter() - start

# 结构化时间
struct_time = time.localtime()  # 本地时间结构
time.strftime('%Y-%m-%d', struct_time)
time.strptime('2023-10-01', '%Y-%m-%d')

四、文件与目录

13. `os` 模块

操作系统接口：

import os

# 路径操作
os.getcwd()  # 当前工作目录
os.chdir('/path/to/dir')  # 改变目录
os.listdir('.')  # 列出目录内容

# 文件和目录
os.mkdir('new_dir')
os.makedirs('path/to/new/dirs', exist_ok=True)
os.remove('file.txt')
os.rmdir('empty_dir')

# 路径检查
os.path.exists('path')
os.path.isfile('file.txt')
os.path.isdir('directory')
os.path.getsize('file.txt')

# 环境变量
os.environ.get('HOME')
os.getenv('PATH', 'default')

# 系统命令
os.system('ls -l')
os.popen('ls').read()

14. `pathlib` 模块

面向对象的路径操作：

from pathlib import Path

# 创建Path对象
p = Path('/home/user/file.txt')
p = Path.cwd() / 'subdir' / 'file.txt'

# 路径信息
p.name  # 'file.txt'
p.stem  # 'file'
p.suffix  # '.txt'
p.parent  # Path('/home/user')
p.parts  # ('/', 'home', 'user', 'file.txt')

# 文件操作
p.exists()
p.is_file()
p.is_dir()
p.stat().st_size  # 文件大小

# 读写
p.read_text()
p.write_text('content')
p.read_bytes()

# 通配符
list(Path('.').glob('*.py'))
list(Path('.').rglob('*.py'))  # 递归

15. `shutil` 模块

高级文件操作：

import shutil

# 复制文件
shutil.copy('src.txt', 'dst.txt')
shutil.copy2('src.txt', 'dst.txt')  # 保留元数据
shutil.copytree('src_dir', 'dst_dir')  # 递归复制目录

# 移动文件
shutil.move('src.txt', 'dst.txt')

# 删除目录树
shutil.rmtree('directory')

# 压缩
shutil.make_archive('archive', 'zip', 'directory')
shutil.unpack_archive('archive.zip', 'extract_dir')

# 磁盘使用
total, used, free = shutil.disk_usage('/')

五、系统与进程

16. `sys` 模块

系统相关参数和函数：

import sys

# 命令行参数
sys.argv  # 脚本参数列表
sys.argv[0]  # 脚本名称

# 模块路径
sys.path  # Python搜索路径
sys.path.append('/custom/path')  # 添加路径

# 标准流
sys.stdin.read()
sys.stdout.write('text')
sys.stderr.write('error')

# 系统信息
sys.platform  # 操作系统
sys.version  # Python版本
sys.maxsize  # 最大整数值
sys.getsizeof(object)  # 对象内存大小

# 退出程序
sys.exit(0)  # 退出码0表示成功
sys.exit('Error message')  # 打印消息并退出

17. `subprocess` 模块

创建子进程：

import subprocess

# 运行命令
result = subprocess.run(['ls', '-l'], capture_output=True, text=True)
print(result.returncode)  # 返回码
print(result.stdout)  # 标准输出
print(result.stderr)  # 标准错误

# 检查输出
subprocess.check_output(['echo', 'hello'], text=True)

# 管道
p1 = subprocess.Popen(['ls'], stdout=subprocess.PIPE)
p2 = subprocess.Popen(['grep', '.py'], stdin=p1.stdout, stdout=subprocess.PIPE)
output = p2.communicate()[0]

# shell命令
subprocess.run('ls -l | grep .py', shell=True)

六、数据序列化

18. `json` 模块

JSON数据处理：

import json

data = {
    'name': 'Alice',
    'age': 30,
    'skills': ['Python', 'JavaScript']
}

# 序列化
json_str = json.dumps(data, indent=2)  # 转换为JSON字符串
json.dump(data, open('data.json', 'w'))  # 写入文件

# 反序列化
data2 = json.loads(json_str)  # 从字符串加载
data3 = json.load(open('data.json'))  # 从文件加载

# 自定义编码/解码
class PersonEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, Person):
            return {'name': obj.name, 'age': obj.age}
        return super().default(obj)

json.dumps(person, cls=PersonEncoder)

注意：
1. 从字符串加载用loads()方法，从文件对象加载用load()方法
2. 加载json的返回值类型可以是字典，列表，字符串等，如果加载的json文件最外层是{}包裹的，返回字典类型（大多数情况），如果json文件最外层是[]，则返回的是列表

19. `pickle` 模块

Python对象序列化：

import pickle

data = {'a': 1, 'b': [2, 3, 4]}

# 序列化
pickled = pickle.dumps(data)  # 字节串
pickle.dump(data, open('data.pkl', 'wb'))

# 反序列化
data2 = pickle.loads(pickled)
data3 = pickle.load(open('data.pkl', 'rb'))

七、文本处理

20. `re` 模块

正则表达式：

import re

# 编译正则
pattern = re.compile(r'\b\w+\b')

# 匹配
match = re.search(r'\d+', 'abc123def')
if match:
    print(match.group())  # '123'
    print(match.start())  # 3
    print(match.end())    # 6

# 查找所有
re.findall(r'\d+', 'a1b22c333')  # ['1', '22', '333']

# 分割
re.split(r'\s+', 'a  b   c')  # ['a', 'b', 'c']

# 替换
re.sub(r'\d+', 'X', 'a1b22c333')  # 'aXbXcX'

# 分组
match = re.match(r'(\d{4})-(\d{2})-(\d{2})', '2023-10-01')
year, month, day = match.groups()

# 编译标志
re.I  # 忽略大小写
re.M  # 多行模式
re.S  # 点匹配所有
re.X  # 详细模式

21. `csv` 模块

CSV文件处理：

import csv

# 读取
with open('data.csv', 'r', newline='') as f:
    reader = csv.reader(f)
    for row in reader:
        print(row)

# 字典方式读取
with open('data.csv', 'r') as f:
    reader = csv.DictReader(f)
    for row in reader:
        print(row['name'], row['age'])

# 写入
with open('output.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['Name', 'Age', 'City'])
    writer.writerow(['Alice', 30, 'NYC'])
    writer.writerows([
        ['Bob', 25, 'LA'],
        ['Charlie', 35, 'Chicago']
    ])

八、并发与异步

22. `threading` 模块

线程操作：

import threading
import time

def worker(name, delay):
    print(f'{name} starting')
    time.sleep(delay)
    print(f'{name} finished')

# 创建线程
t1 = threading.Thread(target=worker, args=('Thread-1', 2))
t2 = threading.Thread(target=worker, args=('Thread-2', 1))

# 启动
t1.start()
t2.start()

# 等待完成
t1.join()
t2.join()

# 线程锁
lock = threading.Lock()
with lock:
    # 临界区
    pass

# 信号量
semaphore = threading.Semaphore(3)  # 最多3个线程同时访问

23. `multiprocessing` 模块

多进程处理：

import multiprocessing
import time

def worker(name, q):
    time.sleep(1)
    q.put(f'Hello from {name}')

# 进程池
with multiprocessing.Pool(4) as pool:
    results = pool.map(range(10))  # 并行处理

# 进程间通信
queue = multiprocessing.Queue()
processes = []
for i in range(3):
    p = multiprocessing.Process(target=worker, args=(f'Process-{i}', queue))
    processes.append(p)
    p.start()

for p in processes:
    p.join()

while not queue.empty():
    print(queue.get())

九、网络与互联网

24. `urllib` 模块

URL处理：

from urllib import request, parse, error

# 发送请求
response = request.urlopen('https://www.example.com')
content = response.read().decode('utf-8')

# 带参数的请求
data = parse.urlencode({'q': 'python', 'page': 1}).encode()
req = request.Request('https://www.example.com/search', data=data)
response = request.urlopen(req)

# 错误处理
try:
    response = request.urlopen('https://www.example.com/404')
except error.HTTPError as e:
    print(f'HTTP Error: {e.code} {e.reason}')
except error.URLError as e:
    print(f'URL Error: {e.reason}')

# URL解析
parsed = parse.urlparse('https://www.example.com/path?query=1#fragment')
print(parsed.scheme, parsed.netloc, parsed.path, parsed.query)

十、测试与调试

25. `unittest` 模块

单元测试：

import unittest

def add(a, b):
    return a + b

class TestMathFunctions(unittest.TestCase):
    
    def setUp(self):
        # 每个测试前的设置
        self.data = [1, 2, 3]
    
    def test_add_positive(self):
        self.assertEqual(add(1, 2), 3)
        self.assertEqual(add(0, 0), 0)
    
    def test_add_negative(self):
        self.assertEqual(add(-1, -1), -2)
    
    def test_list_length(self):
        self.assertEqual(len(self.data), 3)
    
    def tearDown(self):
        # 每个测试后的清理
        self.data = []

if __name__ == '__main__':
    unittest.main()

26. `logging` 模块

日志记录：

import logging

# 基本配置
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('app.log'),
        logging.StreamHandler()
    ]
)

# 记录日志
logging.debug('Debug message')
logging.info('Info message')
logging.warning('Warning message')
logging.error('Error message')
logging.critical('Critical message')

# 高级用法
logger = logging.getLogger('my_app')
logger.setLevel(logging.DEBUG)

# 处理器
file_handler = logging.FileHandler('app.log')
file_handler.setLevel(logging.WARNING)

# 格式化
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)

logger.addHandler(file_handler)

十一、类型注解

27. `typing` 模块

类型提示：

from typing import List, Dict, Tuple, Optional, Union, Any, Callable, TypeVar, Generic

# 基本类型注解
def greet(name: str) -> str:
    return f'Hello {name}'

# 容器类型
Vector = List[float]
Matrix = List[Vector]

def scale(scalar: float, vector: Vector) -> Vector:
    return [scalar * num for num in vector]

# 可选类型
def get_user(id: int) -> Optional[Dict[str, Any]]:
    if id > 0:
        return {'id': id, 'name': 'Alice'}
    return None

# 联合类型
def process(data: Union[str, bytes]) -> None:
    if isinstance(data, str):
        print(f'String: {data}')
    else:
        print(f'Bytes: {data}')

# 可调用对象
def apply(func: Callable[[int, int], int], x: int, y: int) -> int:
    return func(x, y)

# 类型变量
T = TypeVar('T')

def first(items: List[T]) -> T:
    return items[0]

# 泛型类
class Stack(Generic[T]):
    def __init__(self) -> None:
        self.items: List[T] = []
    
    def push(self, item: T) -> None:
        self.items.append(item)
    
    def pop(self) -> T:
        return self.items.pop()

总结表

类别	关键模块/类	主要用途
数据处理	`collections`, `itertools`, `functools`	高级容器、迭代器工具、函数式编程
数学计算	`math`, `random`, `statistics`, `decimal`	数学函数、随机数、统计、精确计算
日期时间	`datetime`, `time`	日期时间处理
文件系统	`os`, `pathlib`, `shutil`	文件、目录操作
系统	`sys`, `subprocess`	系统参数、进程管理
序列化	`json`, `pickle`	数据序列化
文本处理	`re`, `csv`	正则表达式、CSV文件
并发	`threading`, `multiprocessing`	多线程、多进程
网络	`urllib`	URL处理
测试调试	`unittest`, `logging`	单元测试、日志记录
类型系统	`typing`	类型注解

这些模块构成了Python标准库的核心，熟练掌握它们能极大提高开发效率。建议在实际项目中根据需要深入学习相关模块的细节。

Share on

X Facebook LinkedIn Bluesky

xdh2580

Python 常用标准库模块和类详解

一、数据处理与集合工具

1. collections 模块

2. itertools 模块

3. functools 模块

4. heapq 模块

5. bisect 模块

二、数学与随机数

6. math 模块

7. random 模块

8. statistics 模块

9. decimal 模块

10. fractions 模块

三、日期与时间

11. datetime 模块

12. time 模块

四、文件与目录

13. os 模块

14. pathlib 模块

15. shutil 模块

五、系统与进程

16. sys 模块

17. subprocess 模块

六、数据序列化

18. json 模块

19. pickle 模块