NumPy

创建数组

# np.zeros
x = np.zeros(5) 
print(x)
y = np.zeros((5,), dtype = int) 
print(y)

# np.ones
x = np.ones(5) 
print(x)
y = np.ones([2,2], dtype = int)
print(y)

# np.zeros_like & np.ones_like
arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
zeros_arr = np.zeros_like(arr)
print(zeros_arr)
ones_arr = np.ones_like(arr)
print(ones_arr)

随机数生成

# 设置随机种子 (保证每次运行结果一样，方便复现)
np.random.seed(42)

# 0到1之间的随机小数 (常用于初始化权重)
print(np.random.rand(3, 3))

# 标准正态分布 (均值为0，方差为1)
print(np.random.randn(3, 3))

# 随机整数 (比如 0 到 10 之间)
print(np.random.randint(0, 10, size=(2, 5)))

从已有的数组创建数组

# np.asarray
x = [1,2,3] 
a = np.asarray(x)  
print(a)

x = (1,2,3) 
a = np.asarray(x)  
print(a)

x = [(1,2,3),(4,5)] 
a = np.asarray(x)  
print(a)

# np.frombuffer
s = b'Hello World' 
a = np.frombuffer(s, dtype = 'S1')  
print(a)

# np.fromiter
list=range(5)
it=iter(list)
x=np.fromiter(it, dtype=float)
print(x)

从数值范围创建数组

# np.arange
x = np.arange(5, dtype = float)  
print(x)

x = np.arange(10,20,2)  
print(x)

# np.linspace 等差数列
a = np.linspace(1,10,10,endpoint = False)
print(a)

# np.logspace 等比数列
a = np.logspace(0,9,num = 10,base = 2)  
print(a)

切片和索引

# slice
a = np.arange(10)
s = slice(2,7,2)
print(a[s])

a = np.arange(10)  
b = a[2:7:2]
print(b)

高级索引

x = np.array([[1,2],[3,4],[5,6]]) 
y = x[[0,1,2],[0,1,0]]  # (0,0) (1,1) (2,0)
print(y)

x = np.array([[0,1,2],[3,4,5],[6,7,8],[9,10,11]])  
print (x)
print ('\n')
rows = np.array([[0,0],[3,3]]) 
cols = np.array([[0,2],[0,2]]) 
y = x[rows,cols]  
print (y)

# 综合
a = np.array([[1,2,3], [4,5,6],[7,8,9]])
b = a[1:3, 1:3]
c = a[1:3,[1,2]]
d = a[...,1:]
print(b)
print(c)
print(d)

# 布尔索引
x = np.array([[0,1,2],[3,4,5],[6,7,8],[9,10,11]])  
print (x[x>5])

# 花式索引
x = np.arange(32).reshape((8,4))
print (x[[4,2,1,7]]) # 输出4，2，1，7行

x=np.arange(32).reshape((8,4))
print (x[np.ix_([1,5,7,2],[0,3,1,2])]) # np.ix_ : 笛卡尔积

广播

当运算中的 2 个数组的形状不同时，numpy 将自动触发广播机制。

a = np.array([[ 0, 0, 0],
           [10,10,10],
           [20,20,20],
           [30,30,30]])
b = np.array([0,1,2])
print(a + b)

数组操作

# reshape
arr = np.arange(12)
auto_arr = arr.reshape(4, -1) # 4行，列数自适应

# transpose
arr = np.array([[1, 2], [3, 4]])
print(arr.T)

# concatenate
team_a = np.array([[1, 2, 3]])
team_b = np.array([[4, 5, 6]])
result1 = np.concatenate((team_a, team_b),axis=0)
result2 = np.concatenate((team_a, team_b),axis=1)
print(result1)
print(result2)

# unique
arr = np.array([1, 1, 2, 2, 3, 3])
u = np.unique(arr)

# expand_dims
img = np.random.rand(224, 224)
img_batch = np.expand_dims(img, axis=0)

位运算

1
2
3

np.bitwise_[and|or|xor](x1,x2)
np.invert(x) # 取反
np.[left|right]_shift(x1,x2)

数学函数

1
2
3

np.[sin|cos|tan|arcsin|arccos|arctan](x)
np.degrees(x) # 弧度 to 角度
np.[around|floor|ceil](x)

统计函数

mean,min ,max ,std,var,median,percentile,average（加权平均）

关注axis的使用

# 模拟 3 个学生，每人考 2 门课 (3行2列)
scores = np.array([
    [80, 90],  # 学生 A
    [70, 75],  # 学生 B
    [60, 100]  # 学生 C
])

print("--- 第一梯队 (AI 核心) ---")
# 1. 算全班所有成绩的平均分
print(f"全班总平均: {np.mean(scores)}")

# 2. 算每个学生的平均分 (按行算，压缩列，axis=1)
print(f"每个学生的平均分: {np.mean(scores, axis=1)}")
# 结果: [85.  72.5 80. ]

# 3. 算每门课的标准差 (看看哪门课分数波动大)
# axis=0 (按列算)
stds = np.std(scores, axis=0)
print(f"两门课的波动(标准差): {stds}")
# 结果里哪个数字大，说明哪门课大家考得参差不齐

print("\n--- 归一化演示 (MinMax) ---")
# 把所有分数变成 0 到 1 之间的小数
normalized_scores = (scores - np.min(scores)) / (np.max(scores) - np.min(scores))
print(normalized_scores)

排序，条件筛选

# np.sort(a, axis, kind, order)

# np.argmax/np.argmin
# 返回最大/最小的索引值
a = np.array([[30,40,70],[80,20,10],[50,90,60]])  
print ('argmax():') 
print (np.argmax(a)) 
print ('沿轴 0 的最大值索引：') 
maxindex = np.argmax(a, axis =  0)  
print (maxindex) 
print ('沿轴 1 的最大值索引：') 
maxindex = np.argmax(a, axis =  1)  
print (maxindex) 
print ('argmin():') 
minindex = np.argmin(a)  
print (minindex) 
print ('沿轴 0 的最小值索引：') 
minindex = np.argmin(a, axis =  0)  
print (minindex) 
print ('沿轴 1 的最小值索引：') 
minindex = np.argmin(a, axis =  1)  
print (minindex)

# np.argsort
# 返回的是数组值从小到大的索引值
x = np.array([3,1,2])  
print ('argsort(x)：')
y = np.argsort(x)  
print (y)
print ('以排序后的顺序重构原数组：')
print (x[y])
print ('使用循环重构原数组：')
for i in y:  
    print (x[i], end=" ")

# np.where
x = np.arange(9).reshape(3,3)  
y = np.where(x > 3)  
print (y) # 索引
print (x[y]) # 通过索引获取元素

副本和视图

# .copy()
# 危险写法 (可能改坏原数据)
my_subset = data[:100] 
# 安全写法 (推荐)
my_subset = data[:100].copy()

线性代数

# np.dot 点积(@)
# np.matmul 矩阵乘法(@)
z = x @ y

# np.linalg.inv 逆矩阵

# np.linalg.det 行列式

# np.linalg.solve 解线性方程组
A = np.array([
    [3, 1], 
    [1, 2]
])
b = np.array([14, 8])
solution = np.linalg.solve(A, b)
print(solution[0])
print(solution[1])

# np.linalg.eig 特征值/特征向量
A = np.array([
    [1, 2],
    [2, 1]
])
eigenvalues, eigenvectors = np.linalg.eig(A)
print("特征值 (重要程度):")
print(eigenvalues) 
print("特征向量 (方向):")
print(eigenvectors)

IO

# np.save & np.load
# 存为.npy文件
features = np.random.rand(1000, 1000)
np.save('my_data', features)
loaded_data = np.load('my_data.npy')

用于保留中间结果。pandas的读写功能更好用。

Matplotlib

from matplotlib import pyplot as plt

# plt.plot() 画折线图
# plt.scatter() 画散点图
# plt.hist() 画直方图

# example
x = np.arange(0, 3 * np.pi, 0.1)  # 生成 0 到 3π 的数字
y = np.sin(x)    # 算出正弦值 (模拟波动的数据)
plt.title("NumPy & Matplotlib Demo")
plt.plot(x, y, color='blue', label='Line') 
noise = np.random.normal(0, 0.1, len(x))
plt.scatter(x, y + noise, color='red', label='Points', s=10)
plt.legend() # 显示图例
plt.show()   # 把画板展示出来