Closed M-Quadra closed 3 weeks ago
The model:
import torch
from torch import nn
from torch.nn import functional as F
from typing import Final
import coremltools as ct
from coremltools.converters.mil.mil import types
class Model(nn.Module):
def forward(self, x):
return torch.zeros_like(x)
model = Model().eval()
x = torch.randn(100)
traced_model = torch.jit.trace(model, (x))
var_dim: Final[ct.RangeDim] = ct.RangeDim(1, 10_000)
mlmodel = ct.convert(
traced_model,
inputs=[ct.TensorType(name="x", shape=ct.Shape([var_dim]), dtype=types.fp16)],
outputs=[ct.TensorType(name="y")],
minimum_deployment_target=ct.target.iOS16,
)
mlmodel.save("Sub.mlpackage")
# mlmodel.save("Mul.mlpackage")
Then convert the mlpackage to mlmodelc:
xcrun coremlcompiler compile Sub.mlpackage .
xcrun coremlcompiler generate Sub.mlpackage . --language Swift
xcrun coremlcompiler compile Mul.mlpackage .
xcrun coremlcompiler generate Mul.mlpackage . --language Swift
The test case:
func runTest() {
struct Msg: Codable {
let infers: [TimeInterval]
}
DispatchQueue.global().async {
let infers = try! testSub(cnt: 10_000)
let data = try! JSONEncoder().encode(Msg(infers: infers))
let url = URL.documentsDirectory.appending(path: "./sub.json")
try! data.write(to: url)
}
DispatchQueue.global().async {
let infers = try! testMul(cnt: 10_000)
let data = try! JSONEncoder().encode(Msg(infers: infers))
let url = URL.documentsDirectory.appending(path: "./mul.json")
try! data.write(to: url)
}
}
func testMul(cnt: Int) throws -> [TimeInterval] {
let cfg = MLModelConfiguration()
cfg.computeUnits = .cpuAndGPU
let st = Date()
let model = try Mul(configuration: consume cfg)
let load = Date().timeIntervalSince(consume st)
print("mul:", load)
return try [TimeInterval](unsafeUninitializedCapacity: cnt) { buffer, initializedCount in
buffer[0] = 1
for i in 1..<cnt {
let x = try MLMultiArray(shape: [i as NSNumber], dataType: .float16)
let st = Date()
_ = try model.prediction(x: consume x)
buffer[i] = Date().timeIntervalSince(consume st)
}
initializedCount = cnt
}
}
func testSub(cnt: Int) throws -> [TimeInterval] {
let cfg = MLModelConfiguration()
cfg.computeUnits = .cpuAndGPU
let st = Date()
let model = try Sub(configuration: consume cfg)
let load = Date().timeIntervalSince(consume st)
print("sub:", load)
return try [TimeInterval](unsafeUninitializedCapacity: cnt) { buffer, initializedCount in
buffer[0] = 1
for i in 1..<cnt {
let x = try MLMultiArray(shape: [i as NSNumber], dataType: .float16)
let st = Date()
_ = try model.prediction(x: consume x)
buffer[i] = Date().timeIntervalSince(consume st)
}
initializedCount = cnt
}
}
The result (iPhone XR, iOS 18.0.1, Low Power Mode off):
import numpy as np
import matplotlib.pyplot as plt
avg_mul, std_mul = np.mean(mul_infers), np.std(mul_infers)
avg_sub, std_sub = np.mean(sub_infers), np.std(sub_infers)
print(f'mul, avg:{avg_mul: .6f}, std:{std_mul: .6f}')
print(f'sub, avg:{avg_sub: .6f}, std:{std_sub: .6f}')
avg_mul_5k, std_mul_5k = np.mean(mul_infers[5000:]), np.std(mul_infers[5000:])
avg_sub_5k, std_sub_5k = np.mean(sub_infers[5000:]), np.std(sub_infers[5000:])
print(f'mul_5k, avg:{avg_mul_5k: .6f}, std:{std_mul_5k: .6f}')
print(f'sub_5k, avg:{avg_sub_5k: .6f}, std:{std_sub_5k: .6f}')
mul, avg: 0.017661, std: 0.010109
sub, avg: 0.017655, std: 0.010110
mul_5k, avg: 0.019093, std: 0.001480
sub_5k, avg: 0.019091, std: 0.001475
mul_loads = [
0.5651620626449585,
0.5372270345687866,
0.5468639135360718,
0.6307599544525146,
0.5672019720077515,
0.5513859987258911,
0.564581036567688,
0.5799169540405273,
0.5120859146118164,
0.6692310571670532,
]
sub_loads = [
0.5627679824829102,
0.5371979475021362,
0.5452049970626831,
0.6308039426803589,
0.5673099756240845,
0.5501949787139893,
0.5646369457244873,
0.5798110961914062,
0.5121839046478271,
0.6110190153121948,
]
avg_mul_loads, std_mul_loads = np.mean(mul_loads), np.std(mul_loads)
avg_sub_loads, std_sub_loads = np.mean(sub_loads), np.std(sub_loads)
print(f'mul_loads, avg:{avg_mul_loads: .6f}, std:{std_mul_loads: .6f}')
print(f'sub_loads, avg:{avg_sub_loads: .6f}, std:{std_sub_loads: .6f}')
mul_loads, avg: 0.572442, std: 0.043529
sub_loads, avg: 0.566113, std: 0.032922
window_size = 100
avg_mul_infers = [np.mean(mul_infers[i-window_size:i]) for i in range(window_size, len(mul_infers) + 1)]
avg_sub_infers = [np.mean(sub_infers[i-window_size:i]) for i in range(window_size, len(sub_infers) + 1)]
x = range(window_size, window_size+len(avg_mul_infers))
plt.figure(figsize=(10, 6))
plt.plot(x, avg_sub_infers, label='sub-avg')
plt.plot(x, avg_mul_infers, label='mul-avg')
plt.legend()
plt.show()
std_mul_infers = [np.std(mul_infers[i-window_size:i]) for i in range(window_size, len(mul_infers) + 1)]
std_sub_infers = [np.std(sub_infers[i-window_size:i]) for i in range(window_size, len(sub_infers) + 1)]
x = range(window_size, window_size+len(std_mul_infers))
plt.figure(figsize=(10, 6))
plt.plot(x, std_sub_infers, label='sub-std')
plt.plot(x, std_mul_infers, label='mul-std')
plt.legend()
plt.show()
Although the performance difference is minimal, mb.sub
demonstrates a slight edge over mb.mul
.
Sounds good. Will merge once CI green
Add dtype support and unit test. The old PR #2345 is a wrong implementation.