To save the cost on the inference server, I did some experiments on how to accelerate the speed of prediction for our model.
import torch.nn as nn import pycls.core.builders as model_builder from pycls.core.config import cfg def pressure_predict(net, tensor_img): t0 = time.time() for _ in range(10): result = net(tensor_img) result = softmax(result) values, indices = torch.topk(result, 10) t1 = time.time() print("time:", t1 - t0) print(values) if __name__ == "__main__": cfg.MODEL.TYPE = "regnet" # RegNetY-8.0GF cfg.REGNET.DEPTH = 17 cfg.REGNET.SE_ON = False cfg.REGNET.W0 = 192 cfg.REGNET.WA = 76.82 cfg.REGNET.WM = 2.19 cfg.REGNET.GROUP_W = 56 cfg.BN.NUM_GROUPS = 4 cfg.MODEL.NUM_CLASSES = 11120 net = model_builder.build_model() net.load_state_dict(torch.load("bird_cls_2754696.pth", map_location="cpu")) net.eval() net = net.float() softmax = nn.Softmax(dim=1).eval() # read image img = cv2.imread("blujay.jpg") img = cv2.resize(img, (300, 300)) tensor_img = torch.from_numpy(img).unsqueeze(0).permute(0, 3, 1, 2).float() pressure_predict(net, tensor_img) dummy_input = torch.randn(1, 3, 300, 300) with torch.jit.optimized_execution(True): traced_script_module = torch.jit.trace(net, dummy_input) net = torch.jit.optimize_for_inference(traced_script_module) pressure_predict(net, tensor_img) import intel_extension_for_pytorch as ipex net = net.to(memory_format=torch.channels_last) net = ipex.optimize(net) tensor_img = tensor_img.to(memory_format=torch.channels_last) with torch.no_grad(): pressure_predict(net, tensor_img)
Here is the output on my Intel i5-12400 CPU:
inference time (seconds per 10 times) | |
Directly use model | 1.6 |
After PyTorch’s torch.jit.optimize_for_inference() | 1.4 |
After Intel’s ipex.optimize() | 0.8 |
Looks like Intel tried hard to optimize their CPU for neural network models. But the only problem is that the intel_extension_for_pytorch
the package is hard to install (a lot of broken dependencies when I am trying to install and run it), and the best way to use it is through the docker image intel/intel-optimized-pytorch:latest