Intel – Robin on Linux

Accelerate inference speed of DNN on Intel CPU

To save the cost on the inference server, I did some experiments on how to accelerate the speed of prediction for our model.

import torch.nn as nn

import pycls.core.builders as model_builder
from pycls.core.config import cfg

def pressure_predict(net, tensor_img):
    t0 = time.time()
    for _ in range(10):
        result = net(tensor_img)
        result = softmax(result)
        values, indices = torch.topk(result, 10)
    t1 = time.time()
    print("time:", t1 - t0)
    print(values)

if __name__ == "__main__":
    cfg.MODEL.TYPE = "regnet"
    # RegNetY-8.0GF
    cfg.REGNET.DEPTH = 17
    cfg.REGNET.SE_ON = False
    cfg.REGNET.W0 = 192
    cfg.REGNET.WA = 76.82
    cfg.REGNET.WM = 2.19
    cfg.REGNET.GROUP_W = 56
    cfg.BN.NUM_GROUPS = 4
    cfg.MODEL.NUM_CLASSES = 11120
    net = model_builder.build_model()
    net.load_state_dict(torch.load("bird_cls_2754696.pth", map_location="cpu"))
    net.eval()
    net = net.float()
    softmax = nn.Softmax(dim=1).eval()

    # read image
    img = cv2.imread("blujay.jpg")
    img = cv2.resize(img, (300, 300))
    tensor_img = torch.from_numpy(img).unsqueeze(0).permute(0, 3, 1, 2).float()
    pressure_predict(net, tensor_img)

    dummy_input = torch.randn(1, 3, 300, 300)
    with torch.jit.optimized_execution(True):
        traced_script_module = torch.jit.trace(net, dummy_input)

    net = torch.jit.optimize_for_inference(traced_script_module)
    pressure_predict(net, tensor_img)

    import intel_extension_for_pytorch as ipex
    net = net.to(memory_format=torch.channels_last)
    net = ipex.optimize(net)
    tensor_img = tensor_img.to(memory_format=torch.channels_last)

    with torch.no_grad():
        pressure_predict(net, tensor_img)

Here is the output on my Intel i5-12400 CPU:

	inference time (seconds per 10 times)
Directly use model	1.6
After PyTorch’s torch.jit.optimize_for_inference()	1.4
After Intel’s ipex.optimize()	0.8

Looks like Intel tried hard to optimize their CPU for neural network models. But the only problem is that the intel_extension_for_pytorch the package is hard to install (a lot of broken dependencies when I am trying to install and run it), and the best way to use it is through the docker image intel/intel-optimized-pytorch:latest