Cel-b’s Profile | Apple Developer Forums

Reply to Dynamic coreml model inference is significantly slower than static model

Here is a simple example import torch import torch.nn as nn import coremltools as ct class Model(nn.Module): def __init__(self): super().__init__() self.conv_pre1 = nn.ConvTranspose2d(128, 256, kernel_size=3, stride=2, padding=1, output_padding=1) self.conv_pre2 = nn.ConvTranspose2d(256, 256, kernel_size=3, stride=2, padding=1, output_padding=1) self.conv1 = nn.ConvTranspose2d(256, 256, kernel_size=3, stride=2, padding=1, output_padding=1) self.conv2 = nn.ConvTranspose2d(256, 256, kernel_size=3, stride=2, padding=1, output_padding=1) self.conv3 = nn.ConvTranspose2d(256, 256, kernel_size=3, stride=2, padding=1, output_padding=1) self.conv4 = nn.ConvTranspose2d(256, 3, kernel_size=3, stride=2, padding=1, output_padding=1) def forward(self, input1, input2): y = self.conv_pre1(input2) y = self.conv_pre2(y) x = input1 + y x = self.conv1(x) x = self.conv2(x) x = self.conv3(x) x = self.conv4(x) nn_output = torch.clip(x, 0.0, 1.0) recon_img_out = torch.ceil(nn_output*255.0-0.5) return recon_img_out model = Model() model.cuda() dummy_input_f = torch.randn(1,256, 68, 120, device='cuda') dummy_input_z = torch.randn(1,128, 17, 30, device='cuda') torch_model = model.eval() trace_model = torch.jit.trace(torch_model, (dummy_input_f, dummy_input_z)) # Set the input_shape to use RangeDim for each dimension. input_x1_shape = ct.EnumeratedShapes(shapes=[[1, 256, 128//16, 128//16], [1, 256, 8,8], [1, 256, 24,24]], default=[1, 3,16,16]) input_x2_shape = ct.EnumeratedShapes(shapes=[[1, 128, 2, 2], [1, 128, 2, 2], [1, 128, 6, 6]], default=[1, 128, 4, 4]) input_1=ct.TensorType(name="input_x1", shape=input_x1_shape) input_2=ct.TensorType(name="input_x2", shape=input_x2_shape) outputs=ct.TensorType(name="output_img") # outputs=ct.ImageType(name="output_img", color_layout=ct.colorlayout.RGB) mlmodel = ct.convert( trace_model, inputs=[input_1, input_2], outputs=[outputs], ) mlmodel.save("check.mlmodel") Except default shape , the other two are still too slow. input1: 8x8 input2: 2x2 50ms input1: 24x24 input2: 6x6 50ms input1: 16x16 input2: 4x4(default) 1.8ms Then i change model of one input by remove input2, non-default shape inference times speed up a bit, but still unusual. Enumerate Model Inference Speed: input1: 8x8 1.9ms input1: 24x24 12.14ms input1: 16x16 (default) 1.8ms 8x8 and 24x24 inference times with a fixed size model is ~0.5ms and ~4ms. Are these results normal? Does the single-input enumerate model also slow down by 3 to 4 times??

App & System Services Hardware

Feb ’23

Cel-b

Post

Replies

Boosts

Views

Activity