Here is a simple example
import torch
import torch.nn as nn
import coremltools as ct
class Model(nn.Module):
def __init__(self):
super().__init__()
self.conv_pre1 = nn.ConvTranspose2d(128, 256, kernel_size=3, stride=2, padding=1, output_padding=1)
self.conv_pre2 = nn.ConvTranspose2d(256, 256, kernel_size=3, stride=2, padding=1, output_padding=1)
self.conv1 = nn.ConvTranspose2d(256, 256, kernel_size=3, stride=2, padding=1, output_padding=1)
self.conv2 = nn.ConvTranspose2d(256, 256, kernel_size=3, stride=2, padding=1, output_padding=1)
self.conv3 = nn.ConvTranspose2d(256, 256, kernel_size=3, stride=2, padding=1, output_padding=1)
self.conv4 = nn.ConvTranspose2d(256, 3, kernel_size=3, stride=2, padding=1, output_padding=1)
def forward(self, input1, input2):
y = self.conv_pre1(input2)
y = self.conv_pre2(y)
x = input1 + y
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
nn_output = torch.clip(x, 0.0, 1.0)
recon_img_out = torch.ceil(nn_output*255.0-0.5)
return recon_img_out
model = Model()
model.cuda()
dummy_input_f = torch.randn(1,256, 68, 120, device='cuda')
dummy_input_z = torch.randn(1,128, 17, 30, device='cuda')
torch_model = model.eval()
trace_model = torch.jit.trace(torch_model, (dummy_input_f, dummy_input_z))
# Set the input_shape to use RangeDim for each dimension.
input_x1_shape = ct.EnumeratedShapes(shapes=[[1, 256, 128//16, 128//16],
[1, 256, 8,8],
[1, 256, 24,24]],
default=[1, 3,16,16])
input_x2_shape = ct.EnumeratedShapes(shapes=[[1, 128, 2, 2],
[1, 128, 2, 2],
[1, 128, 6, 6]],
default=[1, 128, 4, 4])
input_1=ct.TensorType(name="input_x1", shape=input_x1_shape)
input_2=ct.TensorType(name="input_x2", shape=input_x2_shape)
outputs=ct.TensorType(name="output_img")
# outputs=ct.ImageType(name="output_img", color_layout=ct.colorlayout.RGB)
mlmodel = ct.convert(
trace_model,
inputs=[input_1, input_2],
outputs=[outputs],
)
mlmodel.save("check.mlmodel")
Except default shape , the other two are still too slow.
input1: 8x8 input2: 2x2 50ms
input1: 24x24 input2: 6x6 50ms
input1: 16x16 input2: 4x4(default) 1.8ms
Then i change model of one input by remove input2, non-default shape inference times speed up a bit, but still unusual.
Enumerate Model Inference Speed:
input1: 8x8 1.9ms
input1: 24x24 12.14ms
input1: 16x16 (default) 1.8ms
8x8 and 24x24 inference times with a fixed size model is ~0.5ms and ~4ms.
Are these results normal? Does the single-input enumerate model also slow down by 3 to 4 times??