I was trying the latest coremltools-8.0b1 beta on macOS 15 Beta with the intent to try using the new stateful models api in CoreML.
But the conversion would always fail with the error:
/AppleInternal/Library/BuildRoots/<snip>/Library/Caches/com.apple.xbs/Sources/MetalPerformanceShadersGraph/mpsgraph/MetalPerformanceShadersGraph/Core/Files/MPSGraphExecutable.mm:162: failed assertion `Error: the minimum deployment target for macOS is 14.0.0'
Here's a minimal repro, which works fine with both the stable version of coremltools (7.2) and the beta version (8.0b1) on macOS Sonoma 14.5, but fails with both versions of coremltools on macOS 15.0 Beta and Xcode 16.0 Beta. Which means that this most likely isn't an issue with coremltools, but with the native compilation toolchain.
from collections import OrderedDict
import coremltools as ct
import numpy as np
import torch
import torch.nn as nn
class ResidualAttentionBlock(nn.Module):
def __init__(self, d_model: int, n_head: int, attn_mask: torch.Tensor = None):
super().__init__()
self.attn = nn.MultiheadAttention(d_model, n_head)
self.ln_1 = nn.LayerNorm(d_model)
self.mlp = nn.Sequential(
OrderedDict(
[
("c_fc", nn.Linear(d_model, d_model * 4)),
("gelu", nn.GELU()),
("c_proj", nn.Linear(d_model * 4, d_model)),
]
)
)
self.ln_2 = nn.LayerNorm(d_model)
self.attn_mask = attn_mask
def attention(self, x: torch.Tensor):
self.attn_mask = (
self.attn_mask.to(dtype=x.dtype, device=x.device)
if self.attn_mask is not None
else None
)
return self.attn(x, x, x, need_weights=False, attn_mask=self.attn_mask)[0]
def forward(self, x: torch.Tensor):
x = x + self.attention(self.ln_1(x))
x = x + self.mlp(self.ln_2(x))
return x
class Transformer(nn.Module):
def __init__(
self, width: int, layers: int, heads: int, attn_mask: torch.Tensor = None
):
super().__init__()
self.width = width
self.layers = layers
self.resblocks = nn.Sequential(
*[ResidualAttentionBlock(width, heads, attn_mask) for _ in range(layers)]
)
def forward(self, x: torch.Tensor):
return self.resblocks(x)
transformer = Transformer(width=512, layers=12, heads=8)
emb_tokens = torch.rand((1, 512))
ct_model = ct.convert(
torch.jit.trace(transformer.eval(), emb_tokens),
convert_to="mlprogram",
minimum_deployment_target=ct.target.macOS14,
inputs=[ct.TensorType(name="embIn", shape=[1, 512])],
outputs=[ct.TensorType(name="embOutput", dtype=np.float32)],
)