Hi, the following model does not run on ANE. Inspecting with deCoreML I see the error ane: Failed to retrieved zero_point.
import numpy as np
import coremltools as ct
from coremltools.converters.mil import Builder as mb
import coremltools.converters.mil as mil
B, CIN, COUT = 512, 1024, 1024 * 4
@mb.program(
input_specs=[
mb.TensorSpec((B, CIN), mil.input_types.types.fp16),
],
opset_version=mil.builder.AvailableTarget.iOS18
)
def prog_manual_dequant(
x,
):
qw = np.random.randint(0, 2 ** 4, size=(COUT, CIN), dtype=np.int8).astype(mil.mil.types.np_uint4_dtype)
scale = np.random.randn(COUT, 1).astype(np.float16)
offset = np.random.randn(COUT, 1).astype(np.float16)
# offset = np.random.randint(0, 2 ** 4, size=(COUT, 1), dtype=np.uint8).astype(mil.mil.types.np_uint4_dtype)
dqw = mb.constexpr_blockwise_shift_scale(data=qw, scale=scale, offset=offset)
return mb.linear(x=x, weight=dqw)
cml_qmodel = ct.convert(
prog_manual_dequant,
compute_units=ct.ComputeUnit.CPU_AND_NE,
compute_precision=ct.precision.FLOAT16,
minimum_deployment_target=ct.target.iOS18,
)
Whereas if I use an offset with the same dtype as the weights (uint4 in this case), it does run on ANE
Tested on coremltools 8.0b1, on macOS 15.0 beta 2/Xcode 15 beta 2, and macOS 15.0 beta 3/Xcode 15 beta 3.
Post
Replies
Boosts
Views
Activity
Error when trying to generate CoreML performance report, message says
The data couldn't be written because it isn't in the correct format.
Here is the code to replicate the issue
import numpy as np
import coremltools as ct
from coremltools.converters.mil import Builder as mb
import coremltools.converters.mil as mil
w = np.random.normal(size=(256, 128, 1))
wemb = np.random.normal(size=(1, 32000, 128)) # .astype(np.float16)
rope_emb = np.random.normal(size=(1, 2048, 128))
shapes = [(1, seqlen) for seqlen in (32, 64)]
enum_shape = mil.input_types.EnumeratedShapes(shapes=shapes)
fixed_shape = (1, 128)
max_length = 2048
dtype = np.float32
@mb.program(
input_specs=[
# mb.TensorSpec(enum_shape.symbolic_shape, dtype=mil.input_types.types.int32),
mb.TensorSpec(enum_shape.symbolic_shape, dtype=mil.input_types.types.int32),
],
opset_version=mil.builder.AvailableTarget.iOS17,
)
def flex_like(input_ids):
indices = mb.fill_like(ref_tensor=input_ids, value=np.array(1, dtype=np.int32))
causal_mask = np.expand_dims(
np.triu(np.full((max_length, max_length), -np.inf, dtype=dtype), 1),
axis=0,
)
mask = mb.gather(
x=causal_mask,
indices=indices,
axis=2,
batch_dims=1,
name="mask_gather_0",
)
# mask = mb.gather(
# x=mask, indices=indices, axis=1, batch_dims=1, name="mask_gather_1"
# )
rope = mb.gather(x=rope_emb.astype(dtype), indices=indices, axis=1, batch_dims=1, name="rope")
hidden_states = mb.gather(x=wemb.astype(dtype), indices=input_ids, axis=1, batch_dims=1, name="embedding")
return (
hidden_states,
mask,
rope,
)
cml_flex_like = ct.convert(
flex_like,
compute_units=ct.ComputeUnit.ALL,
compute_precision=ct.precision.FLOAT32,
minimum_deployment_target=ct.target.iOS17,
inputs=[
ct.TensorType(name="input_ids", shape=enum_shape),
],
)
cml_flex_like.save("flex_like_32")
If I remove the hidden states from the return it does work, and it also works if I keep the hidden states, but remove both mask, and rope, i.e, the report is generated for both programs with either these returns:
return (
# hidden_states,
mask,
rope,
)
and
return (
hidden_states,
# mask,
# rope,
)
It also works if I use a static shape instead of an EnumeratedShape
I'm using macOS 15.0 and Xcode 16.0
Edit 1:
Forgot to mention that although the performance report fails, the model is still able to make predictions