Post

Replies

Boosts

Views

Activity

CoreML dynamic shapes excessive memory use
Hi everyone, I'm developing an iOS app which uses a PyTorch GPT2 model converted to Core ML via the Python coremltools. When I use dynamic shapes (either via RangeDim or EnumeratedShapes using coremtools) I get HUGE memory spikes when creating the model instance, while I don't get them when I specify a specific shape. Specifically, I have a single vector input with a specific length. below is the peak and sustained memory usage with different settings. The difference between just a fixed input and a dynamic input is really big and I can't explain why or what to do against it. static (length 64): 15.5 MB RangeDim (64 max length):227 MB peak then 22.8 MB RangeDim (512 max length): 281 MB peak then 25.8 MB enum (from 0 to 64 with step size 10): 328 MB peak then 24.7 MB Code to produce reproduce: import coremltools as ct import torch import os from transformers import GPT2LMHeadModel, GPT2Config from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained(Path(model_folder)) # Create a module that just extracts the logits class JustLogits(torch.nn.Module): def __init__(self, model): super().__init__() self.model = model def forward(self, input_ids): return self.model(input_ids, return_dict=True).logits def create_models_for(n_embd, n_layer, n_head, n_inner): model_str = f"n_embd_{n_embd}_n_layer_{n_layer}_n_head_{n_head}_n_inner_{n_inner}" model_out = f"models/gpt2_{model_str}.mlpackage" configuration = GPT2Config( vocab_size=tokenizer.vocab_size, n_embd=n_embd, n_layer=n_layer, n_head=n_head, n_inner=n_inner, bos_token_id=tokenizer.bos_token_id, eos_token_id=tokenizer.eos_token_id, ) model = GPT2LMHeadModel(configuration) model.init_weights() model.eval() logit_model = JustLogits(model) logit_model.eval(); max_sequence_length = 64 all_input_ids = [torch.ones((seq_len,), dtype=torch.int32) for seq_len in [1, 64, max_sequence_length]] out = model(all_input_ids[1], return_dict=True); logit_model.eval() traced_model = torch.jit.trace(logit_model, all_input_ids[-1]) from coremltools.converters.mil.mil import types mlmodel = ct.convert( traced_model, convert_to="mlprogram", minimum_deployment_target=ct.target.iOS15, inputs=[ct.TensorType(shape=[64], name="input_ids", dtype=types.int32)], outputs=[ct.TensorType(name="output_logits")] ) mlmodel.save(model_out) out = mlmodel.predict({"input_ids": all_input_ids[-1]}) for inference_max_len in [64, 512]: mlmodel = ct.convert( traced_model, convert_to="mlprogram", minimum_deployment_target=ct.target.iOS15, inputs=[ct.TensorType(shape=[ct.RangeDim(1, inference_max_len, default=inference_max_len)], name="input_ids", dtype=types.int32)], outputs=[ct.TensorType(name="output_logits")] ) mlmodel.save(f"models/gpt2_{model_str}_dynamic_{inference_max_len}.mlpackage") create_models_for(192, 1, 6, 4*192)
0
0
1.6k
Dec ’22