How do you integrate a custom image classification model created in keras in your iOS App?

Using the tutorial found at blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html, I trained a Keras model to recognize the difference between cats and dogs.


''' Directory structure: 
     data/ 
          train/ 
               dogs/ 
                 dog001.jpg 
                 dog002.jpg 
                 ...
               cats/ 
                 cat001.jpg 
                 cat002.jpg 
                  ... 
          validation/ 
               dogs/ 
                 dog001.jpg 
                 dog002.jpg
                 ... 
               cats/ 
                  cat001.jpg 
                  cat002.jpg
                  ... 
'''


from keras.preprocessing.image import ImageDataGenerator 
from keras.models import Sequential 
from keras.layers import Conv2D, MaxPooling2D 
from keras.layers import Activation, Dropout, Flatten, Dense 
from keras import backend as K 
from PIL import Image import numpy as np 

# dimensions of our images. 
img_width, img_height = 150, 150 

train_data_dir = 'data/train' 
validation_data_dir = 'data/validation' 
nb_train_samples = 2000 
nb_validation_samples = 800 
epochs = 50 
batch_size = 16 

if K.image_data_format() == 'channels_first': 
     input_shape = (3, img_width, img_height) 
else: 
     input_shape = (img_width, img_height, 3) 

model = Sequential() 
model.add(Conv2D(32, (3, 3), input_shape=input_shape)) 
model.add(Activation('relu')) 
model.add(MaxPooling2D(pool_size=(2, 2))) 

model.add(Conv2D(32, (3, 3))) 
model.add(Activation('relu')) 
model.add(MaxPooling2D(pool_size=(2, 2))) 

model.add(Conv2D(64, (3, 3))) 
model.add(Activation('relu')) 
model.add(MaxPooling2D(pool_size=(2, 2))) 

model.add(Flatten()) model.add(Dense(64)) 
model.add(Activation('relu')) 
model.add(Dropout(0.5)) 
model.add(Dense(1)) 
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) 

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator( rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) 

# this is the augmentation configuration we will use for testing: 
# only rescaling 
test_datagen = ImageDataGenerator(rescale=1. / 255) 

train_generator = train_datagen.flow_from_directory( train_data_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode='binary') 

validation_generator = test_datagen.flow_from_directory( validation_data_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode='binary') 

model.fit_generator( train_generator, steps_per_epoch=nb_train_samples / epochs=epochs, validation_data=validation_generator, validation_steps=nb_validation_samples / 

model.save('first_try.h5')


Using the coremltools documentation as a guide, I tried converting my model to the coreml format:


import coremltools 
import h5py 
coreml_model = coremltools.converters.keras.convert('first_try.h5',input_names='image',output_names='class',image_input_names = 'image',class_labels = ['cat', 'dog'], is_bgr=True) 
coreml_model.save('cats_dogs.mlmodel')


When I import the model into XCode and run it with the following code (which works with the resnet50 and inceptionv3 models found on Apple's website), the snippet at line 86 "guard let prediction = try? model.prediction(image: pixelBuffer!) else { print("Error!") return}" outputs "Error!" and the snippet "textView.text = "I think this is a \(prediction.classLabel)." never gets executed.


import UIKit
import Vision
import CoreML
class ViewController: UIViewController, UINavigationControllerDelegate {

    /
    var model: cats_dogs!

    override func viewWillAppear(_ animated: Bool) {
        model = cats_dogs()
    }

    @IBOutlet weak var imageView: UIImageView!
    @IBOutlet weak var textView: UITextView!

    let imagePicker = UIImagePickerController()

    /

    override func viewDidLoad() {
        super .viewDidLoad()
        self.imagePicker.delegate = self
    }

    @IBAction func openImagePicker(_ sender: Any) {
        imagePicker.allowsEditing = false
        imagePicker.sourceType = .photoLibrary
        present(imagePicker, animated: true, completion: nil)
    }

    @IBAction func camera(_ sender: Any) {
    
        if !UIImagePickerController.isSourceTypeAvailable(.camera) {
            return
        }
    
        let cameraPicker = UIImagePickerController()
        cameraPicker.delegate = self
        cameraPicker.sourceType = .camera
        cameraPicker.allowsEditing = false
    
        present(cameraPicker, animated: true)
    }

}
extension ViewController: UIImagePickerControllerDelegate {
    func imagePickerControllerDidCancel(_ picker: UIImagePickerController) {
        dismiss(animated: true, completion: nil)
    }

    func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) {
        picker.dismiss(animated: true)
        textView.text = "Analyzing Image..."
        guard let image = info["UIImagePickerControllerOriginalImage"] as? UIImage else {
            return
        }
    
        UIGraphicsBeginImageContextWithOptions(CGSize(width: 150, height: 150), true, 2.0)
        image.draw(in: CGRect(x: 0, y: 0, width: 150, height: 150))
        let newImage = UIGraphicsGetImageFromCurrentImageContext()!
        UIGraphicsEndImageContext()
    
        let attrs = [kCVPixelBufferCGImageCompatibilityKey: kCFBooleanTrue, kCVPixelBufferCGBitmapContextCompatibilityKey: kCFBooleanTrue] as CFDictionary
        var pixelBuffer : CVPixelBuffer?
        let status = CVPixelBufferCreate(kCFAllocatorDefault, Int(newImage.size.width), Int(newImage.size.height), kCVPixelFormatType_32ARGB, attrs, &pixelBuffer)
        guard (status == kCVReturnSuccess) else {
            return
        }
    
        CVPixelBufferLockBaseAddress(pixelBuffer!, CVPixelBufferLockFlags(rawValue: 0))
        let pixelData = CVPixelBufferGetBaseAddress(pixelBuffer!)
    
        let rgbColorSpace = CGColorSpaceCreateDeviceRGB()
        let context = CGContext(data: pixelData, width: Int(newImage.size.width), height: Int(newImage.size.height), bitsPerComponent: 8, bytesPerRow: CVPixelBufferGetBytesPerRow(pixelBuffer!), space: rgbColorSpace, bitmapInfo: CGImageAlphaInfo.noneSkipFirst.rawValue) /
    
        context?.translateBy(x: 0, y: newImage.size.height)
        context?.scaleBy(x: 1.0, y: -1.0)
    
        UIGraphicsPushContext(context!)
        newImage.draw(in: CGRect(x: 0, y: 0, width: newImage.size.width, height: newImage.size.height))
        UIGraphicsPopContext()
        CVPixelBufferUnlockBaseAddress(pixelBuffer!, CVPixelBufferLockFlags(rawValue: 0))
        imageView.image = newImage
    
        guard let prediction = try? model.prediction(image: pixelBuffer!) else {
            print("Error!")
            return
        }
        textView.text = "I think this is a \(prediction.classLabel)."
    }
}



I have searched the web extensively to solve this issue. Help to fix this issue would be much appreciated!

Replies

Hmm... It's really hard to tell from the code alone. And I also couldn't find the screenshot you wanted to attach. Can you maybe show us the stack trace when the error occurs?

I reformatted the code slightly. The app no longer crashes, but when I run the code above, the snippet at line 86 "guard let prediction = try? model.prediction(image: pixelBuffer!) else {print("Error!") return}" outputs "Error!" and the snippet "textView.text = "I think this is a \(prediction.classLabel)." never gets executed. Is there a way I can see what the actual error is?

Well, the guard let ... try? ... catches the exception. Try without the guard and with try! instead and you should run into the exception (and get it printed).

Using "print(try! model.prediction(image: pixelBuffer!) as Any)" I printed the following error:

fatal error: 'try!' expression unexpectedly raised an error:

"Dimensions of layer 'output' is not the same size as the number of class labels."

NSLocalizedDescription=Dimensions of layer 'output' is not the same size as the number of class labels.:

Not sure what "Dimensions of layer 'output' is not the same size as the number of class labels" means.

That means that the output of your neural network has unexpected dimensions. Since you provided only two class labels ['cat', 'dog'], CoreML assumes that the output of the network is an array with size two. So the issue is probably somewhere in the shape of the network itself.


To help find the issue I can recommend to inspect the compiled network topology and the shapes of the (intermediate) results. You can find those in the compiled app bundle. There should be a folder with the same name as you network. It contains JSON files describing the network. Look for the layer named 'output' in particular. Its output resource should have shape [2] if everything is correct.

How do you find the compiled app bundle? I found a folder with the model name in XCode Derived Data but it does not have any JSON file in it, so I probably looked at the wrong thing.

This is the code I use to convert the model to the .mlmodel format


import coremltools
import h5py
output_labels = ['cat','dog']
coreml_model = coremltools.converters.keras.convert('first_try.h5',input_names='image',image_input_names = 'image',class_labels = output_labels, is_bgr=False)
coreml_model.author = ''
coreml_model.short_description = 'Model to classify images as either cats or dogs'
coreml_model.input_description['image'] = 'Image of a cat or dog'
print coreml_model


coreml_model.save('cats_dogs.mlmodel')


This is the terminal output:


0 : conv2d_1_input, <keras.engine.topology.InputLayer object at 0x1194c6c50>

1 : conv2d_1, <keras.layers.convolutional.Conv2D object at 0x1194c6c90>

2 : activation_1, <keras.layers.core.Activation object at 0x119515b90>

3 : max_pooling2d_1, <keras.layers.pooling.MaxPooling2D object at 0x119501e50>

4 : conv2d_2, <keras.layers.convolutional.Conv2D object at 0x119520cd0>

5 : activation_2, <keras.layers.core.Activation object at 0x1194e8150>

6 : max_pooling2d_2, <keras.layers.pooling.MaxPooling2D object at 0x11955cc50>

7 : conv2d_3, <keras.layers.convolutional.Conv2D object at 0x11955ce50>

8 : activation_3, <keras.layers.core.Activation object at 0x11954d9d0>

9 : max_pooling2d_3, <keras.layers.pooling.MaxPooling2D object at 0x119594cd0>

10 : flatten_1, <keras.layers.core.Flatten object at 0x1195a08d0>

11 : dense_1, <keras.layers.core.Dense object at 0x119579f10>

12 : activation_4, <keras.layers.core.Activation object at 0x1195c94d0>

13 : dense_2, <keras.layers.core.Dense object at 0x1195ea450>

14 : activation_5, <keras.layers.core.Activation object at 0x119614b10>

input {

name: "image"

shortDescription: "Image of a cat or dog"

type {

imageType {

width: 150

height: 150

colorSpace: RGB

}

}

}

output {

name: "output1"

type {

dictionaryType {

stringKeyType {

}

}

}

}

output {

name: "classLabel"

type {

stringType {

}

}

}

predictedFeatureName: "classLabel"

predictedProbabilitiesName: "output1"

metadata {

shortDescription: "Model to classify images as either cats or dogs"

author: ""

}

Yes, those files. The extension isn't "json", but the format is. The file "model.espresso.net" describes the topology, "model.espresso.shape" describes the shapes of the resources. Maybe they'll help you find which layer doesn't produce the right output.

I ran into exactly the same problem.


In line 62 a Dense layer with 1 output unit is defined and in the next line the sigmoid activation function is used. This function will output one value only, namely the probability that an image is the image of the first label (a cat). The probability that the image is an image of a dog is 1 minus that value.


When converting the model to a coreML model you define two class labels ['cat', 'dog']). Hence the error message: "Dimensions of layer 'output' is not the same size as the number of class labels."


I have managed to make this work by changing the Dense layer to have 2 units, use a 'softmax' activation function and use the 'categorical_crossentropy' loss function. You also need to change the class_mode of the training and validation data generators to 'categorical'.


These are the adapted lines of code

…
model.add(Dense(2))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

…

train_generator = train_datagen.flow_from_directory( train_data_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode='categorical')
validation_generator = test_datagen.flow_from_directory( validation_data_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode='categorical')



There is another easier way to address the issue. Simply set the class labels to ['cat']. In this way you will have the same number of output dimensions as the number of class labels. Now, if the model outputs a value lower than 0.5 it's a dog, else it's a cat.


After this, I was faced with one more problem: the model only returned 0's and 1's. The reason for this is that the images are rescaled in the image data generators in Keras (by a factor 1./255). Hence the coreML-model also needs to apply this scaling factor. So for the tutorial to work you also need to adapt the conversion script as follows:

coreml_model = coremltools.converters.keras.convert('first_try.h5',image_scale=1./255,input_names='image',image_input_names = 'image',class_labels = output_labels, is_bgr=False)


Thanks to Matthijs Hollemans for pointing me in the right direction through his blog post http://machinethink.net/blog/help-core-ml-gives-wrong-output/