9 Replies
      Latest reply: Aug 29, 2017 1:43 PM by Beav RSS
      jfantell Level 1 Level 1 (0 points)

        Using the tutorial found at blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html, I trained a Keras model to recognize the difference between cats and dogs.

         

        ''' Directory structure: 
             data/ 
                  train/ 
                       dogs/ 
                         dog001.jpg 
                         dog002.jpg 
                         ...
                       cats/ 
                         cat001.jpg 
                         cat002.jpg 
                          ... 
                  validation/ 
                       dogs/ 
                         dog001.jpg 
                         dog002.jpg
                         ... 
                       cats/ 
                          cat001.jpg 
                          cat002.jpg
                          ... 
        '''
        
        
        from keras.preprocessing.image import ImageDataGenerator 
        from keras.models import Sequential 
        from keras.layers import Conv2D, MaxPooling2D 
        from keras.layers import Activation, Dropout, Flatten, Dense 
        from keras import backend as K 
        from PIL import Image import numpy as np 
        
        # dimensions of our images. 
        img_width, img_height = 150, 150 
        
        train_data_dir = 'data/train' 
        validation_data_dir = 'data/validation' 
        nb_train_samples = 2000 
        nb_validation_samples = 800 
        epochs = 50 
        batch_size = 16 
        
        if K.image_data_format() == 'channels_first': 
             input_shape = (3, img_width, img_height) 
        else: 
             input_shape = (img_width, img_height, 3) 
        
        model = Sequential() 
        model.add(Conv2D(32, (3, 3), input_shape=input_shape)) 
        model.add(Activation('relu')) 
        model.add(MaxPooling2D(pool_size=(2, 2))) 
        
        model.add(Conv2D(32, (3, 3))) 
        model.add(Activation('relu')) 
        model.add(MaxPooling2D(pool_size=(2, 2))) 
        
        model.add(Conv2D(64, (3, 3))) 
        model.add(Activation('relu')) 
        model.add(MaxPooling2D(pool_size=(2, 2))) 
        
        model.add(Flatten()) model.add(Dense(64)) 
        model.add(Activation('relu')) 
        model.add(Dropout(0.5)) 
        model.add(Dense(1)) 
        model.add(Activation('sigmoid'))
        
        model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) 
        
        # this is the augmentation configuration we will use for training
        train_datagen = ImageDataGenerator( rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) 
        
        # this is the augmentation configuration we will use for testing: 
        # only rescaling 
        test_datagen = ImageDataGenerator(rescale=1. / 255) 
        
        train_generator = train_datagen.flow_from_directory( train_data_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode='binary') 
        
        validation_generator = test_datagen.flow_from_directory( validation_data_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode='binary') 
        
        model.fit_generator( train_generator, steps_per_epoch=nb_train_samples / epochs=epochs, validation_data=validation_generator, validation_steps=nb_validation_samples / 
        
        model.save('first_try.h5')
        
        
        
        

         

        Using the coremltools documentation as a guide, I tried converting my model to the coreml format:

         

        import coremltools 
        import h5py 
        coreml_model = coremltools.converters.keras.convert('first_try.h5',input_names='image',output_names='class',image_input_names = 'image',class_labels = ['cat', 'dog'], is_bgr=True) 
        coreml_model.save('cats_dogs.mlmodel')
        
        
        
        

         

        When I import the model into XCode and run it with the following code (which works with the resnet50 and inceptionv3 models found on Apple's website), the snippet at line 86 "guard let prediction = try? model.prediction(image: pixelBuffer!) else { print("Error!") return}" outputs "Error!" and the snippet "textView.text = "I think this is a \(prediction.classLabel)." never gets executed.

         

        
        import UIKit
        import Vision
        import CoreML
        class ViewController: UIViewController, UINavigationControllerDelegate {
        
            /
            var model: cats_dogs!
        
            override func viewWillAppear(_ animated: Bool) {
                model = cats_dogs()
            }
        
            @IBOutlet weak var imageView: UIImageView!
            @IBOutlet weak var textView: UITextView!
        
            let imagePicker = UIImagePickerController()
        
            /
        
            override func viewDidLoad() {
                super .viewDidLoad()
                self.imagePicker.delegate = self
            }
        
            @IBAction func openImagePicker(_ sender: Any) {
                imagePicker.allowsEditing = false
                imagePicker.sourceType = .photoLibrary
                present(imagePicker, animated: true, completion: nil)
            }
        
            @IBAction func camera(_ sender: Any) {
            
                if !UIImagePickerController.isSourceTypeAvailable(.camera) {
                    return
                }
            
                let cameraPicker = UIImagePickerController()
                cameraPicker.delegate = self
                cameraPicker.sourceType = .camera
                cameraPicker.allowsEditing = false
            
                present(cameraPicker, animated: true)
            }
        
        }
        extension ViewController: UIImagePickerControllerDelegate {
            func imagePickerControllerDidCancel(_ picker: UIImagePickerController) {
                dismiss(animated: true, completion: nil)
            }
        
            func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) {
                picker.dismiss(animated: true)
                textView.text = "Analyzing Image..."
                guard let image = info["UIImagePickerControllerOriginalImage"] as? UIImage else {
                    return
                }
            
                UIGraphicsBeginImageContextWithOptions(CGSize(width: 150, height: 150), true, 2.0)
                image.draw(in: CGRect(x: 0, y: 0, width: 150, height: 150))
                let newImage = UIGraphicsGetImageFromCurrentImageContext()!
                UIGraphicsEndImageContext()
            
                let attrs = [kCVPixelBufferCGImageCompatibilityKey: kCFBooleanTrue, kCVPixelBufferCGBitmapContextCompatibilityKey: kCFBooleanTrue] as CFDictionary
                var pixelBuffer : CVPixelBuffer?
                let status = CVPixelBufferCreate(kCFAllocatorDefault, Int(newImage.size.width), Int(newImage.size.height), kCVPixelFormatType_32ARGB, attrs, &pixelBuffer)
                guard (status == kCVReturnSuccess) else {
                    return
                }
            
                CVPixelBufferLockBaseAddress(pixelBuffer!, CVPixelBufferLockFlags(rawValue: 0))
                let pixelData = CVPixelBufferGetBaseAddress(pixelBuffer!)
            
                let rgbColorSpace = CGColorSpaceCreateDeviceRGB()
                let context = CGContext(data: pixelData, width: Int(newImage.size.width), height: Int(newImage.size.height), bitsPerComponent: 8, bytesPerRow: CVPixelBufferGetBytesPerRow(pixelBuffer!), space: rgbColorSpace, bitmapInfo: CGImageAlphaInfo.noneSkipFirst.rawValue) /
            
                context?.translateBy(x: 0, y: newImage.size.height)
                context?.scaleBy(x: 1.0, y: -1.0)
            
                UIGraphicsPushContext(context!)
                newImage.draw(in: CGRect(x: 0, y: 0, width: newImage.size.width, height: newImage.size.height))
                UIGraphicsPopContext()
                CVPixelBufferUnlockBaseAddress(pixelBuffer!, CVPixelBufferLockFlags(rawValue: 0))
                imageView.image = newImage
            
                guard let prediction = try? model.prediction(image: pixelBuffer!) else {
                    print("Error!")
                    return
                }
                textView.text = "I think this is a \(prediction.classLabel)."
            }
        }
        
        
        
        
        

         

         

        I have searched the web extensively to solve this issue. Help to fix this issue would be much appreciated!

        • Re: How do you integrate a custom image classification model created in keras in your iOS App?
          FrankSchlegel Level 1 Level 1 (20 points)

          Hmm... It's really hard to tell from the code alone. And I also couldn't find the screenshot you wanted to attach. Can you maybe show us the stack trace when the error occurs?

            • Re: How do you integrate a custom image classification model created in keras in your iOS App?
              jfantell Level 1 Level 1 (0 points)

              I reformatted the code slightly. The app no longer crashes, but when I run the code above, the snippet at line 86 "guard let prediction = try? model.prediction(image: pixelBuffer!) else {print("Error!") return}" outputs "Error!" and the snippet "textView.text = "I think this is a \(prediction.classLabel)." never gets executed. Is there a way I can see what the actual error is?

                • Re: How do you integrate a custom image classification model created in keras in your iOS App?
                  FrankSchlegel Level 1 Level 1 (20 points)

                  Well, the guard let ... try? ... catches the exception. Try without the guard and with try! instead and you should run into the exception (and get it printed).

                    • Re: How do you integrate a custom image classification model created in keras in your iOS App?
                      jfantell Level 1 Level 1 (0 points)

                      Using "print(try! model.prediction(image: pixelBuffer!) as Any)" I printed the following error:


                      fatal error: 'try!' expression unexpectedly raised an error:

                      "Dimensions of layer 'output' is not the same size as the number of class labels."

                      NSLocalizedDescription=Dimensions of layer 'output' is not the same size as the number of class labels.:


                      Not sure what "Dimensions of layer 'output' is not the same size as the number of class labels" means.

                        • Re: How do you integrate a custom image classification model created in keras in your iOS App?
                          FrankSchlegel Level 1 Level 1 (20 points)

                          That means that the output of your neural network has unexpected dimensions. Since you provided only two class labels ['cat', 'dog'], CoreML assumes that the output of the network is an array with size two. So the issue is probably somewhere in the shape of the network itself.

                           

                          To help find the issue I can recommend to inspect the compiled network topology and the shapes of the (intermediate) results. You can find those in the compiled app bundle. There should be a folder with the same name as you network. It contains JSON files describing the network.  Look for the layer named 'output' in particular. Its output resource should have shape [2] if everything is correct.

                            • Re: How do you integrate a custom image classification model created in keras in your iOS App?
                              jfantell Level 1 Level 1 (0 points)

                              How do you find the compiled app bundle? I found a folder with the model name in XCode Derived Data but it does not have any JSON file in it, so I probably looked at the wrong thing.

                              • Re: How do you integrate a custom image classification model created in keras in your iOS App?
                                jfantell Level 1 Level 1 (0 points)

                                This is the code I use to convert the model to the .mlmodel format

                                 

                                import coremltools
                                import h5py
                                output_labels = ['cat','dog']
                                coreml_model = coremltools.converters.keras.convert('first_try.h5',input_names='image',image_input_names = 'image',class_labels = output_labels, is_bgr=False)
                                coreml_model.author = ''
                                coreml_model.short_description = 'Model to classify images as either cats or dogs'
                                coreml_model.input_description['image'] = 'Image of a cat or dog'
                                print coreml_model
                                
                                
                                coreml_model.save('cats_dogs.mlmodel')
                                
                                

                                 

                                This is the terminal output:

                                 

                                0 : conv2d_1_input, <keras.engine.topology.InputLayer object at 0x1194c6c50>

                                1 : conv2d_1, <keras.layers.convolutional.Conv2D object at 0x1194c6c90>

                                2 : activation_1, <keras.layers.core.Activation object at 0x119515b90>

                                3 : max_pooling2d_1, <keras.layers.pooling.MaxPooling2D object at 0x119501e50>

                                4 : conv2d_2, <keras.layers.convolutional.Conv2D object at 0x119520cd0>

                                5 : activation_2, <keras.layers.core.Activation object at 0x1194e8150>

                                6 : max_pooling2d_2, <keras.layers.pooling.MaxPooling2D object at 0x11955cc50>

                                7 : conv2d_3, <keras.layers.convolutional.Conv2D object at 0x11955ce50>

                                8 : activation_3, <keras.layers.core.Activation object at 0x11954d9d0>

                                9 : max_pooling2d_3, <keras.layers.pooling.MaxPooling2D object at 0x119594cd0>

                                10 : flatten_1, <keras.layers.core.Flatten object at 0x1195a08d0>

                                11 : dense_1, <keras.layers.core.Dense object at 0x119579f10>

                                12 : activation_4, <keras.layers.core.Activation object at 0x1195c94d0>

                                13 : dense_2, <keras.layers.core.Dense object at 0x1195ea450>

                                14 : activation_5, <keras.layers.core.Activation object at 0x119614b10>

                                input {

                                  name: "image"

                                  shortDescription: "Image of a cat or dog"

                                  type {

                                    imageType {

                                      width: 150

                                      height: 150

                                      colorSpace: RGB

                                    }

                                  }

                                }

                                output {

                                  name: "output1"

                                  type {

                                    dictionaryType {

                                      stringKeyType {

                                      }

                                    }

                                  }

                                }

                                output {

                                  name: "classLabel"

                                  type {

                                    stringType {

                                    }

                                  }

                                }

                                predictedFeatureName: "classLabel"

                                predictedProbabilitiesName: "output1"

                                metadata {

                                  shortDescription: "Model to classify images as either cats or dogs"

                                  author: ""

                                }

                      • Re: How do you integrate a custom image classification model created in keras in your iOS App?
                        Beav Level 1 Level 1 (0 points)

                        I ran into exactly the same problem.

                         

                        In line 62 a Dense layer with 1 output unit is defined and in the next line the sigmoid activation function is used. This function will output one value only, namely the probability that an image is the image of the first label (a cat). The probability that the image is an image of a dog is 1 minus that value.

                         

                        When converting the model to a coreML model you define two class labels  ['cat', 'dog']). Hence the error message: "Dimensions of layer 'output' is not the same size as the number of class labels."

                         

                        I have managed to make this work by changing the Dense layer to have 2 units, use a 'softmax' activation function and use the 'categorical_crossentropy' loss function. You also need to change the class_mode of the training and validation data generators to 'categorical'.

                         

                        These are the adapted lines of code

                        …
                        model.add(Dense(2))
                        model.add(Activation('softmax'))
                        
                        model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
                        
                        …
                        
                        train_generator = train_datagen.flow_from_directory( train_data_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode='categorical')
                        validation_generator = test_datagen.flow_from_directory( validation_data_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode='categorical')
                        
                        
                        
                        
                        

                         

                         

                        There is another easier way to address the issue. Simply set the class labels to ['cat']. In this way you will have the same number of output dimensions as the number of class labels. Now, if the model outputs a value lower than 0.5 it's a dog, else it's a cat.

                         

                        After this, I was faced with one more problem: the model only returned 0's and 1's. The reason for this is that the images are rescaled in the image data generators in Keras (by a factor 1./255). Hence the coreML-model also needs to apply this scaling factor. So for the tutorial to work you also need to adapt the conversion script as follows:

                        coreml_model = coremltools.converters.keras.convert('first_try.h5',image_scale=1./255,input_names='image',image_input_names = 'image',class_labels = output_labels, is_bgr=False)
                        
                        

                         

                        Thanks to Matthijs Hollemans for pointing me in the right direction through his blog post http://machinethink.net/blog/help-core-ml-gives-wrong-output/