While the NeuralNetworkBuilder currently does not have options for image outputs, you can use coremtools to modify the resulting model so that the desired multiarray output is treated as an image.
Here is an example helper function:
def convert_multiarray_output_to_image(spec, feature_name, is_bgr=False):
"""
Convert an output multiarray to be represented as an image
This will modify the Model_pb spec passed in.
Example:
model = coremltools.models.MLModel('MyNeuralNetwork.mlmodel')
spec = model.get_spec()
convert_multiarray_output_to_image(spec,'imageOutput',is_bgr=False)
newModel = coremltools.models.MLModel(spec)
newModel.save('MyNeuralNetworkWithImageOutput.mlmodel')
Parameters
----------
spec: Model_pb
The specification containing the output feature to convert
feature_name: str
The name of the multiarray output feature you want to convert
is_bgr: boolean
If multiarray has 3 channels, set to True for RGB pixel order or false for BGR
"""
for output in spec.description.output:
if output.name != feature_name:
continue
if output.type.WhichOneof('Type') != 'multiArrayType':
raise ValueError("%s is not a multiarray type" % output.name)
array_shape = tuple(output.type.multiArrayType.shape)
channels, height, width = array_shape
from coremltools.proto import FeatureTypes_pb2 as ft
if channels == 1:
output.type.imageType.colorSpace = ft.ImageFeatureType.ColorSpace.Value('GRAYSCALE')
elif channels == 3:
if is_bgr:
output.type.imageType.colorSpace = ft.ImageFeatureType.ColorSpace.Value('BGR')
else:
output.type.imageType.colorSpace = ft.ImageFeatureType.ColorSpace.Value('RGB')
else:
raise ValueError("Channel Value %d not supported for image inputs" % channels)
output.type.imageType.width = width
output.type.imageType.height = height
Note: Neural Networks can output images from a layer (as CVPixelBuffer), but it clamps the values between 0 and 255. i.e. values < 0 become 0, values > 255 become 255.
You can also just keep the output an MLMultiArray and index into pixels with something like this in swfit:
let imageData = UnsafeMutablePointer<Double>(OpaquePointer(imageMultiArray.dataPointer))
let channelStride = imageMultiArray.strides[0].intValue;
let yStride = imageMultiArray.strides[1].intValue;
let xStride = imageMultiArray.strides[2].intValue;
func pixelOffset(_ channel: Int, _ y: Int, _ x: Int) -> Int {
return channel*channelStride + y*yStride + x*xStride
}
let topLeftGreenPixel = Unit8(imageData[pixelOffset(1,0,0)])