I've created an app that can detect faces using Dlib's facial landmark predictor. I'm creating an AVCaptureSession to use the iphone camera and for it to work with the Dlib framework I'm using AVCaptureVideoDataOutput and system face detection via AVCaptureMetadataoutput. I got the face detection working so now I want to be able to record a video of the face being detected and have it save to file using AVAssetWriter.
When I do this just using AVCaptureVideoDataOutput and AVCaptureMetadataOutput it works fine. But when I try to add an audio output using AVCaptureAudioDataOutput it causes the app to crash. I can't think of a way around this as I need to have the audio recorded as well. Is there some problem trying to use AudioDataOutptu with MetadataOutput together? It's almost like the captureOutput function can't handle audio output while there's a face being detected.
Here's some of my code (this code works in another project where I'm just recording video output using AVCaptureVideoDataOutput and AVCaptureAudioOutput):
// MARK: Set up camera
func openSession() {
// size of the output video will be 720x1280
session.sessionPreset = .hd1280x720
// set up camera
videoCaptureDevice = AVCaptureDevice.default(.builtInWideAngleCamera, for: AVMediaType.video, position: .front)
if videoCaptureDevice != nil {
do {
// add the input from the device
try session.addInput(AVCaptureDeviceInput(device: videoCaptureDevice!))
// set up the microphone
if let audioInput = AVCaptureDevice.default(for: AVMediaType.audio) {
try session.addInput(AVCaptureDeviceInput(device: audioInput))
}
// define video output
videoDataOutput.videoSettings = [kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_32BGRA,
]
videoDataOutput.alwaysDiscardsLateVideoFrames = true
let metaOutput = AVCaptureMetadataOutput()
if session.canAddOutput(videoDataOutput) {
videoDataOutput.setSampleBufferDelegate(self, queue: sampleQueue)
print("videodataoutput added")
session.addOutput(videoDataOutput)
}
// define audio output *** THIS IS THE BLOCK OF CODE RESULTING IN CRASH ***
if session.canAddOutput(audioDataOutput) {
audioDataOutput.setSampleBufferDelegate(self, queue: sampleQueue)
session.addOutput(audioDataOutput)
print("audiodataoutput added")
}
// define metadata output
if session.canAddOutput(metaOutput) {
metaOutput.setMetadataObjectsDelegate(self, queue: faceQueue)
session.addOutput(metaOutput)
print("metaoutput added")
}
// availableMetadataObjectTypes change when output is added to session.
// before it is added, availableMetadataObjectTypes is empty
metaOutput.metadataObjectTypes = [AVMetadataObject.ObjectType.face]
session.commitConfiguration()
// prepare the dlib face detection
wrapper?.prepare()
// start the session
session.startRunning()
} catch {
print(error)
}
// Mark: Create AVAssetWriter
func createAssetWriter() {
do {
outputUrl = videoFileLocation()
videoWriter = try AVAssetWriter(outputURL: outputUrl!, fileType: AVFileType.mov)
// add video input
videoWriterInput = AVAssetWriterInput(mediaType: AVMediaType.video, outputSettings: [
AVVideoCodecKey : AVVideoCodecType.h264,
AVVideoWidthKey : 720,
AVVideoHeightKey : 1280,
AVVideoCompressionPropertiesKey : [
AVVideoAverageBitRateKey : 2300000,
],
])
// var pixelBufferAdaptor = AVAssetWriterInputPixelBufferAdaptor(assetWriterInput: videoWriterInput!, sourcePixelBufferAttributes: [ kCVPixelBufferPixelFormatTypeKey as String : Int(kCVPixelFormatType_32BGRA)])
videoWriterInput.expectsMediaDataInRealTime = true
if videoWriter.canAdd(videoWriterInput) {
videoWriter.add(videoWriterInput)
print("video input added")
} else {
print("no input added")
}
// add audio input
audioWriterInput = AVAssetWriterInput(mediaType: AVMediaType.audio, outputSettings: [
AVFormatIDKey: kAudioFormatMPEG4AAC,
AVNumberOfChannelsKey: 1,
AVSampleRateKey: 44100,
AVEncoderBitRateKey: 64000,
])
audioWriterInput.expectsMediaDataInRealTime = true
if videoWriter.canAdd(audioWriterInput!) {
videoWriter.add(audioWriterInput!)
print("audio input added")
}
videoWriter.startWriting()
} catch let error {
debugPrint(error.localizedDescription)
}
}
// MARK: AVCaptureVideoDataOutputSampleBufferDelegate
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
if !currentMetadata.isEmpty {
let boundsArray = currentMetadata
.compactMap { $0 as? AVMetadataFaceObject }
.map { (faceObject) -> NSValue in
// *** CONVERTEDOBJECT IS PRODUCING A FATAL ERROR AS IT'S NIL ***
let convertedObject = output.transformedMetadataObject(for: faceObject, connection: connection)
return NSValue(cgRect: convertedObject!.bounds)
}
wrapper?.doWork(on: sampleBuffer, inRects: boundsArray)
}
layer.enqueue(sampleBuffer)
let writable = canWrite()
if writable,
sessionAtSourceTime == nil {
// start writing
sessionAtSourceTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
videoWriter.startSession(atSourceTime: sessionAtSourceTime!)
//print("Writing")
}
if writable,
output == videoDataOutput,
(videoWriterInput.isReadyForMoreMediaData) {
// write video buffer
videoWriterInput.append(sampleBuffer)
print("video buffering")
} else if writable,
output == audioDataOutput,
(audioWriterInput.isReadyForMoreMediaData) {
// write audio buffer
audioWriterInput?.append(sampleBuffer)
print("audio buffering")
}
}
}
}
Thanks in advance