Run ARSession along with AVCaptureSession in SwiftUI app

I want to capture the RGB frame and depth map of frame on button click. The RGB frame is sent to an image classifier and the output is spoken on audio channel. I'm able to capture the RGB data using AVCaptureSession and it works well as intended. However, when I configure and run the ARSession, the app freezes on the first frame, (preview of RGB camera). It captures and saves the depth map but the RGB image isn't captured anymore.


    @Published var isTaken = false

    @Published var session = AVCaptureSession()

    @Published var ARsession = ARSession()

    @Published var alert = false

    @Published var output = AVCapturePhotoOutput()



    @Published var depthMap: [Float32] = []

    // preview layer of camera

    @Published var preview: AVCaptureVideoPreviewLayer!

    @Published var isSaved = false

    // to save the captured image

    @Published var picData = Data(count: 0)

    

    var classifier = Classifier()

    var speachSynthesizer = SpeechSynthesizer()



    

    // handle permissions

    func check(){

        switch AVCaptureDevice.authorizationStatus(for: .video){

            

        case .notDetermined:

            // ask f or permission

            AVCaptureDevice.requestAccess(for: .video) { status in

                if status{

                    self.setup()

                }

            }

        case .denied:

            self.alert.toggle()

            return

        case .authorized:

            //setup session

            setup()

            ARSetup()

            

            return

        @unknown default:

            return

        }

    }

    

    // setup session

    func setup(){

        

        do{

            

            let device = AVCaptureDevice.default(.builtInDualCamera, for: .video, position: .back)

            self.session.beginConfiguration()



            // change .builtinDualCamera to microphone for audio, for: can be changed for depth data

            

            let input = try AVCaptureDeviceInput(device: device!)

            

            if self.session.canAddInput(input){

                self.session.addInput(input)

            }

            

            if self.session.canAddOutput(output){

                self.session.addOutput(output)



            }

            

            self.session.commitConfiguration()

           

        }catch{

            print(error.localizedDescription)

        }

        

        

    }

    

    func ARSetup(){

        self.ARsession.delegate = self

        let configuration = ARWorldTrackingConfiguration()

        if ARWorldTrackingConfiguration.supportsFrameSemantics(.sceneDepth){

            configuration.frameSemantics = .sceneDepth

            print("AR configured...")

        }

        self.ARsession.run(configuration)

    

    }

    

    func takePic(){

        DispatchQueue.global(qos: .background).async {

            self.output.capturePhoto(with: AVCapturePhotoSettings(), delegate: self)

            self.captureDepth()

            //self.session.stopRunning()

        }

    }

    

    func photoOutput(_ output: AVCapturePhotoOutput, didFinishProcessingPhoto photo: AVCapturePhoto, error: Error?) {

        if error != nil{

            return

        }

        

        print("Picture Taken....")

       

        guard let imageData = photo.fileDataRepresentation() else {return}

        

        self.picData = imageData

        predict()

        

        

    }

    

    



    func predict(){

        let ciImage = CIImage(data: self.picData)

        classifier.detect(ciImage: ciImage!)

        print(classifier.result)

        self.speachSynthesizer.speak(text: classifier.result!)

        

        //uploadImage(data: self.picData, filename: classifier.result!)

        

    }

    

    

    

    

    func captureDepth(){

        if let currentFrame = self.ARsession.currentFrame{

            print("depth captured...")

            guard let depthData = currentFrame.sceneDepth?.depthMap else {return}

            self.saveDepth(depthData: depthData)

        }

    }



    

    func saveDepth(depthData: CVPixelBuffer){

        let width = CVPixelBufferGetWidth(depthData)

        let height = CVPixelBufferGetHeight(depthData)

        //let depthSize = CGSize(width: width, height: height)

        

        let ciImage = CIImage(cvPixelBuffer: depthData)

        let context = CIContext.init(options: nil)

        guard let cgImageRef = context.createCGImage(ciImage, from: CGRect(x: 0, y: 0, width: width, height: height)) else {return}

        let uiImage = UIImage(cgImage: cgImageRef)

        print("depth saved...")

        UIImageWriteToSavedPhotosAlbum(uiImage, nil, nil, nil)

    }

    

    

   

    

}

You're asking two distinct frameworks (ARSession and AVFoundation) to access the camera at the same time. Look at using the ARSession.currentFrame.capturedImage property for the RGB data.

Run ARSession along with AVCaptureSession in SwiftUI app
 
 
Q