We are creating an online book reading app in which we are initiating video call (group call:- for video call. we are using agora SDK) and at the join of call we start book reading and highlight words at other members' end also and recording/recognition text we are using SFSpeechRecognizer but whenever call kit start and video call start SFSpeechRecognizer start to record audio at others end it's getting failed always, can you please provide any solution to record audio during the video call.
//
// Speech.swift
// Edsoma
//
// Created by Kapil on 16/02/22.
//
import Foundation
import AVFoundation
import Speech
protocol SpeechRecognizerDelegate {
func didSpoke(speechRecognizer : SpeechRecognizer , word : String?)
}
class SpeechRecognizer: NSObject {
private let speechRecognizer = SFSpeechRecognizer(locale: Locale.init(identifier: "en-US")) //1
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private let audioEngine = AVAudioEngine()
var delegate : SpeechRecognizerDelegate?
static let shared = SpeechRecognizer()
var isOn = false
func setup(){
speechRecognizer?.delegate = self //3
SFSpeechRecognizer.requestAuthorization { (authStatus) in //4
var isButtonEnabled = false
switch authStatus { //5
case .authorized:
isButtonEnabled = true
case .denied:
isButtonEnabled = false
print("User denied access to speech recognition")
case .restricted:
isButtonEnabled = false
print("Speech recognition restricted on this device" )
case .notDetermined:
isButtonEnabled = false
print("Speech recognition not yet authorized")
@unknown default:
break;
}
OperationQueue.main.addOperation() {
// self.microphoneButton.isEnabled = isButtonEnabled
}
}
}
func transcribeAudio(url: URL) {
// create a new recognizer and point it at our audio
let recognizer = SFSpeechRecognizer()
let request = SFSpeechURLRecognitionRequest(url: url)
// start recognition!
recognizer?.recognitionTask(with: request) { [unowned self] (result, error) in
// abort if we didn't get any transcription back
guard let result = result else {
print("There was an error: \(error!)")
return
}
// if we got the final transcription back, print it
if result.isFinal {
// pull out the best transcription...
print(result.bestTranscription.formattedString)
}
}
}
func startRecording() {
isOn = true
let inputNode = audioEngine.inputNode
if recognitionTask != nil {
inputNode.removeTap(onBus: 0)
self.audioEngine.stop()
self.recognitionRequest = nil
self.recognitionTask = nil
DispatchQueue.main.asyncAfter(deadline: DispatchTime.now() + 1) {
self.startRecording()
}
return
debugPrint("****** recognitionTask != nil *************")
}
let audioSession = AVAudioSession.sharedInstance()
do {
try audioSession.setCategory(AVAudioSession.Category.multiRoute)
try audioSession.setMode(AVAudioSession.Mode.measurement)
try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
} catch {
print("audioSession properties weren't set because of an error.")
}
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let recognitionRequest = recognitionRequest else {
fatalError("Unable to create an SFSpeechAudioBufferRecognitionRequest object")
}
recognitionRequest.shouldReportPartialResults = true
recognitionRequest.taskHint = .search
recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest, resultHandler: { (result, error) in
var isFinal = false
if result != nil {
self.delegate?.didSpoke(speechRecognizer: self, word: result?.bestTranscription.formattedString)
debugPrint(result?.bestTranscription.formattedString)
isFinal = (result?.isFinal)!
}
if error != nil {
debugPrint("Speech Error ====>",error)
inputNode.removeTap(onBus: 0)
self.audioEngine.stop()
self.recognitionRequest = nil
self.recognitionTask = nil
if BookReadingSettings.isSTTEnable{
DispatchQueue.main.asyncAfter(deadline: DispatchTime.now() + 1) {
self.startRecording()
}
}
// self.microphoneButton.isEnabled = true
}
})
// let recordingFormat = AVAudioFormat.init(commonFormat: .pcmFormatFloat32, sampleRate: <#T##Double#>, interleaved: <#T##Bool#>, channelLayout: <#T##AVAudioChannelLayout#>)//inputNode.outputFormat(forBus: 0)
inputNode.removeTap(onBus: 0)
let sampleRate = AVAudioSession.sharedInstance().sampleRate
let recordingFormat = AVAudioFormat(standardFormatWithSampleRate: sampleRate, channels: 1)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, when) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
do {
try audioEngine.start()
} catch {
print("audioEngine couldn't start because of an error.")
}
debugPrint("Say something, I'm listening!")
//textView.text = "Say something, I'm listening!"
}
/* func stopRecording(){
isOn = false
debugPrint("Recording stoped")
self.audioEngine.stop()
recognitionTask?.cancel()
let inputNode = audioEngine.inputNode
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
}*/
func stopRecording(){
isOn = false
debugPrint("Recording stoped")
let inputNode = audioEngine.inputNode
inputNode.removeTap(onBus: 0)
self.audioEngine.stop()
recognitionTask?.cancel()
self.recognitionRequest = nil
self.recognitionTask = nil
}
}
extension SpeechRecognizer : SFSpeechRecognizerDelegate {
}