I faced this same problem. Clicked on ParticleEmitter and it crashed. Diorama crashes and wwdc23 session 10083 is not workable on the the platform. Please take a look and plan to address it asap. Platform is:
MacBook Pro (2020) -- 2 GHz Quad-Core Intel Core i5 -- Intel Iris Plus Graphics 1536 MB
MacOS: 14.0 Beta (23A5286i) -- Sonoma 14.0 Beta (23A5286i) -- Before this I tried on MacOS Ventura 13.4 and 13.4.1 both as well, faced same problem
launchd.log
realityComposerPro.crashReport
Post
Replies
Boosts
Views
Activity
/////////////////////////
/////////////////////////
// ensures all operations happen off of the `@MainActor`.
actor CaptureService {
...
nonisolated private let resultsObserver1 = ResultsObserver1()
...
private func setUpSession() throws { .. }
...
setVideoCaptureMode() throws {
captureSession.beginConfiguration()
defer { captureSession.commitConfiguration() }
/* -- Works fine (analyzeAudio is printed)
Task {
self.resultsObserver1.startAudioEngine()
}
*/
self.resultsObserver1.startAudioEngine() // Does not work - analyzeAudio not printed
captureSession.sessionPreset = .high
try addOutput(movieCapture.output)
if isHDRVideoEnabled {
setHDRVideoEnabled(true)
}
updateCaptureCapabilities()
}
/////////////////////////
/////////////////////////
class ResultsObserver1 {
let resultsObserver2 = ResultsObserver2()
var classifiedText: String = ""
var confidence: Double = 0.0
private var audioEngine: AVAudioEngine?
private var soundAnalyzer: SNAudioStreamAnalyzer?
private var inputFormat: AVAudioFormat?
let analysisQueue = DispatchQueue(label: "com.example.AnalysisQueue")
func analyzeAudio(buffer: AVAudioBuffer, at time: AVAudioTime) {
print("analyzeAudio")
analysisQueue.async {
print("analyze")
self.soundAnalyzer?.analyze(buffer,
atAudioFramePosition: time.sampleTime)
}
}
func stopAudioEngine() {
print("stopAudioEngine")
soundAnalyzer?.removeAllRequests()
audioEngine?.inputNode.removeTap(onBus: 0)
audioEngine?.stop()
soundAnalyzer = nil
audioEngine = nil
inputFormat = nil
}
// Setup audio analysis using SNAudioStreamAnalyzer
func startAudioEngine() {
print("startAudioEngine")
// Create a new audio engine.
audioEngine = AVAudioEngine()
print("audioEngine: \(String(describing: audioEngine))")
// Get the native audio format of the engine's input bus.
let inputBus = AVAudioNodeBus(0)
inputFormat = audioEngine?.inputNode.inputFormat(forBus: inputBus)
print("inputFormat: \(String(describing: inputFormat))")
guard let inputFormat = inputFormat else {
print("Failed to get input format")
return
}
do {
// Start the stream of audio data.
try audioEngine?.start()
print("audio engine started")
} catch {
print("Unable to start AVAudioEngine: \(error.localizedDescription)")
}
// Create a new stream analyzer.
soundAnalyzer = SNAudioStreamAnalyzer(format: inputFormat)
print("soundAnalyzer: \(String(describing: soundAnalyzer))")
// Use Apple's built-in classifier version 1
let version1 = SNClassifierIdentifier.version1
do {
// Create a classification request for version 1
let request = try SNClassifySoundRequest(classifierIdentifier: version1)
// Add a sound classification request that reports to an observer.
try soundAnalyzer?.add(request,
withObserver: resultsObserver2)
print("Added request to soundAnalyzer with the specified observer")
} catch {
print("Error setting up sound analysis: \(error)")
return
}
audioEngine?.inputNode.installTap(onBus: 0, bufferSize: 8192, format: inputFormat, block: { buffer, when in
self.analyzeAudio(buffer: buffer, at: when)
})
/*
audioEngine?.inputNode.installTap(onBus: 0, bufferSize: 8192, format: inputFormat, block: { buffer, when in
print("buffer recieved")
self.soundAnalyzer?.analyze(buffer, atAudioFramePosition: AVAudioFramePosition(buffer.frameLength))
})
*/
do {
// Prepare and start the audio engine
audioEngine?.prepare()
try audioEngine?.start()
} catch {
print("Error starting audio engine: \(error)")
}
}
}
class ResultsObserver2: NSObject, SNResultsObserving {
/// Notifies the observer when a request generates a prediction.
func request(_ request: SNRequest, didProduce result: SNResult) {
// Downcast the result to a classification result.
guard let result = result as? SNClassificationResult else { return }
// Get the prediction with the highest confidence.
guard let classification = result.classifications.first else { return }
// Get the starting time.
let timeInSeconds = result.timeRange.start.seconds
// Convert the time to a human-readable string.
let formattedTime = String(format: "%.2f", timeInSeconds)
print("Analysis result for audio at time: \(formattedTime)")
// Convert the confidence to a percentage string.
let percent = classification.confidence * 100.0
let percentString = String(format: "%.2f%%", percent)
// Print the classification's name (label) with its confidence.
print("\(classification.identifier): \(percentString) confidence.\n")
}
/// Notifies the observer when a request generates an error.
func request(_ request: SNRequest, didFailWithError error: Error) {
print("The analysis failed: \(error.localizedDescription)")
}
/// Notifies the observer when a request is complete.
func requestDidComplete(_ request: SNRequest) {
print("The request completed successfully!")
}
}
Hi Greg,
Thank you for taking the time to look into this.
Unfortunately, I wasn't able to upload the file directly due to the "Add File" option not allowing me to select the 19.9 MB compressed zip file. To make it accessible, I’ve uploaded the file to an S3 bucket, and you can download it here:
https://curvsort-public.s3.ap-south-1.amazonaws.com/AVCamBuildingACameraApp_1.zip. Please let me know once you've downloaded it, and I will delete it from the bucket.
The key files with the focused issue are ESoundClassifier and CaptureService (specifically resultsObserver1 and the sections marked Works 1/2/3).
To Reproduce the Issue:
I am running the app on an iPhone 11 with Swift 5 and Xcode 16
Problem Case: In this case, there are no logs for the analyze and analyzeAudio methods.
Working Case: To observe the working flow, comment out the code at // Does not work 1 and uncomment the three lines at // Works 3. You should then see logs for both analyze and analyzeAudio.
Swift 6 Concurrency Issues:
After switching the build setting to Swift 6, the following concurrency-related errors are reported:
ThumbnailButton Issue: The PhotosPicker throws an error when displaying the thumbnail: "Main actor-isolated property 'thumbnail' cannot be referenced from a Sendable closure."
AVAudioBuffer: The AVAudioBuffer is also flagged as a non-sendable type, which causes concurrency errors.
Apple Source Code References:
AVCam: Building a Camera App
Classifying Sounds in an Audio Stream
I hope this provides the necessary context. Let me know if you need any further details.
Best regards
Thank you for the insights on fine-tuning SNSoundClassifier with AudioFeaturePrint and logistic regression.
However, I’m still unclear on how to effectively integrate embeddings from SNSoundClassifier into this pipeline, given that they aren’t directly accessible.
Are there specific steps or methodologies to consider for augmenting the base model with user-supplied audio data, and how can I ensure the classifier accurately reflects custom sound classes?
What specific pipeline do you recommend? Base model seems to be necessary while fine-tuning on CreateML. If SNSoundClassifier can be used then how? If it cannot be used as base model then its going to be either TF or PyTorch model (which one)
Any additional guidance would be greatly appreciated!
Conversion Script:
import tensorflow as tf
import tensorflow_hub as hub
import coremltools as ct
import numpy as np
# Load YAMNet model from TensorFlow Hub
yamnet_layer = hub.KerasLayer("https://tfhub.dev/google/yamnet/1", trainable=False, dtype=tf.float32)
# Define input shape and input layer with explicit dtype
input_shape = (15600,)
inputs = tf.keras.Input(shape=input_shape, name="audio_waveform", dtype=tf.float32)
outputs = yamnet_layer(tf.reshape(inputs, [-1])) # Ensure correct reshaping
model = tf.keras.Model(inputs, outputs)
# Save the model to a .saved_model format
model.save("yamnet_saved_model")
# Convert the saved model to Core ML format
mlmodel = ct.convert(
"yamnet_saved_model",
source="tensorflow",
inputs=[ct.TensorType(shape=(1, 15600), name="audio_waveform", dtype=np.float32)]
)
# Save the converted model as a .mlmodel file
mlmodel.save("YAMNet.mlmodel")
print("Conversion successful! Model saved as YAMNet.mlmodel")
Execution Error:
√ yamnet-tensorflow2-yamnet-v1 % python convertItScript.py
2024-11-09 15:41:35.807906: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
Running TensorFlow Graph Passes: 100%
Converting TF Frontend ==> MIL Ops: 56%
Traceback (most recent call last):
File "~/yamnet-tensorflow2-yamnet-v1/convertItScript.py", line 19, in <module>
mlmodel = ct.convert(
File "~/.pyenv/versions/3.10.12/lib/python3.10/site-packages/coremltools/converters/_converters_entry.py", line 444, in convert
mlmodel = mil_convert(
File "~/.pyenv/versions/3.10.12/lib/python3.10/site-packages/coremltools/converters/mil/converter.py", line 190, in mil_convert
return _mil_convert(model, convert_from, convert_to, ConverterRegistry, MLModel, compute_units, **kwargs)
File "~/.pyenv/versions/3.10.12/lib/python3.10/site-packages/coremltools/converters/mil/converter.py", line 217, in _mil_convert
...
File "~/.pyenv/versions/3.10.12/lib/python3.10/site-packages/coremltools/converters/mil/mil/ops/defs/iOS15/elementwise_unary.py", line 870, in value_inference
return self.get_cast_value(self.x, self.dtype.val)
File "~/.pyenv/versions/3.10.12/lib/python3.10/site-packages/coremltools/converters/mil/mil/ops/defs/iOS15/elementwise_unary.py", line 896, in get_cast_value
return input_var.val.astype(dtype=type_map[dtype_val])
AttributeError: 'float' object has no attribute 'astype'
In WWDC 2024 session 10223, Apple demonstrated the availability of multiple Core ML modules, including Vision, Natural Language, Sound, Speech, and Translation. These modules can be leveraged concurrently within an app, allowing tasks to run in parallel.
Could you provide guidance on best practices for performing multiple Core ML tasks simultaneously on iPhone and MacBook hardware? Specifically, I’d like to know about the feasibility and efficiency of running three parallel tasks on different Apple chipsets (A-series for iPhone and M-series for MacBook). Are there particular recommendations for optimizing parallel tasks on these chips to maintain performance and battery efficiency?
Thank you!
Highlighted difference indicating code used from the sample.
---
>
68,69d67
< private let videoDataOutputQueue = DispatchQueue(label: "VideoDataOutput", qos: .userInitiated, attributes: [], autoreleaseFrequency: .workItem)
< var bufferSize: CGSize = .zero
---
>
102d99
< print("captureOutput error")
114,142d110
<
< private func setForVideoClassify() {
< print("setForVideoClassify()")
< // Add the video data output for classification if it's not already added
< let videoDataOutput = AVCaptureVideoDataOutput()
< videoDataOutput.alwaysDiscardsLateVideoFrames = true
< videoDataOutput.videoSettings = [
< kCVPixelBufferPixelFormatTypeKey as String: Int(kCVPixelFormatType_420YpCbCr8BiPlanarFullRange)
< ]
< videoDataOutput.setSampleBufferDelegate(self, queue: videoDataOutputQueue)
<
< if captureSession.canAddOutput(videoDataOutput) {
< captureSession.addOutput(videoDataOutput)
< } else {
< print("Could not add video data output")
< }
< let captureConnection = videoDataOutput.connection(with: .video)
< // Always process the frames
< captureConnection?.isEnabled = true
< do {
< let videoDevice = currentDevice
< try videoDevice.lockForConfiguration()
< let dimensions = CMVideoFormatDescriptionGetDimensions((videoDevice.activeFormat.formatDescription))
< bufferSize.width = CGFloat(dimensions.width)
< bufferSize.height = CGFloat(dimensions.height)
< videoDevice.unlockForConfiguration()
< } catch {
< print("setForVideoClassify error: \(error)")
< }
144,145d111
< }
<
262,265c228
< setupVision()
< setForVideoClassify()
< captureSession.sessionPreset = .vga640x480
< //captureSession.sessionPreset = .high
---
> captureSession.sessionPreset = .high
526a490
> setForVideoClassify()
529a494,510
> private func setForVideoClassify() {
> print("setForVideoClassify()")
> // Add the video data output for classification if it's not already added
> let videoDataOutput = AVCaptureVideoDataOutput()
> videoDataOutput.alwaysDiscardsLateVideoFrames = true
> videoDataOutput.videoSettings = [
> kCVPixelBufferPixelFormatTypeKey as String: Int(kCVPixelFormatType_420YpCbCr8BiPlanarFullRange)
> ]
> videoDataOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "VideoDataOutputQueue"))
>
> if captureSession.canAddOutput(videoDataOutput) {
> captureSession.addOutput(videoDataOutput)
> } else {
> print("Could not add video data output")
> }
> }
>
567a549
>