I have an app that has the camera continuously running, as it is doing its own AI, have zero need for Apple'video effects, and am seeing a 200% performance hit after updating to Sonoma. The video effects are the "heaviest stack trace" when profiling my app with Instruments CPU profiler (see below).
Is forcing your software onto developers not something Microsoft would do? Is there really no way to opt out?
6671 Jamscape_exp (23038)
2697 start_wqthread
2697 _pthread_wqthread
2183 _dispatch_workloop_worker_thread
2156 _dispatch_root_queue_drain_deferred_wlh
2153 _dispatch_lane_invoke
2146 _dispatch_lane_serial_drain
1527 _dispatch_client_callout
1493 _dispatch_call_block_and_release
777 __88-[PTHandGestureDetector initWithFrameSize:asyncInitQueue:externalHandDetectionsEnabled:]_block_invoke
777 -[VCPHandGestureVideoRequest initWithOptions:]
508 -[VCPHandGestureClassifier initWithMinHandSize:]
508 -[VCPCoreMLRequest initWithModelName:]
506 +[MLModel modelWithContentsOfURL:configuration:error:]
506 -[MLModelAsset modelWithError:]
506 -[MLModelAsset load:]
506 +[MLLoader loadModelFromAssetAtURL:configuration:error:]
506 +[MLLoader _loadModelFromAssetAtURL:configuration:loaderEvent:error:]
505 +[MLLoader _loadModelFromArchive:configuration:loaderEvent:useUpdatableModelLoaders:error:]
505 +[MLLoader _loadWithModelLoaderFromArchive:configuration:loaderEvent:useUpdatableModelLoaders:error:]
505 +[MLLoader _loadModelFromArchive:configuration:modelVersion:compilerVersion:loaderEvent:useUpdatableModelLoaders:loadingClasses:error:]
505 +[MLLoader _loadModelWithClass:fromArchive:modelVersionInfo:compilerVersionInfo:configuration:error:]
445 +[MLMultiFunctionProgramEngine loadModelFromCompiledArchive:modelVersionInfo:compilerVersionInfo:configuration:error:]
333 -[MLMultiFunctionProgramEngine initWithProgramContainer:configuration:error:]
333 -[MLNeuralNetworkEngine initWithContainer:configuration:error:]
318 -[MLNeuralNetworkEngine _setupContextAndPlanWithConfiguration:usingCPU:reshapeWithContainer:error:]
313 -[MLNeuralNetworkEngine _addNetworkToPlan:error:]
313 espresso_plan_add_network
313 EspressoLight::espresso_plan::add_network(char const*, espresso_storage_type_t)
313 EspressoLight::espresso_plan::add_network(char const*, espresso_storage_type_t, std::__1::shared_ptrEspresso::net)
313 Espresso::load_network(std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator> const&, std::__1::shared_ptrEspresso::abstract_context const&, Espresso::compute_path, bool)
235 Espresso::reload_network_on_context(std::__1::shared_ptrEspresso::net const&, std::__1::shared_ptrEspresso::abstract_context const&, Espresso::compute_path)
226 Espresso::load_and_shape_network(std::__1::shared_ptrEspresso::SerDes::generic_serdes_object const&, std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator> const&, std::__1::shared_ptrEspresso::abstract_context const&, Espresso::network_shape const&, Espresso::compute_path, std::__1::shared_ptrEspresso::blob_storage_abstract const&, std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator> const&)
214 Espresso::load_network_layers_internal(std::__1::shared_ptrEspresso::SerDes::generic_serdes_object, std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator> const&, std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator> const&, std::__1::shared_ptrEspresso::abstract_context const&, Espresso::network_shape const&, std::__1::basic_istream<char, std::__1::char_traits>, Espresso::compute_path, bool, std::__1::shared_ptrEspresso::blob_storage_abstract const&)
208 Espresso::run_dispatch_v2(std::__1::shared_ptrEspresso::abstract_context, std::__1::shared_ptrEspresso::net, std::__1::vector<std::__1::shared_ptrEspresso::SerDes::generic_serdes_object, std::__1::allocator<std::__1::shared_ptrEspresso::SerDes::generic_serdes_object>> const&, Espresso::network_shape const&, Espresso::compute_path const&, std::__1::basic_istream<char, std::__1::char_traits>)
141 try_dispatch(std::__1::shared_ptrEspresso::abstract_context, std::__1::shared_ptrEspresso::net, std::__1::vector<std::__1::shared_ptrEspresso::SerDes::generic_serdes_object, std::__1::allocator<std::__1::shared_ptrEspresso::SerDes::generic_serdes_object>> const&, Espresso::network_shape const&, Espresso::compute_path const&, std::__1::basic_istream<char, std::__1::char_traits>, Espresso::platform const&, Espresso::compute_path const&)
131 Espresso::get_net_info_ir(std::__1::shared_ptrEspresso::abstract_context, std::__1::shared_ptrEspresso::net, std::__1::vector<std::__1::shared_ptrEspresso::SerDes::generic_serdes_object, std::__1::allocator<std::__1::shared_ptrEspresso::SerDes::generic_serdes_object>> const&, Espresso::network_shape const&, Espresso::compute_path const&, Espresso::platform const&, Espresso::compute_path const&, std::__1::shared_ptrEspresso::cpu_context_transfer_algo_t&, std::__1::shared_ptrEspresso::net_info_ir_t&, std::__1::shared_ptrEspresso::kernels_validation_status_t&)
131 Espresso::cpu_context_transfer_algo_t::create_net_info_ir(std::__1::vector<std::__1::shared_ptrEspresso::SerDes::generic_serdes_object, std::__1::allocator<std::__1::shared_ptrEspresso::SerDes::generic_serdes_object>> const&, std::__1::shared_ptrEspresso::abstract_context, Espresso::network_shape const&, Espresso::compute_path, std::__1::shared_ptrEspresso::net_info_ir_t)
120 Espresso::cpu_context_transfer_algo_t::check_all_kernels_availability_on_context(std::__1::vector<std::__1::shared_ptrEspresso::SerDes::generic_serdes_object, std::__1::allocator<std::__1::shared_ptrEspresso::SerDes::generic_serdes_object>> const&, std::__1::shared_ptrEspresso::abstract_context&, Espresso::compute_path, std::__1::shared_ptrEspresso::net_info_ir_t&)
120 is_kernel_available_on_engine(unsigned long, std::__1::shared_ptrEspresso::base_kernel, Espresso::kernel_info_t const&, std::__1::shared_ptrEspresso::SerDes::generic_serdes_object, std::__1::shared_ptrEspresso::abstract_context, Espresso::compute_path, std::__1::shared_ptrEspresso::net_info_ir_t, std::__1::shared_ptrEspresso::kernels_validation_status_t)
83 Espresso::ANECompilerEngine::mix_reshape_kernel::is_valid_for_engine(std::__1::shared_ptrEspresso::kernels_validation_status_t, Espresso::base_kernel::validate_for_engine_args_t const&) const
45 int ValidateLayer<ANECReshapeLayerDesc, ZinIrReshapeUnit, ZinIrReshapeUnitInfo, ANECReshapeLayerDescAlternate>(void, ANECReshapeLayerDesc const*, ANECTensorDesc const*, unsigned long, unsigned long*, ANECReshapeLayerDescAlternate**, ANECTensorValueDesc const*)
45 void ValidateLayer_Impl<ANECReshapeLayerDesc, ZinIrReshapeUnit, ZinIrReshapeUnitInfo, ANECReshapeLayerDescAlternate>(void*, ANECReshapeLayerDesc const*, ANECTensorDesc const*, unsigned long, unsigned long*, ANECReshapeLayerDescAlternate**, ANECTensorValueDesc const*)
(...)
Post
Replies
Boosts
Views
Activity
So I've spent the last five years optimizing my video AI system so that it runs with less than 5% CPU while processing a 30fps video feed on a Macbook Pro M2, and everything is great, until Sonoma comes out, and I find myself consuming 40% CPU for the exact same workload.
So I fire up Instruments, and the "heaviest stack trace" (see screenshot) turns out to be Espresso doing some completely unasked-for and absolutely useless processing on my video frames. I turn off Reactions, but nothing helps - the CPU consumptions stays at 40%.
"Reactions" is nothing but a useless toy to please some WWDC keynote fanboys, I don't want it anywhere near my app or my users, and I especially do not want to take the blame for it pissing away the user's CPU cycles and battery.
Now, how do I make it go away, for ever?
Best regards
Jacob
I Instrument's CPU Profiling tool I've noticed that a significant portion (22.5%) of the CPU-side overhead related to MPS matrix multiplication (GEMM) is in a call to getenv(). Please see attached screenshot.
It seems unnecessary to perform this same check over and over, as whatever hack that needs this should be able to perform the getenv() only once and cache the result for future use.
I am developing a MacOS video/audio chat app that uses the audio input + audio only intermittently. The rest of the time I need to stop and tear down AVAudioEngine to allow other applications such as music players to use audio.
I have found that just pausing or stopping the engine is not enough, I need to completely tear it down and force a deinit by setting engine = nil in my objective C code (with ARC enabled).
What I have learned is that I have to make sure to tear down and detach absolutely everyhing, otherwise AVAudioEngine will fail to start the next time, especially when using a bluetooth headset. However, after months of trial and error, I have something that appears to be almost stable. However, I am sometimes hitting the crash show below after alloc + init of AVAudioEngine instance, when enabling voice processing. The crash is found when building with address-sanitizer enabled, and the logging above the line is my own:
stopping audio engine
disabling voice processing...
voice processing disabled
engine stopped
waiting for engine...
starting audio engine...
enabling voice processing...
==75508==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x000111e11be0 at pc 0x000103123360 bp 0x00016d231c90 sp 0x00016d231450
WRITE of size 52 at 0x000111e11be0 thread T218
#0 0x10312335c in wrap_memcpy+0x244 (libclang_rt.asan_osx_dynamic.dylib:arm64e+0x1b35c) (BuildId: f0a7ac5c49bc3abc851181b6f92b308a32000000200000000100000000000b00)
#1 0x1077f407c (CoreAudio:arm64e+0xc07c) (BuildId: 3318bd64e64f3e69991d605d1bc10d7d32000000200000000100000000030d00)
#2 0x1078f1484 (CoreAudio:arm64e+0x109484) (BuildId: 3318bd64e64f3e69991d605d1bc10d7d32000000200000000100000000030d00)
#3 0x1a3d661a0 in AudioUnitGetProperty+0x1c0 (AudioToolboxCore:arm64e+0x2101a0) (BuildId: 3a76e12cd37d3545bb42d52848e0bd7032000000200000000100000000030d00)
#4 0x207d8be38 in AVAudioIOUnit_OSX::_GetHWFormat(unsigned int, unsigned int*)+0x76c (AVFAudio:arm64e+0xbde38) (BuildId: 4a3f05007b8c35c98be4e78396ca9eeb32000000200000000100000000030d00)
#5 0x207d8aea4 in invocation function for block in AVAudioIOUnit::IOUnitPropertyListener(void*, ComponentInstanceRecord*, unsigned int, unsigned int, unsigned int)+0x15c (AVFAudio:arm64e+0xbcea4) (BuildId: 4a3f05007b8c35c98be4e78396ca9eeb32000000200000000100000000030d00)
#6 0x103149f74 in __wrap_dispatch_async_block_invoke+0xc0 (libclang_rt.asan_osx_dynamic.dylib:arm64e+0x41f74) (BuildId: f0a7ac5c49bc3abc851181b6f92b308a32000000200000000100000000000b00)
#7 0x1a1d4a870 in _dispatch_call_block_and_release+0x1c (libdispatch.dylib:arm64e+0x2870) (BuildId: 8e87dc0ea5703933b37d5e05ad51620632000000200000000100000000030d00)
#8 0x1a1d4c3fc in _dispatch_client_callout+0x10 (libdispatch.dylib:arm64e+0x43fc) (BuildId: 8e87dc0ea5703933b37d5e05ad51620632000000200000000100000000030d00)
#9 0x1a1d53a84 in _dispatch_lane_serial_drain+0x298 (libdispatch.dylib:arm64e+0xba84) (BuildId: 8e87dc0ea5703933b37d5e05ad51620632000000200000000100000000030d00)
#10 0x1a1d545f4 in _dispatch_lane_invoke+0x17c (libdispatch.dylib:arm64e+0xc5f4) (BuildId: 8e87dc0ea5703933b37d5e05ad51620632000000200000000100000000030d00)
#11 0x1a1d5f240 in _dispatch_workloop_worker_thread+0x284 (libdispatch.dylib:arm64e+0x17240) (BuildId: 8e87dc0ea5703933b37d5e05ad51620632000000200000000100000000030d00)
#12 0x1a1ef8070 in _pthread_wqthread+0x11c (libsystem_pthread.dylib:arm64e+0x3070) (BuildId: b401cfb38dfe32db92b3ba8af0f8ca6e32000000200000000100000000030d00)
#13 0x1a1ef6d90 in start_wqthread+0x4 (libsystem_pthread.dylib:arm64e+0x1d90) (BuildId: b401cfb38dfe32db92b3ba8af0f8ca6e32000000200000000100000000030d00)
0x000111e11be0 is located 0 bytes to the right of 32-byte region [0x000111e11bc0,0x000111e11be0)
allocated by thread T218 here:
#0 0x10314ae68 in wrap_malloc+0x94 (libclang_rt.asan_osx_dynamic.dylib:arm64e+0x42e68) (BuildId: f0a7ac5c49bc3abc851181b6f92b308a32000000200000000100000000000b00)
#1 0x207d8bdd4 in AVAudioIOUnit_OSX::_GetHWFormat(unsigned int, unsigned int*)+0x708 (AVFAudio:arm64e+0xbddd4) (BuildId: 4a3f05007b8c35c98be4e78396ca9eeb32000000200000000100000000030d00)
#2 0x207d8aea4 in invocation function for block in AVAudioIOUnit::IOUnitPropertyListener(void*, ComponentInstanceRecord*, unsigned int, unsigned int, unsigned int)+0x15c (AVFAudio:arm64e+0xbcea4) (BuildId: 4a3f05007b8c35c98be4e78396ca9eeb32000000200000000100000000030d00)
#3 0x103149f74 in __wrap_dispatch_async_block_invoke+0xc0 (libclang_rt.asan_osx_dynamic.dylib:arm64e+0x41f74) (BuildId: f0a7ac5c49bc3abc851181b6f92b308a32000000200000000100000000000b00)
#4 0x1a1d4a870 in _dispatch_call_block_and_release+0x1c (libdispatch.dylib:arm64e+0x2870) (BuildId: 8e87dc0ea5703933b37d5e05ad51620632000000200000000100000000030d00)
#5 0x1a1d4c3fc in _dispatch_client_callout+0x10 (libdispatch.dylib:arm64e+0x43fc) (BuildId: 8e87dc0ea5703933b37d5e05ad51620632000000200000000100000000030d00)
#6 0x1a1d53a84 in _dispatch_lane_serial_drain+0x298 (libdispatch.dylib:arm64e+0xba84) (BuildId: 8e87dc0ea5703933b37d5e05ad51620632000000200000000100000000030d00)
#7 0x1a1d545f4 in _dispatch_lane_invoke+0x17c (libdispatch.dylib:arm64e+0xc5f4) (BuildId: 8e87dc0ea5703933b37d5e05ad51620632000000200000000100000000030d00)
#8 0x1a1d5f240 in _dispatch_workloop_worker_thread+0x284 (libdispatch.dylib:arm64e+0x17240) (BuildId: 8e87dc0ea5703933b37d5e05ad51620632000000200000000100000000030d00)
#9 0x1a1ef8070 in _pthread_wqthread+0x11c (libsystem_pthread.dylib:arm64e+0x3070) (BuildId: b401cfb38dfe32db92b3ba8af0f8ca6e32000000200000000100000000030d00)
#10 0x1a1ef6d90 in start_wqthread+0x4 (libsystem_pthread.dylib:arm64e+0x1d90) (BuildId: b401cfb38dfe32db92b3ba8af0f8ca6e32000000200000000100000000030d00)
This is on a Macbook M2 Pro running MacOS 13.3.1 (a) (22E772610a).
What is the best way to proceed with this, it looks to me like a bug in AVAudioEngine/CoreAudio.
Best regards,
Jacob Gorm Hansen
I've noticed that my Catalyst app uses around 65% CPU when capturing from the camera, even when everything else is idle. Disabling camera capture drops the CPU usage to 0 (middle of graph in screenshot). I profiled using Instruments, and found that the most heavy stack trace involved face tracking (see attached screenshot), even though no metadata output was added for the camera. Is this a bug in AVFoundation?