Hi, I'm trying to modify the ScreenCaptureKit Sample code by implementing an actor for Metal rendering, but I'm experiencing issues with frame rendering sequence.
My app workflow is:
ScreenCapture -> createFrame -> setRenderData
Metal draw callback -> renderAsync (getData from renderData)
I've added timestamps to verify frame ordering, I also using binarySearch to insert the frame with timestamp, and while the timestamps appear to be in sequence, the actual rendering output seems out of order.
// ScreenCaptureKit sample
func createFrame(for sampleBuffer: CMSampleBuffer) async {
if let surface: IOSurface = getIOSurface(for: sampleBuffer) {
await renderer.setRenderData(surface, timeStamp: sampleBuffer.presentationTimeStamp.seconds)
}
}
class Renderer {
...
func setRenderData(surface: IOSurface, timeStamp: Double) async {
_ = await renderSemaphore.getSetBuffers(
isGet: false,
surface: surface,
timeStamp: timeStamp
)
}
func draw(in view: MTKView) {
Task {
await renderAsync(view)
}
}
func renderAsync(_ view: MTKView) async {
guard await renderSemaphore.beginRender() else { return }
guard let frame = await renderSemaphore.getSetBuffers(
isGet: true, surface: nil, timeStamp: nil
) else {
await renderSemaphore.endRender()
return }
guard let texture = await renderSemaphore.getRenderData(
device: self.device,
surface: frame.surface) else {
await renderSemaphore.endRender()
return
}
guard let commandBuffer = _commandQueue.makeCommandBuffer(),
let renderPassDescriptor = await view.currentRenderPassDescriptor,
let renderEncoder = commandBuffer.makeRenderCommandEncoder(descriptor: renderPassDescriptor) else {
await renderSemaphore.endRender()
return
}
// Shaders ..
renderEncoder.endEncoding()
commandBuffer.addCompletedHandler() { @Sendable (_ commandBuffer)-> Swift.Void in
updateFPS()
}
// commit frame in actor
let success = await renderSemaphore.commitFrame(
timeStamp: frame.timeStamp,
commandBuffer: commandBuffer,
drawable: view.currentDrawable!
)
if !success {
print("Frame dropped due to out-of-order timestamp")
}
await renderSemaphore.endRender()
}
}
actor RenderSemaphore {
private var frameBuffers: [FrameData] = []
private var lastReadTimeStamp: Double = 0.0
private var lastCommittedTimeStamp: Double = 0
private var activeTaskCount = 0
private var activeRenderCount = 0
private let maxTasks = 3
private var textureCache: CVMetalTextureCache?
init() {
}
func initTextureCache(device: MTLDevice) {
CVMetalTextureCacheCreate(kCFAllocatorDefault, nil, device, nil, &self.textureCache)
}
func beginRender() -> Bool {
guard activeRenderCount < maxTasks else { return false }
activeRenderCount += 1
return true
}
func endRender() {
if activeRenderCount > 0 {
activeRenderCount -= 1
}
}
func setTextureLoaded(_ loaded: Bool) {
isTextureLoaded = loaded
}
func getSetBuffers(isGet: Bool, surface: IOSurface?, timeStamp: Double?) -> FrameData? {
if isGet {
if !frameBuffers.isEmpty {
let frame = frameBuffers.removeFirst()
if frame.timeStamp > lastReadTimeStamp {
lastReadTimeStamp = frame.timeStamp
print(frame.timeStamp)
return frame
}
}
return nil
} else {
// Set
let frameData = FrameData(
surface: surface!,
timeStamp: timeStamp!
)
// insert to the right position
let insertIndex = binarySearch(for: timeStamp!)
frameBuffers.insert(frameData, at: insertIndex)
return frameData
}
}
private func binarySearch(for timeStamp: Double) -> Int {
var left = 0
var right = frameBuffers.count
while left < right {
let mid = (left + right) / 2
if frameBuffers[mid].timeStamp > timeStamp {
right = mid
} else {
left = mid + 1
}
}
return left
}
// for setRenderDataNormalized
func tryEnterTask() -> Bool {
guard activeTaskCount < maxTasks else { return false }
activeTaskCount += 1
return true
}
func exitTask() {
activeTaskCount -= 1
}
func commitFrame(timeStamp: Double,
commandBuffer: MTLCommandBuffer,
drawable: MTLDrawable) async -> Bool {
guard timeStamp > lastCommittedTimeStamp else {
print("Drop frame at commit: \(timeStamp) <= \(lastCommittedTimeStamp)")
return false
}
commandBuffer.present(drawable)
commandBuffer.commit()
lastCommittedTimeStamp = timeStamp
return true
}
func getRenderData(
device: MTLDevice,
surface: IOSurface,
depthData: [Float]
) -> (MTLTexture, MTLBuffer)? {
let _textureName = "RenderData"
var px: Unmanaged<CVPixelBuffer>?
let status = CVPixelBufferCreateWithIOSurface(kCFAllocatorDefault, surface, nil, &px)
guard status == kCVReturnSuccess, let screenImage = px?.takeRetainedValue() else {
return nil
}
CVMetalTextureCacheFlush(textureCache!, 0)
var texture: CVMetalTexture? = nil
let width = CVPixelBufferGetWidthOfPlane(screenImage, 0)
let height = CVPixelBufferGetHeightOfPlane(screenImage, 0)
let result2 = CVMetalTextureCacheCreateTextureFromImage(
kCFAllocatorDefault,
self.textureCache!,
screenImage,
nil,
MTLPixelFormat.bgra8Unorm,
width,
height,
0, &texture)
guard result2 == kCVReturnSuccess,
let cvTexture = texture,
let mtlTexture = CVMetalTextureGetTexture(cvTexture) else {
return nil
}
mtlTexture.label = _textureName
let depthBuffer = device.makeBuffer(bytes: depthData, length: depthData.count * MemoryLayout<Float>.stride)!
return (mtlTexture, depthBuffer)
}
}
Above's my code - could someone point out what might be wrong?
Post
Replies
Boosts
Views
Activity
Hi, I found when continuously predicting with the same Core ML model in 120 FPS will be faster than in 60 FPS.
I use Macbook Pro M2 and turn on ProMotion to run Core ML model prediction with a 120 FPS video, the average prediction time is 7.46ms as below:
But when I turn off ProMotion, set 60 Hz refresh rate, and run Core ML model prediction with a 60 FPS video, the average prediction time is 10.91ms as below:
What could be the technical explanation for these results? Is there any documentation or technical literature that addresses this behavior?
I'm experiencing issues with the Core ML Async API, as it doesn't seem to be working correctly. It consistently hangs during the
"03 performInference, after get smallInput, before prediction" part,
as shown in the attached:
log1.txt
log2.txt
Below is my code. Could you please advise on how I should modify it?
private func createFrameAsync(for sampleBuffer: CMSampleBuffer ) {
guard let pixelBuffer = sampleBuffer.imageBuffer else { return }
Task {
print("**** createFrameAsync before performInference")
do {
try await runModelAsync(on: pixelBuffer)
} catch {
print("Error processing frame: \(error)")
}
print("**** createFrameAsync after performInference")
}
}
func runModelAsync(on pixelbuffer: CVPixelBuffer) async
{
print("01 performInference, before resizeFrame")
guard let data = metalResizeFrame(sourcePixelFrame: pixelbuffer, targetSize: MTLSize.init(width: InputWidth, height: InputHeight, depth: 1), resizeMode: .scaleToFill) else {
os_log("Preprocessing failed", type: .error)
return
}
print("02 performInference, after resizeFrame, before get smallInput")
let input = model_smallInput(input: data)
print("03 performInference, after get smallInput, before prediction")
if let prediction = try? await mlmodel!.model.prediction(from: input) {
print("04 performInference, after prediction, before get result")
var results: [Float] = []
let output = prediction.featureValue(for: "output")?.multiArrayValue
if let bufferPointer = try? UnsafeBufferPointer<Float>(output!) {
results = Array(bufferPointer)
}
print("05 performInference, after get result, before setRenderData")
let localResults = results
await MainActor.run {
ScreenRecorder.shared
.setRenderDataNormalized(
screenImage: pixelbuffer,
depthData: localResults
)
}
print("06 performInference, after setRenderData")
}
}
Hi all, I'm tuning my app prediction speed with Core ML model. I watched and tried the methods in video: Improve Core ML integration with async prediction and Optimize your Core ML usage. I also use instruments to look what's the bottleneck that my prediction speed cannot be faster.
Below is the instruments result with my app. its prediction duration is 10.29ms
And below is performance report shows the average speed of prediction is 5.55ms, that is about half time of my app prediction!
Below is part of my instruments records. I think the prediction should be considered quite frequent. Could it be faster?
How to be the same prediction speed as performance report? The prediction speed on macbook Pro M2 is nearly the same as macbook Air M1!
I tried "WWDC24: Build compelling spatial photo and video experiences | Apple" and it can successfully capture spatial video.
But I found the video by my app differs from the iPhone build-in camera app in:
Videos captured with the iPhone's build-in camera app tend to have a more natural or warmer tone, while videos taken with my app appear whiter or cooler in color temperature.
In videos recorded using the iPhone's built-in camera app, the left eye image is typically sharper than the right eye image. However, in my app, this is reversed: the right eye image is clearer than the left eye image.
I've noticed that when I cover the wide-angle lens while shooting, the entire preview screen in my app becomes brighter. However, this doesn't occur when using the iPhone's built-in camera app.
Is there any api or parameters to make my app more close to the iPhone build-in app? I have tried "whiteBalanceMode" and "exposureMode" but no luck.
Hi there, I'm trying to test the "Drawing fully immersive content using Metal" , but when I select Language: Swift, it still shows Objective C code in some sample codes.
Please check and update the document Swift Code, thank you.
I wrote an app that can show live streaming. When using iPad, I can connect an external display and move app to display, that I can do what I want on my iPad and watch live streaming on external display using cable.
But app moving to display is not always full screen, and on some displays it shows larger than the external display range that I should go to Settings -> Display & Brightness -> External Display -> Switching "Allow Display Mode Changes" to fix this issue.
Are there APIs to make the App "move to display" programmatically, keep app full-screen, matching the external display's resolution on the External display?
I have read the Converting side-by-side 3D video to multi-view HEVC and spatial video, now I want to convert back to side-by-side 3D video. On iPhone 15 Pro MAX, the converting time is about 1:1 as the original video length.
I do almost the same as the article mentioned above, the only difference is I get the frames from Spatial video, merging into Side-by-side. Currently my code merging the frame wrote as below. Is any suggestion to speed up the process? Or in the official article, is there anything that we can do to speed up the conversion?
// Merge frame
let leftCI = resizeCVPixelBufferFill(bufferLeft, targetSize: targetSize)
let rightCI = resizeCVPixelBufferFill(bufferRight, targetSize: targetSize)
let lbuffer = convertCIImageToCVPixelBuffer(leftCI!)!
let rbuffer = convertCIImageToCVPixelBuffer(rightCI!)!
pixelBuffer = mergeFrames(lbuffer, rbuffer)
I tried to understand the view matrix.
The part from original code as below:
private func updateGameState() {
/// Update any game state before rendering
uniforms[0].projectionMatrix = projectionMatrix
let rotationAxis = SIMD3<Float>(1, 1, 0)
let modelMatrix = matrix4x4_rotation(radians: rotation, axis: rotationAxis)
let viewMatrix = matrix4x4_translation(0.0, 0.0, -8.0)
uniforms[0].modelViewMatrix = simd_mul(viewMatrix, modelMatrix)
rotation += 0.01
}
If the view matrix is initialed in x = -0.5, as:let viewMatrix = matrix4x4_translation(-0.5, 0.0, -8.0)
The cube in the MetalView will move left.
I think it should move to right hand side because View Matrix is camera position, am I wrong?
I tried the ScreenCaptureKit sample code from Apple:
ScreenCaptureKit Sample Code
When I ran it for a while, it crash at the strange position as attached screenshot.
The value array is not empty and has value at index 0 but it crashed.