Hello,
I am trying to read video frames using AVAssetReaderTrackOutput. Here is the sample code:
//prepare assets
let asset = AVURLAsset(url: some_url)
let assetReader = try AVAssetReader(asset: asset)
guard let videoTrack = try await asset.loadTracks(withMediaCharacteristic: .visual).first else {
throw SomeErrorCode.error
}
var readerSettings: [String: Any] = [
kCVPixelBufferIOSurfacePropertiesKey as String: [String: String]()
]
//check if HDR video
var isHDRDetected: Bool = false
let hdrTracks = try await asset.loadTracks(withMediaCharacteristic: .containsHDRVideo)
if hdrTracks.count > 0 {
readerSettings[AVVideoAllowWideColorKey as String] = true
readerSettings[kCVPixelBufferPixelFormatTypeKey as String] =
kCVPixelFormatType_420YpCbCr10BiPlanarFullRange
isHDRDetected = true
}
//add output to assetReader
let output = AVAssetReaderTrackOutput(track: videoTrack, outputSettings: readerSettings)
guard assetReader.canAdd(output) else {
throw SomeErrorCode.error
}
assetReader.add(output)
guard assetReader.startReading() else {
throw SomeErrorCode.error
}
//add writer ouput settings
let videoOutputSettings: [String: Any] = [
AVVideoCodecKey: AVVideoCodecType.hevc,
AVVideoWidthKey: 1920,
AVVideoHeightKey: 1080,
]
let finalPath = "//some URL oath"
let assetWriter = try AVAssetWriter(outputURL: finalPath, fileType: AVFileType.mov)
guard assetWriter.canApply(outputSettings: videoOutputSettings, forMediaType: AVMediaType.video)
else {
throw SomeErrorCode.error
}
let assetWriterInput = AVAssetWriterInput(mediaType: .video, outputSettings: videoOutputSettings)
let sourcePixelAttributes: [String: Any] = [
kCVPixelBufferPixelFormatTypeKey as String: isHDRDetected
? kCVPixelFormatType_420YpCbCr10BiPlanarFullRange : kCVPixelFormatType_32ARGB,
kCVPixelBufferWidthKey as String: 1920,
kCVPixelBufferHeightKey as String: 1080,
]
//create assetAdoptor
let assetAdaptor = AVAssetWriterInputTaggedPixelBufferGroupAdaptor(
assetWriterInput: assetWriterInput, sourcePixelBufferAttributes: sourcePixelAttributes)
guard assetWriter.canAdd(assetWriterInput) else {
throw SomeErrorCode.error
}
assetWriter.add(assetWriterInput)
guard assetWriter.startWriting() else {
throw SomeErrorCode.error
}
assetWriter.startSession(atSourceTime: CMTime.zero)
//prepare tranfer session
var session: VTPixelTransferSession? = nil
guard
VTPixelTransferSessionCreate(allocator: kCFAllocatorDefault, pixelTransferSessionOut: &session)
== noErr, let session
else {
throw SomeErrorCode.error
}
guard let pixelBufferPool = assetAdaptor.pixelBufferPool else {
throw SomeErrorCode.error
}
//read through frames
while let nextSampleBuffer = output.copyNextSampleBuffer() {
autoreleasepool {
guard let imageBuffer = CMSampleBufferGetImageBuffer(nextSampleBuffer) else {
return
}
//this part copied from (https://developer.apple.com/videos/play/wwdc2023/10181) at 23:58 timestamp
let attachment = [
kCVImageBufferYCbCrMatrixKey: kCVImageBufferYCbCrMatrix_ITU_R_2020,
kCVImageBufferColorPrimariesKey: kCVImageBufferColorPrimaries_ITU_R_2020,
kCVImageBufferTransferFunctionKey: kCVImageBufferTransferFunction_SMPTE_ST_2084_PQ,
]
CVBufferSetAttachments(imageBuffer, attachment as CFDictionary, .shouldPropagate)
//now convert to CIImage with HDR data
let image = CIImage(cvPixelBuffer: imageBuffer)
let cropped = "" //here perform some actions like cropping, flipping, etc. and preserve this changes by converting the extent to CGImage first:
//this part copied from (https://developer.apple.com/videos/play/wwdc2023/10181) at 24:30 timestamp
guard
let cgImage = context.createCGImage(
cropped, from: cropped.extent, format: .RGBA16,
colorSpace: CGColorSpace(name: CGColorSpace.itur_2100_PQ)!)
else {
continue
}
//finally convert it back to CIImage
let newScaledImage = CIImage(cgImage: cgImage)
//now write it to a new pixelBuffer
let pixelBufferAttributes: [String: Any] = [
kCVPixelBufferCGImageCompatibilityKey as String: true,
kCVPixelBufferCGBitmapContextCompatibilityKey as String: true,
]
var pixelBuffer: CVPixelBuffer?
CVPixelBufferCreate(
kCFAllocatorDefault, Int(newScaledImage.extent.width), Int(newScaledImage.extent.height),
kCVPixelFormatType_420YpCbCr10BiPlanarFullRange, pixelBufferAttributes as CFDictionary,
&pixelBuffer)
guard let pixelBuffer else {
continue
}
context.render(newScaledImage, to: pixelBuffer) //context is a CIContext reference
var pixelTransferBuffer: CVPixelBuffer?
CVPixelBufferPoolCreatePixelBuffer(kCFAllocatorDefault, pixelBufferPool, &pixelTransferBuffer)
guard let pixelTransferBuffer else {
continue
}
// Transfer the image to the pixel buffer.
guard
VTPixelTransferSessionTransferImage(session, from: pixelBuffer, to: pixelTransferBuffer)
== noErr
else {
continue
}
//finally append to taggedBuffer
}
}
assetWriterInput.markAsFinished()
await assetWriter.finishWriting()
The result video is not in correct color as the original video. It turns out too bright. If I play around with attachment values, it can be either too dim or too bright but not exactly proper as the original video. What am I missing in my setup? I did find that kCVPixelFormatType_4444AYpCbCr16 can produce proper video output but then I can't convert it to CIImage and so I can't do the CIImage operations that I need. Mainly cropping and resizing the CIImage
Post
Replies
Boosts
Views
Activity
Hello, I am using MTKView to display: camera preview & video playback. I am testing on iPhone 16. App crashes at a random moment whenever MTKView is rendering CIImage.
MetalView:
public enum MetalActionType {
case image(CIImage)
case buffer(CVPixelBuffer)
}
public struct MetalView: UIViewRepresentable {
let mtkView = MTKView()
public let actionPublisher: any Publisher<MetalActionType, Never>
public func makeCoordinator() -> Coordinator {
Coordinator(self)
}
public func makeUIView(context: UIViewRepresentableContext<MetalView>) -> MTKView {
guard let metalDevice = MTLCreateSystemDefaultDevice() else {
return mtkView
}
mtkView.device = metalDevice
mtkView.framebufferOnly = false
mtkView.clearColor = MTLClearColor(red: 0, green: 0, blue: 0, alpha: 0)
mtkView.drawableSize = mtkView.frame.size
mtkView.delegate = context.coordinator
mtkView.isPaused = true
mtkView.enableSetNeedsDisplay = true
mtkView.preferredFramesPerSecond = 60
context.coordinator.ciContext = CIContext(
mtlDevice: metalDevice, options: [.priorityRequestLow: true, .highQualityDownsample: false])
context.coordinator.metalCommandQueue = metalDevice.makeCommandQueue()
context.coordinator.actionSubscriber = actionPublisher.sink { type in
switch type {
case .buffer(let pixelBuffer):
context.coordinator.updateCIImage(pixelBuffer)
break
case .image(let image):
context.coordinator.updateCIImage(image)
break
}
}
return mtkView
}
public func updateUIView(_ nsView: MTKView, context: UIViewRepresentableContext<MetalView>) {
}
public class Coordinator: NSObject, MTKViewDelegate {
var parent: MetalView
var metalCommandQueue: MTLCommandQueue!
var ciContext: CIContext!
private var image: CIImage? {
didSet {
Task { @MainActor in
self.parent.mtkView.setNeedsDisplay() //<--- call Draw method
}
}
}
var actionSubscriber: (any Combine.Cancellable)?
private let operationQueue = OperationQueue()
init(_ parent: MetalView) {
self.parent = parent
operationQueue.qualityOfService = .background
super.init()
}
public func mtkView(_ view: MTKView, drawableSizeWillChange size: CGSize) {
}
public func draw(in view: MTKView) {
guard let drawable = view.currentDrawable, let ciImage = image,
let commandBuffer = metalCommandQueue.makeCommandBuffer(), let ci = ciContext
else {
return
}
//making sure nothing is nil, now we can add the current frame to the operationQueue for processing
operationQueue.addOperation(
MetalOperation(
drawable: drawable, drawableSize: view.drawableSize, ciImage: ciImage,
commandBuffer: commandBuffer, pixelFormat: view.colorPixelFormat, ciContext: ci))
}
//consumed by Subscriber
func updateCIImage(_ img: CIImage) {
image = img
}
//consumed by Subscriber
func updateCIImage(_ buffer: CVPixelBuffer) {
image = CIImage(cvPixelBuffer: buffer)
}
}
}
now the MetalOperation class:
private class MetalOperation: Operation, @unchecked Sendable {
let drawable: CAMetalDrawable
let drawableSize: CGSize
let ciImage: CIImage
let commandBuffer: MTLCommandBuffer
let pixelFormat: MTLPixelFormat
let ciContext: CIContext
init(
drawable: CAMetalDrawable, drawableSize: CGSize, ciImage: CIImage,
commandBuffer: MTLCommandBuffer, pixelFormat: MTLPixelFormat, ciContext: CIContext
) {
self.drawable = drawable
self.drawableSize = drawableSize
self.ciImage = ciImage
self.commandBuffer = commandBuffer
self.pixelFormat = pixelFormat
self.ciContext = ciContext
}
override func main() {
let width = Int(drawableSize.width)
let height = Int(drawableSize.height)
let ciWidth = Int(ciImage.extent.width) //<-- Thread 22: EXC_BAD_ACCESS (code=1, address=0x5e71f5490) A bad access to memory terminated the process.
let ciHeight = Int(ciImage.extent.height)
let destination = CIRenderDestination(
width: width, height: height, pixelFormat: pixelFormat, commandBuffer: commandBuffer,
mtlTextureProvider: { [self] () -> MTLTexture in
return drawable.texture
})
let transform = CGAffineTransform(
scaleX: CGFloat(width) / CGFloat(ciWidth), y: CGFloat(height) / CGFloat(ciHeight))
do {
try ciContext.startTask(toClear: destination)
try ciContext.startTask(toRender: ciImage.transformed(by: transform), to: destination)
} catch {
}
commandBuffer.present(drawable)
commandBuffer.commit()
commandBuffer.waitUntilCompleted()
}
}
Now I am no Metal expert, but I believe it's a very simple execution that shouldn't cause memory leak especially after we have already checked for whether CIImage is nil or not. I have also tried running this code without OperationQueue and also tried with @autoreleasepool but none of them has solved this problem.
Am I missing something?
Hi, I am trying to stream spatial video in realtime from my iPhone 16.
I am able to record spatial video as a file output using:
let videoDeviceOutput = AVCaptureMovieFileOutput()
However, when I try to grab the raw sample buffer, it doesn't include any spatial information:
let captureOutput = AVCaptureVideoDataOutput()
//when init camera
session.addOutput(captureOutput)
captureOutput.setSampleBufferDelegate(self, queue: sessionQueue)
//finally
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
//use sample buffer (but no spatial data available here)
}
Is this how it's supposed to work or maybe I am missing something?
this video: https://developer.apple.com/videos/play/wwdc2023/10071 gives us a clue towards setting up spatial streaming and I've got the backend all ready for 3D HLS streaming. Now I am only stuck at how to send the video stream to my server.
Hi, I love VideoMaterial API that gives so much power to play video on any mesh. But I am trying to play a side-by-side 3D video usingVideoMaterial:
RealityView { content in
let mesh = MeshResource.generatePlane(width: 300.0, height: 300.0, cornerRadius: 0) //generate mesh
let vidMaterial = VideoMaterial(avPlayer: AVPlayer(url: URL(string: "https://someurl/test/master.m3u8")!)) //VideoMaterial
vidMaterial.controller.preferredViewingMode = .stereo //<-- no idea why it doesn't work for SBS video in simulator
vidMaterial.avPlayer?.play()
let planeEntity = Entity() //new entity
planeEntity.components.set(ModelComponent(mesh: mesh, materials: [vidMaterial])) //set a new ModelComponent to the entity
content.add(planeEntity)
}
this code works well for plain 2D video playback but how do I display a Side-by-Side or Top-Bottom 3D video?
I found GeometrySwitchCameraIndex in custom ShaderGraphMaterial but if I use input node as a image texture then how do I pass the video frame as texture into my custom shader to achieve the 3D effect or maybe there is an even better way to deal with this?
There seems to be additional API .preferredViewingMode on the VideoMaterial's controller that can be set to .stereo but it doesn't give any stereo effect. Perhaps it's only for MV-HEVC media playback?
Hi, I notice a very slow transfer rate when I try to transfer a file picked via .photosPicker. This happens especially when I try to import a 4k/60fps video.
SwiftUI:
VStack {
Button("Pick a video") {
model.isPhotoPickerView = true
}
.photosPicker(isPresented: $model.isPhotoPickerView, selection: $model.selectedImageList, maxSelectionCount: 1, matching: .videos)
.onChange(of: model.selectedImageList, { old, new in
model.handlePhotoPicker()
})
}
View Model to handle Photo Picker action:
private class PageModel: ObservableObject {
//other methods
@MainActor
public func handlePhotoPicker() {
if selectedImageList.isEmpty {
return
}
guard let item = selectedImageList.first else {
return
}
Task {
do {
if let video = try await item.loadTransferable(type: VideoTransferable.self) {
let file = video.url
//video url arrived
}
} catch {
//handle error
}
}
}
}
Now the VideoTransferable :
struct VideoTransferable: Transferable {
let url: URL
static var transferRepresentation: some TransferRepresentation {
FileRepresentation(contentType: .movie) { video in
SentTransferredFile(video.url)
}
importing: { received in
//takes too much time to import large 4K video recorded from iPhone's main camera
let copy = FileManager.documentsDirectory.appendingPathComponent(FolderType.temp.rawValue).appendingPathComponent("video_\(Int64(Date().timeIntervalSince1970 * 1000)).MOV")
if FileManager.default.fileExists(atPath: copy.path) {
try FileManager.default.removeItem(at: copy)
}
try FileManager.default.copyItem(at: received.file, to: copy)
return Self.init(url: copy)
}
}
}
To my surprise this issue doesn't happen when I use a custom UIViewControllerRepresentable to wrap UIImagePickerController() and setting videoExportPreset property of the picker to AVAssetExportPresetPassthrough
Can someone point me out where I am wrong?
Testing on iPhone 12 mini, I have encountered a weird situation. I am try to take snapshot of my view, which works fine but the memory is never released after the snapshot is taken.
func screenshot(view: UIView, scale:Double) -> URL? {
guard let containerView = view.superview, let containerSuperview = containerView.superview else { return nil }
let rendererFormat = UIGraphicsImageRendererFormat()
rendererFormat.scale = scale
var renderer = UIGraphicsImageRenderer(bounds: containerView.frame, format: rendererFormat)
let image = autoreleasepool {
return renderer.image { context in
containerSuperview.drawHierarchy(in: containerSuperview.layer.frame, afterScreenUpdates: true) //memory hog starts from here
}
}
guard let data = image.heicData() else {
return nil
}
//more code to save data to file URL and return it
}
initially it appears to work normally but as soon as I change the scale:
rendererFormat.scale = 10 I can see a spike in memory but the problem is then the memory is never released even after the image is saved. so initially, the app uses: 35MB memory -> when processing the memory usage jumps to expected 250MB to 300MB to process large image -> after processing the memory goes down to around 90MB to 120MB but it never really returns to it's original 35MB state.
Is this a bug or this is expected?
If this is expected behaviour then is there any low level API to free the memory after it's job is done.