Decode HLS livestream with VideoToolbox

Hi, I'm trying to decode a HLS livestream with VideoToolbox. The CMSampleBuffer is successfully created (OSStatus == noErr). When I enqueue the CMSampleBuffer to a AVSampleBufferDisplayLayer the view isn't displaying anything and the status of the AVSampleBufferDisplayLayer is 1 (rendering). When I use a VTDecompressionSession to convert the CMSampleBuffer to a CVPixelBuffer the VTDecompressionOutputCallback returns a -8969 (bad data error). What do I need to fix in my code? Do I incorrectly parse the data from the segment for the CMSampleBuffer?

let segmentData = try await downloadSegment(from: segment.url)
let (sps, pps, idr) = try parseH264FromTSSegment(tsData: segmentData)
if self.formatDescription == nil {
       self.formatDescription = try CMFormatDescription(h264ParameterSets: [sps, pps])
}
    
if let sampleBuffer = try createSampleBuffer(from: idr, segment: segment) {
     try self.decodeSampleBuffer(sampleBuffer)
} 

func parseH264FromTSSegment(tsData: Data) throws -> (sps: Data, pps: Data, idr: Data) {
    let tsSize = 188
    var pesData = Data()

    for i in stride(from: 0, to: tsData.count, by: tsSize) {
        let tsPacket = tsData.subdata(in: i..<min(i + tsSize, tsData.count))

       guard let payload = extractPayloadFromTSPacket(tsPacket) else { continue }
        pesData.append(payload)
    }

    let nalUnits = parseNalUnits(from: pesData)

    var sps: Data?
    var pps: Data?
    var idr: Data?

    for nalUnit in nalUnits {
        guard let firstByte = nalUnit.first else { continue }
        let nalType = firstByte & 0x1F
        switch nalType {
        case 7: // SPS
           sps = nalUnit
        case 8: // PPS
            pps = nalUnit
        case 5: // IDR
            idr = nalUnit
        default:
            break
        }

        if sps != nil, pps != nil, idr != nil {
            break
        }
    }

    guard let validSPS = sps, let validPPS = pps, let validIDR = idr else {
        throw NSError()
     }
    return (validSPS, validPPS, validIDR)
}

func extractPayloadFromTSPacket(_ tsPacket: Data) -> Data? {
  let syncByte: UInt8 = 0x47
  guard tsPacket.count == 188, tsPacket[0] == syncByte else {
      return nil
  }
    let payloadStart = (tsPacket[1] & 0x40) != 0
    let adaptationFieldControl = (tsPacket[3] & 0x30) >> 4

    var payloadOffset = 4
    if adaptationFieldControl == 2 || adaptationFieldControl == 3 {
        let adaptationFieldLength = Int(tsPacket[4])
        payloadOffset += 1 + adaptationFieldLength
    }
    guard adaptationFieldControl == 1 || adaptationFieldControl == 3 else {
        return nil
    }
    let payload = tsPacket.subdata(in: payloadOffset..<tsPacket.count)
    return payloadStart ? payload : nil
}

func parseNalUnits(from h264Data: Data) -> [Data] {
     let startCode = Data([0x00, 0x00, 0x00, 0x01])
     var nalUnits: [Data] = []
     var searchRange = h264Data.startIndex..<h264Data.endIndex

    while let range = h264Data.range(of: startCode, options: [], in: searchRange) {
        let nextStart = h264Data.range(of: startCode, options: [], in: range.upperBound..<h264Data.endIndex)?.lowerBound ?? h264Data.endIndex
        let nalUnit = h264Data.subdata(in: range.upperBound..<nextStart)
        nalUnits.append(nalUnit)
        searchRange = nextStart..<h264Data.endIndex
     }
     return nalUnits
}

private func createSampleBuffer(from data: Data, segment: HLSSegment) throws -> CMSampleBuffer? {
    var blockBuffer: CMBlockBuffer?
    let alignedData = UnsafeMutableRawPointer.allocate(byteCount: data.count, alignment: MemoryLayout<UInt8>.alignment)
    data.copyBytes(to: alignedData.assumingMemoryBound(to: UInt8.self), count: data.count)
    
    let blockStatus = CMBlockBufferCreateWithMemoryBlock(
        allocator: kCFAllocatorDefault,
        memoryBlock: alignedData,
        blockLength: data.count,
        blockAllocator: nil,
        customBlockSource: nil,
       offsetToData: 0,
       dataLength: data.count,
       flags: 0,
       blockBufferOut: &blockBuffer
    )

    guard blockStatus == kCMBlockBufferNoErr, let validBlockBuffer = blockBuffer else {
        alignedData.deallocate()
        throw NSError()
    }

    var sampleBuffer: CMSampleBuffer?
    var timing = [calculateTiming(for: segment)]
    var sampleSizes = [data.count]
    let sampleStatus = CMSampleBufferCreate(
        allocator: kCFAllocatorDefault,
        dataBuffer: validBlockBuffer,
        dataReady: true,
        makeDataReadyCallback: nil,
        refcon: nil,
       formatDescription: formatDescription,
       sampleCount: 1,
       sampleTimingEntryCount: 1,
       sampleTimingArray: &timing,
       sampleSizeEntryCount: sampleSizes.count,
       sampleSizeArray: &sampleSizes,
       sampleBufferOut: &sampleBuffer
    )
    guard sampleStatus == noErr else {
        alignedData.deallocate()
        throw NSError()
    }
    return sampleBuffer
}

private func decodeSampleBuffer(_ sampleBuffer: CMSampleBuffer) throws {
    guard let formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer) else {
        throw NSError()
    }
 
    if decompressionSession == nil {
        try setupDecompressionSession(formatDescription: formatDescription)
    }

    guard let session = decompressionSession else {
        throw NSError()
    }
 
    let flags: VTDecodeFrameFlags = [._EnableAsynchronousDecompression, ._EnableTemporalProcessing]
    var flagOut = VTDecodeInfoFlags()
     
   let status = VTDecompressionSessionDecodeFrame(
       session,
       sampleBuffer: sampleBuffer,
       flags: flags,
       frameRefcon: nil,
       infoFlagsOut: nil)

    if status != noErr {
        throw NSError()
    }
}

private func setupDecompressionSession(formatDescription: CMFormatDescription) throws {
   self.formatDescription = formatDescription
   if let session = decompressionSession {
       VTDecompressionSessionInvalidate(session)
       self.decompressionSession = nil
   }
   var decompressionSession: VTDecompressionSession?
   var callback = VTDecompressionOutputCallbackRecord(
      decompressionOutputCallback: decompressionOutputCallback,
      decompressionOutputRefCon: Unmanaged.passUnretained(self).toOpaque())

   let status = VTDecompressionSessionCreate(
        allocator: kCFAllocatorDefault,
        formatDescription: formatDescription,
        decoderSpecification: nil,
        imageBufferAttributes: nil,
        outputCallback: &callback,
        decompressionSessionOut: &decompressionSession
    )
    if status != noErr {
        throw NSError()
    }
    self.decompressionSession = decompressionSession
}

let decompressionOutputCallback: VTDecompressionOutputCallback = { (
    decompressionOutputRefCon,
    sourceFrameRefCon,
    status,
    infoFlags,
    imageBuffer,
    presentationTimeStamp,
    presentationDuration
) in
    guard status == noErr else {
        print("Callback: \(status)")
        return
     }
   
    if let imageBuffer = imageBuffer {
    }
}

Can I ask a dumb question? Is there a reason you need to customize the decoding of an HLS stream yourself? Does your stream work with AVURLAsset -> url of HLS playlist -> AVPlayer?

Just curious!

As for the error, are you certain you are vending samples to the decoder in DTS order, and that your CMSampleBuffers contain Block Buffers with fully demarcated NAL units, sync info and extra data?

I dont know how to resolve your issue specifically to be honest, but ive absolutely seen that not fully setup sample buffers can cause the decoder to fail in unexpected ways.

Decode HLS livestream with VideoToolbox
 
 
Q