CGAffineTransform -How to Align Video in Screen Center

I've been able to center the middle of a 16:9 landscape video, crop the video, and then create a 9:16 portrait version of the video similar to how Apple does it in the Photos album.

The only issue is the resulting portrait video isn't centered in the middle of the screen (images below).

How can I get the resulting portrait video in the center of the screen?

func createExportSession(for videoURL: URL) {

    let asset = AVURLAsset(url: videoURL)
    let exporter = AVAssetExportSession(asset: asset, presetName: AVAssetExportPresetHighestQuality)!

    exporter.videoComposition = turnHorizontalVideoToPortraitVideo(asset: asset)

    exporter.outputURL = // ...
    exporter.outputFileType = AVFileType.mp4
    exporter.shouldOptimizeForNetworkUse = true

    exporter.exportAsynchronously { [weak self] in

        // ...

       // the exporter.url is eventually added to an AVURLAsset and played inside an AVPlayer
    }
}

func turnHorizontalVideoToPortraitVideo(asset: AVURLAsset) -> AVVideoComposition {

    let track = asset.tracks(withMediaType: AVMediaType.video)[0]

    let renderSize = CGSize(width: 720, height: 1280)

    var transform1 = track.preferredTransform
    transform1 = transform1.concatenating(CGAffineTransform(rotationAngle: CGFloat(90.0 * .pi / 180)))
    transform1 = transform1.concatenating(CGAffineTransform(translationX: track.naturalSize.width, y: 0))

    let transform2 = CGAffineTransform(translationX: track.naturalSize.height, y: (track.naturalSize.width - track.naturalSize.height) / 2)

    let transform3 = transform2.rotated(by: CGFloat(Double.pi/2)).concatenating(transform1)

    let translate = CGAffineTransform(translationX: renderSize.width, y: renderSize.height)
    let rotateFromUpsideDown = translate.rotated(by: CGFloat(Double.pi)) // without this the portrait video is always upside down 

    let finalTransform = transform3.concatenating(rotateFromUpsideDown)

    let transformer = AVMutableVideoCompositionLayerInstruction(assetTrack: track)
    transformer.setTransform(finalTransform, at: .zero)

    let instruction = AVMutableVideoCompositionInstruction()
    instruction.timeRange = CMTimeRange(start: .zero, duration: asset.duration)
    instruction.layerInstructions = [transformer]

    let videoComposition = AVMutableVideoComposition()
    videoComposition.frameDuration = CMTime(value: 1, timescale: 30)
    videoComposition.renderSize = renderSize
    videoComposition.instructions = [instruction]
        
    return videoComposition
}

Initial horizontal video:

Resulting portrait video after running the above code. The portrait video is incorrectly centered:

This is the way that it should be centered:

Accepted Reply

I'm the same person who posted the other question and 2 previous answer. This is what I came up with for both landscapeRight and landscapeLeft videos

func turnHorizontalVideoToPortraitVideo(asset: AVURLAsset) -> AVVideoComposition {

    let track = asset.tracks(withMediaType: AVMediaType.video)[0]

    let renderSize = CGSize(width: 720, height: 1280)

    let t = track.preferredTransform

    if (t.a == 1.0 && t.b == 0 && t.c == 0 && t.d == 1.0) {
        print("landscapeRight")
    }

    var isLandscapeLeft = false
    if (t.a == -1.0 && t.b == 0 && t.c == 0 && t.d == -1.0) {
        print("landscapeLeft")
        isLandscapeLeft = true
    }

    var transform1 = t
    transform1 = transform1.concatenating(CGAffineTransform(rotationAngle: CGFloat(90.0 * .pi / 180)))
    transform1 = transform1.concatenating(CGAffineTransform(translationX: track.naturalSize.width, y: 0))

    let transform2 = CGAffineTransform(translationX: track.naturalSize.height, y: (track.naturalSize.width - track.naturalSize.height) / 2)

    var p = Double.pi/2
    if isLandscapeLeft {
        p = -Double.pi/2
    }
    let transform3 = transform2.rotated(by: CGFloat(p)).concatenating(transform1)

    let finalTransform = transform3

    let transformer = AVMutableVideoCompositionLayerInstruction(assetTrack: track)

    if isLandscapeLeft {
            
        let ty = finalTransform.ty
        let dividedNum = ty/2.5
            
        let translation = CGAffineTransform(translationX: 0, y: dividedNum)
        let new_finalTransform = finalTransform.concatenating(translation)
            
        transformer.setTransform(new_finalTransform, at: .zero)
    }

    if !isLandscapeLeft {
            
        let translate = CGAffineTransform(translationX: renderSize.width, y: renderSize.height)
        let rotateFromUpsideDown = translate.rotated(by: CGFloat(Double.pi))
        let transformRotated = finalTransform.concatenating(rotateFromUpsideDown)
            
        let ty = transformRotated.ty
            
        var dividedNum = ty/2
            
        if dividedNum < 0 {
            dividedNum = 0
        }
            
        let translation = CGAffineTransform(translationX: 0, y: -dividedNum)
        let new_finalTransform = transformRotated.concatenating(translation)
            
        transformer.setTransform(new_finalTransform, at: .zero)
    }

    let instruction = AVMutableVideoCompositionInstruction()
    //instruction.backgroundColor = UIColor.yellow.cgColor
    instruction.timeRange = CMTimeRange(start: .zero, duration: asset.duration)
    instruction.layerInstructions = [transformer]
        
    let videoComposition = AVMutableVideoComposition()
    videoComposition.frameDuration = CMTime(value: 1, timescale: 30)
    videoComposition.renderSize = renderSize
    videoComposition.instructions = [instruction]
        
    return videoComposition
}

Replies

If anyone has a better answer please post, I'll check and accept it

Unbeknownst to me the video was in the correct position but the negative black bar space was causing the video to be appear that it was misaligned. Changing the AVMutableVideoCompositionInstruction() show the .backgroundColor show the negative black bar space issue in yellow:

instruction.backgroundColor = UIColor.yellow.cgColor

To fix it, I divided the finalTransform.ty in half and subtracted that from a translation-y-value, so now the code is:

// ...
let finalTransform = transform3.concatenating(rotateFromUpsideDown)

let ty = finalTransform.ty
        
var divided = ty/2
        
if divided < 0 {
    divided = 0
}
        
let translation = CGAffineTransform(translationX: 0, y: -divided)
let new_finalTransform = finalTransform.concatenating(translation)

let transformer = AVMutableVideoCompositionLayerInstruction(assetTrack: track)

transformer.setTransform(new_finalTransform, at: .zero)

// ...

The fix:

The fix with the negative black bar space in yellow to show how it's now centered:

I posted the above answer and it should be noted that the answer only works for videos that have an orientation of .landscapeRight.

You must check the following before using the above code:

let t = track.preferredTransform

// LandscapeRight
if (t.a == 1.0 && t.b == 0 && t.c == 0 && t.d == 1.0) {
    print(" *** the above answer will only work for landscapeRight *** ")
    transform1 = transform1.concatenating(CGAffineTransform(rotationAngle: CGFloat(90.0 * .pi / 180)))
}

// LandscapeLeft
if (t.a == -1.0 && t.b == 0 && t.c == 0 && t.d == -1.0) {
    print(" *** the above answer will NOT work for landscapeLeft *** ")
}

I'm the same person who posted the other question and 2 previous answer. This is what I came up with for both landscapeRight and landscapeLeft videos

func turnHorizontalVideoToPortraitVideo(asset: AVURLAsset) -> AVVideoComposition {

    let track = asset.tracks(withMediaType: AVMediaType.video)[0]

    let renderSize = CGSize(width: 720, height: 1280)

    let t = track.preferredTransform

    if (t.a == 1.0 && t.b == 0 && t.c == 0 && t.d == 1.0) {
        print("landscapeRight")
    }

    var isLandscapeLeft = false
    if (t.a == -1.0 && t.b == 0 && t.c == 0 && t.d == -1.0) {
        print("landscapeLeft")
        isLandscapeLeft = true
    }

    var transform1 = t
    transform1 = transform1.concatenating(CGAffineTransform(rotationAngle: CGFloat(90.0 * .pi / 180)))
    transform1 = transform1.concatenating(CGAffineTransform(translationX: track.naturalSize.width, y: 0))

    let transform2 = CGAffineTransform(translationX: track.naturalSize.height, y: (track.naturalSize.width - track.naturalSize.height) / 2)

    var p = Double.pi/2
    if isLandscapeLeft {
        p = -Double.pi/2
    }
    let transform3 = transform2.rotated(by: CGFloat(p)).concatenating(transform1)

    let finalTransform = transform3

    let transformer = AVMutableVideoCompositionLayerInstruction(assetTrack: track)

    if isLandscapeLeft {
            
        let ty = finalTransform.ty
        let dividedNum = ty/2.5
            
        let translation = CGAffineTransform(translationX: 0, y: dividedNum)
        let new_finalTransform = finalTransform.concatenating(translation)
            
        transformer.setTransform(new_finalTransform, at: .zero)
    }

    if !isLandscapeLeft {
            
        let translate = CGAffineTransform(translationX: renderSize.width, y: renderSize.height)
        let rotateFromUpsideDown = translate.rotated(by: CGFloat(Double.pi))
        let transformRotated = finalTransform.concatenating(rotateFromUpsideDown)
            
        let ty = transformRotated.ty
            
        var dividedNum = ty/2
            
        if dividedNum < 0 {
            dividedNum = 0
        }
            
        let translation = CGAffineTransform(translationX: 0, y: -dividedNum)
        let new_finalTransform = transformRotated.concatenating(translation)
            
        transformer.setTransform(new_finalTransform, at: .zero)
    }

    let instruction = AVMutableVideoCompositionInstruction()
    //instruction.backgroundColor = UIColor.yellow.cgColor
    instruction.timeRange = CMTimeRange(start: .zero, duration: asset.duration)
    instruction.layerInstructions = [transformer]
        
    let videoComposition = AVMutableVideoComposition()
    videoComposition.frameDuration = CMTime(value: 1, timescale: 30)
    videoComposition.renderSize = renderSize
    videoComposition.instructions = [instruction]
        
    return videoComposition
}

I came across this solution as a recommended result, so thought to share my solution here, as well.

What you seem to be describing is equivalent to fitting an image inside its frame: image.contentMode = .scaleAspectFit Here's the CGAffineTransform equivalent for an AVAssetTrack:

func scaleAspectFitTransform(for assetTrack: AVAssetTrack, into renderSize: CGSize) -> CGAffineTransform {
  let naturalSize = assetTrack.naturalSize.applying(assetTrack.preferredTransform)
  let absoluteSize = CGSize(width: abs(naturalSize.width),
               height: abs(naturalSize.height))
   
  let xFactor = renderSize.width / absoluteSize.width
  let yFactor = renderSize.height / absoluteSize.height
   
  let scaleFactor = min(xFactor, yFactor)
   
  var output = assetTrack.preferredTransform.scaledBy(x: scaleFactor, y: scaleFactor)
  output.tx = assetTrack.preferredTransform.tx * scaleFactor
  output.ty = assetTrack.preferredTransform.ty * scaleFactor
   
  let compressedSize = CGSize(width: absoluteSize.width * scaleFactor,
                height: absoluteSize.height * scaleFactor)
   
  let widthDiff = renderSize.width - compressedSize.width
  let heightDiff = renderSize.height - compressedSize.height
  let plainTranslation = CGAffineTransform.identity.translatedBy(x: widthDiff / 2,
                                  y: heightDiff / 2)
   
  return output.concatenating(plainTranslation)

Then, apply it to your layer instructions:

guard let sourceVideoTrack = sourceAVAsset.tracks(withMediaType: .video).first else { return }
let transform = scaleAspectFitTransform(for: sourceVideoTrack, into: renderSize)

var layerInstruction = AVMutableVideoCompositionLayerInstruction(assetTrack: outputVideoTrack)
layerInstruction.setTransform(transform, at: .zero)

let compositionInstruction = AVMutableVideoCompositionInstruction()
compositionInstruction.timeRange = CMTimeRangeMake(start: .zero, duration: videoAsset.duration)
compositionInstruction.layerInstructions = [layerInstruction]
   
let videoComposition = AVMutableVideoComposition()
videoComposition.renderSize = renderSize
videoComposition.instructions = [compositionInstruction]