I am trying to determine the corners of a RoomPlan-detected wall using the information available in the ARView session's frame, but can't quite figure out what I'm doing wrong. The corners appear to be correct relative to each other, but the wall appears too large when I render it. (I'm also not sure I'm handling the image rotation correctly either, which may be compounding my problem). Here is the code I currently have, along with a sample image, and the resulting image when I pass it through the perspective filter. it is close but isn't cropping the walls and floors correctly.
func captureSession(_ session: RoomCaptureSession, didChange room: CapturedRoom) {
for surface in room.walls {
if let frame = self.arView.session.currentFrame {
var image: CGImage? = nil
VTCreateCGImageFromCVPixelBuffer(frame.capturedImage, options: nil, imageOut: &image)
let wallTransform = surface.transform
let cameraTransform = frame.camera.transform
let intrinsics = frame.camera.intrinsics
let projectionMatrix = frame.camera.projectionMatrix
let width = surface.dimensions.y
let height = surface.dimensions.x
let inverseCameraTransform = simd_inverse(cameraTransform)
let wallTopRight = simd_float4(width/2, height/2, 0, 1)
let wallTopLeft = simd_float4(-width/2, height/2, 0, 1)
let wallBottomRight = simd_float4(width/2, -height/2, 0, 1)
let wallBottomLeft = simd_float4(-width/2, -height/2, 0, 1)
let worldTopRight = wallTransform * wallTopRight
let worldTopLeft = wallTransform * wallTopLeft
let worldBottomRight = wallTransform * wallBottomRight
let worldBottomLeft = wallTransform * wallBottomLeft
let cameraTopRight = projectionMatrix * inverseCameraTransform * worldTopRight
let cameraTopLeft = projectionMatrix * inverseCameraTransform * worldTopLeft
let cameraBottomRight = projectionMatrix * inverseCameraTransform * worldBottomRight
let cameraBottomLeft = projectionMatrix * inverseCameraTransform * worldBottomLeft
let imageTopRight = intrinsics * simd_float3(cameraTopRight.x / cameraTopRight.w, cameraTopRight.y / cameraTopRight.w, cameraTopRight.z / cameraTopRight.w)
let imageTopLeft = intrinsics * simd_float3(cameraTopLeft.x / cameraTopLeft.w, cameraTopLeft.y / cameraTopLeft.w, cameraTopLeft.z / cameraTopLeft.w)
let imageBottomRight = intrinsics * simd_float3(cameraBottomRight.x / cameraBottomRight.w, cameraBottomRight.y / cameraBottomRight.w, cameraBottomRight.z / cameraBottomRight.w)
let imageBottomLeft = intrinsics * simd_float3(cameraBottomLeft.x / cameraBottomLeft.w, cameraBottomLeft.y / cameraBottomLeft.w, cameraBottomLeft.z / cameraBottomLeft.w)
let topRight = CGPoint(x: CGFloat(imageTopRight.x), y: CGFloat(imageTopRight.y))
let topLeft = CGPoint(x: CGFloat(imageTopLeft.x), y: CGFloat(imageTopLeft.y))
let bottomRight = CGPoint(x: CGFloat(imageBottomRight.x), y: CGFloat(imageBottomRight.y))
let bottomLeft = CGPoint(x: CGFloat(imageBottomLeft.x), y: CGFloat(imageBottomLeft.y))
if let image {
let filter = CIFilter.perspectiveCorrection()
filter.inputImage = CIImage(image: UIImage(cgImage: image))
filter.topRight = topRight
filter.topLeft = topLeft
filter.bottomRight = bottomRight
filter.bottomLeft = bottomLeft
let transformedImage = filter.outputImage
if let transformedImage {
let context = CIContext()
if let outputImage = context.createCGImage(transformedImage, from: transformedImage.extent) {
let wall = Wall(id: surface.identifier, image: outputImage, surface: surface)
self.walls.append(wall)
}
}
}
}
}
}