tucan9389 / TFLiteSwift-Vision

Apache License 2.0
38 stars 1 forks source link

TOBE preprocessing flow #18

Open tucan9389 opened 3 years ago

tucan9389 commented 3 years ago

Overall processing

Image type → Array → Normlaization → Data

Image processing

Type flow

primitive image types: CVPixelBuffer or CGImage If you use UIImage or CIImage, you have to convert to primitive image types

  1. CVPixelBuffer → vImage_Buffer → CVPixelBufferCreateWithBytes
    1. UIImage → CVPixelBuffer
  2. CGImage → .dataProvider?.data
    1. UIImage → CGImage
    2. CIImage → CGImage

Resizing

Converting colorspace

tucan9389 commented 3 years ago

CGImage

Resize and Convert Colorspace to gray

source code from link. need to check it works

import CoreGraphics

extension CGImage {
    /// source: https://github.com/tensorflow/examples/blob/master/lite/examples/digit_classifier/ios/DigitClassifier/TFLiteExtensions.swift
    public func grayImage(width: Int, height: Int) -> CGImage? {
        let cgImage = self
        guard cgImage.width > 0, cgImage.height > 0 else { return nil }

        let size = CGSize(width: width, height: height)

        let bitmapInfo = CGBitmapInfo(
            rawValue: CGImageAlphaInfo.none.rawValue
        )

        guard let context = CGContext(
                data: nil,
                width: width,
                height: height,
                bitsPerComponent: cgImage.bitsPerComponent,
                bytesPerRow: width * 1,
                space: CGColorSpaceCreateDeviceGray(),
                bitmapInfo: bitmapInfo.rawValue)
          else {
            return nil
        }
        context.draw(cgImage, in: CGRect(origin: .zero, size: size))
        let outputCGImage = context.makeImage()
        return outputCGImage
    }
}

Convert gray CGImage to Data

source code from link. need to check it works

import CoreGraphics
import TensorFlowLite

extension CGImage {
    /// source: https://github.com/tensorflow/examples/blob/master/lite/examples/digit_classifier/ios/DigitClassifier/TFLiteExtensions.swift
    /// `UIImage` → `CGContext`(resize and make gray data) → `CGImage` → Byte `Array`(and normalization) → `Data`
    public func grayData(
        width: Int,
        height: Int,
        normalization: TFLiteVisionInterpreter.NormalizationOptions = .none,
        dataType: Tensor.DataType = .float32) -> Data? {

        guard let pixelBytes = grayImage(width: width, height: height)?.dataProvider?.data as Data? else { return nil }

        let size = CGSize(width: width, height: height)

        switch dataType {
        case .uInt8:
            return Data(copyingBufferOf: pixelBytes.map { UInt8($0) })
        case .float32:
            switch normalization {
            case .none:
                return Data(copyingBufferOf: pixelBytes.map { Float($0) })
            case .scaled(from: let from, to: let to):
                return Data(copyingBufferOf: pixelBytes.map { element -> Float in ((Float(element) * (1.0 / 255.0)) * (to - from)) + from })
            case .meanStd(mean: let mean, std: let std):
                var bytes = pixelBytes.map { Float($0) } // normalization
                for i in 0 ..< Int(size.width * size.height) {
                    bytes[width * height * 0 + i] = (Float32(bytes[i * 1 + 0]) - mean[0]) / std[0] // Gray
                }
                return Data(copyingBufferOf: bytes)
            }
        default:
            fatalError("don't support the type: \(dataType)")
        }
    }
}
tucan9389 commented 3 years ago

CVPixelBuffer

Resize RGB CVPixelBuffer

checked it works

import Accelerate
import Foundation
import TensorFlowLite

extension CVPixelBuffer {
    var size: CGSize {
        return CGSize(width: CVPixelBufferGetWidth(self), height: CVPixelBufferGetHeight(self))
    }

    /// Returns a new `CVPixelBuffer` created by taking the self area and resizing it to the
    /// specified target size. Aspect ratios of source image and destination image are expected to be
    /// same.
    ///
    /// - Parameters:
    ///   - from: Source area of image to be cropped and resized.
    ///   - to: Size to scale the image to(i.e. image size used while training the model).
    /// - Returns: The cropped and resized image of itself.
    func resize(from source: CGRect, to size: CGSize) -> CVPixelBuffer? {
        let inputImageRowBytes = CVPixelBufferGetBytesPerRow(self)
        let imageChannels = 4

        CVPixelBufferLockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
        defer { CVPixelBufferUnlockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0)) }

        // Finds the address of the upper leftmost pixel of the source area.
        guard
            let inputBaseAddress = CVPixelBufferGetBaseAddress(self)?.advanced(
                by: Int(source.minY) * inputImageRowBytes + Int(source.minX) * imageChannels)
            else {
                return nil
        }

        // Crops given area as vImage Buffer.
        var croppedImage = vImage_Buffer(
            data: inputBaseAddress, height: UInt(source.height), width: UInt(source.width),
            rowBytes: inputImageRowBytes)

        let resultRowBytes = Int(size.width) * imageChannels
        guard let resultAddress = malloc(Int(size.height) * resultRowBytes) else {
            return nil
        }

        // Allocates a vacant vImage buffer for resized image.
        var resizedImage = vImage_Buffer(
            data: resultAddress,
            height: UInt(size.height), width: UInt(size.width),
            rowBytes: resultRowBytes
        )

        // Performs the scale operation on cropped image and stores it in result image buffer.
        guard vImageScale_ARGB8888(&croppedImage, &resizedImage, nil, vImage_Flags(0)) == kvImageNoError
            else {
                return nil
        }

        let releaseCallBack: CVPixelBufferReleaseBytesCallback = { mutablePointer, pointer in
            if let pointer = pointer {
                free(UnsafeMutableRawPointer(mutating: pointer))
            }
        }

        var result: CVPixelBuffer?

        // Converts the thumbnail vImage buffer to CVPixelBuffer
        let conversionStatus = CVPixelBufferCreateWithBytes(
            nil,
            Int(size.width), Int(size.height),
            CVPixelBufferGetPixelFormatType(self),
            resultAddress,
            resultRowBytes,
            releaseCallBack,
            nil,
            nil,
            &result
        )

        guard conversionStatus == kCVReturnSuccess else {
            free(resultAddress)
            return nil
        }

        return result
    }
}

Convert RGB CVPixelBuffer to Data

checked it works

import Accelerate
import Foundation
import TensorFlowLite

extension CVPixelBuffer {
    func rgbData(
        normalization: TFLiteVisionInterpreter.NormalizationOptions = .none,
        isModelQuantized: Bool,
        dataType: Tensor.DataType = .float32) -> Data? {
        CVPixelBufferLockBaseAddress(self, .readOnly)
        defer { CVPixelBufferUnlockBaseAddress(self, .readOnly) }
        guard let sourceData = CVPixelBufferGetBaseAddress(self) else {
            return nil
        }

        let width = CVPixelBufferGetWidth(self)
        let height = CVPixelBufferGetHeight(self)
        let sourceBytesPerRow = CVPixelBufferGetBytesPerRow(self)
        let destinationBytesPerRow = 3 * width

        // Assign input image to `sourceBuffer` to convert it.
        var sourceBuffer = vImage_Buffer(
            data: sourceData,
            height: vImagePixelCount(height),
            width: vImagePixelCount(width),
            rowBytes: sourceBytesPerRow
        )

        // Make `destinationBuffer` and `destinationData` for its data to be assigned.
        guard let destinationData = malloc(height * destinationBytesPerRow) else {
            os_log("Error: out of memory", type: .error)
            return nil
        }
        defer { free(destinationData) }
        var destinationBuffer = vImage_Buffer(
            data: destinationData,
            height: vImagePixelCount(height),
            width: vImagePixelCount(width),
            rowBytes: destinationBytesPerRow)

        // Convert image type.
        switch CVPixelBufferGetPixelFormatType(self) {
        case kCVPixelFormatType_32BGRA:
            vImageConvert_BGRA8888toRGB888(&sourceBuffer, &destinationBuffer, UInt32(kvImageNoFlags))
        case kCVPixelFormatType_32ARGB:
            vImageConvert_BGRA8888toRGB888(&sourceBuffer, &destinationBuffer, UInt32(kvImageNoFlags))
        default:
            os_log("The type of this image is not supported.", type: .error)
            return nil
        }

        // Make `Data` with converted image.
        let imageByteData = Data(
            bytes: destinationBuffer.data, count: destinationBuffer.rowBytes * height)

        if isModelQuantized { return imageByteData }

        let imageBytes = [UInt8](imageByteData)

        switch dataType {
        case .uInt8:
            return Data(copyingBufferOf: imageBytes)
        case .float32:
            switch normalization {
            case .none:
                return Data(copyingBufferOf: imageBytes.map { Float($0) })
            case .scaled(from: let from, to: let to):
                return Data(copyingBufferOf: imageBytes.map { element -> Float in ((Float(element) * (1.0 / 255.0)) * (to - from)) + from })
            case .meanStd(mean: let mean, std: let std):
                var bytes = imageBytes.map { Float($0) } // normalization
                for i in 0 ..< width * height {
                    bytes[width * height * 0 + i] = (Float32(imageBytes[i * 3 + 0]) - mean[0]) / std[0] // R
                    bytes[width * height * 1 + i] = (Float32(imageBytes[i * 3 + 1]) - mean[1]) / std[1] // G
                    bytes[width * height * 2 + i] = (Float32(imageBytes[i * 3 + 2]) - mean[2]) / std[2] // B
                }
                return Data(copyingBufferOf: bytes)
            }
        default:
            fatalError("don't support the type: \(dataType)")
        }
    }
}

Convert gray CVPixelBuffer to Data

need to check

import Accelerate
import Foundation
import TensorFlowLite

extension CVPixelBuffer {
    func grayData(
        normalization: TFLiteVisionInterpreter.NormalizationOptions = .none,
        dataType: Tensor.DataType = .float32) -> Data? {

        CVPixelBufferLockBaseAddress(self, .readOnly)
        defer { CVPixelBufferUnlockBaseAddress(self, .readOnly) }
        guard let baseAddress = CVPixelBufferGetBaseAddress(self) else { return nil }

        let width = CVPixelBufferGetWidth(self)
        let height = CVPixelBufferGetHeight(self)
        let _ = CVPixelBufferGetBytesPerRow(self)

        let buffer = baseAddress.assumingMemoryBound(to: UInt8.self)
        var imageBytes: [UInt8] = [UInt8](repeating: 0, count: width * height)
        imageBytes = imageBytes.enumerated().map { buffer[$0.offset] }

        switch dataType {
        case .uInt8:
            return Data(copyingBufferOf: imageBytes)
        case .float32:
            switch normalization {
            case .none:
                return Data(copyingBufferOf: imageBytes.map { Float($0) })
            case .scaled(from: let from, to: let to):
                return Data(copyingBufferOf: imageBytes.map { element -> Float in ((Float(element) * (1.0 / 255.0)) * (to - from)) + from })
            case .meanStd(mean: let mean, std: let std):
                var bytes = imageBytes.map { Float($0) } // normalization
                for i in 0 ..< width * height {
                    bytes[width * height * 0 + i] = (Float32(imageBytes[i * 1 + 0]) - mean[0]) / std[0] // Gray
                }
                return Data(copyingBufferOf: bytes)
            }
        default:
            fatalError("don't support the type: \(dataType)")
        }
    }
}
tucan9389 commented 3 years ago

Data extension

source from tensorflow/examples

import Foundation

extension Data {
    /// Creates a new buffer by copying the buffer pointer of the given array.
    ///
    /// - Warning: The given array's element type `T` must be trivial in that it can be copied bit
    ///     for bit with no indirection or reference-counting operations; otherwise, reinterpreting
    ///     data from the resulting buffer has undefined behavior.
    /// - Parameter array: An array with elements of type `T`.
    init<T>(copyingBufferOf array: [T]) {
        self = array.withUnsafeBufferPointer(Data.init)
    }

    /// Convert a Data instance to Array representation.
    func toArray<T>(type: T.Type) -> [T] where T: AdditiveArithmetic {
        var array = [T](repeating: T.zero, count: self.count / MemoryLayout<T>.stride)
        _ = array.withUnsafeMutableBytes { self.copyBytes(to: $0) }
        return array
    }
}
tucan9389 commented 3 years ago

UIImage

Resize

import UIKit

extension UIImage {
    func resized(targetSize: CGSize) -> UIImage {
        UIGraphicsBeginImageContextWithOptions(targetSize, false, 1.0)
        self.draw(in: CGRect(origin: .zero, size: targetSize))
        let toConvertImage = UIGraphicsGetImageFromCurrentImageContext()!
        UIGraphicsEndImageContext()
        return toConvertImage
    }
}

Convert RGB UIImage to CVPixelBuffer

import UIKit

extension UIImage {
    func pixelBufferFromImage() -> CVPixelBuffer {
        let ciimage = CIImage(image: self)
        //let cgimage = convertCIImageToCGImage(inputImage: ciimage!)
        let tmpcontext = CIContext(options: nil)
        let cgimage =  tmpcontext.createCGImage(ciimage!, from: ciimage!.extent)

        let cfnumPointer = UnsafeMutablePointer<UnsafeRawPointer>.allocate(capacity: 1)
        let cfnum = CFNumberCreate(kCFAllocatorDefault, .intType, cfnumPointer)
        let keys: [CFString] = [kCVPixelBufferCGImageCompatibilityKey, kCVPixelBufferCGBitmapContextCompatibilityKey, kCVPixelBufferBytesPerRowAlignmentKey]
        let values: [CFTypeRef] = [kCFBooleanTrue, kCFBooleanTrue, cfnum!]
        let keysPointer = UnsafeMutablePointer<UnsafeRawPointer?>.allocate(capacity: 1)
        let valuesPointer =  UnsafeMutablePointer<UnsafeRawPointer?>.allocate(capacity: 1)
        keysPointer.initialize(to: keys)
        valuesPointer.initialize(to: values)

        let options = CFDictionaryCreate(kCFAllocatorDefault, keysPointer, valuesPointer, keys.count, nil, nil)

        let width = cgimage!.width
        let height = cgimage!.height

        var pxbuffer: CVPixelBuffer?
        // if pxbuffer = nil, you will get status = -6661
        _ = CVPixelBufferCreate(kCFAllocatorDefault, width, height,
                                kCVPixelFormatType_32BGRA, options, &pxbuffer)
        _ = CVPixelBufferLockBaseAddress(pxbuffer!, CVPixelBufferLockFlags(rawValue: 0));

        let bufferAddress = CVPixelBufferGetBaseAddress(pxbuffer!);

        let rgbColorSpace = CGColorSpaceCreateDeviceRGB();
        let bytesperrow = CVPixelBufferGetBytesPerRow(pxbuffer!)
        let context = CGContext(data: bufferAddress,
                                width: width,
                                height: height,
                                bitsPerComponent: 8,
                                bytesPerRow: bytesperrow,
                                space: rgbColorSpace,
                                bitmapInfo: CGImageAlphaInfo.premultipliedFirst.rawValue | CGBitmapInfo.byteOrder32Little.rawValue);
        context?.concatenate(CGAffineTransform(rotationAngle: 0))
        // context?.concatenate(__CGAffineTransformMake( 1, 0, 0, -1, 0, CGFloat(height) )) //Flip Vertical
        //        context?.concatenate(__CGAffineTransformMake( -1.0, 0.0, 0.0, 1.0, CGFloat(width), 0.0)) //Flip Horizontal

        context?.draw(cgimage!, in: CGRect(x:0, y:0, width:CGFloat(width), height:CGFloat(height)));
        _ = CVPixelBufferUnlockBaseAddress(pxbuffer!, CVPixelBufferLockFlags(rawValue: 0));
        return pxbuffer!;
    }
}