Extract integers from binary based on number of bits used for each integer

Hello.


I have an array of bytes, e.g.


let arrayToCheck1 : [UInt8] = [0x47,0x52,0x49,0x42,0x0,0x0,0x0,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x81,0x35]


I have to extract an array of Integers from arrayToCheck1 where each integer is represented by a certain number of bits. For example, if the number of bits is 5, my values will be like this:


01000 11101 01001 00100 10010 10000 etc.


I.e., the whole array of bytes will be transfered into the following:


[8, 29, 9, 4, 18, 16, 16, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 1, 6]


I do this by implementing the following code:


// Method to convert an arry of bytes into an array of integers based on number of bits for one integer
static func getValuesArrayFromBits(bytes byteArray:[UInt8], width fieldWidth:Int, totalBits bits:Int) -> [Int64]
    {
        // Convert the array of bytes into a binary string
        let convertedString = byteArrayToBinaryString(bytes: byteArray);
     
        // Convert the binary string into an arry of integers
        let finalResult = bitString2Ints(from: convertedString, width: fieldWidth, totalBits: bits)

        return finalResult;
    }

// Converts an array of bytes to a String.
static func byteArrayToBinaryString(bytes byteArray:[UInt8]) -> String
{
        var arrayOfStrings = [String]()

        for i in 0..<byteArray.count
        {
            arrayOfStrings.append(byteToBinaryString(byte: byteArray[i]));
        }

        var bitsetString = ""
     
        for stringElement in arrayOfStrings
        {
            bitsetString += stringElement;
        }

        return bitsetString;
    }


// Converts one byte to a binary string.
static func byteToBinaryString(byte byteIn:UInt8) -> String
{
        var sb = "00000000"
     
        func replace(myString: String, _ index: UInt8, _ newChar: Character) -> String
        {
            var chars = Array(myString.characters)     /
            chars[Int(index)] = newChar
            let modifiedString = String(chars)
            return modifiedString
        }
     
        let bits : [UInt8] = [0,1,2,3,4,5,6,7]

        for bit in bits
        {
            if (((byteIn >> bit) & 1) > 0)
            {
                sb = replace(myString: sb, (7 - bit), "1")
            }
        }

        return sb;
}

//  Convert the binary string into an arry of integers
static func bitString2Ints(from binaryString : String, width bitWidth:Int, totalBits bits:Int) -> [Int64]
{
        var finalArrayList = [Int64](repeating: 0, count: (bits/bitWidth))
  
        if(binaryString.characters.count < bits)
        {
            let errorMessage = "Error!"
              
            print(errorMessage)
              
            finalArrayList[0] = -1;
        }
        else
        {
            var counterIndex = 0;
          
            for i in stride(from: 0, to: bits, by: bitWidth)
            {
                let binarySubstring = binaryString.substring(index: i, length: bitWidth)
              
                finalArrayList[counterIndex] = Int64(strtoul(binarySubstring, nil, 2))
              
                counterIndex += 1
            }
        }
  
        return finalArrayList;
}


The code works. However, the problem is that it is VERY slow when I have an array of, for example, 30000 and more bytes.


Is there a more efficient way to extract integers from binary data based on a certain number of bits used for each integer?


Thanks a lot!

Accepted Reply

You can define a bit-sequence retrieval function like this:

func bits(_ array: [UInt8], offset: Int, bitSize: Int) -> UInt64 {
    assert(1...64 ~= bitSize)
    func arrayValue(_ offset: Int) -> UInt64 {
        if offset >= array.count {
            return 0
        } else {
            return UInt64(array[offset])
        }
    }
  
    let startByteOffset = offset / 8
    let startBitOffset = UInt64(offset % 8)
    let firstByteMask = 0xFF >> startBitOffset
    let endByteOffset = (offset + bitSize - 1) / 8
    let endByteBits = UInt64((offset + bitSize - 1) % 8 + 1)
    if startByteOffset == endByteOffset {
        return (arrayValue(startByteOffset) & firstByteMask) >> (8 - endByteBits)
    } else {
        var result: UInt64 = arrayValue(startByteOffset) & firstByteMask
        for i in startByteOffset+1 ..< endByteOffset {
            result = result << 8 + arrayValue(i)
        }
        result = (result << endByteBits) + (arrayValue(endByteOffset) >> (8 - endByteBits))
        return result
    }
}


Using the function above, you can write something like this:

let arrayToCheck1 : [UInt8] = [0x47,0x52,0x49,0x42,0x0,0x0,0x0,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x81,0x35]
let arr = (0..<arrayToCheck1.count*8/5).map {Int64(bitPattern: bits(arrayToCheck1, offset: $0*5, bitSize: 5))}
print(arr) //->[8, 29, 9, 4, 18, 16, 16, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 1, 6]

Replies

Certainly there are more efficient ways. You can, for example, write a method that is given the index of a 5-bit group and returns the 5-bit value at that index. What this method needs to do internally is to take the index, multiply it by 5 (making it a single-bit index into your array), then divide it by 8 to get a byte index (quotient) and bit index within the byte (remainder). You can use these indexes to retrieve the 5 bits you want.


In general, the 5-bit group will cross a byte boundary. You can either construct the value in two parts, shifting and ORing together the partial values from two successive bytes in the original array. Or, you can start by combining 2 successive bytes into a single 16-bit value, and using the bit index to locate the 5-bit group you want within the 16-bit value.


All of this takes care to get it right, but it's not extremely difficult — it's a combination of shifting, ANDing and ORing bits.

How about this, any faster than your code? I think it is a compromise between performance and memory footprint on one side and readability and avoidance of Playground crashing constructs on the other.


let arrayToCheck1: [UInt8] = [0x47,0x52,0x49,0x42,0x0,0x0,0x0,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x81,0x35]
let bitCount = 5
var bitArray: [UInt8] = []
bitArray.reserveCapacity(arrayToCheck1.count * 8)
for byte in arrayToCheck1 {
    bitArray.append(byte >> 7 & 1)
    bitArray.append(byte >> 6 & 1)
    bitArray.append(byte >> 5 & 1)
    bitArray.append(byte >> 4 & 1)
    bitArray.append(byte >> 3 & 1)
    bitArray.append(byte >> 2 & 1)
    bitArray.append(byte >> 1 & 1)
    bitArray.append(byte >> 0 & 1)
}
var results: [UInt8] = []
results.reserveCapacity(arrayToCheck1.count / bitCount * 8)
mainCalcLoop: for i in stride(from: 0, to: bitArray.count - 1, by: bitCount) {
    var result: UInt8 = 0
    for j in 0..<bitCount {
        if i + j == bitArray.count {
            break mainCalcLoop
        }
        let shift = UInt8(bitCount - j - 1)
        result += bitArray[i + j] << shift
    }
    results.append(result)
}
results.forEach {
    print(String($0, radix: 2), $0)
}

MirekE. You suggestion is faster, however it works only when the number of bits used for an integer is less than 8. If it is greater than 8, I get an error "fatal error: shift amount is larger than type in bits" in line:


result += bitArray[i + j] << shift


Thank you, anyways.

My code uses Int8, so the max size is 8 bit. If you need larger, change the types of variables that are involved in creation of the result. For example, if 64 bits is required, use UInt64:


let arrayToCheck1: [UInt8] = [0x47,0x52,0x49,0x42,0x0,0x0,0x0,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x81,0x35]
let bitCount = 64
var bitArray: [UInt8] = []
bitArray.reserveCapacity(arrayToCheck1.count * 8)
for byte in arrayToCheck1 {
    bitArray.append(byte >> 7 & 1)
    bitArray.append(byte >> 6 & 1)
    bitArray.append(byte >> 5 & 1)
    bitArray.append(byte >> 4 & 1)
    bitArray.append(byte >> 3 & 1)
    bitArray.append(byte >> 2 & 1)
    bitArray.append(byte >> 1 & 1)
    bitArray.append(byte >> 0 & 1)
}
var results: [UInt64] = []
results.reserveCapacity(arrayToCheck1.count / bitCount * 8)
mainCalcLoop: for i in stride(from: 0, to: bitArray.count - 1, by: bitCount) {
    var result: UInt64 = 0
    for j in 0..<bitCount {
        if i + j == bitArray.count {
            break mainCalcLoop
        }
        let shift = UInt64(bitCount - j - 1)
        let bitValue: UInt64 = UInt64(bitArray[i + j])
        result += bitValue << shift
    }
    results.append(result)
}
results.forEach {
    print(String($0, radix: 2), $0)
}

You can define a bit-sequence retrieval function like this:

func bits(_ array: [UInt8], offset: Int, bitSize: Int) -> UInt64 {
    assert(1...64 ~= bitSize)
    func arrayValue(_ offset: Int) -> UInt64 {
        if offset >= array.count {
            return 0
        } else {
            return UInt64(array[offset])
        }
    }
  
    let startByteOffset = offset / 8
    let startBitOffset = UInt64(offset % 8)
    let firstByteMask = 0xFF >> startBitOffset
    let endByteOffset = (offset + bitSize - 1) / 8
    let endByteBits = UInt64((offset + bitSize - 1) % 8 + 1)
    if startByteOffset == endByteOffset {
        return (arrayValue(startByteOffset) & firstByteMask) >> (8 - endByteBits)
    } else {
        var result: UInt64 = arrayValue(startByteOffset) & firstByteMask
        for i in startByteOffset+1 ..< endByteOffset {
            result = result << 8 + arrayValue(i)
        }
        result = (result << endByteBits) + (arrayValue(endByteOffset) >> (8 - endByteBits))
        return result
    }
}


Using the function above, you can write something like this:

let arrayToCheck1 : [UInt8] = [0x47,0x52,0x49,0x42,0x0,0x0,0x0,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x81,0x35]
let arr = (0..<arrayToCheck1.count*8/5).map {Int64(bitPattern: bits(arrayToCheck1, offset: $0*5, bitSize: 5))}
print(arr) //->[8, 29, 9, 4, 18, 16, 16, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 1, 6]