Searching data using findRange(of:

The performance of DataProtocol.firstRange(of:) and Collection.firstRange(of:) is different.

func testSearch(dataSize: Int = 10_000_000, segmentSize: Int = 1000) {
    let query = Data([1,1,1])
    let data = Data(repeating: 0, count: dataSize)
    let segments = stride(from: 0, to: dataSize, by: segmentSize)
    
    // Search for `query` in `data` using `Data.firstRange(of:`
    var date = Date.now
    let result1 = segments.compactMap({ i in
        data[i..<i+segmentSize].firstRange(of: query)
    })
    print(-date.timeIntervalSinceNow) // 1 sec.
    
    // Search for `query` in `data` using `Collection.firstRange(of:`
    date = .now
    let result2 = segments.compactMap({ i in
        data[i..<i+segmentSize].search(for: query)
    })
    print(-date.timeIntervalSinceNow)  // 4 secs.
    assert(result1 == result2)
}

extension Collection {
    func search<T: Collection>(for query: T) -> Range<Index>? 
    where T.Element == Element, Element: Equatable {
        firstRange(of: query)
    }
}

To fix, I’m explicitly casting it to Data.

extension Collection {
    func search<T: Collection>(for query: T) -> Range<Index>? 
    where T.Element == Element, Element: Equatable {
        if let data = self as? Data, let query = query as? Data {
            return data.firstRange(of: query) as? Range<Index>
        } else {
            return firstRange(of: query)
        }
    }
}
  • This fixes, but doesn’t scale well. Given the method signature is same, how can I write the search function such that it picks up the one in DataProtocol?
  • In my testing, the search results of both are same. Besides the performance, is there any other difference?