I notice that this operation is expensive for the code (2^set.size). I'm looking the best practices and implementations more likely a re-work this code goes to a infinite bucle and freezes the app until it's done
func generateSubsets(set: Set<String>) -> Set<Set<String>> {
if set.count == 1 { return [set] }
let first = set.map { item -> Set<String> in
var clone = set
clone.remove(item)
return clone
}
let second = first.map(generateSubsets)
var subSets = second.reduce(Set<Set<String>>()) { accumulator, elements in
var clone = accumulator
for element in elements {
clone.insert(element)
}
return clone
}
subSets.insert(set)
return subSets
}
Example
For input ["qual1", "qual2", 'qual3"]
the combinations can be: ["qual1#qual2#qual3", "qual1#qual2", "qual1#qual3", "qual2#qual3", "qual1", "qual2", "qual3"]
@JessyC , he wants combinations, not permutations.
I note that the page you linked to does also have a combinations(ofCount:)
function. But we want all combinations, not just combinaions of a particular size. Is there an all combinations function in there that I can't see?
Anyway, here is a trivial recursive implementation:
func getCombinations(s: Set<String>) -> Set<Set<String>>
{
var result: Set<Set<String>> = [];
if (s.isEmpty) {
result.insert([]);
return result;
}
var ss = s;
let first = ss.removeFirst();
let subsets = getCombinations(s: ss);
for sub in subsets {
result.insert(sub);
result.insert(sub.union([first]));
}
return result;
}
Note that includes the empty set, so the caller would need to remove that.
That takes 15 seconds to compute the 8 million combinations of 23 inputs:
var s : Set = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
"k", "l", "m", "n", "o", "p", "q", "r", "s", "t",
"u", "v", "w" //, "x", "y", "z"
];
//print("Input:", s);
print("Input size = ", s.count);
let combinations = getCombinations(s: s);
// print("Output:", combinations);
print("Output size = ", combinations.count);
Here's a version that uses a binary counter. The idea is that you count from 1 to 2^n-1, and each bit pattern indicates a subset of the input elements:
func getCombination(s: Array<String>, i: Int) -> Set<String>
{
var result: Set<String> = [];
for j in 0 ... (s.count - 1) {
if ((i >> j) & 1 == 1) {
result.insert(s[j]);
}
}
return result;
}
func getCombinations(s: Array<String>) -> Set<Set<String>>
{
assert(s.count < 32);
var result: Set<Set<String>> = [];
for i in 1 ... ((1 << s.count) - 1) {
result.insert(getCombination(s: s, i: i));
}
return result;
}
func getCombinations(s: Set<String>) -> Set<Set<String>>
{
let a = Array(s);
return getCombinations(s: a);
}
Annoyingly it is fractionally slower than the recursive version.
We can make it much faster by using more Arrays and fewer Sets. In particular, appending to an array is much quicker than inserting into a set (and we know the elements we're adding are unique). So:
func getCombination(s: Array<String>, i: Int) -> Array<String>
{
var result: Array<String> = [];
for j in 0 ... (s.count - 1) {
if ((i >> j) & 1 == 1) {
result.append(s[j]);
}
}
return result;
}
func getCombinations(s: Array<String>) -> Array<Array<String>>
{
assert(s.count < 32);
var result: Array<Array<String>> = [];
for i in 1 ... ((1 << s.count) - 1) {
result.append(getCombination(s: s, i: i));
}
return result;
}
func getCombinations(s: Set<String>) -> Array<Array<String>>
{
let a = Array(s);
return getCombinations(s: a);
}
I don't know how InvaDerSim wants to use this, but I suggest not trying to have all the combinations in memory at the same time if there are a lot of them. Instead have a callback and pass the combinations one at a time:
func getCombination(s: Array<String>, i: Int) -> Array<String>
{
var result: Array<String> = [];
for j in 0 ... (s.count - 1) {
if ((i >> j) & 1 == 1) {
result.append(s[j]);
}
}
return result;
}
func getCombinations(s: Array<String>, cbk: (Array<String>)->Void)
{
assert(s.count < 32);
for i in 1 ... ((1 << s.count) - 1) {
cbk(getCombination(s: s, i: i));
}
}
func getCombinations(s: Set<String>, cbk: (Array<String>)->Void)
{
let a = Array(s);
getCombinations(s: a, cbk: cbk);
}
... except it actually isn't any faster! (But it does work for 26 input elements, which the previous version doesn't.)
Finally, here's a C++ version:
#include <bit>
#include <cassert>
#include <iostream>
#include <set>
#include <string>
#include <vector>
static auto getCombination(const std::vector<std::string>& s, int i)
{
std::vector<std::string> result;
result.reserve(std::popcount(i)); // <-- edited to add this
for (int j = 0; j < s.size(); ++j) {
if (((i >> j) & 1) == 1) {
result.insert(result.end(), s[j]);
}
}
return result;
}
static void getCombinations(const std::vector<std::string>& s, auto cbk)
{
assert(s.size() < 32);
for (auto i = 1; i < (1 << s.size()); ++i) {
cbk(getCombination(s, i));
}
}
static void getCombinations(const std::set<std::string>& s, auto cbk)
{
std::vector<std::string> a {s.begin(),s.end()};
getCombinations(a, cbk);
}
int main()
{
std::set<std::string> s = { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
"k", "l", "m", "n", "o", "p", "q", "r", "s", "t",
"u", "v", "w", "x", "y", "z"
};
std::cout << "Input size = " << s.size() << "\n";
auto n = 0;
getCombinations(s, [&] (auto combination) {
// std::cout << combination << "\n";
n += 1;
});
std::cout << "Output size = " << n << "\n";
return 0;
}
Its runtime is about 60% 33% of the swift version. Beware when benchmarking that it has the potential to optimise away too much to give a useful answer. (Edit: runtime approximately halved by adding reserve(std::popcount(i))
.)
Second edit to add this:
We can make it much faster by using boost::container::static_vector
instead of std::vector:
#include <boost/container/static_vector.hpp>
...
static auto getCombination(const std::vector<std::string>& s, unsigned int i)
{
boost::container::static_vector<std::string, 32> result;
// or boost::container::small_vector<std::string, 32> result;
...
The point is that the static_vector is stack-allocated, not heap-allocated. That gives a runtime of less than 10% of the swift code.
I hope this is useful. I've done it really to improve my Swift! Feedback regarding that would be appreciated, i.e. that's a challenge to get the swift code as fast as my C++ version.
One interesting language point is that the expression (i >> j) & 1 == 1
requires extra parenthesis in C++ because the relative priority of & and == differ, but in C++ I can omit the ==1 entirely as it will implicitly convert to bool.