Generates all possible subsets of a given set, excluding the empty set. (non sorted)

Question

Created Mar ’24

Replies 4

Boosts 0

Participants 4

I notice that this operation is expensive for the code (2^set.size). I'm looking the best practices and implementations more likely a re-work this code goes to a infinite bucle and freezes the app until it's done

func generateSubsets(set: Set<String>) -> Set<Set<String>> {
        if set.count == 1 { return [set] }
        let first = set.map { item -> Set<String> in
            var clone = set
            clone.remove(item)
            return clone
        }
        let second = first.map(generateSubsets)
        var subSets = second.reduce(Set<Set<String>>()) { accumulator, elements in
            var clone = accumulator
            for element in elements {
                clone.insert(element)
            }
            return clone
        }
        subSets.insert(set)
        return subSets
    }

Example

For input ["qual1", "qual2", 'qual3"]

the combinations can be: ["qual1#qual2#qual3", "qual1#qual2", "qual1#qual3", "qual2#qual3", "qual1", "qual2", "qual3"]

Answered by endecotp in 781983022

@JessyC , he wants combinations, not permutations.

I note that the page you linked to does also have a combinations(ofCount:) function. But we want all combinations, not just combinaions of a particular size. Is there an all combinations function in there that I can't see?

Anyway, here is a trivial recursive implementation:

func getCombinations(s: Set<String>) -> Set<Set<String>>
{
  var result: Set<Set<String>> = [];

  if (s.isEmpty) {
    result.insert([]);
    return result;
  }

  var ss = s;

  let first = ss.removeFirst();

  let subsets = getCombinations(s: ss);

  for sub in subsets {
    result.insert(sub);
    result.insert(sub.union([first]));
  }

  return result;
}

Note that includes the empty set, so the caller would need to remove that.

That takes 15 seconds to compute the 8 million combinations of 23 inputs:

var s : Set = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
               "k", "l", "m", "n", "o", "p", "q", "r", "s", "t",
               "u", "v", "w" //, "x", "y", "z"
              ];

//print("Input:", s);
print("Input size = ", s.count);

let combinations = getCombinations(s: s);

// print("Output:", combinations);
print("Output size = ", combinations.count);

Here's a version that uses a binary counter. The idea is that you count from 1 to 2^n-1, and each bit pattern indicates a subset of the input elements:

func getCombination(s: Array<String>, i: Int) -> Set<String>
{
  var result: Set<String> = [];

  for j in 0 ... (s.count - 1) {
    if ((i >> j) & 1 == 1) {
      result.insert(s[j]);
    }
  }

  return result;
}


func getCombinations(s: Array<String>) -> Set<Set<String>>
{
  assert(s.count < 32);

  var result: Set<Set<String>> = [];

  for i in 1 ... ((1 << s.count) - 1) {
    result.insert(getCombination(s: s, i: i));
  }

  return result;
}


func getCombinations(s: Set<String>) -> Set<Set<String>>
{
  let a = Array(s);
  return getCombinations(s: a);
}

Annoyingly it is fractionally slower than the recursive version.

We can make it much faster by using more Arrays and fewer Sets. In particular, appending to an array is much quicker than inserting into a set (and we know the elements we're adding are unique). So:

func getCombination(s: Array<String>, i: Int) -> Array<String>
{
  var result: Array<String> = [];

  for j in 0 ... (s.count - 1) {
    if ((i >> j) & 1 == 1) {
      result.append(s[j]);
    }
  }

  return result;
}


func getCombinations(s: Array<String>) -> Array<Array<String>>
{
  assert(s.count < 32);

  var result: Array<Array<String>> = [];

  for i in 1 ... ((1 << s.count) - 1) {
    result.append(getCombination(s: s, i: i));
  }

  return result;
}


func getCombinations(s: Set<String>) -> Array<Array<String>>
{
  let a = Array(s);
  return getCombinations(s: a);
}

I don't know how InvaDerSim wants to use this, but I suggest not trying to have all the combinations in memory at the same time if there are a lot of them. Instead have a callback and pass the combinations one at a time:

func getCombination(s: Array<String>, i: Int) -> Array<String>
{
  var result: Array<String> = [];

  for j in 0 ... (s.count - 1) {
    if ((i >> j) & 1 == 1) {
      result.append(s[j]);
    }
  }

  return result;
}


func getCombinations(s: Array<String>, cbk: (Array<String>)->Void)
{
  assert(s.count < 32);

  for i in 1 ... ((1 << s.count) - 1) {
    cbk(getCombination(s: s, i: i));
  }
}


func getCombinations(s: Set<String>, cbk: (Array<String>)->Void)
{
  let a = Array(s);
  getCombinations(s: a, cbk: cbk);
}

... except it actually isn't any faster! (But it does work for 26 input elements, which the previous version doesn't.)

Finally, here's a C++ version:

#include <bit>
#include <cassert>
#include <iostream>
#include <set>
#include <string>
#include <vector>


static auto getCombination(const std::vector<std::string>& s, int i)
{
  std::vector<std::string> result;
  result.reserve(std::popcount(i));  // <-- edited to add this

  for (int j = 0; j < s.size(); ++j) {
    if (((i >> j) & 1) == 1) {
      result.insert(result.end(), s[j]);
    }
  }

  return result;
}


static void getCombinations(const std::vector<std::string>& s, auto cbk)
{
  assert(s.size() < 32);

  for (auto i = 1; i < (1 << s.size()); ++i) {
    cbk(getCombination(s, i));
  }
}


static void getCombinations(const std::set<std::string>& s, auto cbk)
{
  std::vector<std::string> a {s.begin(),s.end()};
  getCombinations(a, cbk);
}


int main()
{
  std::set<std::string> s = { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
                              "k", "l", "m", "n", "o", "p", "q", "r", "s", "t",
                              "u", "v", "w", "x", "y", "z"
                            };

  std::cout << "Input size = " << s.size() << "\n";

  auto n = 0;

  getCombinations(s, [&] (auto combination) {
//  std::cout << combination << "\n";
    n += 1;
  });

  std::cout << "Output size = " << n << "\n";

  return 0;
}

Its runtime is about ~~60%~~ 33% of the swift version. Beware when benchmarking that it has the potential to optimise away too much to give a useful answer. (Edit: runtime approximately halved by adding reserve(std::popcount(i)).)

Second edit to add this:

We can make it much faster by using boost::container::static_vector instead of std::vector:

#include <boost/container/static_vector.hpp>

...

static auto getCombination(const std::vector<std::string>& s, unsigned int i)
{
  boost::container::static_vector<std::string, 32> result;
  // or boost::container::small_vector<std::string, 32> result;

 ...

The point is that the static_vector is stack-allocated, not heap-allocated. That gives a runtime of less than 10% of the swift code.

I hope this is useful. I've done it really to improve my Swift! Feedback regarding that would be appreciated, i.e. that's a challenge to get the swift code as fast as my C++ version.

One interesting language point is that the expression (i >> j) & 1 == 1 requires extra parenthesis in C++ because the relative priority of & and == differ, but in C++ I can omit the ==1 entirely as it will implicitly convert to bool.

Boost

Answer 1

JessyC OP

Mar ’24

https://swiftpackageindex.com/apple/swift-algorithms/1.2.0/documentation/algorithms/swift/collection/uniquepermutations(ofcount:)-2extq

1

Answer 2

Claude31 OP

Mar ’24

I would do it with recursive calls. Of course, sets are unordered, so the result is unordered.

 func getSubSets(initialSet: Set<String>) -> Set<String> {
  
    var item: String?
    var partialSet = initialSet
    var returnedSet: Set<String> = initialSet
    
    for s in initialSet {
        item = s    // "a"
        break
    }
    
    if let item {
        partialSet.remove(item)     // We get an element out: partialSet = ["b", "c"]
        returnedSet.insert(item)    // ["a"]

        // We add the subsets built with "a" + all subsets of partialSet
        let subSets = getSubSets(initialSet: partialSet)
        for aString in subSets {  // "b", "c", "b#c"]
            returnedSet.insert(item + "#" + aString)
        }
        returnedSet = returnedSet.union(subSets)
    }

    return returnedSet
}

var initialSet : Set = ["a", "b", "c"]

print("Original Set:", initialSet)

let allSubs = getSubSets(initialSet: initialSet)

print("Modified Set:", allSubs)

This gives:

Original Set: ["b", "a", "c"]
Modified Set: ["b", "b#a", "b#a#c", "a#c", "c", "a", "b#c"]

1

Answer 3

endecotp OP

Mar ’24

Accepted Answer

@JessyC , he wants combinations, not permutations.

I note that the page you linked to does also have a combinations(ofCount:) function. But we want all combinations, not just combinaions of a particular size. Is there an all combinations function in there that I can't see?

Anyway, here is a trivial recursive implementation:

func getCombinations(s: Set<String>) -> Set<Set<String>>
{
  var result: Set<Set<String>> = [];

  if (s.isEmpty) {
    result.insert([]);
    return result;
  }

  var ss = s;

  let first = ss.removeFirst();

  let subsets = getCombinations(s: ss);

  for sub in subsets {
    result.insert(sub);
    result.insert(sub.union([first]));
  }

  return result;
}

Note that includes the empty set, so the caller would need to remove that.

That takes 15 seconds to compute the 8 million combinations of 23 inputs:

var s : Set = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
               "k", "l", "m", "n", "o", "p", "q", "r", "s", "t",
               "u", "v", "w" //, "x", "y", "z"
              ];

//print("Input:", s);
print("Input size = ", s.count);

let combinations = getCombinations(s: s);

// print("Output:", combinations);
print("Output size = ", combinations.count);

Here's a version that uses a binary counter. The idea is that you count from 1 to 2^n-1, and each bit pattern indicates a subset of the input elements:

func getCombination(s: Array<String>, i: Int) -> Set<String>
{
  var result: Set<String> = [];

  for j in 0 ... (s.count - 1) {
    if ((i >> j) & 1 == 1) {
      result.insert(s[j]);
    }
  }

  return result;
}


func getCombinations(s: Array<String>) -> Set<Set<String>>
{
  assert(s.count < 32);

  var result: Set<Set<String>> = [];

  for i in 1 ... ((1 << s.count) - 1) {
    result.insert(getCombination(s: s, i: i));
  }

  return result;
}


func getCombinations(s: Set<String>) -> Set<Set<String>>
{
  let a = Array(s);
  return getCombinations(s: a);
}

Annoyingly it is fractionally slower than the recursive version.

We can make it much faster by using more Arrays and fewer Sets. In particular, appending to an array is much quicker than inserting into a set (and we know the elements we're adding are unique). So:

func getCombination(s: Array<String>, i: Int) -> Array<String>
{
  var result: Array<String> = [];

  for j in 0 ... (s.count - 1) {
    if ((i >> j) & 1 == 1) {
      result.append(s[j]);
    }
  }

  return result;
}


func getCombinations(s: Array<String>) -> Array<Array<String>>
{
  assert(s.count < 32);

  var result: Array<Array<String>> = [];

  for i in 1 ... ((1 << s.count) - 1) {
    result.append(getCombination(s: s, i: i));
  }

  return result;
}


func getCombinations(s: Set<String>) -> Array<Array<String>>
{
  let a = Array(s);
  return getCombinations(s: a);
}

I don't know how InvaDerSim wants to use this, but I suggest not trying to have all the combinations in memory at the same time if there are a lot of them. Instead have a callback and pass the combinations one at a time:

func getCombination(s: Array<String>, i: Int) -> Array<String>
{
  var result: Array<String> = [];

  for j in 0 ... (s.count - 1) {
    if ((i >> j) & 1 == 1) {
      result.append(s[j]);
    }
  }

  return result;
}


func getCombinations(s: Array<String>, cbk: (Array<String>)->Void)
{
  assert(s.count < 32);

  for i in 1 ... ((1 << s.count) - 1) {
    cbk(getCombination(s: s, i: i));
  }
}


func getCombinations(s: Set<String>, cbk: (Array<String>)->Void)
{
  let a = Array(s);
  getCombinations(s: a, cbk: cbk);
}

... except it actually isn't any faster! (But it does work for 26 input elements, which the previous version doesn't.)

Finally, here's a C++ version:

#include <bit>
#include <cassert>
#include <iostream>
#include <set>
#include <string>
#include <vector>


static auto getCombination(const std::vector<std::string>& s, int i)
{
  std::vector<std::string> result;
  result.reserve(std::popcount(i));  // <-- edited to add this

  for (int j = 0; j < s.size(); ++j) {
    if (((i >> j) & 1) == 1) {
      result.insert(result.end(), s[j]);
    }
  }

  return result;
}


static void getCombinations(const std::vector<std::string>& s, auto cbk)
{
  assert(s.size() < 32);

  for (auto i = 1; i < (1 << s.size()); ++i) {
    cbk(getCombination(s, i));
  }
}


static void getCombinations(const std::set<std::string>& s, auto cbk)
{
  std::vector<std::string> a {s.begin(),s.end()};
  getCombinations(a, cbk);
}


int main()
{
  std::set<std::string> s = { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
                              "k", "l", "m", "n", "o", "p", "q", "r", "s", "t",
                              "u", "v", "w", "x", "y", "z"
                            };

  std::cout << "Input size = " << s.size() << "\n";

  auto n = 0;

  getCombinations(s, [&] (auto combination) {
//  std::cout << combination << "\n";
    n += 1;
  });

  std::cout << "Output size = " << n << "\n";

  return 0;
}

Its runtime is about ~~60%~~ 33% of the swift version. Beware when benchmarking that it has the potential to optimise away too much to give a useful answer. (Edit: runtime approximately halved by adding reserve(std::popcount(i)).)

Second edit to add this:

We can make it much faster by using boost::container::static_vector instead of std::vector:

#include <boost/container/static_vector.hpp>

...

static auto getCombination(const std::vector<std::string>& s, unsigned int i)
{
  boost::container::static_vector<std::string, 32> result;
  // or boost::container::small_vector<std::string, 32> result;

 ...

The point is that the static_vector is stack-allocated, not heap-allocated. That gives a runtime of less than 10% of the swift code.

I hope this is useful. I've done it really to improve my Swift! Feedback regarding that would be appreciated, i.e. that's a challenge to get the swift code as fast as my C++ version.

One interesting language point is that the expression (i >> j) & 1 == 1 requires extra parenthesis in C++ because the relative priority of & and == differ, but in C++ I can omit the ==1 entirely as it will implicitly convert to bool.

1

Answer 4

Claude31 OP

Mar ’24

@ InvaDerZim

In your example, you showed a result

["qual1#qual2#qual3", "qual1#qual2", "qual1#qual3", "qual2#qual3", "qual1", "qual2", "qual3"]

which is a Set<String> not a Set<Set<String>>

What is the expected result as Set<Set<String>> ?

@endecotp

Just to give a reference, I tested with 23 letters (8 million combinations):

Original Set: ["v", "g", "r", "p", "d", "c", "e", "s", "h", "m", "j", "a", "i", "l", "q", "o", "b", "f", "k", "n", "t", "w", "u"]
Elapsed time: 12.271471977233887 seconds
Modified Set: 8 388 607

Beyond, app crashes (memory limit because strings are explicitly built ?)

1