CPC Sketch C++ Example

#include <iostream>
#include <fstream>

#include <cpc_sketch.hpp>
#include <cpc_union.hpp>

//simplified file operations and no error handling for clarity
int main(int argc, char **argv) {
  const int lg_k = 10;

  // this section generates two sketches with some overlap and serializes them into files
    // 100000 distinct keys
    datasketches::cpc_sketch sketch1(lg_k);
    for (int key = 0; key < 100000; key++) sketch1.update(key);
    std::ofstream os1("cpc_sketch1.bin");

    // 100000 distinct keys
    datasketches::cpc_sketch sketch2(lg_k);
    for (int key = 50000; key < 150000; key++) sketch2.update(key);
    std::ofstream os2("cpc_sketch2.bin");

  // this section deserializes the sketches, produces union and prints the result
    std::ifstream is1("cpc_sketch1.bin");
    auto sketch1 = datasketches::cpc_sketch::deserialize(is1);

    std::ifstream is2("cpc_sketch2.bin");
    auto sketch2 = datasketches::cpc_sketch::deserialize(is2);

    datasketches::cpc_union u(lg_k);
    auto sketch = u.get_result();

    // debug summary of the union result sketch

    std::cout << "Distinct count estimate: " << sketch.get_estimate() << std::endl;
    std::cout << "Distinct count lower bound 95% confidence: " << sketch.get_lower_bound(2) << std::endl;
    std::cout << "Distinct count upper bound 95% confidence: " << sketch.get_upper_bound(2) << std::endl;

  return 0;

### CPC sketch summary:
   lg_k           : 10
   seed hash      : 93cc
   C              : 7706
   flavor         : 4
   merged         : true
   intresting col : 4
   table entries  : 27
   window         : allocated
   window offset  : 5
### End sketch summary
Distinct count estimate: 149797
Distinct count lower bound 95% confidence: 143416
Distinct count upper bound 95% confidence: 156397