#include <iostream>
#include <fstream>
#include <frequent_items_sketch.hpp>
//simplified file operations and no error handling for clarity
int main(int argc, char **argv) {
typedef datasketches::frequent_items_sketch<std::string> frequent_strings_sketch;
// this section generates two sketches and serializes them into files
{
frequent_strings_sketch sketch1(64);
sketch1.update("a");
sketch1.update("a");
sketch1.update("b");
sketch1.update("c");
sketch1.update("a");
sketch1.update("d");
sketch1.update("a");
std::ofstream os1("freq_str_sketch1.bin");
sketch1.serialize(os1);
frequent_strings_sketch sketch2(64);
sketch2.update("e");
sketch2.update("a");
sketch2.update("f");
sketch2.update("f");
sketch2.update("f");
sketch2.update("g");
sketch2.update("a");
sketch2.update("f");
std::ofstream os2("freq_str_sketch2.bin");
sketch2.serialize(os2);
}
// this section deserializes the sketches, produces a union and prints the result
{
std::ifstream is1("freq_str_sketch1.bin");
frequent_strings_sketch sketch1 = frequent_strings_sketch::deserialize(is1);
std::ifstream is2("freq_str_sketch2.bin");
frequent_strings_sketch sketch2 = frequent_strings_sketch::deserialize(is2);
// we could merge sketch2 into sketch1 or the other way around
// this is an example of using a new sketch as a union and keeping the original sketches intact
frequent_strings_sketch u(64);
u.merge(sketch1);
u.merge(sketch2);
auto items = u.get_frequent_items(datasketches::NO_FALSE_POSITIVES);
std::cout << "Frequent strings: " << items.size() << std::endl;
std::cout << "Str\tEst\tLB\tUB" << std::endl;
for (auto row: items) {
std::cout << row.get_item() << "\t" << row.get_estimate() << "\t"
<< row.get_lower_bound() << "\t" << row.get_upper_bound() << std::endl;
}
}
return 0;
}
Output:
Frequent strings: 7
Str Est LB UB
a 6 6 6
f 4 4 4
c 1 1 1
d 1 1 1
e 1 1 1
b 1 1 1
g 1 1 1