// simplified file operations and no error handling for clarity
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.Arrays;
import java.util.Random;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.quantiles.DoublesSketch;
import org.apache.datasketches.quantiles.DoublesUnion;
import org.apache.datasketches.quantiles.UpdateDoublesSketch;
// this section generates two sketches from random data and serializes them into files
{
Random rand = new Random();
UpdateDoublesSketch sketch1 = DoublesSketch.builder().build(); // default k=128
for (int i = 0; i < 10000; i++) {
sketch1.update(rand.nextGaussian()); // mean=0, stddev=1
}
FileOutputStream out1 = new FileOutputStream("QuantilesDoublesSketch1.bin");
out1.write(sketch1.toByteArray());
out1.close();
UpdateDoublesSketch sketch2 = DoublesSketch.builder().build(); // default k=128
for (int i = 0; i < 10000; i++) {
sketch2.update(rand.nextGaussian() + 1); // shift the mean for the second sketch
}
FileOutputStream out2 = new FileOutputStream("QuantilesDoublesSketch2.bin");
out2.write(sketch2.toByteArray());
out2.close();
}
// this section deserializes the sketches, produces a union and prints some results
{
FileInputStream in1 = new FileInputStream("QuantilesDoublesSketch1.bin");
byte[] bytes1 = new byte[in1.available()];
in1.read(bytes1);
in1.close();
DoublesSketch sketch1 = DoublesSketch.wrap(Memory.wrap(bytes1));
FileInputStream in2 = new FileInputStream("QuantilesDoublesSketch2.bin");
byte[] bytes2 = new byte[in2.available()];
in2.read(bytes2);
in2.close();
DoublesSketch sketch2 = DoublesSketch.wrap(Memory.wrap(bytes2));
DoublesUnion union = DoublesUnion.builder().build(); // default k=128
union.update(sketch1);
union.update(sketch2);
DoublesSketch result = union.getResult();
// Debug output from the sketch
System.out.println(result.toString());
System.out.println("Min, Median, Max values");
System.out.println(Arrays.toString(result.getQuantiles(new double[] {0, 0.5, 1})));
System.out.println("Probability Histogram: estimated probability mass in 4 bins: (-inf, -2), [-2, 0), [0, 2), [2, +inf)");
System.out.println(Arrays.toString(result.getPMF(new double[] {-2, 0, 2})));
System.out.println("Frequency Histogram: estimated number of original values in the same bins");
double[] histogram = result.getPMF(new double[] {-2, 0, 2});
for (int i = 0; i < histogram.length; i++) {
histogram[i] *= result.getN(); // scale the fractions by the total count of values
}
System.out.println(Arrays.toString(histogram));
}
Output:
### Quantiles HeapUpdateDoublesSketch SUMMARY:
Empty : false
Direct, Capacity bytes : false,
Estimation Mode : true
K : 128
N : 20,000
Levels (Needed, Total, Valid): 7, 7, 4
Level Bit Pattern : 1001110
BaseBufferCount : 32
Combined Buffer Capacity : 1,152
Retained Items : 544
Compact Storage Bytes : 4,384
Updatable Storage Bytes : 9,248
Normalized Rank Error : 1.725%
Min Value : -4.113
Max Value : 4.363
### END SKETCH SUMMARY
Min, Median, Max values
[-4.113097775288085, 0.49496152841809893, 4.362712872544037]
Probability Histogram: estimated probability mass in 4 bins: (-inf, -2), [-2, 0), [0, 2), [2, +inf)
[0.01445, 0.3071, 0.58545, 0.093]
Frequency Histogram: estimated number of original values in the same bins
[289.0, 6142.0, 11709.0, 1860.0]