Skip to content

Instantly share code, notes, and snippets.

@mfuerstenau
Created September 1, 2021 22:56
Show Gist options
  • Save mfuerstenau/bcc995bc08d681339c6664c181be3bf3 to your computer and use it in GitHub Desktop.
Save mfuerstenau/bcc995bc08d681339c6664c181be3bf3 to your computer and use it in GitHub Desktop.
Heap based disjunction of multiple DocIdSetIterator instances
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.RoaringDocIdSet;
import java.io.IOException;
public class SimpleDisjunctionDISI extends DocIdSetIterator
{
private final DISIWrapper[] heap;
private final long cost;
private int size;
public SimpleDisjunctionDISI (DocIdSetIterator... iterators)
{
heap = new DISIWrapper[iterators.length];
size = 0;
for (DocIdSetIterator it : iterators)
{
add (new DISIWrapper (it));
}
long cost = 0;
for (DISIWrapper wrapper : heap)
{
cost += wrapper.cost;
}
this.cost = cost;
}
private static int leftNode (int node)
{
return ((node + 1) << 1) - 1;
}
private static int rightNode (int leftNode)
{
return leftNode + 1;
}
private static int parentNode (int node)
{
return ((node + 1) >>> 1) - 1;
}
private DISIWrapper top ()
{
return heap[0];
}
public int size ()
{
return size;
}
private DISIWrapper add (DISIWrapper entry)
{
final DISIWrapper[] heap = this.heap;
final int size = this.size;
heap[size] = entry;
upHeap (size);
this.size = size + 1;
return heap[0];
}
private void upHeap (int i)
{
final DISIWrapper node = heap[i];
final int nodeDoc = node.doc;
int j = parentNode (i);
while (j >= 0 && nodeDoc < heap[j].doc)
{
heap[i] = heap[j];
i = j;
j = parentNode (j);
}
heap[i] = node;
}
private DISIWrapper updateTop ()
{
downHeap (size);
return heap[0];
}
private void downHeap (int size)
{
int i = 0;
final DISIWrapper node = heap[0];
int j = leftNode (i);
if (j < size)
{
int k = rightNode (j);
if (k < size && heap[k].doc < heap[j].doc)
{
j = k;
}
if (heap[j].doc < node.doc)
{
do
{
heap[i] = heap[j];
i = j;
j = leftNode (i);
k = rightNode (j);
if (k < size && heap[k].doc < heap[j].doc)
{
j = k;
}
}
while (j < size && heap[j].doc < node.doc);
heap[i] = node;
}
}
}
@Override
public int docID ()
{
return top ().doc;
}
@Override
public int nextDoc () throws IOException
{
DISIWrapper top = top ();
final int doc = top.doc;
do
{
top.doc = top.iterator.nextDoc ();
top = updateTop ();
}
while (top.doc == doc);
return top.doc;
}
@Override
public int advance (int target) throws IOException
{
DISIWrapper top = top ();
do
{
top.doc = top.iterator.advance (target);
top = updateTop ();
}
while (top.doc < target);
return top.doc;
}
@Override
public long cost ()
{
return cost;
}
private static class DISIWrapper
{
private final DocIdSetIterator iterator;
private final long cost;
private int doc = -1;
private DISIWrapper (DocIdSetIterator iterator)
{
this.iterator = iterator;
this.cost = iterator.cost ();
}
}
public static void main (String[] args) throws IOException
{
FixedBitSet bs = new FixedBitSet (10);
bs.set (8);
bs.set (3);
bs.set (10);
RoaringDocIdSet.Builder b = new RoaringDocIdSet.Builder (100);
b.add (0);
b.add (2);
b.add (10);
b.add (20);
RoaringDocIdSet bs2 = b.build ();
SimpleDisjunctionDISI it = new SimpleDisjunctionDISI (bs2.iterator (), new BitSetIterator (bs, bs.cardinality ()));
int docId;
while ((docId = it.nextDoc ()) != DocIdSetIterator.NO_MORE_DOCS)
{
System.out.println (docId);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment