forked from fast-pack/CSharpFastPFOR
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathNewPFD.cs
More file actions
196 lines (182 loc) · 7.19 KB
/
NewPFD.cs
File metadata and controls
196 lines (182 loc) · 7.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
/**
* This code is released under the
* Apache License Version 2.0 http://www.apache.org/licenses/.
*
* (c) Daniel Lemire, http://lemire.me/en/
*/
/**
* NewPFD/NewPFOR: fast patching scheme by Yan et al.
* <p>
* Follows:
* </p><p>
* H. Yan, S. Ding, T. Suel, Inverted index compression and query processing
* with optimized document ordering, in: WWW 09, 2009, pp. 401-410.
* </p>
* using Simple16 as the secondary coder.
*
* It encodes integers in blocks of 128 integers. For arrays containing
* an arbitrary number of integers, you should use it in conjunction
* with another CODEC:
*
* <pre>IntegerCODEC ic =
* new Composition(new NewPDF(), new VariableByte()).</pre>
*
* Note that this does not use differential coding: if you are working on sorted
* lists, you must compute the deltas separately. (Yes, this is true even though
* the "D" at the end of the name probably stands for delta.)
*
* For multi-threaded applications, each thread should use its own NewPFD
* object.
*
* @author Daniel Lemire
*/
namespace Genbox.CSharpFastPFOR
{
public class NewPFD : IntegerCODEC, SkippableIntegerCODEC
{
private const int BLOCK_SIZE = 128;
private int[] exceptbuffer = new int[2 * BLOCK_SIZE];
/**
* Constructor for the NewPFD CODEC.
*/
public NewPFD()
{
}
public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos)
{
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
if (inlength == 0)
return;
encodePage(@in, inpos, inlength, @out, outpos);
}
protected static int[] bits = { 0, 1, 2, 3, 4, 5,
6, 7, 8, 9, 10, 11,
12, 13, 16, 20, 32 };
protected static int[] invbits = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 14, 14, 15,
15, 15, 15, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16 };
protected static void getBestBFromData(int[] @in, int pos, IntWrapper bestb, IntWrapper bestexcept)
{
int mb = Util.maxbits(@in, pos, BLOCK_SIZE);
int mini = 0;
if (mini + 28 < bits[invbits[mb]])
mini = bits[invbits[mb]] - 28; // 28 is the max for
// exceptions
int besti = bits.Length - 1;
int exceptcounter = 0;
for (int i = mini; i < bits.Length - 1; ++i)
{
int tmpcounter = 0;
for (int k = pos; k < BLOCK_SIZE + pos; ++k)
if ((int)((uint)@in[k] >> bits[i]) != 0)
++tmpcounter;
if (tmpcounter * 10 <= BLOCK_SIZE)
{
besti = i;
exceptcounter = tmpcounter;
break;
}
}
bestb.set(besti);
bestexcept.set(exceptcounter);
}
private void encodePage(int[] @in, IntWrapper inpos, int thissize, int[] @out, IntWrapper outpos)
{
int tmpoutpos = outpos.get();
int tmpinpos = inpos.get();
IntWrapper bestb = new IntWrapper();
IntWrapper bestexcept = new IntWrapper();
for (int finalinpos = tmpinpos + thissize; tmpinpos
+ BLOCK_SIZE <= finalinpos; tmpinpos += BLOCK_SIZE)
{
getBestBFromData(@in, tmpinpos, bestb, bestexcept);
int tmpbestb = bestb.get();
int nbrexcept = bestexcept.get();
int exceptsize = 0;
int remember = tmpoutpos;
tmpoutpos++;
if (nbrexcept > 0)
{
for (int i = 0, c = 0; i < BLOCK_SIZE; ++i)
{
if ((int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]) != 0)
{
exceptbuffer[c + nbrexcept] = i;
exceptbuffer[c] = (int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]);
++c;
}
}
exceptsize = S16.compress(exceptbuffer, 0,
2 * nbrexcept, @out, tmpoutpos);
tmpoutpos += exceptsize;
}
@out[remember] = tmpbestb | (nbrexcept << 8)
| (exceptsize << 16);
for (int k = 0; k < BLOCK_SIZE; k += 32)
{
BitPacking.fastpack(@in, tmpinpos + k, @out,
tmpoutpos, bits[tmpbestb]);
tmpoutpos += bits[tmpbestb];
}
}
inpos.set(tmpinpos);
outpos.set(tmpoutpos);
}
public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int mynvalue)
{
if (inlength == 0)
return;
mynvalue = Util.greatestMultiple(mynvalue, BLOCK_SIZE);
decodePage(@in, inpos, @out, outpos, mynvalue);
}
private void decodePage(int[] @in, IntWrapper inpos, int[] @out, IntWrapper outpos, int thissize)
{
int tmpoutpos = outpos.get();
int tmpinpos = inpos.get();
for (int run = 0; run < thissize / BLOCK_SIZE; ++run, tmpoutpos += BLOCK_SIZE)
{
int b = @in[tmpinpos] & 0xFF;
int cexcept = (int)((uint)@in[tmpinpos] >> 8) & 0xFF;
int exceptsize = (int)((uint)@in[tmpinpos] >> 16);
++tmpinpos;
S16.uncompress(@in, tmpinpos, exceptsize, exceptbuffer,
0, 2 * cexcept);
tmpinpos += exceptsize;
for (int k = 0; k < BLOCK_SIZE; k += 32)
{
BitPacking.fastunpack(@in, tmpinpos, @out,
tmpoutpos + k, bits[b]);
tmpinpos += bits[b];
}
for (int k = 0; k < cexcept; ++k)
{
@out[tmpoutpos + exceptbuffer[k + cexcept]] |= (exceptbuffer[k] << bits[b]);
}
}
outpos.set(tmpoutpos);
inpos.set(tmpinpos);
}
public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos)
{
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
if (inlength == 0)
return;
@out[outpos.get()] = inlength;
outpos.increment();
headlessCompress(@in, inpos, inlength, @out, outpos);
}
public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos)
{
if (inlength == 0)
return;
int outlength = @in[inpos.get()];
inpos.increment();
headlessUncompress(@in, inpos, inlength, @out, outpos, outlength);
}
public override string ToString()
{
return nameof(NewPFD);
}
}
}