/
sdc.go
156 lines (123 loc) · 2.8 KB
/
sdc.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
// Package sdc implements simple dense coding which compresses an array of
// integers and allows random access in constant time.
//
// For more details see Simple Random Access Compression:
// http://www.cs.uku.fi/~fredriks/pub/papers/fi09.pdf
package sdc
import (
"bytes"
"encoding/gob"
"fmt"
"github.com/robskie/bit"
"github.com/robskie/ranksel"
)
// These are the rank and select
// sampling block sizes used by
// ranksel.BitVector.
const (
sr = 1024
ss = 1792
)
// Array represents an array of integers.
type Array struct {
bits *bit.Array
selector *ranksel.BitVector
length int
initialized bool
}
func (a *Array) init() {
a.bits = bit.NewArray(0)
selOpts := &ranksel.Options{Sr: sr, Ss: ss}
a.selector = ranksel.NewBitVector(selOpts)
a.selector.Add(1, 1)
a.initialized = true
}
// NewArray returns an empty array.
func NewArray() *Array {
array := &Array{}
array.init()
return array
}
func encode(value int) (uint64, int) {
v := uint64(value)
length := bit.MSBIndex(v + 2)
code := v + 2 - (1 << uint(length))
return code, length
}
// Add adds an integer to the array.
func (a *Array) Add(v int) {
if !a.initialized {
a.init()
}
a.length++
code, length := encode(v)
a.bits.Add(code, length)
a.selector.Add(1<<uint(length-1), length)
}
func decode(value uint64, length int) int {
return int(value - 2 + (1 << uint(length)))
}
// Get returns the value at index i.
func (a *Array) Get(i int) int {
start := a.selector.Select1(i + 1)
bits := a.selector.Get(start, min(64, a.selector.Len()-start))
length := bit.Select(bits, 2)
code := a.bits.Get(start, length)
return decode(code, length)
}
// Len returns the number of values stored.
func (a *Array) Len() int {
return a.length
}
// Size returns the array size in bytes.
func (a *Array) Size() int {
size := a.bits.Size()
size += a.selector.Size()
return size
}
// GobEncode encodes this array into gob streams.
func (a *Array) GobEncode() ([]byte, error) {
buf := &bytes.Buffer{}
enc := gob.NewEncoder(buf)
err := checkErr(
enc.Encode(a.bits),
enc.Encode(a.selector),
enc.Encode(a.length),
enc.Encode(a.initialized),
)
if err != nil {
err = fmt.Errorf("sdc: encode failed (%v)", err)
}
return buf.Bytes(), err
}
// GobDecode populates this array from gob streams.
func (a *Array) GobDecode(data []byte) error {
buf := bytes.NewReader(data)
dec := gob.NewDecoder(buf)
a.bits = bit.NewArray(0)
a.selector = ranksel.NewBitVector(nil)
err := checkErr(
dec.Decode(a.bits),
dec.Decode(a.selector),
dec.Decode(&a.length),
dec.Decode(&a.initialized),
)
if err != nil {
err = fmt.Errorf("sdc: decode failed (%v)", err)
}
return err
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
func checkErr(err ...error) error {
for _, e := range err {
if e != nil {
return e
}
}
return nil
}