Browse code
Add base32 encoding/decoding
Ed Langley authored on 06/06/2017 22:06:25
Showing 1 changed files
Showing 1 changed files
... | ... |
@@ -1,23 +1,9 @@ |
1 | 1 |
#include <iostream> |
2 | 2 |
#include <iomanip> |
3 | 3 |
#include <vector> |
4 |
+#include <unordered_map> |
|
4 | 5 |
|
5 |
-std::string encodeBase32(const std::vector<uint8_t> data) { |
|
6 |
- std::vector<char> alphabet = { |
|
7 |
- 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', |
|
8 |
- 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', |
|
9 |
- 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', |
|
10 |
- 'Y', 'Z', '2', '3', '4', '5', '6', '7' |
|
11 |
- }; |
|
12 |
- |
|
13 |
- uint8_t tmp = 0; |
|
14 |
- auto data_size = data.size(); |
|
15 |
- |
|
16 |
- std::string result; |
|
17 |
- auto begin = data.begin(); |
|
18 |
- auto end = data.end(); |
|
19 |
- |
|
20 |
- auto extra_bytes = (data_size % 5); |
|
6 |
+uint64_t calculate_padding_bytes(uint64_t extra_bytes) { |
|
21 | 7 |
auto padding_chars = 0; |
22 | 8 |
switch (extra_bytes) { |
23 | 9 |
case 1: |
... | ... |
@@ -34,6 +20,38 @@ std::string encodeBase32(const std::vector<uint8_t> data) { |
34 | 20 |
break; |
35 | 21 |
} |
36 | 22 |
|
23 |
+ return padding_chars; |
|
24 |
+} |
|
25 |
+ |
|
26 |
+std::string &pad_string(uint64_t extra_bytes, std::string &input) { |
|
27 |
+ auto padding_chars = calculate_padding_bytes(extra_bytes); |
|
28 |
+ if (padding_chars > 0) { |
|
29 |
+ auto replace_end = input.end(); |
|
30 |
+ auto replace_start = replace_end - padding_chars; |
|
31 |
+ auto replace_count = replace_end - replace_start; |
|
32 |
+ |
|
33 |
+ input.replace(replace_start, replace_end, replace_count, '='); |
|
34 |
+ } |
|
35 |
+ return input; |
|
36 |
+} |
|
37 |
+ |
|
38 |
+static const std::vector<char> alphabet = { |
|
39 |
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', |
|
40 |
+ 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', |
|
41 |
+ 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', |
|
42 |
+ 'Y', 'Z', '2', '3', '4', '5', '6', '7' |
|
43 |
+}; |
|
44 |
+ |
|
45 |
+std::string encodeBase32(const std::vector<uint8_t> data) { |
|
46 |
+ std::string result; |
|
47 |
+ |
|
48 |
+ uint8_t tmp = 0; |
|
49 |
+ |
|
50 |
+ auto data_size = data.size(); |
|
51 |
+ auto extra_bytes = data_size % 5; |
|
52 |
+ |
|
53 |
+ auto begin = data.begin(); |
|
54 |
+ auto end = data.end(); |
|
37 | 55 |
auto leftover = (5 - extra_bytes) % 5; |
38 | 56 |
for (auto cur = begin; cur+5 < (end + leftover + 1); cur+=5) { |
39 | 57 |
std::vector<uint8_t> batch; |
... | ... |
@@ -47,40 +65,137 @@ std::string encodeBase32(const std::vector<uint8_t> data) { |
47 | 65 |
} |
48 | 66 |
|
49 | 67 |
std::bitset<40> everything; |
50 |
- everything |= (static_cast<uint64_t>(batch[0]) << 32); |
|
51 |
- everything |= (static_cast<uint64_t>(batch[1]) << 24); |
|
52 |
- everything |= (static_cast<uint64_t>(batch[2]) << 16); |
|
53 |
- everything |= (static_cast<uint64_t>(batch[3]) << 8); |
|
54 |
- everything |= batch[4]; |
|
55 |
- |
|
56 |
- uint64_t mask = 31; |
|
57 |
- uint64_t offset = 35; |
|
68 |
+ for (int x = 0, y = 32; x < 5; x+=1, y=32-x*8) { |
|
69 |
+ uint64_t item = batch[x]; |
|
70 |
+ everything |= item << y; |
|
71 |
+ } |
|
72 |
+ |
|
73 |
+ std::bitset<40> mask = 31; |
|
74 |
+ int offset = 35; |
|
58 | 75 |
mask <<= offset; |
59 | 76 |
|
60 |
- int counter = 0; |
|
61 |
- while (offset <= 35) { |
|
62 |
- auto idx = everything; |
|
63 |
- idx &= mask; |
|
64 |
- idx >>= offset; |
|
77 |
+ for (/* see above */; offset >= 0; mask >>= 5, offset -= 5) { |
|
78 |
+ auto idx = ((everything & mask) >> offset).to_ullong(); |
|
79 |
+ result.push_back(alphabet[idx]); |
|
80 |
+ } |
|
81 |
+ } |
|
82 |
+ |
|
83 |
+ result = pad_string(extra_bytes, result); |
|
84 |
+ return result; |
|
85 |
+} |
|
86 |
+ |
|
87 |
+std::unordered_map <char, unsigned char> construct_lookup_table() { |
|
88 |
+ std::unordered_map<char, unsigned char> lookup_table; |
|
89 |
+ for (int i = 0; i < alphabet.size(); i += 1) { |
|
90 |
+ lookup_table[alphabet[i]] = i; |
|
91 |
+ } |
|
92 |
+ return lookup_table; |
|
93 |
+} |
|
65 | 94 |
|
66 |
- mask >>= 5; |
|
67 |
- offset -= 5; |
|
95 |
+void print_bits(uint8_t input, int width = 8) { |
|
96 |
+ for (int counter = 0; counter < width; counter++) { |
|
97 |
+ unsigned int bit = input & (1 << (width - 1)); |
|
98 |
+ input <<= 1; |
|
99 |
+ bit >>= (width - 1); |
|
68 | 100 |
|
69 |
- result.push_back(alphabet[idx.to_ullong()]); |
|
101 |
+ printf("%d", bit); |
|
102 |
+ if (counter == (width-1)/2) { |
|
103 |
+ printf("|"); |
|
70 | 104 |
} |
71 | 105 |
} |
106 |
+} |
|
72 | 107 |
|
73 |
- if (padding_chars > 0) { |
|
74 |
- auto replace_end = result.end(); |
|
75 |
- auto replace_start = replace_end - padding_chars; |
|
76 |
- auto replace_count = replace_end - replace_start; |
|
108 |
+uint8_t extract_bits_from_string(std::string input, std::string::size_type byte_offset, uint8_t bit_offset, uint8_t window_size = 5) { |
|
109 |
+ auto relevant_characters = input.substr(byte_offset, byte_offset + 2); |
|
110 |
+ uint16_t bit_end_position = bit_offset + window_size; |
|
111 |
+ uint16_t result = relevant_characters[0] << 8; |
|
112 |
+ uint16_t mask = (1 << window_size) - 1; |
|
113 |
+ uint16_t shift_offset = 16 - bit_end_position; |
|
114 |
+ if (bit_end_position > 8) { |
|
115 |
+ result |= relevant_characters[1]; |
|
116 |
+ } |
|
117 |
+ |
|
118 |
+ return (result & (mask << shift_offset)) >> shift_offset; |
|
119 |
+} |
|
120 |
+ |
|
121 |
+std::pair<uint8_t, uint8_t> split_value(uint8_t item, uint8_t pos, uint8_t width) { |
|
122 |
+ auto shift = width-pos; |
|
123 |
+ uint8_t part2_mask = (1 << (shift)) - 1; |
|
124 |
+ uint8_t part2 = item & part2_mask; |
|
125 |
+ item >>= shift; |
|
126 |
+ return { item, part2 }; |
|
127 |
+} |
|
128 |
+ |
|
129 |
+void set_vector_at_bit(std::vector<uint8_t> &data, uint8_t item, std::vector<uint8_t>::size_type byte_offset, uint8_t bit_offset_from_start, uint8_t width) { |
|
130 |
+ bool need_to_split = bit_offset_from_start + width > 8; |
|
131 |
+ uint8_t bit_offset_from_end = 7 - bit_offset_from_start; |
|
77 | 132 |
|
78 |
- result.replace(replace_start, replace_end, replace_count, '='); |
|
133 |
+ if (need_to_split) { |
|
134 |
+ uint8_t split_pos = bit_offset_from_end + 1; |
|
135 |
+ auto pieces = split_value(item, split_pos, width); |
|
136 |
+ data[byte_offset] |= pieces.first; |
|
137 |
+ |
|
138 |
+ uint8_t leftover_bits = width - split_pos; |
|
139 |
+ uint8_t second_part_shift = 8 - leftover_bits; |
|
140 |
+ data[byte_offset+1] |= pieces.second << second_part_shift; |
|
141 |
+ } else { |
|
142 |
+ uint8_t last_bit = (bit_offset_from_end - width) + 1; |
|
143 |
+ data[byte_offset] |= item << last_bit; |
|
144 |
+ } |
|
145 |
+} |
|
146 |
+ |
|
147 |
+ |
|
148 |
+std::string::size_type calculate_decoded_size(std::string input) { |
|
149 |
+ std::string::size_type input_size = input.size(); |
|
150 |
+ std::string::size_type first_equals = input.find_first_of('='); |
|
151 |
+ if (first_equals != std::string::npos) { |
|
152 |
+ input_size = first_equals; |
|
79 | 153 |
} |
154 |
+ return (input_size * 5) / 8; |
|
155 |
+} |
|
156 |
+ |
|
157 |
+std::vector<unsigned char> decodeBase32(std::string input) { |
|
158 |
+ auto lookup_table = construct_lookup_table(); |
|
159 |
+ auto input_size = calculate_decoded_size(input); |
|
160 |
+ |
|
161 |
+ std::vector<unsigned char> result(input_size, 0); |
|
162 |
+ |
|
163 |
+ unsigned long long bits_written = 0; |
|
164 |
+ for (std::string::size_type idx = 0; idx < input.size(); idx++) { |
|
165 |
+ uint8_t val = lookup_table[input[idx]]; |
|
166 |
+ uint8_t start_bit = bits_written % 8; |
|
167 |
+ std::vector<unsigned char>::size_type current_byte = bits_written/8; |
|
80 | 168 |
|
169 |
+ set_vector_at_bit(result, val, current_byte, start_bit, 5); |
|
170 |
+ |
|
171 |
+ bits_written += 5; |
|
172 |
+ } |
|
81 | 173 |
return result; |
82 | 174 |
} |
83 | 175 |
|
176 |
+int main(int argc, char** argv) { |
|
177 |
+ std::string data = "hi"; |
|
178 |
+ while (std::getline(std::cin,data)) { |
|
179 |
+ std::cout << std::endl; |
|
180 |
+ std::vector<unsigned char> input(data.begin(), data.end()); |
|
181 |
+ // std::vector<unsigned char> input = { 0b10001100, 0b01100011, 0b00011000, 0b11000110, 0b10101010, 0b10101010 }; |
|
182 |
+ |
|
183 |
+ std::cout << "data:\t"; |
|
184 |
+ for (auto v: input) {print_bits(v); std::cout << ":";} |
|
185 |
+ std::cout << std::endl; |
|
186 |
+ |
|
187 |
+ std::string encoded = encodeBase32(input); |
|
188 |
+ std::vector<unsigned char> decoded = decodeBase32(encoded); |
|
189 |
+ |
|
190 |
+ std::cout << "output:\t"; |
|
191 |
+ for (auto v: decoded) {print_bits(v); std::cout << ":";} |
|
192 |
+ std::cout << std::endl; |
|
193 |
+ } |
|
194 |
+ |
|
195 |
+} |
|
196 |
+ |
|
197 |
+// To test the encoder at the command line: |
|
198 |
+/* |
|
84 | 199 |
int main(int argc, char** argv) { |
85 | 200 |
std::string data; |
86 | 201 |
while (std::getline(std::cin,data)) { |
... | ... |
@@ -90,3 +205,43 @@ int main(int argc, char** argv) { |
90 | 205 |
std::cout << "{\"ORIGINAL\": \"" << data << "\", \"BASE32\": \"" << res << "\"}" << std::endl; |
91 | 206 |
} |
92 | 207 |
} |
208 |
+ |
|
209 |
+export tmp=`mktemp` |
|
210 |
+(while true; do |
|
211 |
+ (paste <(./a.out < /usr/share/dict/words | tee /tmp/my_implementation | jq -r .BASE32) <(tr '[a-z]' '[A-Z]' < /tmp/oracle) /usr/share/dict/words | awk '$1 == $2 { printf("^[[32m") } $1 != $2 {printf("^[[31m") } {printf("%s\t%s\ttest: %s\toracle: %s", $1==$2, $3, $1, $2); print "^[[0m"} ' | sample.awk -v n=65 | sort > $tmp ) ; |
|
212 |
+ sleep 5; |
|
213 |
+ clear; |
|
214 |
+ printf '\e]50;ClearScrollback\a'; |
|
215 |
+ column -s $'\t' -t $tmp; |
|
216 |
+done) |
|
217 |
+*/ |
|
218 |
+ |
|
219 |
+// int main(int argc, char** argv) { |
|
220 |
+// std::vector<unsigned char> input; |
|
221 |
+// input = { 0, 0 ,0 ,0 ,0 ,0 ,0}; |
|
222 |
+// std::cout << "orig vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl; |
|
223 |
+// set_vector_at_bit(input, 0b11111, 0, 7, 5); |
|
224 |
+// std::cout << "rslt vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl; |
|
225 |
+ |
|
226 |
+// input = { 0, 0 ,0 ,0 ,0 ,0 ,0}; |
|
227 |
+// std::cout << "orig vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl; |
|
228 |
+// set_vector_at_bit(input, 0b11111, 0, 6, 5); |
|
229 |
+// std::cout << "rslt vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl; |
|
230 |
+ |
|
231 |
+// input = { 0, 0 ,0 ,0 ,0 ,0 ,0}; |
|
232 |
+// std::cout << "orig vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl; |
|
233 |
+// set_vector_at_bit(input, 0b11111, 0, 5, 5); |
|
234 |
+// std::cout << "rslt vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl; |
|
235 |
+ |
|
236 |
+// input = { 0, 0 ,0 ,0 ,0 ,0 ,0}; |
|
237 |
+// std::cout << "orig vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl; |
|
238 |
+// set_vector_at_bit(input, 0b11111, 0, 4, 5); |
|
239 |
+// std::cout << "rslt vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl; |
|
240 |
+ |
|
241 |
+// input = { 0, 0 ,0 ,0 ,0 ,0 ,0}; |
|
242 |
+// std::cout << "orig vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl; |
|
243 |
+// set_vector_at_bit(input, 0b11111, 0, 3, 5); |
|
244 |
+// std::cout << "rslt vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl; |
|
245 |
+// } |
|
246 |
+ |
|
247 |
+template class std::vector<unsigned char>; |