git.fiddlerwoaroof.com
Browse code

Add base32 encoding/decoding

Ed Langley authored on 06/06/2017 22:06:25
Showing 1 changed files
... ...
@@ -1,23 +1,9 @@
1 1
 #include <iostream>
2 2
 #include <iomanip>
3 3
 #include <vector>
4
+#include <unordered_map>
4 5
 
5
-std::string encodeBase32(const std::vector<uint8_t> data) {
6
-    std::vector<char> alphabet = {
7
-        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
8
-        'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
9
-        'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
10
-        'Y', 'Z', '2', '3', '4', '5', '6', '7'
11
-    };
12
-
13
-    uint8_t tmp = 0;
14
-    auto data_size = data.size();
15
-
16
-    std::string result;
17
-    auto begin = data.begin();
18
-    auto end = data.end();
19
-
20
-    auto extra_bytes = (data_size % 5);
6
+uint64_t calculate_padding_bytes(uint64_t extra_bytes) {
21 7
     auto padding_chars = 0;
22 8
     switch (extra_bytes) {
23 9
     case 1:
... ...
@@ -34,6 +20,38 @@ std::string encodeBase32(const std::vector<uint8_t> data) {
34 20
         break;
35 21
     }
36 22
 
23
+    return padding_chars;
24
+}
25
+
26
+std::string &pad_string(uint64_t extra_bytes, std::string &input) {
27
+    auto padding_chars = calculate_padding_bytes(extra_bytes);
28
+    if (padding_chars > 0) {
29
+        auto replace_end   = input.end();
30
+        auto replace_start = replace_end - padding_chars;
31
+        auto replace_count = replace_end - replace_start;
32
+
33
+        input.replace(replace_start, replace_end, replace_count, '=');
34
+    }
35
+    return input;
36
+}
37
+
38
+static const std::vector<char> alphabet = {
39
+    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
40
+    'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
41
+    'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
42
+    'Y', 'Z', '2', '3', '4', '5', '6', '7'
43
+};
44
+
45
+std::string encodeBase32(const std::vector<uint8_t> data) {
46
+    std::string result;
47
+
48
+    uint8_t tmp = 0;
49
+
50
+    auto data_size = data.size();
51
+    auto extra_bytes = data_size % 5;
52
+
53
+    auto begin = data.begin();
54
+    auto end = data.end();
37 55
     auto leftover = (5 - extra_bytes) % 5;
38 56
     for (auto cur = begin; cur+5 < (end + leftover + 1); cur+=5) {
39 57
         std::vector<uint8_t> batch;
... ...
@@ -47,40 +65,137 @@ std::string encodeBase32(const std::vector<uint8_t> data) {
47 65
         }
48 66
 
49 67
         std::bitset<40> everything;
50
-        everything |= (static_cast<uint64_t>(batch[0]) << 32);
51
-        everything |= (static_cast<uint64_t>(batch[1]) << 24);
52
-        everything |= (static_cast<uint64_t>(batch[2]) << 16);
53
-        everything |= (static_cast<uint64_t>(batch[3]) <<  8);
54
-        everything |=                        batch[4];
55
-
56
-        uint64_t mask = 31;
57
-        uint64_t offset = 35;
68
+        for (int x = 0, y = 32; x < 5; x+=1, y=32-x*8) {
69
+            uint64_t item = batch[x];
70
+            everything |= item << y;
71
+        }
72
+
73
+        std::bitset<40> mask = 31;
74
+        int offset = 35;
58 75
         mask <<= offset;
59 76
 
60
-        int counter = 0;
61
-        while (offset <= 35) {
62
-            auto idx = everything;
63
-            idx &=  mask;
64
-            idx >>= offset;
77
+        for (/* see above */; offset >= 0; mask >>= 5, offset -= 5) {
78
+            auto idx = ((everything & mask) >> offset).to_ullong();
79
+            result.push_back(alphabet[idx]);
80
+        }
81
+    }
82
+
83
+    result = pad_string(extra_bytes, result);
84
+    return result;
85
+}
86
+
87
+std::unordered_map <char, unsigned char> construct_lookup_table() {
88
+    std::unordered_map<char, unsigned char> lookup_table;
89
+    for (int i = 0; i < alphabet.size(); i += 1) {
90
+        lookup_table[alphabet[i]] = i;
91
+    }
92
+    return lookup_table;
93
+}
65 94
 
66
-            mask >>= 5;
67
-            offset -= 5;
95
+void print_bits(uint8_t input, int width = 8) {
96
+    for (int counter = 0; counter < width; counter++) {
97
+        unsigned int bit = input & (1 << (width - 1));
98
+        input <<= 1;
99
+        bit >>= (width - 1);
68 100
 
69
-            result.push_back(alphabet[idx.to_ullong()]);
101
+        printf("%d", bit);
102
+        if (counter == (width-1)/2) {
103
+            printf("|");
70 104
         }
71 105
     }
106
+}
72 107
 
73
-    if (padding_chars > 0) {
74
-        auto replace_end   = result.end();
75
-        auto replace_start = replace_end - padding_chars;
76
-        auto replace_count = replace_end - replace_start;
108
+uint8_t extract_bits_from_string(std::string input, std::string::size_type byte_offset, uint8_t bit_offset, uint8_t window_size = 5) {
109
+    auto relevant_characters = input.substr(byte_offset, byte_offset + 2);
110
+    uint16_t bit_end_position = bit_offset + window_size;
111
+    uint16_t result = relevant_characters[0] << 8;
112
+    uint16_t mask = (1 << window_size) - 1;
113
+    uint16_t shift_offset = 16 - bit_end_position;
114
+    if (bit_end_position > 8) {
115
+        result |= relevant_characters[1];
116
+    }
117
+
118
+    return (result & (mask << shift_offset)) >> shift_offset;
119
+}
120
+
121
+std::pair<uint8_t, uint8_t> split_value(uint8_t item, uint8_t pos, uint8_t width) {
122
+    auto shift = width-pos;
123
+    uint8_t part2_mask = (1 << (shift)) - 1;
124
+    uint8_t part2 = item & part2_mask;
125
+    item >>= shift;
126
+    return { item, part2 };
127
+}
128
+
129
+void set_vector_at_bit(std::vector<uint8_t> &data, uint8_t item, std::vector<uint8_t>::size_type byte_offset, uint8_t bit_offset_from_start, uint8_t width) {
130
+    bool need_to_split = bit_offset_from_start + width > 8;
131
+    uint8_t bit_offset_from_end = 7 - bit_offset_from_start;
77 132
 
78
-        result.replace(replace_start, replace_end, replace_count, '=');
133
+    if (need_to_split) {
134
+        uint8_t split_pos = bit_offset_from_end + 1;
135
+        auto pieces = split_value(item, split_pos, width);
136
+        data[byte_offset] |= pieces.first;
137
+
138
+        uint8_t leftover_bits = width - split_pos;
139
+        uint8_t second_part_shift = 8 - leftover_bits;
140
+        data[byte_offset+1] |= pieces.second << second_part_shift;
141
+    } else {
142
+        uint8_t last_bit = (bit_offset_from_end - width) + 1;
143
+        data[byte_offset] |= item << last_bit;
144
+    }
145
+}
146
+
147
+
148
+std::string::size_type calculate_decoded_size(std::string input) {
149
+    std::string::size_type input_size = input.size();
150
+    std::string::size_type first_equals = input.find_first_of('=');
151
+    if (first_equals != std::string::npos) {
152
+        input_size = first_equals;
79 153
     }
154
+    return (input_size * 5) / 8;
155
+}
156
+
157
+std::vector<unsigned char> decodeBase32(std::string input) {
158
+    auto lookup_table = construct_lookup_table();
159
+    auto input_size = calculate_decoded_size(input);
160
+
161
+    std::vector<unsigned char> result(input_size, 0);
162
+
163
+    unsigned long long bits_written = 0;
164
+    for (std::string::size_type idx = 0; idx < input.size(); idx++) {
165
+        uint8_t val = lookup_table[input[idx]];
166
+        uint8_t start_bit = bits_written % 8;
167
+        std::vector<unsigned char>::size_type current_byte = bits_written/8;
80 168
 
169
+        set_vector_at_bit(result, val, current_byte, start_bit, 5);
170
+
171
+        bits_written += 5;
172
+    }
81 173
     return result;
82 174
 }
83 175
 
176
+int main(int argc, char** argv) {
177
+    std::string data = "hi";
178
+    while (std::getline(std::cin,data)) {
179
+        std::cout << std::endl;
180
+        std::vector<unsigned char> input(data.begin(), data.end());
181
+        // std::vector<unsigned char> input = { 0b10001100, 0b01100011, 0b00011000, 0b11000110, 0b10101010, 0b10101010 };
182
+
183
+        std::cout << "data:\t";
184
+        for (auto v: input) {print_bits(v); std::cout << ":";}
185
+        std::cout << std::endl;
186
+
187
+        std::string encoded = encodeBase32(input);
188
+        std::vector<unsigned char> decoded = decodeBase32(encoded);
189
+
190
+        std::cout << "output:\t";
191
+        for (auto v: decoded) {print_bits(v); std::cout << ":";}
192
+        std::cout << std::endl;
193
+    }
194
+
195
+}
196
+
197
+// To test the encoder at the command line:
198
+/*
84 199
 int main(int argc, char** argv) {
85 200
     std::string data;
86 201
     while (std::getline(std::cin,data)) {
... ...
@@ -90,3 +205,43 @@ int main(int argc, char** argv) {
90 205
         std::cout << "{\"ORIGINAL\": \"" << data << "\", \"BASE32\": \"" << res << "\"}" << std::endl;
91 206
     }
92 207
 }
208
+
209
+export tmp=`mktemp`
210
+(while true; do
211
+  (paste <(./a.out < /usr/share/dict/words | tee /tmp/my_implementation | jq -r .BASE32) <(tr '[a-z]' '[A-Z]' < /tmp/oracle) /usr/share/dict/words | awk '$1 == $2 { printf("^[[32m") } $1 != $2 {printf("^[[31m") } {printf("%s\t%s\ttest: %s\toracle: %s", $1==$2, $3, $1, $2); print "^[[0m"} ' | sample.awk -v n=65 | sort > $tmp ) ;
212
+  sleep 5;
213
+  clear;
214
+  printf '\e]50;ClearScrollback\a';
215
+  column -s $'\t' -t $tmp;
216
+done)
217
+*/
218
+
219
+// int main(int argc, char** argv) {
220
+//     std::vector<unsigned char> input;
221
+//     input = { 0, 0 ,0 ,0 ,0 ,0 ,0};
222
+//     std::cout << "orig vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
223
+//     set_vector_at_bit(input, 0b11111, 0, 7, 5);
224
+//     std::cout << "rslt vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
225
+
226
+//     input = { 0, 0 ,0 ,0 ,0 ,0 ,0};
227
+//     std::cout << "orig vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
228
+//     set_vector_at_bit(input, 0b11111, 0, 6, 5);
229
+//     std::cout << "rslt vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
230
+
231
+//     input = { 0, 0 ,0 ,0 ,0 ,0 ,0};
232
+//     std::cout << "orig vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
233
+//     set_vector_at_bit(input, 0b11111, 0, 5, 5);
234
+//     std::cout << "rslt vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
235
+
236
+//     input = { 0, 0 ,0 ,0 ,0 ,0 ,0};
237
+//     std::cout << "orig vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
238
+//     set_vector_at_bit(input, 0b11111, 0, 4, 5);
239
+//     std::cout << "rslt vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
240
+
241
+//     input = { 0, 0 ,0 ,0 ,0 ,0 ,0};
242
+//     std::cout << "orig vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
243
+//     set_vector_at_bit(input, 0b11111, 0, 3, 5);
244
+//     std::cout << "rslt vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
245
+// }
246
+
247
+template class std::vector<unsigned char>;