git.fiddlerwoaroof.com
Browse code

Beginning to classify base32 codec

Ed Langley authored on 06/06/2017 23:41:09
Showing 3 changed files
... ...
@@ -1,39 +1,20 @@
1
+/* Copyright (C) CJ Affiliate
2
+ *
3
+ * You may use, distribute and modify this code under  the
4
+ * terms of the  GNU General Public License  version 2  or
5
+ * later.
6
+ *
7
+ * You should have received a copy of the license with this
8
+ * file. If not, you will find a copy in the "LICENSE" file
9
+ * at https://github.com/cjdev/dual-control.
10
+ */
11
+
1 12
 #include <iostream>
2 13
 #include <iomanip>
3 14
 #include <vector>
4 15
 #include <unordered_map>
5 16
 
6
-uint64_t calculate_padding_bytes(uint64_t extra_bytes) {
7
-    auto padding_chars = 0;
8
-    switch (extra_bytes) {
9
-    case 1:
10
-        padding_chars = 6;
11
-        break;
12
-    case 2:
13
-        padding_chars = 4;
14
-        break;
15
-    case 3:
16
-        padding_chars = 3;
17
-        break;
18
-    case 4:
19
-        padding_chars = 1;
20
-        break;
21
-    }
22
-
23
-    return padding_chars;
24
-}
25
-
26
-std::string &pad_string(uint64_t extra_bytes, std::string &input) {
27
-    auto padding_chars = calculate_padding_bytes(extra_bytes);
28
-    if (padding_chars > 0) {
29
-        auto replace_end   = input.end();
30
-        auto replace_start = replace_end - padding_chars;
31
-        auto replace_count = replace_end - replace_start;
32
-
33
-        input.replace(replace_start, replace_end, replace_count, '=');
34
-    }
35
-    return input;
36
-}
17
+#include "base32.h"
37 18
 
38 19
 static const std::vector<char> alphabet = {
39 20
     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
... ...
@@ -42,206 +23,184 @@ static const std::vector<char> alphabet = {
42 23
     'Y', 'Z', '2', '3', '4', '5', '6', '7'
43 24
 };
44 25
 
45
-std::string encodeBase32(const std::vector<uint8_t> data) {
46
-    std::string result;
26
+namespace
27
+{
28
+class base32_impl : public base32_ifc
29
+{
30
+private:
31
+    uint64_t calculate_padding_bytes (uint64_t extra_bytes)
32
+    {
33
+        auto padding_chars = 0;
34
+
35
+        switch (extra_bytes) {
36
+        case 1:
37
+            padding_chars = 6;
38
+            break;
39
+
40
+        case 2:
41
+            padding_chars = 4;
42
+            break;
43
+
44
+        case 3:
45
+            padding_chars = 3;
46
+            break;
47
+
48
+        case 4:
49
+            padding_chars = 1;
50
+            break;
51
+        }
47 52
 
48
-    uint8_t tmp = 0;
53
+        return padding_chars;
54
+    }
49 55
 
50
-    auto data_size = data.size();
51
-    auto extra_bytes = data_size % 5;
56
+    std::string &pad_string (uint64_t extra_bytes, std::string &input)
57
+    {
58
+        auto padding_chars = calculate_padding_bytes (extra_bytes);
52 59
 
53
-    auto begin = data.begin();
54
-    auto end = data.end();
55
-    auto leftover = (5 - extra_bytes) % 5;
56
-    for (auto cur = begin; cur+5 < (end + leftover + 1); cur+=5) {
57
-        std::vector<uint8_t> batch;
58
-        if (cur+5 < end) {
59
-             batch = std::vector<uint8_t> (cur, cur+5);
60
-        } else {
61
-             batch = std::vector<uint8_t> (cur, end);
62
-             for (int x = 0; x < leftover; x++) {
63
-                 batch.push_back(0);
64
-             }
65
-        }
60
+        if (padding_chars > 0) {
61
+            auto replace_end   = input.end();
62
+            auto replace_start = replace_end - padding_chars;
63
+            auto replace_count = replace_end - replace_start;
66 64
 
67
-        std::bitset<40> everything;
68
-        for (int x = 0, y = 32; x < 5; x+=1, y=32-x*8) {
69
-            uint64_t item = batch[x];
70
-            everything |= item << y;
65
+            input.replace (replace_start, replace_end, replace_count, '=');
71 66
         }
72 67
 
73
-        std::bitset<40> mask = 31;
74
-        int offset = 35;
75
-        mask <<= offset;
76
-
77
-        for (/* see above */; offset >= 0; mask >>= 5, offset -= 5) {
78
-            auto idx = ((everything & mask) >> offset).to_ullong();
79
-            result.push_back(alphabet[idx]);
80
-        }
68
+        return input;
81 69
     }
82 70
 
83
-    result = pad_string(extra_bytes, result);
84
-    return result;
85
-}
71
+public:
72
+    std::string encode (const std::vector<uint8_t> data) override
73
+    {
74
+        std::string result;
86 75
 
87
-std::unordered_map <char, unsigned char> construct_lookup_table() {
88
-    std::unordered_map<char, unsigned char> lookup_table;
89
-    for (int i = 0; i < alphabet.size(); i += 1) {
90
-        lookup_table[alphabet[i]] = i;
91
-    }
92
-    return lookup_table;
93
-}
76
+        uint8_t tmp = 0;
77
+
78
+        auto data_size = data.size();
79
+        auto extra_bytes = data_size % 5;
80
+
81
+        auto begin = data.begin();
82
+        auto end = data.end();
83
+        auto leftover = (5 - extra_bytes) % 5;
84
+
85
+        for (auto cur = begin; cur+5 < (end + leftover + 1); cur+=5) {
86
+            std::vector<uint8_t> batch;
87
+
88
+            if (cur+5 < end) {
89
+                batch = std::vector<uint8_t> (cur, cur+5);
90
+            } else {
91
+                batch = std::vector<uint8_t> (cur, end);
92
+
93
+                for (int x = 0; x < leftover; x++) {
94
+                    batch.push_back (0);
95
+                }
96
+            }
97
+
98
+            std::bitset<40> everything;
94 99
 
95
-void print_bits(uint8_t input, int width = 8) {
96
-    for (int counter = 0; counter < width; counter++) {
97
-        unsigned int bit = input & (1 << (width - 1));
98
-        input <<= 1;
99
-        bit >>= (width - 1);
100
+            for (int x = 0, y = 32; x < 5; x+=1, y=32-x*8) {
101
+                uint64_t item = batch[x];
102
+                everything |= item << y;
103
+            }
100 104
 
101
-        printf("%d", bit);
102
-        if (counter == (width-1)/2) {
103
-            printf("|");
105
+            std::bitset<40> mask = 31;
106
+            int offset = 35;
107
+            mask <<= offset;
108
+
109
+            for (/* see above */; offset >= 0; mask >>= 5, offset -= 5) {
110
+                auto idx = ((everything & mask) >> offset).to_ullong();
111
+                result.push_back (alphabet[idx]);
112
+            }
104 113
         }
105
-    }
106
-}
107 114
 
108
-uint8_t extract_bits_from_string(std::string input, std::string::size_type byte_offset, uint8_t bit_offset, uint8_t window_size = 5) {
109
-    auto relevant_characters = input.substr(byte_offset, byte_offset + 2);
110
-    uint16_t bit_end_position = bit_offset + window_size;
111
-    uint16_t result = relevant_characters[0] << 8;
112
-    uint16_t mask = (1 << window_size) - 1;
113
-    uint16_t shift_offset = 16 - bit_end_position;
114
-    if (bit_end_position > 8) {
115
-        result |= relevant_characters[1];
115
+        result = pad_string (extra_bytes, result);
116
+        return result;
116 117
     }
117 118
 
118
-    return (result & (mask << shift_offset)) >> shift_offset;
119
-}
119
+private:
120 120
 
121
-std::pair<uint8_t, uint8_t> split_value(uint8_t item, uint8_t pos, uint8_t width) {
122
-    auto shift = width-pos;
123
-    uint8_t part2_mask = (1 << (shift)) - 1;
124
-    uint8_t part2 = item & part2_mask;
125
-    item >>= shift;
126
-    return { item, part2 };
127
-}
121
+    std::unordered_map <char, unsigned char> construct_lookup_table()
122
+    {
123
+        std::unordered_map<char, unsigned char> lookup_table;
128 124
 
129
-void set_vector_at_bit(std::vector<uint8_t> &data, uint8_t item, std::vector<uint8_t>::size_type byte_offset, uint8_t bit_offset_from_start, uint8_t width) {
130
-    bool need_to_split = bit_offset_from_start + width > 8;
131
-    uint8_t bit_offset_from_end = 7 - bit_offset_from_start;
132
-
133
-    if (need_to_split) {
134
-        uint8_t split_pos = bit_offset_from_end + 1;
135
-        auto pieces = split_value(item, split_pos, width);
136
-        data[byte_offset] |= pieces.first;
137
-
138
-        uint8_t leftover_bits = width - split_pos;
139
-        uint8_t second_part_shift = 8 - leftover_bits;
140
-        data[byte_offset+1] |= pieces.second << second_part_shift;
141
-    } else {
142
-        uint8_t last_bit = (bit_offset_from_end - width) + 1;
143
-        data[byte_offset] |= item << last_bit;
125
+        for (int i = 0; i < alphabet.size(); i += 1) {
126
+            lookup_table[alphabet[i]] = i;
127
+        }
128
+
129
+        return lookup_table;
144 130
     }
145
-}
146 131
 
132
+    std::pair<uint8_t, uint8_t> split_value (uint8_t item, uint8_t pos,
133
+            uint8_t width)
134
+    {
135
+        auto shift = width-pos;
136
+        uint8_t part2_mask = (1 << (shift)) - 1;
137
+        uint8_t part2 = item & part2_mask;
138
+        item >>= shift;
139
+        return { item, part2 };
140
+    }
147 141
 
148
-std::string::size_type calculate_decoded_size(std::string input) {
149
-    std::string::size_type input_size = input.size();
150
-    std::string::size_type first_equals = input.find_first_of('=');
151
-    if (first_equals != std::string::npos) {
152
-        input_size = first_equals;
142
+    void set_vector_at_bit (std::vector<uint8_t> &data, uint8_t item,
143
+                            std::vector<uint8_t>::size_type byte_offset, uint8_t bit_offset_from_start,
144
+                            uint8_t width)
145
+    {
146
+        bool need_to_split = bit_offset_from_start + width > 8;
147
+        uint8_t bit_offset_from_end = 7 - bit_offset_from_start;
148
+
149
+        if (need_to_split) {
150
+            uint8_t split_pos = bit_offset_from_end + 1;
151
+            auto pieces = split_value (item, split_pos, width);
152
+            data[byte_offset] |= pieces.first;
153
+
154
+            uint8_t leftover_bits = width - split_pos;
155
+            uint8_t second_part_shift = 8 - leftover_bits;
156
+            data[byte_offset+1] |= pieces.second << second_part_shift;
157
+        } else {
158
+            uint8_t last_bit = (bit_offset_from_end - width) + 1;
159
+            data[byte_offset] |= item << last_bit;
160
+        }
153 161
     }
154
-    return (input_size * 5) / 8;
155
-}
156 162
 
157
-std::vector<unsigned char> decodeBase32(std::string input) {
158
-    auto lookup_table = construct_lookup_table();
159
-    auto input_size = calculate_decoded_size(input);
163
+    std::string::size_type calculate_decoded_size (std::string input)
164
+    {
165
+        std::string::size_type input_size = input.size();
166
+        std::string::size_type first_equals = input.find_first_of ('=');
160 167
 
161
-    std::vector<unsigned char> result(input_size, 0);
168
+        if (first_equals != std::string::npos) {
169
+            input_size = first_equals;
170
+        }
162 171
 
163
-    unsigned long long bits_written = 0;
164
-    for (std::string::size_type idx = 0; idx < input.size(); idx++) {
165
-        uint8_t val = lookup_table[input[idx]];
166
-        uint8_t start_bit = bits_written % 8;
167
-        std::vector<unsigned char>::size_type current_byte = bits_written/8;
172
+        return (input_size * 5) / 8;
173
+    }
168 174
 
169
-        set_vector_at_bit(result, val, current_byte, start_bit, 5);
175
+public:
170 176
 
171
-        bits_written += 5;
172
-    }
173
-    return result;
174
-}
177
+    std::vector<uint8_t> decode (std::string input) override
178
+    {
179
+        auto lookup_table = construct_lookup_table();
180
+        auto input_size = calculate_decoded_size (input);
175 181
 
176
-int main(int argc, char** argv) {
177
-    std::string data = "hi";
178
-    while (std::getline(std::cin,data)) {
179
-        std::cout << std::endl;
180
-        std::vector<unsigned char> input(data.begin(), data.end());
181
-        // std::vector<unsigned char> input = { 0b10001100, 0b01100011, 0b00011000, 0b11000110, 0b10101010, 0b10101010 };
182
+        std::vector<uint8_t> result (input_size, 0);
182 183
 
183
-        std::cout << "data:\t";
184
-        for (auto v: input) {print_bits(v); std::cout << ":";}
185
-        std::cout << std::endl;
184
+        unsigned long long bits_written = 0;
186 185
 
187
-        std::string encoded = encodeBase32(input);
188
-        std::vector<unsigned char> decoded = decodeBase32(encoded);
186
+        for (std::string::size_type idx = 0; idx < input.size(); idx++) {
187
+            uint8_t val = lookup_table[input[idx]];
189 188
 
190
-        std::cout << "output:\t";
191
-        for (auto v: decoded) {print_bits(v); std::cout << ":";}
192
-        std::cout << std::endl;
193
-    }
189
+            uint8_t start_bit = bits_written % 8;
190
+            std::vector<unsigned char>::size_type current_byte = bits_written/8;
194 191
 
195
-}
192
+            set_vector_at_bit (result, val, current_byte, start_bit, 5);
196 193
 
197
-// To test the encoder at the command line:
198
-/*
199
-int main(int argc, char** argv) {
200
-    std::string data;
201
-    while (std::getline(std::cin,data)) {
202
-        std::vector<unsigned char> v(data.begin(), data.end());
194
+            bits_written += 5;
195
+        }
203 196
 
204
-        std::string res = encodeBase32(v);
205
-        std::cout << "{\"ORIGINAL\": \"" << data << "\", \"BASE32\": \"" << res << "\"}" << std::endl;
197
+        return result;
206 198
     }
199
+};
207 200
 }
208 201
 
209
-export tmp=`mktemp`
210
-(while true; do
211
-  (paste <(./a.out < /usr/share/dict/words | tee /tmp/my_implementation | jq -r .BASE32) <(tr '[a-z]' '[A-Z]' < /tmp/oracle) /usr/share/dict/words | awk '$1 == $2 { printf("^[[32m") } $1 != $2 {printf("^[[31m") } {printf("%s\t%s\ttest: %s\toracle: %s", $1==$2, $3, $1, $2); print "^[[0m"} ' | sample.awk -v n=65 | sort > $tmp ) ;
212
-  sleep 5;
213
-  clear;
214
-  printf '\e]50;ClearScrollback\a';
215
-  column -s $'\t' -t $tmp;
216
-done)
217
-*/
218
-
219
-// int main(int argc, char** argv) {
220
-//     std::vector<unsigned char> input;
221
-//     input = { 0, 0 ,0 ,0 ,0 ,0 ,0};
222
-//     std::cout << "orig vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
223
-//     set_vector_at_bit(input, 0b11111, 0, 7, 5);
224
-//     std::cout << "rslt vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
225
-
226
-//     input = { 0, 0 ,0 ,0 ,0 ,0 ,0};
227
-//     std::cout << "orig vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
228
-//     set_vector_at_bit(input, 0b11111, 0, 6, 5);
229
-//     std::cout << "rslt vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
230
-
231
-//     input = { 0, 0 ,0 ,0 ,0 ,0 ,0};
232
-//     std::cout << "orig vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
233
-//     set_vector_at_bit(input, 0b11111, 0, 5, 5);
234
-//     std::cout << "rslt vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
235
-
236
-//     input = { 0, 0 ,0 ,0 ,0 ,0 ,0};
237
-//     std::cout << "orig vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
238
-//     set_vector_at_bit(input, 0b11111, 0, 4, 5);
239
-//     std::cout << "rslt vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
240
-
241
-//     input = { 0, 0 ,0 ,0 ,0 ,0 ,0};
242
-//     std::cout << "orig vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
243
-//     set_vector_at_bit(input, 0b11111, 0, 3, 5);
244
-//     std::cout << "rslt vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
245
-// }
246
-
247 202
 template class std::vector<unsigned char>;
203
+
204
+base32::base32 (
205
+    delegate_ (std::make_shared<base32_impl> ())
206
+{}
248 207
new file mode 100644
... ...
@@ -0,0 +1,44 @@
1
+/* Copyright (C) CJ Affiliate
2
+ *
3
+ * You may use, distribute and modify this code under  the
4
+ * terms of the  GNU General Public License  version 2  or
5
+ * later.
6
+ *
7
+ * You should have received a copy of the license with this
8
+ * file. If not, you will find a copy in the "LICENSE" file
9
+ * at https://github.com/cjdev/dual-control.
10
+ */
11
+
12
+#ifndef BASE32_H
13
+#define BASE32_H
14
+
15
+#include <memory>
16
+#include <vector>
17
+#include <string>
18
+
19
+class base32_ifc
20
+{
21
+public:
22
+    virtual std::string encode (std::vector<uint8_t> input) = 0;
23
+    virtual std::vector<uint8_t> decode (std::vector<uint8_t> input) = 0;
24
+};
25
+
26
+class base32
27
+{
28
+public:
29
+    using delegate = std::shared_ptr<base32_ifc>;
30
+private:
31
+    delegate delegate_;
32
+public:
33
+    base32 ();
34
+
35
+    std::string encode (std::vector<uint8_t> input) {
36
+        return delegate_->encode(input);
37
+    };
38
+
39
+    std::vector<uint8_t> decode (std::vector<uint8_t> input) {
40
+        return delegate_->decode(input);
41
+    };
42
+};
43
+
44
+#endif
0 45
new file mode 100644
... ...
@@ -0,0 +1,65 @@
1
+/* Copyright (C) CJ Affiliate
2
+ *
3
+ * You may use, distribute and modify this code under  the
4
+ * terms of the  GNU General Public License  version 2  or
5
+ * later.
6
+ *
7
+ * You should have received a copy of the license with this
8
+ * file. If not, you will find a copy in the "LICENSE" file
9
+ * at https://github.com/cjdev/dual-control.
10
+ */
11
+#include <memory>
12
+#include <algorithm>
13
+#include <initializer_list>
14
+#include <vector>
15
+#include <climits>
16
+#include <ctime>
17
+#include <iostream>
18
+#include <map>
19
+
20
+#include "base32.h"
21
+#include "test_util.h"
22
+
23
+// totp test
24
+int int_precomputed()
25
+{
26
+    // given
27
+    std::map<std::string, std::string> precomputed = {
28
+        {"consimilate", "mnxw443jnvuwyylumu======"},
29
+        {"defacing", "mrswmyldnfxgo==="},
30
+        {"downcome", "mrxxo3tdn5wwk==="},
31
+        {"filchery", "mzuwyy3imvzhs==="},
32
+        {"Galatic", "i5qwyylunfrq===="},
33
+        {"hearthrug", "nbswc4tunbzhkzy="},
34
+        {"heterotypic", "nbsxizlsn52hs4djmm======"},
35
+        {"kinase", "nnuw4yltmu======"},
36
+        {"Lycopodiales", "jr4wg33qn5sgsylmmvzq===="},
37
+        {"mosker", "nvxxg23foi======"},
38
+        {"ornithosaurian", "n5zg42lunbxxgylvojuwc3q="},
39
+        {"quilkin", "of2ws3dlnfxa===="},
40
+        {"swartly", "on3wc4tunr4q===="},
41
+        {"teleost", "orswyzlpon2a===="},
42
+        {"thinglet", "orugs3thnrsxi==="},
43
+        {"unpregnant", "ovxha4tfm5xgc3tu"},
44
+        {"unreachably", "ovxhezlbmnugcytmpe======"},
45
+        {"unusableness", "ovxhk43bmjwgk3tfonzq===="},
46
+        {"wickawee", "o5uwg23bo5swk==="},
47
+        {"yareta", "pfqxezlume======"},
48
+    };
49
+
50
+    // The token for key 76I6WTYEUTNCJUREMGKVM45PMA and time '2017/01/01 00:00:00' is 258675
51
+    base32 codec;
52
+
53
+
54
+}
55
+
56
+int run_tests()
57
+{
58
+    test (int_precomputed);
59
+    succeed();
60
+}
61
+
62
+int main (int argc, char *argv[])
63
+{
64
+    return !run_tests();
65
+}