GitList

Beginning to classify base32 codec

Ed Langley authored on 06/06/2017 23:41:09
Showing 3 changed files

base32.cc index eea2a5a..1b1d0e8 100644
base32.h index 0000000..fc89f8f
base32_test.cc index 0000000..4dbbc5e

@@ -1,39 +1,20 @@
                 +/* Copyright (C) CJ Affiliate
                 + *
                 + * You may use, distribute and modify this code under  the
                 + * terms of the  GNU General Public License  version 2  or
                 + * later.
                 + *
                 + * You should have received a copy of the license with this
                 + * file. If not, you will find a copy in the "LICENSE" file
                 + * at https://github.com/cjdev/dual-control.
                 + */
+                +
                  #include <iostream>
                  #include <iomanip>
                  #include <vector>
                  #include <unordered_map>
                 -uint64_t calculate_padding_bytes(uint64_t extra_bytes) {
                 -    auto padding_chars = 0;
                 -    switch (extra_bytes) {
                 -    case 1:
                 -        padding_chars = 6;
                 -        break;
                 -    case 2:
                 -        padding_chars = 4;
                 -        break;
                 -    case 3:
                 -        padding_chars = 3;
                 -        break;
                 -    case 4:
                 -        padding_chars = 1;
                 -        break;
                 -    }
+                -
                 -    return padding_chars;
                 -}
+                -
                 -std::string &pad_string(uint64_t extra_bytes, std::string &input) {
                 -    auto padding_chars = calculate_padding_bytes(extra_bytes);
                 -    if (padding_chars > 0) {
                 -        auto replace_end   = input.end();
                 -        auto replace_start = replace_end - padding_chars;
                 -        auto replace_count = replace_end - replace_start;
+                -
                 -        input.replace(replace_start, replace_end, replace_count, '=');
                 -    }
                 -    return input;
                 -}
                 +#include "base32.h"
                  static const std::vector<char> alphabet = {
                      'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
@@ -42,206 +23,184 @@ static const std::vector<char> alphabet = {
                      'Y', 'Z', '2', '3', '4', '5', '6', '7'
                  };
                 -std::string encodeBase32(const std::vector<uint8_t> data) {
                 -    std::string result;
                 +namespace
                 +{
                 +class base32_impl : public base32_ifc
                 +{
                 +private:
                 +    uint64_t calculate_padding_bytes (uint64_t extra_bytes)
                 +    {
                 +        auto padding_chars = 0;
+                +
                 +        switch (extra_bytes) {
                 +        case 1:
                 +            padding_chars = 6;
                 +            break;
+                +
                 +        case 2:
                 +            padding_chars = 4;
                 +            break;
+                +
                 +        case 3:
                 +            padding_chars = 3;
                 +            break;
+                +
                 +        case 4:
                 +            padding_chars = 1;
                 +            break;
                 +        }
                 -    uint8_t tmp = 0;
                 +        return padding_chars;
                 +    }
                 -    auto data_size = data.size();
                 -    auto extra_bytes = data_size % 5;
                 +    std::string &pad_string (uint64_t extra_bytes, std::string &input)
                 +    {
                 +        auto padding_chars = calculate_padding_bytes (extra_bytes);
                 -    auto begin = data.begin();
                 -    auto end = data.end();
                 -    auto leftover = (5 - extra_bytes) % 5;
                 -    for (auto cur = begin; cur+5 < (end + leftover + 1); cur+=5) {
                 -        std::vector<uint8_t> batch;
                 -        if (cur+5 < end) {
                 -             batch = std::vector<uint8_t> (cur, cur+5);
                 -        } else {
                 -             batch = std::vector<uint8_t> (cur, end);
                 -             for (int x = 0; x < leftover; x++) {
                 -                 batch.push_back(0);
                 -             }
                 -        }
                 +        if (padding_chars > 0) {
                 +            auto replace_end   = input.end();
                 +            auto replace_start = replace_end - padding_chars;
                 +            auto replace_count = replace_end - replace_start;
                 -        std::bitset<40> everything;
                 -        for (int x = 0, y = 32; x < 5; x+=1, y=32-x*8) {
                 -            uint64_t item = batch[x];
                 -            everything |= item << y;
                 +            input.replace (replace_start, replace_end, replace_count, '=');
+                         }
                 -        std::bitset<40> mask = 31;
                 -        int offset = 35;
                 -        mask <<= offset;
+                -
                 -        for (/* see above */; offset >= 0; mask >>= 5, offset -= 5) {
                 -            auto idx = ((everything & mask) >> offset).to_ullong();
                 -            result.push_back(alphabet[idx]);
                 -        }
                 +        return input;
+                     }
                 -    result = pad_string(extra_bytes, result);
                 -    return result;
                 -}
                 +public:
                 +    std::string encode (const std::vector<uint8_t> data) override
                 +    {
                 +        std::string result;
                 -std::unordered_map <char, unsigned char> construct_lookup_table() {
                 -    std::unordered_map<char, unsigned char> lookup_table;
                 -    for (int i = 0; i < alphabet.size(); i += 1) {
                 -        lookup_table[alphabet[i]] = i;
                 -    }
                 -    return lookup_table;
                 -}
                 +        uint8_t tmp = 0;
+                +
                 +        auto data_size = data.size();
                 +        auto extra_bytes = data_size % 5;
+                +
                 +        auto begin = data.begin();
                 +        auto end = data.end();
                 +        auto leftover = (5 - extra_bytes) % 5;
+                +
                 +        for (auto cur = begin; cur+5 < (end + leftover + 1); cur+=5) {
                 +            std::vector<uint8_t> batch;
+                +
                 +            if (cur+5 < end) {
                 +                batch = std::vector<uint8_t> (cur, cur+5);
                 +            } else {
                 +                batch = std::vector<uint8_t> (cur, end);
+                +
                 +                for (int x = 0; x < leftover; x++) {
                 +                    batch.push_back (0);
                 +                }
                 +            }
+                +
                 +            std::bitset<40> everything;
                 -void print_bits(uint8_t input, int width = 8) {
                 -    for (int counter = 0; counter < width; counter++) {
                 -        unsigned int bit = input & (1 << (width - 1));
                 -        input <<= 1;
                 -        bit >>= (width - 1);
                 +            for (int x = 0, y = 32; x < 5; x+=1, y=32-x*8) {
                 +                uint64_t item = batch[x];
                 +                everything |= item << y;
                 +            }
                 -        printf("%d", bit);
                 -        if (counter == (width-1)/2) {
                 -            printf("|");
                 +            std::bitset<40> mask = 31;
                 +            int offset = 35;
                 +            mask <<= offset;
+                +
                 +            for (/* see above */; offset >= 0; mask >>= 5, offset -= 5) {
                 +                auto idx = ((everything & mask) >> offset).to_ullong();
                 +                result.push_back (alphabet[idx]);
                 +            }
+                         }
                 -    }
                 -}
                 -uint8_t extract_bits_from_string(std::string input, std::string::size_type byte_offset, uint8_t bit_offset, uint8_t window_size = 5) {
                 -    auto relevant_characters = input.substr(byte_offset, byte_offset + 2);
                 -    uint16_t bit_end_position = bit_offset + window_size;
                 -    uint16_t result = relevant_characters[0] << 8;
                 -    uint16_t mask = (1 << window_size) - 1;
                 -    uint16_t shift_offset = 16 - bit_end_position;
                 -    if (bit_end_position > 8) {
                 -        result |= relevant_characters[1];
                 +        result = pad_string (extra_bytes, result);
                 +        return result;
+                     }
                 -    return (result & (mask << shift_offset)) >> shift_offset;
                 -}
                 +private:
                 -std::pair<uint8_t, uint8_t> split_value(uint8_t item, uint8_t pos, uint8_t width) {
                 -    auto shift = width-pos;
                 -    uint8_t part2_mask = (1 << (shift)) - 1;
                 -    uint8_t part2 = item & part2_mask;
                 -    item >>= shift;
                 -    return { item, part2 };
                 -}
                 +    std::unordered_map <char, unsigned char> construct_lookup_table()
                 +    {
                 +        std::unordered_map<char, unsigned char> lookup_table;
                 -void set_vector_at_bit(std::vector<uint8_t> &data, uint8_t item, std::vector<uint8_t>::size_type byte_offset, uint8_t bit_offset_from_start, uint8_t width) {
                 -    bool need_to_split = bit_offset_from_start + width > 8;
                 -    uint8_t bit_offset_from_end = 7 - bit_offset_from_start;
+                -
                 -    if (need_to_split) {
                 -        uint8_t split_pos = bit_offset_from_end + 1;
                 -        auto pieces = split_value(item, split_pos, width);
                 -        data[byte_offset] |= pieces.first;
+                -
                 -        uint8_t leftover_bits = width - split_pos;
                 -        uint8_t second_part_shift = 8 - leftover_bits;
                 -        data[byte_offset+1] |= pieces.second << second_part_shift;
                 -    } else {
                 -        uint8_t last_bit = (bit_offset_from_end - width) + 1;
                 -        data[byte_offset] |= item << last_bit;
                 +        for (int i = 0; i < alphabet.size(); i += 1) {
                 +            lookup_table[alphabet[i]] = i;
                 +        }
+                +
                 +        return lookup_table;
+                     }
                 -}
                 +    std::pair<uint8_t, uint8_t> split_value (uint8_t item, uint8_t pos,
                 +            uint8_t width)
                 +    {
                 +        auto shift = width-pos;
                 +        uint8_t part2_mask = (1 << (shift)) - 1;
                 +        uint8_t part2 = item & part2_mask;
                 +        item >>= shift;
                 +        return { item, part2 };
                 +    }
                 -std::string::size_type calculate_decoded_size(std::string input) {
                 -    std::string::size_type input_size = input.size();
                 -    std::string::size_type first_equals = input.find_first_of('=');
                 -    if (first_equals != std::string::npos) {
                 -        input_size = first_equals;
                 +    void set_vector_at_bit (std::vector<uint8_t> &data, uint8_t item,
                 +                            std::vector<uint8_t>::size_type byte_offset, uint8_t bit_offset_from_start,
                 +                            uint8_t width)
                 +    {
                 +        bool need_to_split = bit_offset_from_start + width > 8;
                 +        uint8_t bit_offset_from_end = 7 - bit_offset_from_start;
+                +
                 +        if (need_to_split) {
                 +            uint8_t split_pos = bit_offset_from_end + 1;
                 +            auto pieces = split_value (item, split_pos, width);
                 +            data[byte_offset] |= pieces.first;
+                +
                 +            uint8_t leftover_bits = width - split_pos;
                 +            uint8_t second_part_shift = 8 - leftover_bits;
                 +            data[byte_offset+1] |= pieces.second << second_part_shift;
                 +        } else {
                 +            uint8_t last_bit = (bit_offset_from_end - width) + 1;
                 +            data[byte_offset] |= item << last_bit;
                 +        }
+                     }
                 -    return (input_size * 5) / 8;
                 -}
                 -std::vector<unsigned char> decodeBase32(std::string input) {
                 -    auto lookup_table = construct_lookup_table();
                 -    auto input_size = calculate_decoded_size(input);
                 +    std::string::size_type calculate_decoded_size (std::string input)
                 +    {
                 +        std::string::size_type input_size = input.size();
                 +        std::string::size_type first_equals = input.find_first_of ('=');
                 -    std::vector<unsigned char> result(input_size, 0);
                 +        if (first_equals != std::string::npos) {
                 +            input_size = first_equals;
                 +        }
                 -    unsigned long long bits_written = 0;
                 -    for (std::string::size_type idx = 0; idx < input.size(); idx++) {
                 -        uint8_t val = lookup_table[input[idx]];
                 -        uint8_t start_bit = bits_written % 8;
                 -        std::vector<unsigned char>::size_type current_byte = bits_written/8;
                 +        return (input_size * 5) / 8;
                 +    }
                 -        set_vector_at_bit(result, val, current_byte, start_bit, 5);
                 +public:
                 -        bits_written += 5;
                 -    }
                 -    return result;
                 -}
                 +    std::vector<uint8_t> decode (std::string input) override
                 +    {
                 +        auto lookup_table = construct_lookup_table();
                 +        auto input_size = calculate_decoded_size (input);
                 -int main(int argc, char** argv) {
                 -    std::string data = "hi";
                 -    while (std::getline(std::cin,data)) {
                 -        std::cout << std::endl;
                 -        std::vector<unsigned char> input(data.begin(), data.end());
                 -        // std::vector<unsigned char> input = { 0b10001100, 0b01100011, 0b00011000, 0b11000110, 0b10101010, 0b10101010 };
                 +        std::vector<uint8_t> result (input_size, 0);
                 -        std::cout << "data:\t";
                 -        for (auto v: input) {print_bits(v); std::cout << ":";}
                 -        std::cout << std::endl;
                 +        unsigned long long bits_written = 0;
                 -        std::string encoded = encodeBase32(input);
                 -        std::vector<unsigned char> decoded = decodeBase32(encoded);
                 +        for (std::string::size_type idx = 0; idx < input.size(); idx++) {
                 +            uint8_t val = lookup_table[input[idx]];
                 -        std::cout << "output:\t";
                 -        for (auto v: decoded) {print_bits(v); std::cout << ":";}
                 -        std::cout << std::endl;
                 -    }
                 +            uint8_t start_bit = bits_written % 8;
                 +            std::vector<unsigned char>::size_type current_byte = bits_written/8;
                 -}
                 +            set_vector_at_bit (result, val, current_byte, start_bit, 5);
                 -// To test the encoder at the command line:
                 -/*
                 -int main(int argc, char** argv) {
                 -    std::string data;
                 -    while (std::getline(std::cin,data)) {
                 -        std::vector<unsigned char> v(data.begin(), data.end());
                 +            bits_written += 5;
                 +        }
                 -        std::string res = encodeBase32(v);
                 -        std::cout << "{\"ORIGINAL\": \"" << data << "\", \"BASE32\": \"" << res << "\"}" << std::endl;
                 +        return result;
+                     }
                 +};
+                 }
                 -export tmp=`mktemp`
                 -(while true; do
                 -  (paste <(./a.out < /usr/share/dict/words | tee /tmp/my_implementation | jq -r .BASE32) <(tr '[a-z]' '[A-Z]' < /tmp/oracle) /usr/share/dict/words | awk '$1 == $2 { printf("^[[32m") } $1 != $2 {printf("^[[31m") } {printf("%s\t%s\ttest: %s\toracle: %s", $1==$2, $3, $1, $2); print "^[[0m"} ' | sample.awk -v n=65 | sort > $tmp ) ;
                 -  sleep 5;
                 -  clear;
                 -  printf '\e]50;ClearScrollback\a';
                 -  column -s $'\t' -t $tmp;
                 -done)
                 -*/
+                -
                 -// int main(int argc, char** argv) {
                 -//     std::vector<unsigned char> input;
                 -//     input = { 0, 0 ,0 ,0 ,0 ,0 ,0};
                 -//     std::cout << "orig vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
                 -//     set_vector_at_bit(input, 0b11111, 0, 7, 5);
                 -//     std::cout << "rslt vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
+                -
                 -//     input = { 0, 0 ,0 ,0 ,0 ,0 ,0};
                 -//     std::cout << "orig vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
                 -//     set_vector_at_bit(input, 0b11111, 0, 6, 5);
                 -//     std::cout << "rslt vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
+                -
                 -//     input = { 0, 0 ,0 ,0 ,0 ,0 ,0};
                 -//     std::cout << "orig vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
                 -//     set_vector_at_bit(input, 0b11111, 0, 5, 5);
                 -//     std::cout << "rslt vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
+                -
                 -//     input = { 0, 0 ,0 ,0 ,0 ,0 ,0};
                 -//     std::cout << "orig vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
                 -//     set_vector_at_bit(input, 0b11111, 0, 4, 5);
                 -//     std::cout << "rslt vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
+                -
                 -//     input = { 0, 0 ,0 ,0 ,0 ,0 ,0};
                 -//     std::cout << "orig vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
                 -//     set_vector_at_bit(input, 0b11111, 0, 3, 5);
                 -//     std::cout << "rslt vector: "; for (auto v: input) {print_bits(v);}; std::cout << std::endl;
                 -// }
+                -
                  template class std::vector<unsigned char>;
+                +
                 +base32::base32 (
                 +    delegate_ (std::make_shared<base32_impl> ())
                 +{}

base32.h

History View file @ d8ae572

                 new file mode 100644
@@ -0,0 +1,44 @@
                 +/* Copyright (C) CJ Affiliate
                 + *
                 + * You may use, distribute and modify this code under  the
                 + * terms of the  GNU General Public License  version 2  or
                 + * later.
                 + *
                 + * You should have received a copy of the license with this
                 + * file. If not, you will find a copy in the "LICENSE" file
                 + * at https://github.com/cjdev/dual-control.
                 + */
+                +
                 +#ifndef BASE32_H
                 +#define BASE32_H
+                +
                 +#include <memory>
                 +#include <vector>
                 +#include <string>
+                +
                 +class base32_ifc
                 +{
                 +public:
                 +    virtual std::string encode (std::vector<uint8_t> input) = 0;
                 +    virtual std::vector<uint8_t> decode (std::vector<uint8_t> input) = 0;
                 +};
+                +
                 +class base32
                 +{
                 +public:
                 +    using delegate = std::shared_ptr<base32_ifc>;
                 +private:
                 +    delegate delegate_;
                 +public:
                 +    base32 ();
+                +
                 +    std::string encode (std::vector<uint8_t> input) {
                 +        return delegate_->encode(input);
                 +    };
+                +
                 +    std::vector<uint8_t> decode (std::vector<uint8_t> input) {
                 +        return delegate_->decode(input);
                 +    };
                 +};
+                +
                 +#endif

base32_test.cc

History View file @ d8ae572

                 new file mode 100644
@@ -0,0 +1,65 @@
                 +/* Copyright (C) CJ Affiliate
                 + *
                 + * You may use, distribute and modify this code under  the
                 + * terms of the  GNU General Public License  version 2  or
                 + * later.
                 + *
                 + * You should have received a copy of the license with this
                 + * file. If not, you will find a copy in the "LICENSE" file
                 + * at https://github.com/cjdev/dual-control.
                 + */
                 +#include <memory>
                 +#include <algorithm>
                 +#include <initializer_list>
                 +#include <vector>
                 +#include <climits>
                 +#include <ctime>
                 +#include <iostream>
                 +#include <map>
+                +
                 +#include "base32.h"
                 +#include "test_util.h"
+                +
                 +// totp test
                 +int int_precomputed()
                 +{
                 +    // given
                 +    std::map<std::string, std::string> precomputed = {
                 +        {"consimilate", "mnxw443jnvuwyylumu======"},
                 +        {"defacing", "mrswmyldnfxgo==="},
                 +        {"downcome", "mrxxo3tdn5wwk==="},
                 +        {"filchery", "mzuwyy3imvzhs==="},
                 +        {"Galatic", "i5qwyylunfrq===="},
                 +        {"hearthrug", "nbswc4tunbzhkzy="},
                 +        {"heterotypic", "nbsxizlsn52hs4djmm======"},
                 +        {"kinase", "nnuw4yltmu======"},
                 +        {"Lycopodiales", "jr4wg33qn5sgsylmmvzq===="},
                 +        {"mosker", "nvxxg23foi======"},
                 +        {"ornithosaurian", "n5zg42lunbxxgylvojuwc3q="},
                 +        {"quilkin", "of2ws3dlnfxa===="},
                 +        {"swartly", "on3wc4tunr4q===="},
                 +        {"teleost", "orswyzlpon2a===="},
                 +        {"thinglet", "orugs3thnrsxi==="},
                 +        {"unpregnant", "ovxha4tfm5xgc3tu"},
                 +        {"unreachably", "ovxhezlbmnugcytmpe======"},
                 +        {"unusableness", "ovxhk43bmjwgk3tfonzq===="},
                 +        {"wickawee", "o5uwg23bo5swk==="},
                 +        {"yareta", "pfqxezlume======"},
                 +    };
+                +
                 +    // The token for key 76I6WTYEUTNCJUREMGKVM45PMA and time '2017/01/01 00:00:00' is 258675
                 +    base32 codec;
+                +
+                +
                 +}
+                +
                 +int run_tests()
                 +{
                 +    test (int_precomputed);
                 +    succeed();
                 +}
+                +
                 +int main (int argc, char *argv[])
                 +{
                 +    return !run_tests();
                 +}