C++ Serialize Function

Convert an unsigned integer to a vector of bytes (char)

The use of function template and argument deduction means you don’t need to specialize the function template, you just call it with the unsigned int type and the compiler will deduce the width of the type.

The function uses sizeof() to calculate the bitwise shift values needed to shift each byte from the unsigned integer input.

Calling the function with a type set to a value wider than the type will cause a warning.

Calling the function with a type wider than the value assigned to it will cause zero padding to the start of the vector. These are removed before the function exit.

Additional function for serializing std::string objects overloads the same function signature as the function template, but is only used when the first argument is a std::string.

#include <iostream>
#include <cstdint>
#include <vector>
#include <iomanip>

using u8_vector = std::vector<unsigned char>;

enum class Endian { BIG, LITTLE };

// pretty print helper function
void printv(u8_vector &in)
{
    for(const uint8_t &b: in) 
        std::cout << std::hex 
                  << std::setfill('0') 
                  << std::setw(2) 
                  << static_cast<int>(b) 
                  << " ";

    std::cout << std::endl;
}

// serialize unsigned ints
template <class UINT>
void serialize(const UINT in, 
               u8_vector &out, 
               Endian endian = Endian::BIG, 
               bool trim = true)
{
    out.clear();
    if (in) 
    { 
        if (endian == Endian::BIG)
        {
            for(int idx = 1; idx < sizeof(in) + 1; idx++)
            {
                int inv_idx = sizeof(in) - idx;
                int divisor = 8 * inv_idx;
                out.push_back(in >> divisor);
            }
        }
        else
        {
            for(int idx = 0; idx < sizeof(in); idx++)
            {
                int divisor = 8 * idx;
                out.push_back(in >> divisor);
            }        
        }
        // remove any leading null bytes
        if (trim) { while(!out.front()) { out.erase(out.begin()); } }
    }
    else 
    {
        out.push_back(0);
    }
    std::cout  << "Serializing " 
               << sizeof(UINT) 
               << "-byte word " 
               << ((endian == Endian::BIG) ? "(BigEndian): ":"(LittleEndian): ") 
               << std::dec 
               << long(in) 
               << " --> ";

    printv(out);
}

// serialize hex strings
void serialize(const std::string in, 
               u8_vector &out, 
               Endian endian = Endian::BIG)
{
    out.clear();
    if (endian == Endian::BIG)
    {
        for (unsigned int i = 0; i < in.length(); i += 2) {
            std::string byteString = in.substr(i, 2);
            char byte = (char) strtol(byteString.c_str(), NULL, 16);
            out.push_back(byte);
        }
    }
    else
    {
        for (int i = in.length() - 2; i > -1; i -= 2) {
            std::string byteString = in.substr(i, 2);
            char byte = (char) strtol(byteString.c_str(), NULL, 16);
            out.push_back(byte);
        }        
    }

    std::cout  << "Serializing std::string " << ((endian == Endian::BIG) ? "(BigEndian): ":"(LittleEndian): ") << in << " --> ";
    printv(out);
}

int main()
{
    u8_vector result;

    // NOTE: static casts are for brevity/accuracy, left-value expressions
    // using width-specific types will provide similiar accuracy
    serialize(static_cast<uint8_t>(0), result);
    serialize(static_cast<uint8_t>(208), result);

    serialize(static_cast<uint16_t>(44428), result, Endian::BIG);
    serialize(static_cast<uint16_t>(44428), result, Endian::LITTLE);

    serialize(static_cast<uint32_t>(4293844428), result, Endian::BIG);
    serialize(static_cast<uint32_t>(4293844428), result, Endian::LITTLE);
    serialize(static_cast<uint32_t>(1), result, Endian::BIG, false);
    serialize(static_cast<uint32_t>(1), result, Endian::LITTLE, false);

    serialize(static_cast<uint64_t>(3345453544293844428), result, Endian::BIG);
    serialize(static_cast<uint64_t>(3345453544293844428), result, Endian::LITTLE);
    serialize(static_cast<uint64_t>(1), result, Endian::BIG, false);
    serialize(static_cast<uint64_t>(1), result, Endian::LITTLE, false);

    serialize(std::string{"00AABBCCDD"}, result, Endian::BIG);
    serialize(std::string{"00AABBCCDD"}, result, Endian::LITTLE);

    return 0;
}

Output:

Serializing 1-byte word (BigEndian): 0 --> 00 
Serializing 1-byte word (BigEndian): 208 --> d0 

Serializing 2-byte word (BigEndian): 44428 --> ad 8c 
Serializing 2-byte word (LittleEndian): 44428 --> 8c ad 

Serializing 4-byte word (BigEndian): 4293844428 --> ff ee dd cc 
Serializing 4-byte word (LittleEndian): 4293844428 --> cc dd ee ff 
Serializing 4-byte word (BigEndian): 1 --> 00 00 00 01 
Serializing 4-byte word (LittleEndian): 1 --> 01 00 00 00 

Serializing 8-byte word (BigEndian): 3345453544293844428 --> 2e 6d 70 46 08 34 c5 cc 
Serializing 8-byte word (LittleEndian): 3345453544293844428 --> cc c5 34 08 46 70 6d 2e 
Serializing 8-byte word (BigEndian): 1 --> 00 00 00 00 00 00 00 01 
Serializing 8-byte word (LittleEndian): 1 --> 01 00 00 00 00 00 00 00 

Serializing std::string (BigEndian): 00AABBCCDD --> 00 aa bb cc dd 
Serializing std::string (LittleEndian): 00AABBCCDD --> dd cc bb aa 00 

https://godbolt.org/z/MPv7vx8Wv

Categories: C++

Leave a Reply

Your email address will not be published. Required fields are marked *