关于位域和结构体的内存布局

AMD64 ABI、Intel386 ABI、Windows x64 ABI 规范对结构体的内存布局作出如下规定:

结构体的对齐值取其成员的最大对齐值,其大小必须是其对齐值的整倍数(包括其它所有对象。这么做是为了确保对象被组织为数组结构时仍然能够满足所有元素的对齐)。

结构体每个成员需在确保内存对齐的情况下被分配至最小偏移量的位置上(Windows 未明文要求最小偏移量),为此成员之间可以添加必要的内部填充(internal padding)。同时为了满足大小要求,结构体可以添加必要的尾部填充(tail padding)。填充的内容是未定义的。


AMD64 ABI、Intel386 ABI 规范对位域的内存布局作出如下规定(Windows 对位域没有过多描述):

位域与其它类型成员遵循相同的大小和对齐约束。

无论位域是否为无符号类型,其都按无符号类型处理,其值不会为负数。

位域成员从低地址开始向高地址分配,其必须完整位于所声明类型的存储单元之中,也就是说位域从不跨过它的单元边界(Windows 则是说将超出的部分截断)。

位域可能与结构体的其它类型成员(包括非位域)共享存储单元。当然,它们使用存储单元的不同部分,且不同成员之间仍然需要对齐。

未命名的位域不会影响结构体的对齐值。


为了加深理解,我进行了一些测试,测试环境为:

Windows - Intel64 - MSVC2019_64

Windows - Intel64 - MinGW810_64

Deepin (Linux 6.6.93) - Intel64 - g++12.3.0 (Deepin 12.3.0-17deepin15)


测试结果大部分与 ABI 规范相符,除了以下几点:

  • 无论 Windows 还是 Linux 平台,有符号类型的位域都存在负数值,其最高位被作为符号位。
  • Windows 下位域仅与相邻的同类型位域成员共享存储单元。
  • Windows 下未命名的位域也会影响结构体的对齐值。
  • Windows 下单独存在的零宽度位域会被忽略,而位于其它位域成员之后的零宽度位域将被处理。其影响结构体的对齐值,且促使下一成员至少对齐至其声明类型的边界上。
  • 无论 Windows 还是 Linux 平台,我想其对零宽度位域的处理方式就像是促使下一成员至少对齐至其声明类型的边界上。
  • 对于 Windows 将位域超出部分截断的场景,没有找到测试手段。

以下为详细测试内容和结果(一些例子来自 ABI 文档,一些是自定义。MinGW 测试结果与 MSVC 相同因此不做罗列):

关于位域和结构体的内存布局

可以看到,Linux 下几乎完全符合 AMD64 ABI、Intel386 ABI 规范的定义,而 Windows 在位域方面则是有自己的行为。ABI 的差异导致了内存布局的差异,开发人员平时应尽量避免假设内存布局或谨慎处理,尤其是在做跨平台开发的时候。


参考资料:


完整测试代码:

点击查看代码
#include <iostream>
#include <cstring>
#include <string>
#include <vector>
#include <cassert>

template<typename T>
struct sizeof_bits {
    static constexpr auto value = sizeof(T) * 8;
};

template<typename T>
struct alignof_bits {
    static constexpr auto value = alignof(T) * 8;
};

#define TYPE_SIZEINFO(type) sizeof_bits<type>::value, alignof_bits<type>::value
#define BITFIELD_SIZEINFO 0, 1

struct StructMember {
    std::string name;
    int sizeInBits;
    int alignment;
    int bitfield;

    StructMember(const std::string& n, int s = 0, int a = 0, int b = -1)
        : name(n)
        , sizeInBits(s)
        , alignment(a)
        , bitfield(b)
    {}
};

struct MemoryUnit {
    std::string name;
    int sizeInBits;

    MemoryUnit(const std::string& n, int s = 0) : name(n), sizeInBits(s) {}
};


// Figure 3-3: No Padding
struct Figure3_3 {
    char c;
    char d;
    short s;
    long n;
};
void printOffset_Figure3_3() {
    if (offsetof(Figure3_3, c) != 0)
        std::cout << "ERROR:";
    std::cout << "  c at offset " << offsetof(Figure3_3, c) << std::endl;

    if (offsetof(Figure3_3, d) != 1)
        std::cout << "ERROR:";
    std::cout << "  d at offset " << offsetof(Figure3_3, d) << std::endl;

    if (offsetof(Figure3_3, s) != 2)
        std::cout << "ERROR:";
    std::cout << "  s at offset " << offsetof(Figure3_3, s) << std::endl;

    if (offsetof(Figure3_3, n) != sizeof(long))
        std::cout << "ERROR:";
    std::cout << "  n at offset " << offsetof(Figure3_3, n) << std::endl;
};

constexpr auto Figure3_3_align = alignof(Figure3_3) * 8;
const std::vector<StructMember> Figure3_3_members = {
    {"c", TYPE_SIZEINFO(char)},
    {"d", TYPE_SIZEINFO(char)},
    {"s", TYPE_SIZEINFO(short)},
    {"n", TYPE_SIZEINFO(long)},
    };

// Figure 3-4: Internal Padding
struct Figure3_4 {
    char c;
    short s;
};
void printOffset_Figure3_4() {
    if (offsetof(Figure3_4, c) != 0)
        std::cout << "ERROR:";
    std::cout << "  c at offset " << offsetof(Figure3_4, c) << std::endl;

    if (offsetof(Figure3_4, s) != 2)
        std::cout << "ERROR:";
    std::cout << "  s at offset " << offsetof(Figure3_4, s) << std::endl;
};
constexpr auto Figure3_4_align = alignof(Figure3_4) * 8;
const std::vector<StructMember> Figure3_4_members = {
    {"c", TYPE_SIZEINFO(char)},
    {"s", TYPE_SIZEINFO(short)},
    };

// Figure 3-5: Internal and Tail Padding
struct Figure3_5 {
    char c;
    double d;
    short s;
};
void printOffset_Figure3_5() {
    if (offsetof(Figure3_5, c) != 0)
        std::cout << "ERROR:";
    std::cout << "  c at offset " << offsetof(Figure3_5, c) << std::endl;

    if (offsetof(Figure3_5, d) != 8)
        std::cout << "ERROR:";
    std::cout << "  d at offset " << offsetof(Figure3_5, d) << std::endl;

    if (offsetof(Figure3_5, s) != 16)
        std::cout << "ERROR:";
    std::cout << "  s at offset " << offsetof(Figure3_5, s) << std::endl;
};
constexpr auto Figure3_5_align = alignof(Figure3_5) * 8;
const std::vector<StructMember> Figure3_5_members = {
    {"c", TYPE_SIZEINFO(char)},
    {"d", TYPE_SIZEINFO(double)},
    {"s", TYPE_SIZEINFO(short)},
    };

// Figure 3-9: Right-to-Left Allocation
struct Figure3_9 {
    int j: 5;
    int k: 6;
    int m: 7;
};
void printOffset_Figure3_9() {
    Figure3_9 obj = {1, 2, 3};
    int* storageUnit = reinterpret_cast<int*>(&obj);
    int mask_j = 0b11111;
    const auto offset_k = 5;
    int mask_k = 0b111111 << offset_k;
    const auto offset_m = 11;
    int mask_m = 0b1111111 << offset_m;

    if ((*storageUnit & mask_j) == obj.j)
        std::cout << "  j at offset 0 bits" << std::endl;
    else
        std::cout << "ERROR: Offset of j is incorrect" << std::endl;

    if ((*storageUnit & mask_k) >> offset_k == obj.k)
        std::cout << "  k at offset " << offset_k << " bits" << std::endl;
    else
        std::cout << "ERROR: Offset of k is incorrect" << std::endl;

    if ((*storageUnit & mask_m) >> offset_m == obj.m)
        std::cout << "  m at offset " << offset_m << "bits" << std::endl;
    else
        std::cout << "ERROR: Offset of m is incorrect" << std::endl;
};
constexpr auto Figure3_9_align = alignof(Figure3_9) * 8;
const std::vector<StructMember> Figure3_9_members = {
    {"j", TYPE_SIZEINFO(int), 5},
    {"k", BITFIELD_SIZEINFO, 6},
    {"m", BITFIELD_SIZEINFO, 7},
    };

// Figure 3-10: Boundary Alignment
struct Figure3_10 {
    short s: 9;
    int j: 9;
    char c;
    short t: 9;
    short u: 9;
    char d;
};
#if defined(_WIN32)
void printOffset_Figure3_10() {
    Figure3_10 obj = {1, 2, 3, 4, 5, 6};
    char* storageUnit = reinterpret_cast<char*>(&obj);
    int mask_s = 0b111111111;
    int mask_j = 0b111111111;
    int mask_t = 0b111111111;
    int mask_u = 0b111111111;

    if ((*reinterpret_cast<int*>(storageUnit) & mask_s) == obj.s)
        std::cout << "  s at offset 0" << std::endl;
    else
        std::cout << "ERROR: Offset of s is incorrect" << std::endl;

    if ((*reinterpret_cast<int*>(storageUnit + 4) & mask_j) == obj.j)
        std::cout << "  j at offset 4" << std::endl;
    else
        std::cout << "ERROR: Offset of j is incorrect" << std::endl;

    if (*(storageUnit + 8) != obj.c)
        std::cout << "ERROR:";
    std::cout << "  c at offset " << offsetof(Figure3_10, c) << std::endl;

    if ((*reinterpret_cast<short*>(storageUnit + 10) & mask_t) == obj.t)
        std::cout << "  t at offset 10" << std::endl;
    else
        std::cout << "ERROR: Offset of t is incorrect" << std::endl;

    if ((*reinterpret_cast<short*>(storageUnit + 12) & mask_u) == obj.u)
        std::cout << "  u at offset 12" << std::endl;
    else
        std::cout << "ERROR: Offset of u is incorrect" << std::endl;

    if (*(storageUnit + 14) != obj.d)
        std::cout << "ERROR:";
    std::cout << "  d at offset " << offsetof(Figure3_10, d) << std::endl;
};
constexpr auto Figure3_10_align = alignof(Figure3_10) * 8;
const std::vector<StructMember> Figure3_10_members = {
    {"s", TYPE_SIZEINFO(short), 9},
    {"j", TYPE_SIZEINFO(int), 9},       // Windows 下 c、j 与 s 并不共享存储单元
    {"c", TYPE_SIZEINFO(char)},
    {"t", TYPE_SIZEINFO(short), 9},
    {"u", TYPE_SIZEINFO(short), 9},
    {"d", TYPE_SIZEINFO(char)},
    };
#elif defined(__linux__)
void printOffset_Figure3_10() {
    Figure3_10 obj = {1, 2, 3, 4, 5, 6};
    char* storageUnit = reinterpret_cast<char*>(&obj);
    int mask_s = 0b111111111;
    const auto offset_j = 9;
    int mask_j = 0b111111111 << offset_j;
    int mask_t = 0b111111111;
    int mask_u = 0b111111111;

    if ((*reinterpret_cast<int*>(storageUnit) & mask_s) == obj.s)
        std::cout << "  s at offset 0" << std::endl;
    else
        std::cout << "ERROR: Offset of s is incorrect" << std::endl;

    if ((*reinterpret_cast<int*>(storageUnit) & mask_j) >> offset_j == obj.j)
        std::cout << "  j at offset " << offset_j << " bits" << std::endl;
    else
        std::cout << "ERROR: Offset of j is incorrect" << std::endl;

    if (*(storageUnit + 3) != obj.c)
        std::cout << "ERROR:";
    std::cout << "  c at offset " << offsetof(Figure3_10, c) << std::endl;

    if ((*reinterpret_cast<short*>(storageUnit + 4) & mask_t) == obj.t)
        std::cout << "  t at offset 4" << std::endl;
    else
        std::cout << "ERROR: Offset of t is incorrect" << std::endl;

    if ((*reinterpret_cast<short*>(storageUnit + 6) & mask_u) == obj.u)
        std::cout << "  u at offset 6" << std::endl;
    else
        std::cout << "ERROR: Offset of u is incorrect" << std::endl;

    if (*(storageUnit + 8) != obj.d)
        std::cout << "ERROR:";
    std::cout << "  d at offset " << offsetof(Figure3_10, d) << std::endl;
};
constexpr auto Figure3_10_align = alignof(Figure3_10) * 8;
const std::vector<StructMember> Figure3_10_members = {
    {"s", TYPE_SIZEINFO(int), 9},
    {"j", BITFIELD_SIZEINFO, 9},    // c、j 与 s 共享存储单元
    {"c", 0, 8, 8},
    {"t", TYPE_SIZEINFO(short), 9},
    {"u", TYPE_SIZEINFO(short), 9},
    {"d", TYPE_SIZEINFO(char)},
    };
#endif

// Figure 3-11: Storage Unit Sharing
struct Figure3_11 {
    char c;
    short s: 8;
};
#if defined(_WIN32)
void printOffset_Figure3_11() {
    Figure3_11 obj = {1, 2};
    char* storageUnit = reinterpret_cast<char*>(&obj);
    int mask_s = 0b11111111;

    if (*storageUnit != obj.c)
        std::cout << "ERROR:";
    std::cout << "  c at offset " << offsetof(Figure3_11, c) << std::endl;

    if ((*reinterpret_cast<short*>(storageUnit + 2) & mask_s) == obj.s)
        std::cout << "  s at offset 2" << std::endl;
    else
        std::cout << "ERROR: Offset of s is incorrect" << std::endl;
};
constexpr auto Figure3_11_align = alignof(Figure3_11) * 8;
const std::vector<StructMember> Figure3_11_members = {
    {"c", TYPE_SIZEINFO(char)},
    {"s", TYPE_SIZEINFO(short), 8},   // Windwos 下并不共享存储单元
    };
#elif defined(__linux__)
void printOffset_Figure3_11() {
    Figure3_11 obj = {1, 2};
    char* storageUnit = reinterpret_cast<char*>(&obj);

    if (*storageUnit != obj.c)
        std::cout << "ERROR:";
    std::cout << "  c at offset " << offsetof(Figure3_11, c) << std::endl;

    if (*(storageUnit + 1) == obj.s)
        std::cout << "  s at offset 1" << std::endl;
    else
        std::cout << "ERROR: Offset of s is incorrect" << std::endl;
};
constexpr auto Figure3_11_align = alignof(Figure3_11) * 8;
const std::vector<StructMember> Figure3_11_members = {
    {"c", TYPE_SIZEINFO(short), 8},
    {"s", BITFIELD_SIZEINFO, 8},   // 与 c 共享存储单元
    };
#endif

// Figure 3-13: Unnamed Bit-Fields
struct Figure3_13 {
    char c;
    int : 0;
    char d;
    short : 9;
    char e;
    char : 0;
};
#if defined(_WIN32)
void printOffset_Figure3_13() {
    Figure3_13 obj;
    std::memset(&obj, 0b00000101, sizeof(Figure3_13));
    obj.c = 1;
    obj.d = 2;
    obj.e = 3;
    char* storageUnit = reinterpret_cast<char*>(&obj);
    int mask_unamed1 = 0b111111111;

    if (*storageUnit != obj.c)
        std::cout << "ERROR:";
    std::cout << "  c at offset " << offsetof(Figure3_13, c) << std::endl;

    if (*(storageUnit + 1) != obj.d)
        std::cout << "ERROR:";
    std::cout << "  d at offset " << offsetof(Figure3_13, d) << std::endl;

    if ((*reinterpret_cast<short*>(storageUnit + 2) & mask_unamed1) == 0b100000101)
        std::cout << "  unamed short :9 at offset 2" << std::endl;
    else
        std::cout << "ERROR: Offset of unamed short :9 is incorrect" << std::endl;

    if (*(storageUnit + 4) != obj.e)
        std::cout << "ERROR:";
    std::cout << "  e at offset " << offsetof(Figure3_13, e) << std::endl;
};
constexpr auto Figure3_13_align = alignof(Figure3_13) * 8;
const std::vector<StructMember> Figure3_13_members = {
    {"c", TYPE_SIZEINFO(char)},
    {":0", BITFIELD_SIZEINFO, 0},       // Windows 下单独的零宽度位域会被忽略
    {"d", TYPE_SIZEINFO(char)},
    {":9", TYPE_SIZEINFO(short), 9},    // 该匿名位域成员导致结构体对齐为 short
    {"e", TYPE_SIZEINFO(char)},
    {":0", BITFIELD_SIZEINFO, 0},       // 被忽略
    };
#elif defined(__linux__)
void printOffset_Figure3_13() {
    Figure3_13 obj;
    std::memset(&obj, 0b00000101, sizeof(Figure3_13));
    obj.c = 1;
    obj.d = 2;
    obj.e = 3;
    char* storageUnit = reinterpret_cast<char*>(&obj);
    int mask_unamed1 = 0b111111111;

    if (*storageUnit != obj.c)
        std::cout << "ERROR:";
    std::cout << "  c at offset " << offsetof(Figure3_13, c) << std::endl;

    if (*(storageUnit + 4) != obj.d)
        std::cout << "ERROR:";
    std::cout << "  d at offset " << offsetof(Figure3_13, d) << std::endl;

    if ((*reinterpret_cast<short*>(storageUnit + 6) & mask_unamed1) == 0b100000101)
        std::cout << "  unamed short :9 at offset 6" << std::endl;
    else
        std::cout << "ERROR: Offset of unamed short :9 is incorrect" << std::endl;

    if (*(storageUnit + 8) != obj.e)
        std::cout << "ERROR:";
    std::cout << "  e at offset " << offsetof(Figure3_13, e) << std::endl;
};
constexpr auto Figure3_13_align = 4 * 8;  // 实际是 1 字节对齐, 为了方便查看改为 4
const std::vector<StructMember> Figure3_13_members = {
    {"c", TYPE_SIZEINFO(int), 8},
    {":0", BITFIELD_SIZEINFO, 0},       // 导致 d 对齐至 int 边界
    {"d", TYPE_SIZEINFO(char)},
    {":9", TYPE_SIZEINFO(short), 9},    // 不对结构体的对齐产生影响, 结构体的对齐仍为 1, 当该存储单元的对齐为 short
    {"e", TYPE_SIZEINFO(char)},
    {":0", BITFIELD_SIZEINFO, 0},       // 如果改为 short 将会导致添加末尾填充
    };
#endif

// Figure 3-11-2: Storage Unit Sharing 2
struct Figure3_11_2 {
    int a: 6;
    char b;
    short c: 6;
    char d: 6;
};
#if defined(_WIN32)
void printOffset_Figure3_11_2() {
    Figure3_11_2 obj = {1, 2, 3, 4};
    char* storageUnit = reinterpret_cast<char*>(&obj);
    int mask_a = 0b111111;
    int mask_c = 0b111111;
    int mask_d = 0b111111;

    if ((*storageUnit & mask_a) == obj.a)
        std::cout << "  a at offset 0" << std::endl;
    else
        std::cout << "ERROR: Offset of a is incorrect" << std::endl;

    if (*(storageUnit + 4) != obj.b)
        std::cout << "ERROR:";
    std::cout << "  b at offset " << offsetof(Figure3_11_2, b) << std::endl;

    if ((*reinterpret_cast<short*>(storageUnit + 6) & mask_c) == obj.c)
        std::cout << "  c at offset 6" << std::endl;
    else
        std::cout << "ERROR: Offset of c is incorrect" << std::endl;

    if ((*(storageUnit + 8) & mask_d) == obj.d)
        std::cout << "  d at offset 8" << std::endl;
    else
        std::cout << "ERROR: Offset of d is incorrect" << std::endl;
};
constexpr auto Figure3_11_2_align = alignof(Figure3_11_2) * 8;
const std::vector<StructMember> Figure3_11_2_members = {
    {"a", TYPE_SIZEINFO(int), 6},
    {"b", TYPE_SIZEINFO(char)},
    {"c", TYPE_SIZEINFO(short), 6},
    {"d", TYPE_SIZEINFO(char), 6},
};
#elif defined(__linux__)
void printOffset_Figure3_11_2() {
    Figure3_11_2 obj = {1, 2, 3, 4};
    char* storageUnit = reinterpret_cast<char*>(&obj);
    int mask_a = 0b111111;
    int mask_c = 0b111111;
    int mask_d = 0b111111;

    if ((*storageUnit & mask_a) == obj.a)
        std::cout << "  a at offset 0" << std::endl;
    else
        std::cout << "ERROR: Offset of a is incorrect" << std::endl;

    if (*(storageUnit + 1) != obj.b)
        std::cout << "ERROR:";
    std::cout << "  b at offset " << offsetof(Figure3_11_2, b) << std::endl;

    if ((*reinterpret_cast<short*>(storageUnit + 2) & mask_c) == obj.c)
        std::cout << "  c at offset 2" << std::endl;
    else
        std::cout << "ERROR: Offset of c is incorrect" << std::endl;

    if ((*(storageUnit + 3) & mask_d) == obj.d)
        std::cout << "  d at offset 3" << std::endl;
    else
        std::cout << "ERROR: Offset of d is incorrect" << std::endl;
};
constexpr auto Figure3_11_2_align = alignof(Figure3_11_2) * 8;
const std::vector<StructMember> Figure3_11_2_members = {
    {"a", TYPE_SIZEINFO(int), 6},
    {"b", 0, alignof_bits<char>::value, sizeof_bits<char>::value},
    {"c", 0, alignof_bits<short>::value, 6},
    {"d", 0, alignof_bits<char>::value, 6},
    };
#endif

// Custom 1: Zero Bitfield effect
struct Custom_1 {
    int : 0;
    char c;
    short : 0;
};
#if defined(_WIN32)
void printOffset_Custom_1() {
    Custom_1 obj;
    std::memset(&obj, 0b00000101, sizeof(Custom_1));
    obj.c = 1;
    char* storageUnit = reinterpret_cast<char*>(&obj);

    if (*storageUnit != obj.c)
        std::cout << "ERROR:";
    std::cout << "  c at offset " << offsetof(Custom_1, c) << std::endl;
};
constexpr auto Custom_1_align = alignof(Custom_1) * 8;
const std::vector<StructMember> Custom_1_members = {
    {":0", BITFIELD_SIZEINFO, 0},       // 被忽略
    {"c", TYPE_SIZEINFO(char)},
    {":0", BITFIELD_SIZEINFO, 0},       // 被忽略
};
#elif defined(__linux__)
void printOffset_Custom_1() {
    Custom_1 obj;
    std::memset(&obj, 0b00000101, sizeof(Custom_1));
    obj.c = 1;
    char* storageUnit = reinterpret_cast<char*>(&obj);

    if (*storageUnit != obj.c)
        std::cout << "ERROR:";
    std::cout << "  c at offset " << offsetof(Custom_1, c) << std::endl;
};
constexpr auto Custom_1_align = 2 * 8;
const std::vector<StructMember> Custom_1_members = {
    {":0", TYPE_SIZEINFO(int), 0},      // 被忽略亦或者说 c 已在合适的边界上
    {"c", TYPE_SIZEINFO(short), 8},
    {":0", BITFIELD_SIZEINFO, 0},       // 导致添加尾部填充以符合 short 边界
};
#endif

// Custom 2: Zero Bitfield effect 2
struct Custom_2 {
    char a;
    char b;
    char c;
    char d;
    char e: 7;
    int : 0;
    char f;
};
#if defined(_WIN32)
void printOffset_Custom_2() {
    Custom_2 obj;
    std::memset(&obj, 0b00000101, sizeof(Custom_1));
    obj.e = 6;
    char* storageUnit = reinterpret_cast<char*>(&obj);
    int mask_e = 0b1111111;

    std::cout << "  a at offset " << offsetof(Custom_2, a) << std::endl;
    std::cout << "  b at offset " << offsetof(Custom_2, b) << std::endl;
    std::cout << "  c at offset " << offsetof(Custom_2, c) << std::endl;
    std::cout << "  d at offset " << offsetof(Custom_2, d) << std::endl;

    if ((*(storageUnit + 4) & mask_e) == obj.e)
        std::cout << "  e at offset 4" << std::endl;
    else
        std::cout << "ERROR: Offset of e is incorrect" << std::endl;

    if (*(storageUnit + 8) != obj.f)
        std::cout << "ERROR:";
    std::cout << "  f at offset " << offsetof(Custom_2, f) << std::endl;
};
constexpr auto Custom_2_align = alignof(Custom_2) * 8;
const std::vector<StructMember> Custom_2_members = {
    {"a", TYPE_SIZEINFO(char)},
    {"b", TYPE_SIZEINFO(char)},
    {"c", TYPE_SIZEINFO(char)},
    {"d", TYPE_SIZEINFO(char)},
    {"e", TYPE_SIZEINFO(int), 7},
    {":0", BITFIELD_SIZEINFO, 0},       // 位于其它位域成员之后的零宽度位域将被处理,
                                        // 其影响结构体的对齐值, 且促使下一成员至少对齐至其声明类型的边界上
    {"f", TYPE_SIZEINFO(char)},
};
#elif defined(__linux__)
void printOffset_Custom_2() {
    Custom_2 obj;
    std::memset(&obj, 0b00000101, sizeof(Custom_1));
    obj.e = 5;
    char* storageUnit = reinterpret_cast<char*>(&obj);
    int mask_e = 0b1111111;

    std::cout << "  a at offset " << offsetof(Custom_2, a) << std::endl;
    std::cout << "  b at offset " << offsetof(Custom_2, b) << std::endl;
    std::cout << "  c at offset " << offsetof(Custom_2, c) << std::endl;
    std::cout << "  d at offset " << offsetof(Custom_2, d) << std::endl;

    if ((*(storageUnit + 4) & mask_e) == obj.e)
        std::cout << "  e at offset 4" << std::endl;
    else
        std::cout << "ERROR: Offset of e is incorrect" << std::endl;

    if (*(storageUnit + 8) != obj.f)
        std::cout << "ERROR:";
    std::cout << "  f at offset " << offsetof(Custom_2, f) << std::endl;
};
constexpr auto Custom_2_align = 4 * 8;
const std::vector<StructMember> Custom_2_members = {
    {"a", TYPE_SIZEINFO(char)},
    {"b", TYPE_SIZEINFO(char)},
    {"c", TYPE_SIZEINFO(char)},
    {"d", TYPE_SIZEINFO(char)},
    {"e", TYPE_SIZEINFO(int), 7},
    {":0", BITFIELD_SIZEINFO, 0},       // 促使下一成员至少对齐至其声明类型的边界上
    {"f", TYPE_SIZEINFO(char)},
};
#endif

// Custom 3: Zero Bitfield effect 3
struct Custom_3 {
    char a;
    char b;
    char c;
    char d: 3;
    int : 0;
    char f;
};
#if defined(_WIN32)
void printOffset_Custom_3() {
    Custom_3 obj;
    std::memset(&obj, 0b00000101, sizeof(Custom_3));
    obj.d = 1;  // 无法赋予 3 以上的数值, 最高位被处理为符号位
    obj.f = 2;
    char* storageUnit = reinterpret_cast<char*>(&obj);
    int mask_d = 0b111;

    std::cout << "  a at offset " << offsetof(Custom_3, a) << std::endl;
    std::cout << "  b at offset " << offsetof(Custom_3, b) << std::endl;
    std::cout << "  c at offset " << offsetof(Custom_3, c) << std::endl;

    if ((*(storageUnit + 3) & mask_d) == obj.d)
        std::cout << "  d at offset 3" << std::endl;
    else
        std::cout << "ERROR: Offset of d is incorrect" << std::endl;

    if (*(storageUnit + 4) != obj.f)
        std::cout << "ERROR:";
    std::cout << "  f at offset " << offsetof(Custom_3, f) << std::endl;
};
constexpr auto Custom_3_align = alignof(Custom_3) * 8;
const std::vector<StructMember> Custom_3_members = {
    {"a", TYPE_SIZEINFO(char)},
    {"b", TYPE_SIZEINFO(char)},
    {"c", TYPE_SIZEINFO(char)},
    {"d", TYPE_SIZEINFO(char), 3},
    {":0", BITFIELD_SIZEINFO, 0},       // 此处不会添加填充, 因为 f 已位于 int 的边界上
    {"f", TYPE_SIZEINFO(char)},
    };
#elif defined(__linux__)
void printOffset_Custom_3() {
    Custom_3 obj;
    std::memset(&obj, 0b00000101, sizeof(Custom_3));
    obj.d = 1;  // 同样无法赋予 3 以上的数值, 最高位被处理为符号位, 与 ABI 不符
    obj.f = 2;
    char* storageUnit = reinterpret_cast<char*>(&obj);
    int mask_d = 0b111;

    std::cout << "  a at offset " << offsetof(Custom_3, a) << std::endl;
    std::cout << "  b at offset " << offsetof(Custom_3, b) << std::endl;
    std::cout << "  c at offset " << offsetof(Custom_3, c) << std::endl;

    if ((*(storageUnit + 3) & mask_d) == obj.d)
        std::cout << "  d at offset 3" << std::endl;
    else
        std::cout << "ERROR: Offset of d is incorrect" << std::endl;

    if (*(storageUnit + 4) != obj.f)
        std::cout << "ERROR:";
    std::cout << "  f at offset " << offsetof(Custom_3, f) << std::endl;
};
constexpr auto Custom_3_align = 4 * 8;
const std::vector<StructMember> Custom_3_members = {
    {"a", TYPE_SIZEINFO(char)},
    {"b", TYPE_SIZEINFO(char)},
    {"c", TYPE_SIZEINFO(char)},
    {"d", TYPE_SIZEINFO(char), 3},
    {":0", BITFIELD_SIZEINFO, 0},       // 此处不会添加填充, 因为 f 已位于 int 的边界上
    {"f", TYPE_SIZEINFO(char)},
    };
#endif

// Custom 4: Nested
struct Temp1 {
    short a;
    short b;
    short c;
};
struct Custom_4 {
    short a;
    Temp1 tmp;
    char b;
};
#if defined(_WIN32)
void printOffset_Custom_4() {
    Custom_4 obj;
    std::memset(&obj, 0b00000101, sizeof(Custom_4));

    std::cout << "  a at offset " << offsetof(Custom_4, a) << std::endl;
    if (offsetof(Custom_4, tmp) != 2)
        std::cout << "ERROR:";
    std::cout << "  tmp at offset " << offsetof(Custom_4, tmp) << std::endl;
    if (offsetof(Custom_4, b) != 8)
        std::cout << "ERROR:";
    std::cout << "  b at offset " << offsetof(Custom_4, b) << std::endl;
};
constexpr auto Custom_4_align = alignof(Custom_4) * 8;
const std::vector<StructMember> Custom_4_members = {
    {"a", TYPE_SIZEINFO(short)},
    {"tmp", TYPE_SIZEINFO(Temp1)},
    {"b", TYPE_SIZEINFO(short)},
    };
#elif defined(__linux__)
void printOffset_Custom_4() {
    Custom_4 obj;
    std::memset(&obj, 0b00000101, sizeof(Custom_4));

    std::cout << "  a at offset " << offsetof(Custom_4, a) << std::endl;
    if (offsetof(Custom_4, tmp) != 2)
        std::cout << "ERROR:";
    std::cout << "  tmp at offset " << offsetof(Custom_4, tmp) << std::endl;
    if (offsetof(Custom_4, b) != 8)
        std::cout << "ERROR:";
    std::cout << "  b at offset " << offsetof(Custom_4, b) << std::endl;
};
constexpr auto Custom_4_align = alignof(Custom_4) * 8;
const std::vector<StructMember> Custom_4_members = {
    {"a", TYPE_SIZEINFO(short)},
    {"tmp", TYPE_SIZEINFO(Temp1)},
    {"b", TYPE_SIZEINFO(short)},
    };
#endif

// Custom 5: Bitfield storage
struct Custom_5 {
    short a: 5;
    short b: 5;
    short c: 7;
};
#if defined(_WIN32)
void printOffset_Custom_5() {
    Custom_5 obj;
    obj.a = 1;
    obj.b = 2;
    obj.c = 0b1111101;
    char* storageUnit = reinterpret_cast<char*>(&obj);
    int mask_a = 0b11111;
    const auto offset_b = 5;
    int mask_b = 0b11111 << offset_b;
    int mask_c = 0b1111111;

    if ((*storageUnit & mask_a) == obj.a)
        std::cout << "  a at offset 0" << std::endl;
    else
        std::cout << "ERROR: Offset of a is incorrect" << std::endl;

    if ((*reinterpret_cast<short*>(storageUnit) & mask_b) >> offset_b == obj.b)
        std::cout << "  b at offset " << offset_b << " bits" << std::endl;
    else
        std::cout << "ERROR: Offset of b is incorrect" << std::endl;

    if ((*(storageUnit + 2) & mask_c) == 0b1111101 && obj.c == -3)
        std::cout << "  c at offset 2, value=" << obj.c << std::endl;
    else
        std::cout << "ERROR: Offset of c is incorrect" << std::endl;
};
constexpr auto Custom_5_align = alignof(Custom_5) * 8;
const std::vector<StructMember> Custom_5_members = {
    {"a", TYPE_SIZEINFO(short), 5},
    {"b", BITFIELD_SIZEINFO, 5},
    {"c", TYPE_SIZEINFO(short), 7},
    };
#elif defined(__linux__)
void printOffset_Custom_5() {
    Custom_5 obj;
    obj.a = 1;
    obj.b = 2;
    obj.c = 0b1111101;
    char* storageUnit = reinterpret_cast<char*>(&obj);
    int mask_a = 0b11111;
    const auto offset_b = 5;
    int mask_b = 0b11111 << offset_b;
    int mask_c = 0b1111111;

    if ((*storageUnit & mask_a) == obj.a)
        std::cout << "  a at offset 0" << std::endl;
    else
        std::cout << "ERROR: Offset of a is incorrect" << std::endl;

    if ((*reinterpret_cast<short*>(storageUnit) & mask_b) >> offset_b == obj.b)
        std::cout << "  b at offset " << offset_b << " bits" << std::endl;
    else
        std::cout << "ERROR: Offset of b is incorrect" << std::endl;

    if ((*(storageUnit + 2) & mask_c) == 0b1111101 && obj.c == -3)
        std::cout << "  c at offset 2, value=" << obj.c << std::endl;
    else
        std::cout << "ERROR: Offset of c is incorrect" << std::endl;
};
constexpr auto Custom_5_align = alignof(Custom_5) * 8;
const std::vector<StructMember> Custom_5_members = {
    {"a", TYPE_SIZEINFO(short), 5},
    {"b", BITFIELD_SIZEINFO, 5},
    {"c", TYPE_SIZEINFO(short), 7},
    };
#endif

template<typename T>
void printBaseInfo(const char* name) {
    T obj;
    uintptr_t address = reinterpret_cast<uintptr_t>(&obj);
    size_t alignment = alignof(T);

    std::cout << std::endl;
    std::cout << name << ":" << std::endl;
    std::cout << "  alignment: " << alignment << " bytes" << std::endl;
    std::cout << "  size: " << sizeof(T) << " bytes" << std::endl;
    std::cout << "  address: " << reinterpret_cast<void*>(address) << std::endl;
    std::cout << "  correct alignment: " << ((address % alignment) == 0 ? "yes" : "no") << std::endl;
}

void printMemoryLayout(int alignment, const std::vector<StructMember>& members)
{
    int alignmentUnitSize = 0;
    int addressOffset = 0;
    int leftoverSpace = 0;
    std::vector<MemoryUnit> units;

    auto printBorder = [&alignment]() {
        for (int i = 0; i < alignment; i++)
            std::cout << "-";
        std::cout << std::endl;
    };

    auto printUnits = [&units, &printBorder]() {
        for (auto rit = units.crbegin(); rit != units.crend(); ++rit) {
            size_t space = rit->sizeInBits;
            std::string str(rit->sizeInBits, ' ');
            str[0] = '|';
            --space;
            if (std::next(rit) == units.crend()) {
                str[str.length() - 1] = '|';
                --space;
            }
            if (space > 0) {
                size_t len = space >= rit->name.length() ? rit->name.length() : space;
                size_t pos = (space - len) / 2;
                ++pos;
                str.replace(pos, len, rit->name, 0, len);
            }
            std::cout << str;
        }
        std::cout << "  >>";
        for (auto rit = units.crbegin(); rit != units.crend(); ++rit) {
            std::string str("  ");
            str.append(rit->name);
            str.append("(");
            if (rit->sizeInBits % 8 == 0) {
                str.append(std::to_string(rit->sizeInBits / 8));
                str.append(" bytes");
            } else {
                str.append(std::to_string(rit->sizeInBits));
                str.append(" bits");
            }
            str.append(")");
            std::cout << str;
        }
        std::cout << std::endl;
        printBorder();

        units.clear();
    };

    auto addUnit = [&addressOffset, &alignmentUnitSize, &units, &alignment, &printUnits](const std::string& name, int size) {
        if (size == 0)
            return;

        addressOffset += size;
        alignmentUnitSize += size;
        if (alignmentUnitSize > alignment) {
            /* 当前对齐单元不足以完全容纳当前成员 */

            // 先处理当前单元能够容纳的部分
            units.push_back({name, size + alignment - alignmentUnitSize});
            printUnits();

            /* 处理余下部分 */
            alignmentUnitSize = alignmentUnitSize - alignment;
            int multiple = alignmentUnitSize / alignment;
            for (int i = 0; i < multiple; ++i) {
                /* 成员跨多个单元 */
                units.push_back({name, alignment});
                printUnits();
            }
            alignmentUnitSize = alignmentUnitSize % alignment;
            if (alignmentUnitSize > 0)
                units.push_back({name, alignmentUnitSize});
        } else if (alignmentUnitSize == alignment) {
            /* 刚好容纳下当前成员 */
            units.push_back({name, size});
            printUnits();
            alignmentUnitSize = 0;
        } else {
            // 单元仍有空间, 尝试继续布局下一成员
            units.push_back({name, size});
        }
    };

    auto alignToBoundary = [&addressOffset, &addUnit](int memberAlignment) -> int {
        int mod = addressOffset % memberAlignment;
        if (mod != 0) {
            // 添加填充以对齐
            addUnit("pad", memberAlignment - mod);
            return memberAlignment - mod;
        }
        return 0;
    };

    printBorder();
    for (size_t i = 0; i < members.size(); ++i) {
        const auto& m = members.at(i);
        if (m.bitfield == -1) {
            if (leftoverSpace != 0) {
                addUnit("pad", leftoverSpace);  // 将前一位域存储单元的余量空间填上
                leftoverSpace = 0;
            }

            alignToBoundary(m.alignment);
            addUnit(m.name, m.sizeInBits);
        } else {
            /* 位域成员 */
            if (m.bitfield != 0) {
                if (m.sizeInBits != 0) {
                    /* 新的位域存储单元 */
                    if (leftoverSpace != 0)
                        addUnit("pad", leftoverSpace);
                    leftoverSpace = m.sizeInBits;
                    alignToBoundary(m.alignment);
                } else {
                    leftoverSpace -= alignToBoundary(m.alignment);
                }

                leftoverSpace -= m.bitfield;
                if (leftoverSpace >= 0) {
                    addUnit(m.name, m.bitfield);
                } else {
                    /* 位域超出存储单元 */
                    assert(false);
                    addUnit(m.name, m.bitfield + leftoverSpace);  // 截断至余量空间
                    leftoverSpace = 0;
                }
            } else {
                // 零宽度位域直接吃掉剩余的位域空间
                if (leftoverSpace != 0) {
                    addUnit(m.name, leftoverSpace);
                    leftoverSpace = 0;
                }
            }
        }
    }
    if (alignmentUnitSize != 0)
        addUnit("pad", alignment - alignmentUnitSize);
}

int main()
{
    printBaseInfo<Figure3_3>("Figure3_3");
    printOffset_Figure3_3();
    printMemoryLayout(Figure3_3_align, Figure3_3_members);

    printBaseInfo<Figure3_4>("Figure3_4");
    printOffset_Figure3_4();
    printMemoryLayout(Figure3_4_align, Figure3_4_members);

    printBaseInfo<Figure3_5>("Figure3_5");
    printOffset_Figure3_5();
    printMemoryLayout(Figure3_5_align, Figure3_5_members);

    printBaseInfo<Figure3_9>("Figure3_9");
    printOffset_Figure3_9();
    printMemoryLayout(Figure3_9_align, Figure3_9_members);

    printBaseInfo<Figure3_10>("Figure3_10");
    printOffset_Figure3_10();
    printMemoryLayout(Figure3_10_align, Figure3_10_members);

    printBaseInfo<Figure3_11>("Figure3_11");
    printOffset_Figure3_11();
    printMemoryLayout(Figure3_11_align, Figure3_11_members);

    printBaseInfo<Figure3_13>("Figure3_13");
    printOffset_Figure3_13();
    printMemoryLayout(Figure3_13_align, Figure3_13_members);

    printBaseInfo<Figure3_11_2>("Figure3_11_2");
    printOffset_Figure3_11_2();
    printMemoryLayout(Figure3_11_2_align, Figure3_11_2_members);

    printBaseInfo<Custom_1>("Custom_1");
    printOffset_Custom_1();
    printMemoryLayout(Custom_1_align, Custom_1_members);

    printBaseInfo<Custom_2>("Custom_2");
    printOffset_Custom_2();
    printMemoryLayout(Custom_2_align, Custom_2_members);

    printBaseInfo<Custom_3>("Custom_3");
    printOffset_Custom_3();
    printMemoryLayout(Custom_3_align, Custom_3_members);

    printBaseInfo<Custom_4>("Custom_4");
    printOffset_Custom_4();
    printMemoryLayout(Custom_4_align, Custom_4_members);

    printBaseInfo<Custom_5>("Custom_5");
    printOffset_Custom_5();
    printMemoryLayout(Custom_5_align, Custom_5_members);
}
posted @ 2025-12-18 00:06  邓加领  阅读(4)  评论(0)    收藏  举报