#define offsetof(st, m) ((size_t) ( (char *)&((st *)(0))->m - (char *)0 ))
The idea is very simple. We declare a target type pointer pointing at address 0, then we retrieve the address of its member variable. Because the start address is 0, the value of the pointer to member is the offset.Everything is clear and simple. But wait, look at how we retrieve the address of the member, it's deferencing a pointer to 0. Why id doesn't give us a segmentation fault?
To understand this, let's check the c code below which retrieve the offset of member c in struct foo.
1 #include "stdio.h"
2
3 struct foo
4 {
5 int a;
6 char b;
7 int c;
8 };
9
10 int main ( int argc, char *argv[] )
11 {
12 struct foo* fp = (struct foo*)0;
13 unsigned int offset = (unsigned int)&fp->c;
14 printf("offset of c is %u\n", offset);
15 return 0;
16 } // ---------- end of function main ----------
Then compile it with miscrosoft's c++ compiler, and dump the revelant assembly code generated by the compiler. We got this:2
3 struct foo
4 {
5 int a;
6 char b;
7 int c;
8 };
9
10 int main ( int argc, char *argv[] )
11 {
12 struct foo* fp = (struct foo*)0;
13 unsigned int offset = (unsigned int)&fp->c;
14 printf("offset of c is %u\n", offset);
15 return 0;
16 } // ---------- end of function main ----------
1 _main:
2 00401010: 55 push ebp
3 00401011: 8B EC mov ebp,esp
4 00401013: 83 EC 08 sub esp,8
5 00401016: C7 45 FC 00 00 00 mov dword ptr [ebp-4],0
6 00
7 0040101D: 8B 45 FC mov eax,dword ptr [ebp-4]
8 00401020: 83 C0 08 add eax,8
9 00401023: 89 45 F8 mov dword ptr [ebp-8],eax
10 00401026: 8B 4D F8 mov ecx,dword ptr [ebp-8]
11 00401029: 51 push ecx
12 0040102A: 68 5C DC 41 00 push 41DC5Ch
13 0040102F: E8 14 00 00 00 call _printf
14 00401034: 83 C4 08 add esp,8
15 00401037: 33 C0 xor eax,eax
16 00401039: 8B E5 mov esp,ebp
17 0040103B: 5D pop ebp
18 0040103C: C3 ret
2 00401010: 55 push ebp
3 00401011: 8B EC mov ebp,esp
4 00401013: 83 EC 08 sub esp,8
5 00401016: C7 45 FC 00 00 00 mov dword ptr [ebp-4],0
6 00
7 0040101D: 8B 45 FC mov eax,dword ptr [ebp-4]
8 00401020: 83 C0 08 add eax,8
9 00401023: 89 45 F8 mov dword ptr [ebp-8],eax
10 00401026: 8B 4D F8 mov ecx,dword ptr [ebp-8]
11 00401029: 51 push ecx
12 0040102A: 68 5C DC 41 00 push 41DC5Ch
13 0040102F: E8 14 00 00 00 call _printf
14 00401034: 83 C4 08 add esp,8
15 00401037: 33 C0 xor eax,eax
16 00401039: 8B E5 mov esp,ebp
17 0040103B: 5D pop ebp
18 0040103C: C3 ret
It's clear that the compiler doesn't blindly follow the null pointer to get its member variable's address. Instead, because the compiler knows the structure and layout of struct foo, it adds the offset (which is already known to the compiler) of member c to the starting address of struct foo to find out the address of c.
offset doesn't access memory pointed to by the null pointer. That's why we didn't get invalid memory access error while using a null pointer.