python源码学习笔记(二)
(二) python 继承和多态
这非常类似C++的功能,只不过是是在C基础上开发的。由上一节知,python的所有对象的基础都是PyObject,所以例如创建一个PyIntObject对象,是通过PyObejct*变量来维护,所以在python内部各个函数之间传递的都是一种范型指针PyObject* ,是不是很像C++里面的基类。如果要Print(PyIntObject* ),由多态(polymophism)我们会知道,调用的实际上是PyIntObject对象对应的类型对象中定义的输出操作。
看代码:
long PyObject_Hash(PyObject *v) //注意是PyObject { PyTypeObject *tp = v->ob_type; //找到类型 if (tp->tp_hash != NULL) return (*tp->tp_hash)(v); //调用相应类型的hash函数 /* To keep to the general practice that inheriting * solely from object in C code should work without * an explicit call to PyType_Ready, we implicitly call * PyType_Ready here and then check the tp_hash slot again */ //为了维持在C代码单继承中不直接调用PyType_Ready这一惯例,在这里间接地调用PyType_Ready(),并再次检查tp_hash槽 if (tp->tp_dict == NULL) { if (PyType_Ready(tp) < 0) return -1; if (tp->tp_hash != NULL) return (*tp->tp_hash)(v); } if (tp->tp_compare == NULL && RICHCOMPARE(tp) == NULL) { return _Py_HashPointer(v); /* Use address as hash value */ //把地址作为hash值返回 } /* If there's a cmp but no hash defined, the object can't be hashed */ //如果有cmp,但是hash没有被定义,返回这个对象不能被hash return PyObject_HashNotImplemented(v); }
以PyIntObject为例,观察其实现过程。
1 [intobject.h] 2 typedef struct { 3 PyObject_HEAD 4 long ob_ival; 5 } PyIntObject; 6 7 [intobject.c] 8 static PyObject * //注意这里是静态函数,而且是PyObject的指针,这个是多态的典型特征 9 int_add(PyIntObject *v, PyIntObject *w) //加 10 { 11 register long a, b, x; 12 CONVERT_TO_LONG(v, a); 13 CONVERT_TO_LONG(w, b); 14 /* casts in the line below avoid undefined behaviour on overflow */ 15 x = (long)((unsigned long)a + b); 16 if ((x^a) >= 0 || (x^b) >= 0) 17 return PyInt_FromLong(x); 18 return PyLong_Type.tp_as_number->nb_add((PyObject *)v, (PyObject *)w); 19 } 20 21 static PyObject * 22 int_sub(PyIntObject *v, PyIntObject *w) //减 23 { 24 register long a, b, x; 25 CONVERT_TO_LONG(v, a); 26 CONVERT_TO_LONG(w, b); 27 /* casts in the line below avoid undefined behaviour on overflow */ 28 x = (long)((unsigned long)a - b); 29 if ((x^a) >= 0 || (x^~b) >= 0) 30 return PyInt_FromLong(x); 31 return PyLong_Type.tp_as_number->nb_subtract((PyObject *)v, 32 (PyObject *)w); 33 } 34 35 static PyObject * 36 int_mul(PyObject *v, PyObject *w) //乘 37 { 38 long a, b; 39 long longprod; /* a*b in native long arithmetic */ 40 double doubled_longprod; /* (double)longprod */ 41 double doubleprod; /* (double)a * (double)b */ 42 43 CONVERT_TO_LONG(v, a); 44 CONVERT_TO_LONG(w, b); 45 /* casts in the next line avoid undefined behaviour on overflow */ 46 longprod = (long)((unsigned long)a * b); 47 doubleprod = (double)a * (double)b; 48 doubled_longprod = (double)longprod; 49 50 /* Fast path for normal case: small multiplicands, and no info 51 is lost in either method. */ 52 if (doubled_longprod == doubleprod) 53 return PyInt_FromLong(longprod); 54 55 /* Somebody somewhere lost info. Close enough, or way off? Note 56 that a != 0 and b != 0 (else doubled_longprod == doubleprod == 0). 57 The difference either is or isn't significant compared to the 58 true value (of which doubleprod is a good approximation). 59 */ 60 { 61 const double diff = doubled_longprod - doubleprod; 62 const double absdiff = diff >= 0.0 ? diff : -diff; 63 const double absprod = doubleprod >= 0.0 ? doubleprod : 64 -doubleprod; 65 /* absdiff/absprod <= 1/32 iff 66 32 * absdiff <= absprod -- 5 good bits is "close enough" */ 67 if (32.0 * absdiff <= absprod) 68 return PyInt_FromLong(longprod); 69 else 70 return PyLong_Type.tp_as_number->nb_multiply(v, w); 71 } 72 } 73
由此可知,python的int 实际上是C里面的long实现,所以加减乘除都是用long实现,又由于PyIntObject为一个Immutable对象,这个对象不可改变,因此在最后return的都是新的对象:PyInt_FromLong(num),即由long变量创建一个int变量。
(三)python整数的实现
在整数里,python分为大整数和小整数,为了加快计算,节省内存的分配时间,因为不论是什么对象,只要在堆上申请空间是非常费时的,所以在涉及到频繁的内存操作时需要做一些优化。python提供了一种比较原始的方法——设个阈值,无语了,有这么来的么,最起码来个动态阈值也好啊...
1 #ifndef NSMALLPOSINTS 2 #define NSMALLPOSINTS 257 3 #endif 4 #ifndef NSMALLNEGINTS 5 #define NSMALLNEGINTS 5 6 #endif ////范围设定到(-5~257) 在这个区间里面都为小整数 7 #if NSMALLNEGINTS + NSMALLPOSINTS > 0 8 /* References to small integers are saved in this array so that they //保存在数组里面被共享 9 can be shared. 10 The integers that are saved are those in the range 11 -NSMALLNEGINTS (inclusive) to NSMALLPOSINTS (not inclusive). //[-5, 257) 12 */ 13 static PyIntObject *small_ints[NSMALLNEGINTS + NSMALLPOSINTS]; //申请(NSMALLNEGINTS + NSMALLPOSINTS)个PyIntObject* 为以后所共享 14 #endif 15
然后是大整数,大整数采用块内存区间内缓存
1 #define BLOCK_SIZE 1000 /* 1K less typical malloc overhead */ 2 #define BHEAD_SIZE 8 /* Enough for a 64-bit pointer */ 3 #define N_INTOBJECTS ((BLOCK_SIZE - BHEAD_SIZE) / sizeof(PyIntObject)) 4 5 struct _intblock { 6 struct _intblock *next; 7 PyIntObject objects[N_INTOBJECTS]; 8 }; 9 10 typedef struct _intblock PyIntBlock; 11 12 static PyIntBlock *block_list = NULL; 13 static PyIntObject *free_list = NULL;
N_INTOBJECTS 到底是多少呢,算一下PyIntObject的大小,PyIntObject宏展开(without Py_TRACE_REFS)后就是:
Py_ssize_t ob_refnt;
PyTypeObject *ob_type;
long ob_ival;
字节大小为4+4+8 = 16 , N_INTOBJECTS = (1000-8)/16 = 82,即一个PyIntBlock维护着的82个PyIntObeject,咋一看其实就是个单链表,因此这个82个objects相当于是82个数。
通过block_list 来维护,看代码:
1 static PyIntObject * 2 fill_free_list(void) 3 { 4 PyIntObject *p, *q; 5 /* Python's object allocator isn't appropriate for large blocks. */ 6 p = (PyIntObject *) PyMem_MALLOC(sizeof(PyIntBlock)); 7 if (p == NULL) 8 return (PyIntObject *) PyErr_NoMemory(); 9 ((PyIntBlock *)p)->next = block_list; //串联blocklist 10 block_list = (PyIntBlock *)p; 11 /* Link the int objects together, from rear to front, then return 12 the address of the last int object in the block. */ 13 p = &((PyIntBlock *)p)->objects[0]; //第一块即front,头部,注意是地址哦,有个“&” 14 q = p + N_INTOBJECTS; //从0开始计数,q不是尾指针,还要减去1才是 15 while (--q > p) // //最后--q 后即rear,尾部从后往前遍历 16 Py_TYPE(q) = (struct _typeobject *)(q-1); //将所有的PyIntObject 串起来 17 Py_TYPE(q) = NULL; //q现在为头指针,类型置为空 18 return p + N_INTOBJECTS - 1; //返回rear 19 }
由上可知,相当于对外是一个blocklist, 对内是一系列的PyIntObject,当需要重新开辟blocklist时,通过((PyIntBlock *)p)->next = block_list把这些链表串起来。
现在我们可以看看PyInt_FromLong的实现了
1 [intobject.c] 2 PyObject * 3 PyInt_FromLong(long ival) 4 { 5 register PyIntObject *v; //用寄存器操作,加快速度 6 #if NSMALLNEGINTS + NSMALLPOSINTS > 0 7 if (-NSMALLNEGINTS <= ival && ival < NSMALLPOSINTS) { //在小数的范围区间,直接命中 8 v = small_ints[ival + NSMALLNEGINTS]; 9 Py_INCREF(v); //引用计数加1 10 #ifdef COUNT_ALLOCS //这是要统计了 11 if (ival >= 0) 12 quick_int_allocs++; //正数命中的个数 13 else 14 quick_neg_int_allocs++; //负数命中的个数 15 #endif 16 return (PyObject *) v; 17 } 18 #endif 19 if (free_list == NULL) { //如果是大数,且free_list 没有被赋值,开始创建 20 if ((free_list = fill_free_list()) == NULL) //这里我们知道freelist指向的是链表的rear 21 //和 block_list 是指向PyIntBlock 的指针相区别 22 return NULL; 23 } 24 /* Inline PyObject_New */ 25 v = free_list; 26 free_list = (PyIntObject *)Py_TYPE(v); //强制转换一下类型变成PyIntObject类型 27 PyObject_INIT(v, &PyInt_Type); //初始化为python的int类型 28 v->ob_ival = ival; 29 return (PyObject *) v; 30 } 31
最后有
1 [intobject.c] 2 #define PyInt_CheckExact(op) ((op)->ob_type == &PyInt_Type) 3 ... 4 static void 5 int_dealloc(PyIntObject *v) 6 { 7 if (PyInt_CheckExact(v)) { 8 Py_TYPE(v) = (struct _typeobject *)free_list; //如果是整数类对象,只是简单的把v置成free_list,即空闲链表的起点,相当于 9 //覆盖的形式 10 free_list = v; 11 } 12 else 13 Py_TYPE(v)->tp_free((PyObject *)v); //如果不是整数类型,调用底层的释放函数 14 } 15 16 static void 17 int_free(PyIntObject *v) 18 { 19 Py_TYPE(v) = (struct _typeobject *)free_list; //同上 20 free_list = v; 21 }
不要搞混的是,上述实现不管是小数还是大数都是起着缓冲池的作用,不要误解为是实现大数字功能,只不过都是用long实现的,这个很容易误导。
![]()
再看小整数的换冲池:

再看小整数的换冲池:
1 static PyIntObject *small_ints[NSMALLNEGINTS + NSMALLPOSINTS]; //声明为静态指针数组 2 ......... 3 4 int 5 _PyInt_Init(void) 6 { 7 PyIntObject *v; 8 int ival; 9 #if NSMALLNEGINTS + NSMALLPOSINTS > 0 10 for (ival = -NSMALLNEGINTS; ival < NSMALLPOSINTS; ival++) { 11 if (!free_list && (free_list = fill_free_list()) == NULL) //同样是申请空闲链表 12 return 0; 13 /* PyObject_New is inlined */ 14 v = free_list; 15 free_list = (PyIntObject *)Py_TYPE(v); 16 PyObject_INIT(v, &PyInt_Type); 17 v->ob_ival = ival; //赋值后加入small_ints这个缓冲池 18 small_ints[ival + NSMALLNEGINTS] = v; //相当于一个一一映射的关系 19 } 20 #endif 21 return 1; 22 }
我们知道的就是说所有的整数都在堆里面有内存,小整数使用通过small_ints[]数组一一映射加快查找速度,大数则需要通过链表来维护内存。