python源码学习笔记(二)

(二) python 继承和多态
 
   这非常类似C++的功能,只不过是是在C基础上开发的。由上一节知,python的所有对象的基础都是PyObject,所以例如创建一个PyIntObject对象,是通过PyObejct*变量来维护,所以在python内部各个函数之间传递的都是一种范型指针PyObject* ,是不是很像C++里面的基类。如果要Print(PyIntObject* ),由多态(polymophism)我们会知道,调用的实际上是PyIntObject对象对应的类型对象中定义的输出操作。
看代码:
 
复制代码
long
PyObject_Hash(PyObject *v)                //注意是PyObject
{
    PyTypeObject *tp = v->ob_type;        //找到类型
    if (tp->tp_hash != NULL)
        return (*tp->tp_hash)(v);                //调用相应类型的hash函数
        
    /* To keep to the general practice that inheriting
     * solely from object in C code should work without
     * an explicit call to PyType_Ready, we implicitly call
     * PyType_Ready here and then check the tp_hash slot again
     */
    //为了维持在C代码单继承中不直接调用PyType_Ready这一惯例,在这里间接地调用PyType_Ready(),并再次检查tp_hash槽
    if (tp->tp_dict == NULL) {
        if (PyType_Ready(tp) < 0)
            return -1;
        if (tp->tp_hash != NULL)
            return (*tp->tp_hash)(v);
    }
    if (tp->tp_compare == NULL && RICHCOMPARE(tp) == NULL) {
        return _Py_HashPointer(v); /* Use address as hash value */        //把地址作为hash值返回
    }
    /* If there's a cmp but no hash defined, the object can't be hashed */
    //如果有cmp,但是hash没有被定义,返回这个对象不能被hash
    return PyObject_HashNotImplemented(v);
}
复制代码

 

 



 以PyIntObject为例,观察其实现过程。
复制代码
 1 [intobject.h]
 2   typedef struct {
 3     PyObject_HEAD
 4     long ob_ival;
 5 } PyIntObject;
 6  
 7 [intobject.c]
 8 static PyObject *                                        //注意这里是静态函数,而且是PyObject的指针,这个是多态的典型特征
 9 int_add(PyIntObject *v, PyIntObject *w)    //
10 {
11     register long a, b, x;
12     CONVERT_TO_LONG(v, a);
13     CONVERT_TO_LONG(w, b);
14     /* casts in the line below avoid undefined behaviour on overflow */
15     x = (long)((unsigned long)a + b);
16     if ((x^a) >= 0 || (x^b) >= 0)
17         return PyInt_FromLong(x);
18     return PyLong_Type.tp_as_number->nb_add((PyObject *)v, (PyObject *)w);
19 }
20  
21 static PyObject *
22 int_sub(PyIntObject *v, PyIntObject *w)      //
23 {
24     register long a, b, x;
25     CONVERT_TO_LONG(v, a);
26     CONVERT_TO_LONG(w, b);
27     /* casts in the line below avoid undefined behaviour on overflow */
28     x = (long)((unsigned long)a - b);
29     if ((x^a) >= 0 || (x^~b) >= 0)
30         return PyInt_FromLong(x);
31     return PyLong_Type.tp_as_number->nb_subtract((PyObject *)v,
32                                                  (PyObject *)w);
33 }
34  
35 static PyObject *
36 int_mul(PyObject *v, PyObject *w)                        //
37 {
38     long a, b;
39     long longprod;                      /* a*b in native long arithmetic */
40     double doubled_longprod;            /* (double)longprod */
41     double doubleprod;                  /* (double)a * (double)b */
42  
43     CONVERT_TO_LONG(v, a);
44     CONVERT_TO_LONG(w, b);
45     /* casts in the next line avoid undefined behaviour on overflow */
46     longprod = (long)((unsigned long)a * b);
47     doubleprod = (double)a * (double)b;
48     doubled_longprod = (double)longprod;
49  
50     /* Fast path for normal case:  small multiplicands, and no info
51        is lost in either method. */
52     if (doubled_longprod == doubleprod)
53         return PyInt_FromLong(longprod);
54  
55     /* Somebody somewhere lost info.  Close enough, or way off?  Note
56        that a != 0 and b != 0 (else doubled_longprod == doubleprod == 0).
57        The difference either is or isn't significant compared to the
58        true value (of which doubleprod is a good approximation).
59     */
60     {
61         const double diff = doubled_longprod - doubleprod;
62         const double absdiff = diff >= 0.0 ? diff : -diff;
63         const double absprod = doubleprod >= 0.0 ? doubleprod :
64                               -doubleprod;
65         /* absdiff/absprod <= 1/32 iff
66            32 * absdiff <= absprod -- 5 good bits is "close enough" */
67         if (32.0 * absdiff <= absprod)
68             return PyInt_FromLong(longprod);
69         else
70             return PyLong_Type.tp_as_number->nb_multiply(v, w);
71     }
72 }
73  
复制代码

 

  由此可知,python的int 实际上是C里面的long实现,所以加减乘除都是用long实现,又由于PyIntObject为一个Immutable对象,这个对象不可改变,因此在最后return的都是新的对象:PyInt_FromLong(num),即由long变量创建一个int变量。

(三)python
整数的实现 
   在整数里,python分为大整数和小整数,为了加快计算,节省内存的分配时间,因为不论是什么对象,只要在堆上申请空间是非常费时的,所以在涉及到频繁的内存操作时需要做一些优化。python提供了一种比较原始的方法——设个阈值,无语了,有这么来的么,最起码来个动态阈值也好啊...
复制代码
 1 #ifndef NSMALLPOSINTS
 2 #define NSMALLPOSINTS           257                                    
 3 #endif
 4 #ifndef NSMALLNEGINTS
 5 #define NSMALLNEGINTS           5
 6 #endif                                                                                    ////范围设定到(-5~257) 在这个区间里面都为小整数
 7 #if NSMALLNEGINTS + NSMALLPOSINTS > 0
 8 /* References to small integers are saved in this array so that they            //保存在数组里面被共享
 9    can be shared.
10    The integers that are saved are those in the range
11    -NSMALLNEGINTS (inclusive) to NSMALLPOSINTS (not inclusive).        //[-5, 257)
12 */
13 static PyIntObject *small_ints[NSMALLNEGINTS + NSMALLPOSINTS];        //申请(NSMALLNEGINTS + NSMALLPOSINTS)个PyIntObject* 为以后所共享
14 #endif
15   
复制代码

 




 然后是大整数,大整数采用块内存区间内缓存
复制代码
 1 #define BLOCK_SIZE      1000    /* 1K less typical malloc overhead */
 2 #define BHEAD_SIZE      8       /* Enough for a 64-bit pointer */
 3 #define N_INTOBJECTS    ((BLOCK_SIZE - BHEAD_SIZE) / sizeof(PyIntObject))
 4  
 5 struct _intblock {
 6     struct _intblock *next;
 7     PyIntObject objects[N_INTOBJECTS];
 8 };
 9  
10 typedef struct _intblock PyIntBlock;
11  
12 static PyIntBlock *block_list = NULL;
13 static PyIntObject *free_list = NULL;
复制代码

 

 
N_INTOBJECTS    到底是多少呢,算一下PyIntObject的大小,PyIntObject宏展开(without Py_TRACE_REFS)后就是:
   Py_ssize_t  ob_refnt;
   PyTypeObject *ob_type;
   long ob_ival;
字节大小为4+4+8 = 16 , N_INTOBJECTS  = (1000-8)/16 = 82,即一个PyIntBlock维护着的82个PyIntObeject,咋一看其实就是个单链表,因此这个82个objects相当于是82个数。
通过block_list 来维护,看代码:
复制代码
 1 static PyIntObject *
 2 fill_free_list(void)
 3 {
 4     PyIntObject *p, *q;
 5     /* Python's object allocator isn't appropriate for large blocks. */
 6     p = (PyIntObject *) PyMem_MALLOC(sizeof(PyIntBlock));
 7     if (p == NULL)
 8         return (PyIntObject *) PyErr_NoMemory();
 9     ((PyIntBlock *)p)->next = block_list;                //串联blocklist
10     block_list = (PyIntBlock *)p;
11     /* Link the int objects together, from rear to front, then return
12        the address of the last int object in the block. */
13     p = &((PyIntBlock *)p)->objects[0];        //第一块即front,头部,注意是地址哦,有个“&”
14     q = p + N_INTOBJECTS;                           //从0开始计数,q不是尾指针,还要减去1才是
15     while (--q > p)                                          // //最后--q 后即rear,尾部从后往前遍历
16         Py_TYPE(q) = (struct _typeobject *)(q-1);    //将所有的PyIntObject 串起来
17     Py_TYPE(q) = NULL;                                 //q现在为头指针,类型置为空
18     return p + N_INTOBJECTS - 1;                //返回rear
19 }
复制代码

 

由上可知,相当于对外是一个blocklist, 对内是一系列的PyIntObject,当需要重新开辟blocklist时,通过((PyIntBlock *)p)->next = block_list把这些链表串起来。
现在我们可以看看PyInt_FromLong的实现了
复制代码
 1 [intobject.c]
 2 PyObject *
 3 PyInt_FromLong(long ival)
 4 {
 5     register PyIntObject *v;                                                    //用寄存器操作,加快速度
 6 #if NSMALLNEGINTS + NSMALLPOSINTS > 0                    
 7     if (-NSMALLNEGINTS <= ival && ival < NSMALLPOSINTS) {        //在小数的范围区间,直接命中
 8         v = small_ints[ival + NSMALLNEGINTS];                    
 9         Py_INCREF(v);                                                                //引用计数加1
10 #ifdef COUNT_ALLOCS                                                        //这是要统计了
11         if (ival >= 0)
12             quick_int_allocs++;                                                //正数命中的个数
13         else
14             quick_neg_int_allocs++;                                        //负数命中的个数
15 #endif
16         return (PyObject *) v;
17     }
18 #endif
19     if (free_list == NULL) {                                                //如果是大数,且free_list 没有被赋值,开始创建
20         if ((free_list = fill_free_list()) == NULL)                 //这里我们知道freelist指向的是链表的rear
21                                                                                         //和 block_list 是指向PyIntBlock 的指针相区别
22             return NULL;
23     }
24     /* Inline PyObject_New */
25     v = free_list;
26     free_list = (PyIntObject *)Py_TYPE(v);                    //强制转换一下类型变成PyIntObject类型
27     PyObject_INIT(v, &PyInt_Type);                             //初始化为python的int类型
28     v->ob_ival = ival;
29     return (PyObject *) v;
30 }
31  
复制代码

 

最后有
复制代码
 1 [intobject.c]
 2 #define PyInt_CheckExact(op) ((op)->ob_type == &PyInt_Type)
 3 ...
 4 static void
 5 int_dealloc(PyIntObject *v)
 6 {
 7     if (PyInt_CheckExact(v)) {
 8         Py_TYPE(v) = (struct _typeobject *)free_list;        //如果是整数类对象,只是简单的把v置成free_list,即空闲链表的起点,相当于 
 9                                                                                         //覆盖的形式
10         free_list = v;
11     }
12     else
13         Py_TYPE(v)->tp_free((PyObject *)v);                    //如果不是整数类型,调用底层的释放函数
14 }
15  
16 static void
17 int_free(PyIntObject *v)
18 {
19     Py_TYPE(v) = (struct _typeobject *)free_list;        //同上
20     free_list = v;
21 }
复制代码

 

 
不要搞混的是,上述实现不管是小数还是大数都是起着缓冲池的作用,不要误解为是实现大数字功能,只不过都是用long实现的,这个很容易误导。


    再看小整数的换冲池:

复制代码
 1 static PyIntObject *small_ints[NSMALLNEGINTS + NSMALLPOSINTS];   //声明为静态指针数组
 2 ......... 
 3 
 4 int
 5 _PyInt_Init(void)
 6 {
 7     PyIntObject *v;
 8     int ival;
 9 #if NSMALLNEGINTS + NSMALLPOSINTS > 0
10     for (ival = -NSMALLNEGINTS; ival < NSMALLPOSINTS; ival++) {
11           if (!free_list && (free_list = fill_free_list()) == NULL)    //同样是申请空闲链表
12                     return 0;
13         /* PyObject_New is inlined */
14         v = free_list;
15         free_list = (PyIntObject *)Py_TYPE(v);
16         PyObject_INIT(v, &PyInt_Type);
17         v->ob_ival = ival;                                                        //赋值后加入small_ints这个缓冲池
18         small_ints[ival + NSMALLNEGINTS] = v;                    //相当于一个一一映射的关系
19     }
20 #endif
21     return 1;
22 }
复制代码

 

 我们知道的就是说所有的整数都在堆里面有内存,小整数使用通过small_ints[]数组一一映射加快查找速度,大数则需要通过链表来维护内存。

posted @ 2013-01-11 11:56  happydpc  阅读(290)  评论(0)    收藏  举报
努力加载评论中...
点击右上角即可分享
微信分享提示