drf 源码分析之【Serializer-序列化】

drf中为我们提供了Serializer,他主要有两大功能:

  • 对数据库查询到的对象进行序列化
  • 对请求数据校验(底层调用Django的Form和ModelForm)

 

1. 序列化

引入一个例子分析:

# models.py

from django.db import models


class Role(models.Model):
    """ 角色表 """
    title = models.CharField(verbose_name="名称", max_length=32)


class Department(models.Model):
    """ 部门表 """
    title = models.CharField(verbose_name="名称", max_length=32)


class UserInfo(models.Model):
    """ 用户表 """
    level_choices = ((1, "普通会员"), (2, "VIP"), (3, "SVIP"),)
    level = models.IntegerField(verbose_name="级别", choices=level_choices, default=1)

    username = models.CharField(verbose_name="用户名", max_length=32)
    password = models.CharField(verbose_name="密码", max_length=64)
    age = models.IntegerField(verbose_name="年龄", default=0)
    email = models.CharField(verbose_name="邮箱", max_length=64, null=True, blank=True)  # 自注:CharField不适合,EmailField似乎较好
    token = models.CharField(verbose_name="TOKEN", max_length=64, null=True, blank=True)

    depart = models.ForeignKey(verbose_name="部门", to="Department", on_delete=models.CASCADE, null=True, blank=True)
    roles = models.ManyToManyField(verbose_name="角色", to="Role")

定义Serializer(此例基于ModelSerializer)和使用:

# views.py

from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework import serializers
from rest_framework import exceptions
from django.forms.models import model_to_dict
from app01 import models


class UserModelSerializer(serializers.ModelSerializer):
    # 1.重写要展示的字段内容(对含choice字段)
    level_text = serializers.CharField(source="get_level_display") # 展示choice数字对应的描述
    # 2.重写要展示的字段内容(对FK等跨表字段)
    depart = serializers.CharField(source="depart.title") # 自定义使其展示depart的title
    # 3.方法字段 重写更复杂的字段展示(通过钩子函数get_roles自定义)
    roles = serializers.SerializerMethodField() # 自定义展示,内容见下方钩子函数
    extra = serializers.SerializerMethodField()

    class Meta:
        model = models.UserInfo
        fields = ["username", "age", "email", "level_text", "depart", "roles", "extra"]

    def get_roles(self, obj):  # obj是当前要序列化的那一行对象  # 常用,需熟悉
        '''钩子函数,自定义展示的内容'''
        data_list = obj.roles.all()
        return [model_to_dict(item, ["id", "title"]) for item in data_list]
		# [{'id':1,'title':'员工'},{'id':2,'title':'总监'},{...}...]
    
    def get_extra(self, obj):
        return 666


class UserView(APIView):
    '''用户管理'''

    def get(self, request):
        '''查看用户'''
        queryset = models.UserInfo.objects.all()  # 所有数据
        ser = UserModelSerializer(instance=queryset, many=True) 
        print(ser.data) # 原queryset序列化后格式:[{},{},{}]
        return Response({"code": 0, 'data': ser.data})

 

(1) 定义类

class UserModelSerializer(serializers.ModelSerializer):
    # 1.重写要展示的字段内容(对含choice字段)
    level_text = serializers.CharField(source="get_level_display") # 展示choice数字对应的描述
    depart = serializers.CharField(source="depart.title")
    roles = serializers.SerializerMethodField()

    class Meta:
        model = models.UserInfo
        fields = ["username", "age", "email", "level_text", "depart", "roles", "extra"]
        
    def get_roles(self, obj):
        pass

# 继承的父类:
class ModelSerializer(Serializer):
    pass

class Serializer(BaseSerializer, metaclass=SerializerMetaclass):
    pass

我们自定义的UserModelSerializer是继承自Serializer类,而它又继承自元类SerializerMetaclass,那么我们在创建类的时候,其实是会执行元类SerializerMetaclass中的__new__方法(所有子类在创建时都会执行),那么我们来看下创建类的过程

关于元类的介绍,参考:Python Cookbook - 元编程深入理解 Python 中的元类

 

(2) 创建类

class SerializerMetaclass(type):
    """
    This metaclass sets a dictionary named `_declared_fields` on the class.

    Any instances of `Field` included as attributes on either the class
    or on any of its superclasses will be include in the
    `_declared_fields` dictionary.
    """
	# 在类中定义一个名为_declared_fields的字典,其中包含了该类及其所有父类中的字段(Field)实例对象
    @classmethod
    def _get_declared_fields(cls, bases, attrs):
        # 2.读取 类变量中定义的字段
        fields = [(field_name, attrs.pop(field_name)) # 如:[("level_text",CharField对象),...]
                  for field_name, obj in list(attrs.items())
                  if isinstance(obj, Field)]
        fields.sort(key=lambda x: x[1]._creation_counter)

        # 3.读取 父类中定义的字段(存储在_declared_fields中的)
        known = set(attrs)
        
        def visit(name):
            known.add(name)
            return name
		# 父类已创建过,所以其内是有_declared_fields字段的
        # 已有的字段,不会覆盖 --> 在已有字段的基础上,添加父类有的
        base_fields = [
            (visit(name), f)
            for base in bases if hasattr(base, '_declared_fields')
            for name, f in base._declared_fields.items() if name not in known
        ]
		# 将上面读取到的字段都存入字典中
        return OrderedDict(base_fields + fields)
    
    def __new__(cls, name, bases, attrs):
        # 1.先进入这执行,将读取到的字段(已存入字典)写入类 _declared_fields字段中
        attrs['_declared_fields'] = cls._get_declared_fields(bases, attrs)
        return super().__new__(cls, name, bases, attrs)

在我们自定义UserModelSerializer时,会执行上述代码,执行的结果就是在类中定义了一个名为_declared_fields的字典,其中包含了该类及其所有父类中的字段(Field)实例对象,比如本例中的如下:

_declared_fields ={
    "level_text":serializers.CharField(source="get_level_display"),
    "depart": serializers.CharField(source="depart.title"),
    "roles": serializers.SerializerMethodField(),
    "extra": serializers.SerializerMethodField(),
    ... # (父类中的字段)
}

 

(3) 应用(序列化)

本例中的应用:

class UserView(APIView):

    def get(self, request):
		# 获取要序列化的数据
        queryset = models.UserInfo.objects.all()  
        # 创建Serializer对象
        ser = UserModelSerializer(instance=queryset, many=True) 
        # 获取结果
        print(ser.data) # 原queryset序列化后格式:[{},{},{}]
        return Response({"code": 0, 'data': ser.data})

我们知道,在实例化对象时,会先执行__new__方法而后执行__init__方法,

顺着继承关系(UserModelSerializer --> ModelSerializer --> Serializer --> BaseSerializer )可以在BaseSerializer中找到这两个方法:

class BaseSerializer(Field):
	# 对传参初始化。
    def __init__(self, instance=None, data=empty, **kwargs):
        self.instance = instance
        if data is not empty:
            self.initial_data = data
        self.partial = kwargs.pop('partial', False)
        self._context = kwargs.pop('context', {})
        kwargs.pop('many', None)
        super().__init__(**kwargs)

    def __new__(cls, *args, **kwargs):
        # 在设置了参数`many=True`时,执行many_init方法。返回一个`ListSerializer`类的实例对象
        if kwargs.pop('many', False):
            return cls.many_init(*args, **kwargs)
        # many=False时,直接创捷了对象
        return super().__new__(cls, *args, **kwargs)

当参数中设置了many=True时,执行many_init方法。顺着继承关系,依然在BaseSerializer中,这个方法的执行结果就是生成一个ListSerializer对象来替代原对象,并且把原对象放在参数中。

class BaseSerializer(Field):

    @classmethod
    def many_init(cls, *args, **kwargs):

        allow_empty = kwargs.pop('allow_empty', None)
        # 生成一个原对象(自定义的Serializer实例),存在list_kwargs字典中
        child_serializer = cls(*args, **kwargs)
        list_kwargs = {
            'child': child_serializer,
        }
        if allow_empty is not None:
            list_kwargs['allow_empty'] = allow_empty
        list_kwargs.update({
            key: value for key, value in kwargs.items()
            if key in LIST_SERIALIZER_KWARGS
        })
        meta = getattr(cls, 'Meta', None)
        list_serializer_class = getattr(meta, 'list_serializer_class', ListSerializer)
        # 返回一个ListSerializer实例,原对象作为参数初始化。
        return list_serializer_class(*args, **list_kwargs)

以上就是在创建Serializer对象时的执行流程,会有两种结果:

  • 传了参数many=True时,生成的是ListSerializer类的实例对象
  • 没有定义many参数时,生成的就是自定义的Serializer类的实例对象

 

再继续看获取数据(序列化)时,也就是获取对象的data属性的执行源码,顺着继承关系,找到两个相关的:

class Serializer(BaseSerializer, metaclass=SerializerMetaclass):
    @property
    def data(self):
        ret = super().data # 转父类中的data
        return ReturnDict(ret, serializer=self)
    
class BaseSerializer(Field):    
    @property
    def data(self):
        if not hasattr(self, '_data'):
            if self.instance is not None and not getattr(self, '_errors', None):
                # 在这执行,指向了`to_representation`方法
                self._data = self.to_representation(self.instance)
            elif hasattr(self, '_validated_data') and not getattr(self, '_errors', None):
                self._data = self.to_representation(self.validated_data)
            else:
                self._data = self.get_initial()
        return self._data

指向了to_representation方法,注意虽然BaseSerializer中有着方法,但还是得区分调用的self是谁,才能找到对应的方法,本例中selfListSerializer类的实例对象,所以源码如下:

class ListSerializer(BaseSerializer):
    
    def __init__(self, *args, **kwargs):
        # self.child 就是自定义的Serializer实例
        self.child = kwargs.pop('child', copy.deepcopy(self.child))
    
    def to_representation(self, data):
        # 传过来的data是个queryset对象
        iterable = data.all() if isinstance(data, models.Manager) else data
		# 遍历对每个模型类对象序列化
        return [
            # 按继承顺序,会跳转到Serializer中的to_representation方法
            self.child.to_representation(item) for item in iterable
        ]

看到这我们知道了,前面创建Serializer实例时,有两种情况生成不同的对象的作用就是:如果序列化的数据是queryset,就遍历每个模型类对象执行序列化,如果是单条数据,就直接执行序列化了,但殊途同归,最终序列化的执行,还是回到Serializer类中:

class Serializer(BaseSerializer, metaclass=SerializerMetaclass):
    
    def to_representation(self, instance): # instance是单条数据(模型类对象)
        """
        Object instance -> Dict of primitive datatypes.
        """
        ret = OrderedDict()
        # 1.获取字段对象生成器(Meta中定义的字段 + 类变量中定义的字段)
        fields = self._readable_fields # 详细引申源码见下面
		# 遍历对每个字段逐个序列化
        for field in fields:
            try:
                # 2. 获取字段的值
                attribute = field.get_attribute(instance)
            except SkipField:
                continue

            check_for_none = attribute.pk if isinstance(attribute, PKOnlyObject) else attribute
            if check_for_none is None:
                ret[field.field_name] = None
            else:
                # 3. 对获取的值再次处理
                ret[field.field_name] = field.to_representation(attribute)

        return ret

 

  1. 获取字段对象生成器:fields = self._readable_fields

    class Serializer(BaseSerializer, metaclass=SerializerMetaclass):    
        @property
        def _readable_fields(self):
            for field in self.fields.values():
                if not field.write_only:
                    yield field  # field_instance 字段对象
                    
        @cached_property
        def fields(self):
            """
            A dictionary of {field_name: field_instance}.
            """
            fields = BindingDict(self)
            for key, value in self.get_fields().items():
                fields[key] = value
            return fields  
        
        def get_fields(self):
            """
            Returns a dictionary of {field_name: field_instance}.
            """
            return copy.deepcopy(self._declared_fields)    
    

    还记得大明湖畔的_declared_fields吗?在刚开始创建类时执行__new__将读取到的字段都存入字典中并写入类_declared_fields字段中。所以上面的fields = self._readable_fields就是获取了一个字段对象的生成器。

    NOTE:此段代码中还会去读取Meta中自定义的字段,具体不再展开(其实是我也没搞明白...)。

    好吧,强迫症逼迫我搞明白他:

    经测验,是在fields(self)方法执行后,return的fields就带了Meta中自定义的所有字段,应该是执行fields[key] = value这句时,触发BindingDict实例的__setitem__方法,进而触发bind方法。看bind方法的注释,大概是当有一个字段添加到我们定义的serializer中时,就会初始化(应该是把Meta中的元数据都添加进去)

    class BindingDict(MutableMapping):
        
        def __setitem__(self, key, field):
            self.fields[key] = field
            field.bind(field_name=key, parent=self.serializer)
    
    class Field:    
        def bind(self, field_name, parent):
            """
            Initializes the field name and parent for the field instance.
            Called when a field is added to the parent serializer instance.
            """
            pass
    

 

  1. 获取字段的值: attribute = field.get_attribute(instance)

    field是定义的各种字段对象,比如CharField、IntegerField等,如用例中的depart = serializers.CharField(source="depart.title"),就是CharField对象调用get_attribute方法,可以在其父类Field中找到:

    class Field:
        def get_attribute(self, instance):
    		# 根据source参数获取字段
            # 调用外面的get_attribute函数
            return get_attribute(instance, self.source_attrs)
            
        def bind(self, field_name, parent):
    		# ...
            if self.source is None:
                self.source = field_name
            if self.source == '*':
                self.source_attrs = []
            else: # 如:source="depart.title",则source_attrs=['depart','title']
                self.source_attrs = self.source.split('.')   
    
    def get_attribute(instance, attrs):
        """
        Similar to Python's built in `getattr(instance, attr)`,
        but takes a list of nested attributes, instead of a single attribute.
    
        Also accepts either attribute lookup on objects or dictionary lookups.
        """
        
        for attr in attrs:
            try:
                if isinstance(instance, Mapping):
                    instance = instance[attr]
                else:
                    instance = getattr(instance, attr)
            except ObjectDoesNotExist:
                return None
    		# 如:get_level_display会走这
            if is_simple_callable(instance):
                instance = instance()
        return instance    
    
    

 

  1. 对获取的值再次处理:ret[field.field_name] = field.to_representation(attribute)

    其中field有两种情况:

    • drf中提供的字段类:
    class CharField(Field):
        
        def to_representation(self, value):
            return str(value)
    
    
    • SerializerMethodField

      如用例中的roles = serializers.SerializerMethodField()

    class SerializerMethodField(Field):
    
        def __init__(self, method_name=None, **kwargs):
            self.method_name = method_name
            kwargs['source'] = '*'
            kwargs['read_only'] = True
            super().__init__(**kwargs)
    
        def bind(self, field_name, parent):
            # The method name defaults to `get_{field_name}`.
            # 如:定义的get_roles方法
            if self.method_name is None:
                self.method_name = 'get_{field_name}'.format(field_name=field_name)
    
            super().bind(field_name, parent)
    	# 如:执行自定义的get_roles(obj),在内完成对数据的定制加工
        def to_representation(self, value):
            method = getattr(self.parent, self.method_name)
            return method(value)
    

 

序列化小结

至此,整个序列化的执行流程的就结束了。从源码分析来看,简单的概括就是,把数据库中获取到的数据序列化为python支持的数据类型(字典或者列表),在这基础上还可以自定义的处理加工成个性化的内容。那源码的执行流程像是一层层的委派任务,比如原始的queryset数据,分为一个个的模型类数据去处理,每条模型类数据又可以分为一个个的字段对象分别执行序列化,每个模块完成一部分工作,最后逐级汇总得到全部序列化后的数据。

posted on 2022-06-24 23:45  吃大飞  阅读(137)  评论(0)    收藏  举报