drf 源码分析之【Serializer-序列化】
drf中为我们提供了Serializer,他主要有两大功能:
- 对数据库查询到的对象进行序列化
- 对请求数据校验(底层调用Django的Form和ModelForm)
1. 序列化
引入一个例子分析:
# models.py
from django.db import models
class Role(models.Model):
""" 角色表 """
title = models.CharField(verbose_name="名称", max_length=32)
class Department(models.Model):
""" 部门表 """
title = models.CharField(verbose_name="名称", max_length=32)
class UserInfo(models.Model):
""" 用户表 """
level_choices = ((1, "普通会员"), (2, "VIP"), (3, "SVIP"),)
level = models.IntegerField(verbose_name="级别", choices=level_choices, default=1)
username = models.CharField(verbose_name="用户名", max_length=32)
password = models.CharField(verbose_name="密码", max_length=64)
age = models.IntegerField(verbose_name="年龄", default=0)
email = models.CharField(verbose_name="邮箱", max_length=64, null=True, blank=True) # 自注:CharField不适合,EmailField似乎较好
token = models.CharField(verbose_name="TOKEN", max_length=64, null=True, blank=True)
depart = models.ForeignKey(verbose_name="部门", to="Department", on_delete=models.CASCADE, null=True, blank=True)
roles = models.ManyToManyField(verbose_name="角色", to="Role")
定义Serializer(此例基于ModelSerializer)和使用:
# views.py
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework import serializers
from rest_framework import exceptions
from django.forms.models import model_to_dict
from app01 import models
class UserModelSerializer(serializers.ModelSerializer):
# 1.重写要展示的字段内容(对含choice字段)
level_text = serializers.CharField(source="get_level_display") # 展示choice数字对应的描述
# 2.重写要展示的字段内容(对FK等跨表字段)
depart = serializers.CharField(source="depart.title") # 自定义使其展示depart的title
# 3.方法字段 重写更复杂的字段展示(通过钩子函数get_roles自定义)
roles = serializers.SerializerMethodField() # 自定义展示,内容见下方钩子函数
extra = serializers.SerializerMethodField()
class Meta:
model = models.UserInfo
fields = ["username", "age", "email", "level_text", "depart", "roles", "extra"]
def get_roles(self, obj): # obj是当前要序列化的那一行对象 # 常用,需熟悉
'''钩子函数,自定义展示的内容'''
data_list = obj.roles.all()
return [model_to_dict(item, ["id", "title"]) for item in data_list]
# [{'id':1,'title':'员工'},{'id':2,'title':'总监'},{...}...]
def get_extra(self, obj):
return 666
class UserView(APIView):
'''用户管理'''
def get(self, request):
'''查看用户'''
queryset = models.UserInfo.objects.all() # 所有数据
ser = UserModelSerializer(instance=queryset, many=True)
print(ser.data) # 原queryset序列化后格式:[{},{},{}]
return Response({"code": 0, 'data': ser.data})
(1) 定义类
class UserModelSerializer(serializers.ModelSerializer):
# 1.重写要展示的字段内容(对含choice字段)
level_text = serializers.CharField(source="get_level_display") # 展示choice数字对应的描述
depart = serializers.CharField(source="depart.title")
roles = serializers.SerializerMethodField()
class Meta:
model = models.UserInfo
fields = ["username", "age", "email", "level_text", "depart", "roles", "extra"]
def get_roles(self, obj):
pass
# 继承的父类:
class ModelSerializer(Serializer):
pass
class Serializer(BaseSerializer, metaclass=SerializerMetaclass):
pass
我们自定义的UserModelSerializer是继承自Serializer类,而它又继承自元类SerializerMetaclass,那么我们在创建类的时候,其实是会执行元类SerializerMetaclass中的__new__方法(所有子类在创建时都会执行),那么我们来看下创建类的过程
关于元类的介绍,参考:Python Cookbook - 元编程、深入理解 Python 中的元类
(2) 创建类
class SerializerMetaclass(type):
"""
This metaclass sets a dictionary named `_declared_fields` on the class.
Any instances of `Field` included as attributes on either the class
or on any of its superclasses will be include in the
`_declared_fields` dictionary.
"""
# 在类中定义一个名为_declared_fields的字典,其中包含了该类及其所有父类中的字段(Field)实例对象
@classmethod
def _get_declared_fields(cls, bases, attrs):
# 2.读取 类变量中定义的字段
fields = [(field_name, attrs.pop(field_name)) # 如:[("level_text",CharField对象),...]
for field_name, obj in list(attrs.items())
if isinstance(obj, Field)]
fields.sort(key=lambda x: x[1]._creation_counter)
# 3.读取 父类中定义的字段(存储在_declared_fields中的)
known = set(attrs)
def visit(name):
known.add(name)
return name
# 父类已创建过,所以其内是有_declared_fields字段的
# 已有的字段,不会覆盖 --> 在已有字段的基础上,添加父类有的
base_fields = [
(visit(name), f)
for base in bases if hasattr(base, '_declared_fields')
for name, f in base._declared_fields.items() if name not in known
]
# 将上面读取到的字段都存入字典中
return OrderedDict(base_fields + fields)
def __new__(cls, name, bases, attrs):
# 1.先进入这执行,将读取到的字段(已存入字典)写入类 _declared_fields字段中
attrs['_declared_fields'] = cls._get_declared_fields(bases, attrs)
return super().__new__(cls, name, bases, attrs)
在我们自定义UserModelSerializer时,会执行上述代码,执行的结果就是在类中定义了一个名为_declared_fields的字典,其中包含了该类及其所有父类中的字段(Field)实例对象,比如本例中的如下:
_declared_fields ={
"level_text":serializers.CharField(source="get_level_display"),
"depart": serializers.CharField(source="depart.title"),
"roles": serializers.SerializerMethodField(),
"extra": serializers.SerializerMethodField(),
... # (父类中的字段)
}
(3) 应用(序列化)
本例中的应用:
class UserView(APIView):
def get(self, request):
# 获取要序列化的数据
queryset = models.UserInfo.objects.all()
# 创建Serializer对象
ser = UserModelSerializer(instance=queryset, many=True)
# 获取结果
print(ser.data) # 原queryset序列化后格式:[{},{},{}]
return Response({"code": 0, 'data': ser.data})
我们知道,在实例化对象时,会先执行__new__方法而后执行__init__方法,
顺着继承关系(UserModelSerializer --> ModelSerializer --> Serializer --> BaseSerializer )可以在BaseSerializer中找到这两个方法:
class BaseSerializer(Field):
# 对传参初始化。
def __init__(self, instance=None, data=empty, **kwargs):
self.instance = instance
if data is not empty:
self.initial_data = data
self.partial = kwargs.pop('partial', False)
self._context = kwargs.pop('context', {})
kwargs.pop('many', None)
super().__init__(**kwargs)
def __new__(cls, *args, **kwargs):
# 在设置了参数`many=True`时,执行many_init方法。返回一个`ListSerializer`类的实例对象
if kwargs.pop('many', False):
return cls.many_init(*args, **kwargs)
# many=False时,直接创捷了对象
return super().__new__(cls, *args, **kwargs)
当参数中设置了many=True时,执行many_init方法。顺着继承关系,依然在BaseSerializer中,这个方法的执行结果就是生成一个ListSerializer对象来替代原对象,并且把原对象放在参数中。
class BaseSerializer(Field):
@classmethod
def many_init(cls, *args, **kwargs):
allow_empty = kwargs.pop('allow_empty', None)
# 生成一个原对象(自定义的Serializer实例),存在list_kwargs字典中
child_serializer = cls(*args, **kwargs)
list_kwargs = {
'child': child_serializer,
}
if allow_empty is not None:
list_kwargs['allow_empty'] = allow_empty
list_kwargs.update({
key: value for key, value in kwargs.items()
if key in LIST_SERIALIZER_KWARGS
})
meta = getattr(cls, 'Meta', None)
list_serializer_class = getattr(meta, 'list_serializer_class', ListSerializer)
# 返回一个ListSerializer实例,原对象作为参数初始化。
return list_serializer_class(*args, **list_kwargs)
以上就是在创建Serializer对象时的执行流程,会有两种结果:
- 传了参数
many=True时,生成的是ListSerializer类的实例对象 - 没有定义many参数时,生成的就是自定义的Serializer类的实例对象
再继续看获取数据(序列化)时,也就是获取对象的data属性的执行源码,顺着继承关系,找到两个相关的:
class Serializer(BaseSerializer, metaclass=SerializerMetaclass):
@property
def data(self):
ret = super().data # 转父类中的data
return ReturnDict(ret, serializer=self)
class BaseSerializer(Field):
@property
def data(self):
if not hasattr(self, '_data'):
if self.instance is not None and not getattr(self, '_errors', None):
# 在这执行,指向了`to_representation`方法
self._data = self.to_representation(self.instance)
elif hasattr(self, '_validated_data') and not getattr(self, '_errors', None):
self._data = self.to_representation(self.validated_data)
else:
self._data = self.get_initial()
return self._data
指向了to_representation方法,注意虽然BaseSerializer中有着方法,但还是得区分调用的self是谁,才能找到对应的方法,本例中self是ListSerializer类的实例对象,所以源码如下:
class ListSerializer(BaseSerializer):
def __init__(self, *args, **kwargs):
# self.child 就是自定义的Serializer实例
self.child = kwargs.pop('child', copy.deepcopy(self.child))
def to_representation(self, data):
# 传过来的data是个queryset对象
iterable = data.all() if isinstance(data, models.Manager) else data
# 遍历对每个模型类对象序列化
return [
# 按继承顺序,会跳转到Serializer中的to_representation方法
self.child.to_representation(item) for item in iterable
]
看到这我们知道了,前面创建Serializer实例时,有两种情况生成不同的对象的作用就是:如果序列化的数据是queryset,就遍历每个模型类对象执行序列化,如果是单条数据,就直接执行序列化了,但殊途同归,最终序列化的执行,还是回到Serializer类中:
class Serializer(BaseSerializer, metaclass=SerializerMetaclass):
def to_representation(self, instance): # instance是单条数据(模型类对象)
"""
Object instance -> Dict of primitive datatypes.
"""
ret = OrderedDict()
# 1.获取字段对象生成器(Meta中定义的字段 + 类变量中定义的字段)
fields = self._readable_fields # 详细引申源码见下面
# 遍历对每个字段逐个序列化
for field in fields:
try:
# 2. 获取字段的值
attribute = field.get_attribute(instance)
except SkipField:
continue
check_for_none = attribute.pk if isinstance(attribute, PKOnlyObject) else attribute
if check_for_none is None:
ret[field.field_name] = None
else:
# 3. 对获取的值再次处理
ret[field.field_name] = field.to_representation(attribute)
return ret
-
获取字段对象生成器:
fields = self._readable_fieldsclass Serializer(BaseSerializer, metaclass=SerializerMetaclass): @property def _readable_fields(self): for field in self.fields.values(): if not field.write_only: yield field # field_instance 字段对象 @cached_property def fields(self): """ A dictionary of {field_name: field_instance}. """ fields = BindingDict(self) for key, value in self.get_fields().items(): fields[key] = value return fields def get_fields(self): """ Returns a dictionary of {field_name: field_instance}. """ return copy.deepcopy(self._declared_fields)还记得大明湖畔的
_declared_fields吗?在刚开始创建类时执行__new__将读取到的字段都存入字典中并写入类_declared_fields字段中。所以上面的fields = self._readable_fields就是获取了一个字段对象的生成器。NOTE:此段代码中还会去读取Meta中自定义的字段,具体不再展开(其实是我也没搞明白...)。
好吧,强迫症逼迫我搞明白他:
经测验,是在fields(self)方法执行后,return的fields就带了Meta中自定义的所有字段,应该是执行
fields[key] = value这句时,触发BindingDict实例的__setitem__方法,进而触发bind方法。看bind方法的注释,大概是当有一个字段添加到我们定义的serializer中时,就会初始化(应该是把Meta中的元数据都添加进去)class BindingDict(MutableMapping): def __setitem__(self, key, field): self.fields[key] = field field.bind(field_name=key, parent=self.serializer) class Field: def bind(self, field_name, parent): """ Initializes the field name and parent for the field instance. Called when a field is added to the parent serializer instance. """ pass
-
获取字段的值:
attribute = field.get_attribute(instance)field是定义的各种字段对象,比如CharField、IntegerField等,如用例中的depart = serializers.CharField(source="depart.title"),就是CharField对象调用get_attribute方法,可以在其父类Field中找到:class Field: def get_attribute(self, instance): # 根据source参数获取字段 # 调用外面的get_attribute函数 return get_attribute(instance, self.source_attrs) def bind(self, field_name, parent): # ... if self.source is None: self.source = field_name if self.source == '*': self.source_attrs = [] else: # 如:source="depart.title",则source_attrs=['depart','title'] self.source_attrs = self.source.split('.') def get_attribute(instance, attrs): """ Similar to Python's built in `getattr(instance, attr)`, but takes a list of nested attributes, instead of a single attribute. Also accepts either attribute lookup on objects or dictionary lookups. """ for attr in attrs: try: if isinstance(instance, Mapping): instance = instance[attr] else: instance = getattr(instance, attr) except ObjectDoesNotExist: return None # 如:get_level_display会走这 if is_simple_callable(instance): instance = instance() return instance
-
对获取的值再次处理:
ret[field.field_name] = field.to_representation(attribute)其中
field有两种情况:- drf中提供的字段类:
class CharField(Field): def to_representation(self, value): return str(value)-
SerializerMethodField
如用例中的
roles = serializers.SerializerMethodField()
class SerializerMethodField(Field): def __init__(self, method_name=None, **kwargs): self.method_name = method_name kwargs['source'] = '*' kwargs['read_only'] = True super().__init__(**kwargs) def bind(self, field_name, parent): # The method name defaults to `get_{field_name}`. # 如:定义的get_roles方法 if self.method_name is None: self.method_name = 'get_{field_name}'.format(field_name=field_name) super().bind(field_name, parent) # 如:执行自定义的get_roles(obj),在内完成对数据的定制加工 def to_representation(self, value): method = getattr(self.parent, self.method_name) return method(value)
序列化小结
至此,整个序列化的执行流程的就结束了。从源码分析来看,简单的概括就是,把数据库中获取到的数据序列化为python支持的数据类型(字典或者列表),在这基础上还可以自定义的处理加工成个性化的内容。那源码的执行流程像是一层层的委派任务,比如原始的queryset数据,分为一个个的模型类数据去处理,每条模型类数据又可以分为一个个的字段对象分别执行序列化,每个模块完成一部分工作,最后逐级汇总得到全部序列化后的数据。
浙公网安备 33010602011771号