DRF 源码解析 解析器 (六)

111 阅读6分钟

概述:

解析器解析请求者发送过来的数据,最常用的格式比如JSON

数据可能是附在url、请求头或请求体中。url的数据解析后储存在request.query_params中,解析器负责解析请求体的数据

形如name=xxx&age=19格式的请求体,其请求头中的数据为content-type: "urlencode...";而形如{"name": "xxx", "age":19}的JSON数据,对应的请求头为content-type: "application/json"

所以解析器首先读取请求头,然后根据请求头获取解析器类,最后由这些解析器类。

常见解析器和执行流程

class APIView(View):
    parser_classes = api_settings.DEFAULT_PARSER_CLASSES
    content_negotiation_class = api_settings.DEFAULT_CONTENT_NEGOTIATION_CLASS

    def dispatch(self, request, *args, **kwargs):
        self.args = args
        self.kwargs = kwargs
        # 封装request对象
        request = self.initialize_request(request, *args, **kwargs)
        self.request = request
        self.headers = self.default_response_headers  # deprecate?

        try:
            self.initial(request, *args, **kwargs)

            # 反射执行视图中的get/post等方法
            if request.method.lower() in self.http_method_names:
                handler = getattr(self, request.method.lower(),
                                  self.http_method_not_allowed)
            else:
                handler = self.http_method_not_allowed

            response = handler(request, *args, **kwargs)

        except Exception as exc:
            response = self.handle_exception(exc)

        self.response = self.finalize_response(request, response, *args, **kwargs)
        return self.response
        
        
        
    def initialize_request(self, request, *args, **kwargs):
        # get_parser_context方法返回的是{视图对象,URL路由参数}
        # 这些内容封装到parser_context字典中
        parser_context = self.get_parser_context(request)

        return Request(
            request,                                   # Django的request对象
            parsers=self.get_parsers(),                # 同理,返回一个解析器类实例的列表
            authenticators=self.get_authenticators(),  # 认证组件,传递的是一个认证类实例的列表
            negotiator=self.get_content_negotiator(),  # 实例化的content_negotiation_class
            parser_context=parser_context              # {视图对象,URL路由参数, drf的request对象,encoding}
        )
        
    def get_parser_context(self, http_request):
        """
        Returns a dict that is passed through to Parser.parse(),
        as the `parser_context` keyword argument.
        """
        # Note: Additionally `request` and `encoding` will also be added
        #       to the context by the Request object.
        return {
            'view': self,  # 当前视图对象
            # self.args 和 self.kwargs在dispatch 方法中被设置
            # 通过反射拿到这些参数
            'args': getattr(self, 'args', ()),  
            'kwargs': getattr(self, 'kwargs', {})
        }

    # 将解析器实例化,并加入一个列表中
    def get_parsers(self):
        return [parser() for parser in self.parser_classes]
        
    # 将content_negotiation_class实例化并返回
    def get_content_negotiator(self):
        if not getattr(self, '_negotiator', None):
            self._negotiator = self.content_negotiation_class()
        return self._negotiator
        
    def initial(self, request, *args, **kwargs):
        ...
        # 跟渲染器相关,这里不展开了
        neg = self.perform_content_negotiation(request)
        request.accepted_renderer, request.accepted_media_type = neg

        ...


class Request:

    # 把传入的参数封装进内部的属性中
    def __init__(self, request, parsers=None, authenticators=None,
                 negotiator=None, parser_context=None):
        assert isinstance(request, HttpRequest), (
            'The `request` argument must be an instance of '
            '`django.http.HttpRequest`, not `{}.{}`.'
            .format(request.__class__.__module__, request.__class__.__name__)
        )

        self._request = request
        self.parsers = parsers or ()
        self.negotiator = negotiator or self._default_negotiator()
        self.parser_context = parser_context

        if self.parser_context is None:
            self.parser_context = {}
        # 把自己封装进parser_context属性中
        self.parser_context['request'] = self
        self.parser_context['encoding'] = request.encoding or settings.DEFAULT_CHARSET 

目前的流程中,只是在封装request对象的时候,将解析器相关的组件封装进request对象内,并没有执行解析。当调用request.data时,才会触发解析动作。

class Empty:
    pass

def _hasattr(obj, name):
    return not getattr(obj, name) is Empty


class Request:
    @property
    def data(self):
        # 通过反射,尝试获取self._full_data
        if not _hasattr(self, '_full_data'):
            self._load_data_and_files()
        # 之前解析过一次请求数据,就直接返回
        return self._full_data  
        
    # 将解析的数据存到request.data中
    def _load_data_and_files(self):
        # 如果没有self._data,执行_parse方法
        if not _hasattr(self, '_data'):
            self._data, self._files = self._parse()
            # self._full_data既包括data也包括files
            if self._files:
                self._full_data = self._data.copy()
                self._full_data.update(self._files)
            else:
                self._full_data = self._data

            if is_form_media_type(self.content_type):
                self._request._post = self.POST
                self._request._files = self.FILES

    # 解析请求内容,返回一个(data, file)的元组
    def _parse(self):
        # 读取请求中的content_type,
        media_type = self.content_type
        try:
            # 请求发过来的原始的二进制数据
            stream = self.stream
        except RawPostDataException:
            ...

        # 如果数据为空,则返回空数据
        if stream is None or media_type is None:
            ...
        
        # 获取解析器,调用 content_negotiation_class 的 select_parser方法
        # 传入的参数是request对象本身,解析器实例的列表
        parser = self.negotiator.select_parser(self, self.parsers)

        # 没有匹配的content-type请求头的解析器,抛出一个异常
        if not parser:
            raise exceptions.UnsupportedMediaType(media_type)

        # 使用解析器的parse方法,解析请求数据
        try:
            parsed = parser.parse(stream, media_type, self.parser_context)
        except Exception:
            ...

        # 返回解析的数据和文件
        try:
            return (parsed.data, parsed.files)
        except AttributeError:
            ....
            
    # 读取请求者的Content_Type请求头
    @property
    def content_type(self):
        # self._request就是django的request对象
        # django的request对象的META属性包含请求头的各种信息
        meta = self._request.META
        return meta.get('CONTENT_TYPE', meta.get('HTTP_CONTENT_TYPE', ''))
        
        
        
class DefaultContentNegotiation(BaseContentNegotiation):
    settings = api_settings
    
    def select_parser(self, request, parsers):
        for parser in parsers:
            # request的content_type属性就是Content_Type请求头
            # 跟解析器的media_type属性匹配上,就选择该解析器
            if media_type_matches(parser.media_type, request.content_type):
                return parser
        return None

    def select_renderer(self, request, renderers, format_suffix=None):
        ...
    ...
    
class JSONParser(BaseParser):
    media_type = 'application/json'
    ...
    
    # 解码并用json.load获取数据
    def parse(self, stream, media_type=None, parser_context=None):
        #parser_context 包括了 {视图对象,URL路由参数, drf的request对象,encoding}
        parser_context = parser_context or {}
        encoding = parser_context.get('encoding', settings.DEFAULT_CHARSET)

        try:
            decoded_stream = codecs.getreader(encoding)(stream)
            parse_constant = json.strict_constant if self.strict else None
            # json.load读取的是文件对象(内存对象)
            return json.load(decoded_stream, parse_constant=parse_constant)
        except ValueError as exc:
            raise ParseError('JSON parse error - %s' % str(exc))

当调用Request类的data方法时,首先会去找Request._full_data属性。如果之前解析过,该属性就会存在,也无需再次解析。否则,会调用_load_data_and_files方法,其中会调用self._parse()方法。该方法会将请求头的Content_Typeself.parsers这个解析器列表中的每一个解析器的media_type进行匹配,选出一个符合请求头Content_Type内容的解析器。然后用调用解析器的parse对数据进行解析,在Request._load_data_and_files方法中将上传的数据和文件分别赋值给self._data, self._files

关于文件的解析

对于文件上传,请求头的Content-Type应为*/*, COntent-Disposition应为attachment;filename=<filename>

调用request.data时,同样会调用文件上传解析器FileUploadParserparse方法,

class FileUploadParser(BaseParser):
    media_type = '*/*'
    errors = {
        'unhandled': 'FileUpload parse error - none of upload handlers can handle the stream',
        'no_filename': 'Missing filename. Request should include a Content-Disposition header with a filename parameter.',
    }

    def parse(self, stream, media_type=None, parser_context=None):
        parser_context = parser_context or {}
        request = parser_context['request']
        encoding = parser_context.get('encoding', settings.DEFAULT_CHARSET)
        meta = request.META
        upload_handlers = request.upload_handlers
        filename = self.get_filename(stream, media_type, parser_context)

        if not filename:
            raise ParseError(self.errors['no_filename'])

        # Note that this code is extracted from Django's handling of
        # file uploads in MultiPartParser.
        content_type = meta.get('HTTP_CONTENT_TYPE',
                                meta.get('CONTENT_TYPE', ''))
        try:
            content_length = int(meta.get('HTTP_CONTENT_LENGTH',
                                          meta.get('CONTENT_LENGTH', 0)))
        except (ValueError, TypeError):
            content_length = None

        # See if the handler will want to take care of the parsing.
        for handler in upload_handlers:
            result = handler.handle_raw_input(stream,
                                              meta,
                                              content_length,
                                              None,
                                              encoding)
            if result is not None:
                return DataAndFiles({}, {'file': result[1]})

        # This is the standard case.
        possible_sizes = [x.chunk_size for x in upload_handlers if x.chunk_size]
        chunk_size = min([2 ** 31 - 4] + possible_sizes)
        chunks = ChunkIter(stream, chunk_size)
        counters = [0] * len(upload_handlers)

        for index, handler in enumerate(upload_handlers):
            try:
                handler.new_file(None, filename, content_type,
                                 content_length, encoding)
            except StopFutureHandlers:
                upload_handlers = upload_handlers[:index + 1]
                break

        for chunk in chunks:
            for index, handler in enumerate(upload_handlers):
                chunk_length = len(chunk)
                chunk = handler.receive_data_chunk(chunk, counters[index])
                counters[index] += chunk_length
                if chunk is None:
                    break

        for index, handler in enumerate(upload_handlers):
            file_obj = handler.file_complete(counters[index])
            if file_obj is not None:
                return DataAndFiles({}, {'file': file_obj})

        raise ParseError(self.errors['unhandled'])

    def get_filename(self, stream, media_type, parser_context):
        # Detects the uploaded file name. First searches a 'filename' url kwarg.
        # Then tries to parse Content-Disposition header.
        with contextlib.suppress(KeyError):
            return parser_context['kwargs']['filename']

        with contextlib.suppress(AttributeError, KeyError, ValueError):
            meta = parser_context['request'].META
            disposition, params = parse_header_parameters(meta['HTTP_CONTENT_DISPOSITION'])
            if 'filename*' in params:
                return params['filename*']
            return params['filename']

在视图函数中使用FileUploadParser:

class HomeView(APIView):
    parser_classes = (FileUploadParser, )

    def post(self, request, *args, **kwargs):
        file_obj = request.data.get('file')  # 文件对象包含在request.data中
        # 文件名存储在file_obj.name中
        with open(file_obj.name, 'wb') as file:  # 以二进制写入方式打开文件
            for chunk in file_obj.chunks():
                file.write(chunk)
            file.close()
        return Response(status=200)

文件解析器的全局配置

如果不在视图类显示设置paser_classes,实际上默认的全局解析器类列表是[JSONParser, FormParser, MultiPartParser]