代码改变世界

Cowboy 源码分析(二十一)

2012-06-25 00:39  rhinovirus  阅读(1766)  评论(0编辑  收藏  举报

  大家好,今天继续跟大家分享Cowboy的源码分析,这一篇,我们看下cowboy_http_protocol:onrequest/2函数,完整代码如下:

%% Call the global onrequest callback. The callback can send a reply,
%% in which case we consider the request handled and move on to the next
%% one. Note that since we haven't dispatched yet, we don't know the
%% handler, host_info, path_info or bindings yet.
-spec onrequest(#http_req{}, #state{}) -> ok.
onrequest(Req, State=#state{onrequest=undefined}) ->
    dispatch(Req, State);
onrequest(Req, State=#state{onrequest=OnRequest}) ->
    Req2 = OnRequest(Req),
    case Req2#http_req.resp_state of
        waiting -> dispatch(Req2, State);
        _ -> next_request(Req2, State, ok)
    end.

  这里我们从Debugger可以看到,onrequest=undefined,所以这里我们从第一个分支来看,另一个分支,等以后具体用到的时候再看。我们从代码中可以看到,第一个分支只是简单的调用了cowboy_http_protocol:dispatch/2 函数,代码如下:

-spec dispatch(#http_req{}, #state{}) -> ok.
dispatch(Req=#http_req{host=Host, path=Path},
        State=#state{dispatch=Dispatch}) ->
    case cowboy_dispatcher:match(Host, Path, Dispatch) of
        {ok, Handler, Opts, Binds, HostInfo, PathInfo} ->
            handler_init(Req#http_req{host_info=HostInfo, path_info=PathInfo,
                bindings=Binds}, State#state{handler={Handler, Opts}});
        {error, notfound, host} ->
            error_terminate(400, State);
        {error, notfound, path} ->
            error_terminate(404, State)
    end.

  调用函数时,参数如下:
  < Host = [<<"localhost">>]
  < Path = []
  < Dispatch = [{'_',[{[<<"websocket">>],websocket_handler,[]},
                    {[<<"eventsource">>],eventsource_handler,[]},
                    {[<<"eventsource">>,<<"live">>],eventsource_emitter,[]},
                    {'_',default_handler,[]}]}]

  前面2个参数,之前都见过了,最后一个,不知道大家还记得吗?这个参数,是在cowboy_examples:start/2传递进来的,如下:

start(_Type, _Args) ->
    Dispatch = [
        {'_', [
            {[<<"websocket">>], websocket_handler, []},
            {[<<"eventsource">>], eventsource_handler, []},
            {[<<"eventsource">>, <<"live">>], eventsource_emitter, []},
            {'_', default_handler, []}
        ]}
    ],
    cowboy:start_listener(my_http_listener, 100,
        cowboy_tcp_transport, [{port, 8080}],
        cowboy_http_protocol, [{dispatch, Dispatch}]
    ),
    cowboy:start_listener(my_https_listener, 100,
        cowboy_ssl_transport, [
            {port, 8443}, {certfile, "priv/ssl/cert.pem"},
            {keyfile, "priv/ssl/key.pem"}, {password, "cowboy"}],
        cowboy_http_protocol, [{dispatch, Dispatch}]
    ),
    cowboy_examples_sup:start_link().

  好,弄清楚了参数,我们来看具体逻辑:

    case cowboy_dispatcher:match(Host, Path, Dispatch) of

  这里调用 cowboy_dispatcher:match/3 函数,代码如下:

%% @doc Match hostname tokens and path tokens against dispatch rules.
%%
%% It is typically used for matching tokens for the hostname and path of
%% the request against a global dispatch rule for your listener.
%%
%% Dispatch rules are a list of <em>{Hostname, PathRules}</em> tuples, with
%% <em>PathRules</em> being a list of <em>{Path, HandlerMod, HandlerOpts}</em>.
%%
%% <em>Hostname</em> and <em>Path</em> are match rules and can be either the
%% atom <em>'_'</em>, which matches everything for a single token, the atom
%% <em>'*'</em>, which matches everything for the rest of the tokens, or a
%% list of tokens. Each token can be either a binary, the atom <em>'_'</em>,
%% the atom '...' or a named atom. A binary token must match exactly,
%% <em>'_'</em> matches everything for a single token, <em>'...'</em> matches
%% everything for the rest of the tokens and a named atom will bind the
%% corresponding token value and return it.
%%
%% The list of hostname tokens is reversed before matching. For example, if
%% we were to match "www.ninenines.eu", we would first match "eu", then
%% "ninenines", then "www". This means that in the context of hostnames,
%% the <em>'...'</em> atom matches properly the lower levels of the domain
%% as would be expected.
%%
%% When a result is found, this function will return the handler module and
%% options found in the dispatch list, a key-value list of bindings and
%% the tokens that were matched by the <em>'...'</em> atom for both the
%% hostname and path.
-spec match(Host::tokens(), Path::tokens(), dispatch_rules())
    -> {ok, module(), any(), bindings(),
        HostInfo::undefined | tokens(),
        PathInfo::undefined | tokens()}
    | {error, notfound, host} | {error, notfound, path}.
match(_Host, _Path, []) ->
    {error, notfound, host};
match(_Host, Path, [{'_', PathMatchs}|_Tail]) ->
    match_path(Path, PathMatchs, [], undefined);
match(Host, Path, [{HostMatch, PathMatchs}|Tail]) ->
    case try_match(host, Host, HostMatch) of
        false ->
            match(Host, Path, Tail);
        {true, HostBinds, undefined} ->
            match_path(Path, PathMatchs, HostBinds, undefined);
        {true, HostBinds, HostInfo} ->
            match_path(Path, PathMatchs, HostBinds, lists:reverse(HostInfo))
    end.

  函数的注释很清楚,本来想尝试翻译,无奈英文不好,就不给大家献丑了,我们看具体逻辑吧,如果有能坚持看到这里的朋友,而且你的英文不错,能翻译,那我在这麻烦你帮忙翻译下,也能帮助后来看这篇文字的朋友,不甚感激,回到代码:

  我们根据上下文,能够知道代码将调用第二个分支,也就是执行:

match(_Host, Path, [{'_', PathMatchs}|_Tail]) ->
    match_path(Path, PathMatchs, [], undefined);

  这里,参数的值分别为:

  < _Host = [<<"localhost">>]
  < Path = []
  < PathMatchs = [{[<<"websocket">>],websocket_handler,[]},
                {[<<"eventsource">>],eventsource_handler,[]},
                {[<<"eventsource">>,<<"live">>],eventsource_emitter,[]},
                {'_',default_handler,[]}]
  < _Tail = []

  接着调用 cowboy_dispatcher:match_path/4 函数:

-spec match_path(tokens(), dispatch_path(), bindings(),
    HostInfo::undefined | tokens())
    -> {ok, module(), any(), bindings(),
        HostInfo::undefined | tokens(),
        PathInfo::undefined | tokens()}
    | {error, notfound, path}.
match_path(_Path, [], _HostBinds, _HostInfo) ->
    {error, notfound, path};
match_path(_Path, [{'_', Handler, Opts}|_Tail], HostBinds, HostInfo) ->
    {ok, Handler, Opts, HostBinds, HostInfo, undefined};
match_path('*', [{'*', Handler, Opts}|_Tail], HostBinds, HostInfo) ->
    {ok, Handler, Opts, HostBinds, HostInfo, undefined};
match_path(Path, [{PathMatch, Handler, Opts}|Tail], HostBinds, HostInfo) ->
    case try_match(path, Path, PathMatch) of
        false ->
            match_path(Path, Tail, HostBinds, HostInfo);
        {true, PathBinds, PathInfo} ->
            {ok, Handler, Opts, HostBinds ++ PathBinds, HostInfo, PathInfo}
    end.

  同样,这里根据参数的值,我们也能知道,首先匹配的是最后一个分支,那么我们看下最后一个分支接受到的参数的值:

  < Path = []
  < PathMatch = [<<"websocket">>]
  < Handler = websocket_handler
  < Opts = []
  < Tail = [{[<<"eventsource">>],eventsource_handler,[]},
          {[<<"eventsource">>,<<"live">>],eventsource_emitter,[]},
          {'_',default_handler,[]}]
  < HostBinds = []
  < HostInfo = undefined

  而逻辑也比较简单,首先调用cowboy_dispatcher:try_match/3 函数,代码如下:

-spec try_match(host | path, tokens(), match_rule())
    -> {true, bindings(), undefined | tokens()} | false.
try_match(host, List, Match) ->
    list_match(lists:reverse(List), lists:reverse(Match), []);
try_match(path, List, Match) ->
    list_match(List, Match, []).

  由于 case try_match(path, Path, PathMatch) of 第一个参数传递的是path,所以匹配的是下面这个分支,接受到的参数值为:

  < List = []
  < Match = [<<"websocket">>]

  逻辑也很简单,就调用了 cowboy_dispatcher:list_match/3 函数,代码如下:

-spec list_match(tokens(), match_rule(), bindings())
    -> {true, bindings(), undefined | tokens()} | false.
%% Atom '...' matches any trailing path, stop right now.
list_match(List, ['...'], Binds) ->
    {true, Binds, List};
%% Atom '_' matches anything, continue.
list_match([_E|Tail], ['_'|TailMatch], Binds) ->
    list_match(Tail, TailMatch, Binds);
%% Both values match, continue.
list_match([E|Tail], [E|TailMatch], Binds) ->
    list_match(Tail, TailMatch, Binds);
%% Bind E to the variable name V and continue.
list_match([E|Tail], [V|TailMatch], Binds) when is_atom(V) ->
    list_match(Tail, TailMatch, [{V, E}|Binds]);
%% Match complete.
list_match([], [], Binds) ->
    {true, Binds, undefined};
%% Values don't match, stop.
list_match(_List, _Match, _Binds) ->
    false.

  跟之前的分析方法一样,都是根据参数的值去匹配对应的分支,很简单,这里匹配到最后一个分支,返回 false。

  接着回到 cowboy_dispatcher:match_path/4 函数:

match_path(Path, [{PathMatch, Handler, Opts}|Tail], HostBinds, HostInfo) ->
    case try_match(path, Path, PathMatch) of
        false ->
            match_path(Path, Tail, HostBinds, HostInfo);
        {true, PathBinds, PathInfo} ->
            {ok, Handler, Opts, HostBinds ++ PathBinds, HostInfo, PathInfo}
    end.

  我们看下这里返回 false,递归调用cowboy_dispatcher:match_path/4,其实就是依次遍历处理:

  < PathMatchs = [{[<<"websocket">>],websocket_handler,[]},
                {[<<"eventsource">>],eventsource_handler,[]},
                {[<<"eventsource">>,<<"live">>],eventsource_emitter,[]},
                {'_',default_handler,[]}]

  我详细把每次调用cowboy_dispatcher:match_path/4时的参数都列出来,如下:

  -----------------------------

  < Path = []
  < PathMatch = [<<"websocket">>]
  < Handler = websocket_handler
  < Opts = []
  < Tail = [{[<<"eventsource">>],eventsource_handler,[]},
          {[<<"eventsource">>,<<"live">>],eventsource_emitter,[]},
          {'_',default_handler,[]}]
  < HostBinds = []
  < HostInfo = undefined

  -----------------------------

  < Path = []
  < PathMatch = [<<"eventsource">>]
  < Handler = eventsource_handler
  < Opts = []
  < Tail = [{[<<"eventsource">>,<<"live">>],eventsource_emitter,[]},
          {'_',default_handler,[]}]
  < HostBinds = []
  < HostInfo = undefined  

  -----------------------------

  < Path = []
  < PathMatch = [<<"eventsource">>,<<"live">>]
  < Handler = eventsource_emitter
  < Opts = []
  < Tail = [{'_',default_handler,[]}]
  < HostBinds = []
  < HostInfo = undefined

  -----------------------------

  < _Path = []
  < Handler = default_handler
  < Opts = []
  < _Tail = []
  < HostBinds = []
  < HostInfo = undefined

  -----------------------------

  不知道大家能不能明白,当然,最后一次,很明显不会匹配cowboy_dispatcher:match_path/4 函数最后一个分支,而是匹配第二个分支,代码如下:

match_path(_Path, [{'_', Handler, Opts}|_Tail], HostBinds, HostInfo) ->
    {ok, Handler, Opts, HostBinds, HostInfo, undefined};

  这里就一行代码,返回由这几个参数组合成的元组。

  接着回到cowboy_http_protocol:dispatch/2 函数实现:

    case cowboy_dispatcher:match(Host, Path, Dispatch) of
        {ok, Handler, Opts, Binds, HostInfo, PathInfo} ->
            handler_init(Req#http_req{host_info=HostInfo, path_info=PathInfo,
                bindings=Binds}, State#state{handler={Handler, Opts}});
        {error, notfound, host} ->
            error_terminate(400, State);
        {error, notfound, path} ->
            error_terminate(404, State)
    end.

  这里我们已经知道了case cowboy_dispatcher:match(Host, Path, Dispatch) of 这里正确的情况下会返回 {ok, Handler, Opts, Binds, HostInfo, PathInfo},那么函数接下来将调用cowboy_http_protocol:handler_init/2函数

  好了,今天就到这里,下一篇,我们将从cowboy_http_protocol:handler_init/2函数继续往下看。

  最后,谢谢大家支持。北京今天下了一天雨,现在还在继续,好反常,不过总算凉快了,大家好梦吧。