APISIX源码解析(2.7版本)

1,304 阅读5分钟

启动

checkRuning
local pid_path = env.apisix_home .. "/logs/nginx.pid"
    local pid = util.read_file(pid_path)
    pid = tonumber(pid)
    if pid then
        local lsof_cmd = "lsof -p " .. pid
        local res, err = util.execute_cmd(lsof_cmd)
        if not (res and res == "") then
            if not res then
                print(err)
            else
                print("APISIX is running...")
            end

            return
        end

        print("nginx.pid exists but there's no corresponding process with pid ", pid,
              ", the file will be overwritten")
    end

首先检查是否Nginx已启动:读取Nginx.pid拿到进程号,在进程号存在的情况下,通过执行lsof指令进行判断,如果存在对应进程则返回,否则继续执行启动流程并重写Nginx.pid

    init_etcd(env, args)
    util.execute_cmd(env.openresty_args)

接着进行了Nginx配置文件生成,etcd初始化,及执行OpenResty启动命令 这三个步骤,我们依次来看:

初始化Apisix
local function init(env)
    if env.is_root_path then
        print('Warning! Running apisix under /root is only suitable for '
              .. 'development environments and it is dangerous to do so. '
              .. 'It is recommended to run APISIX in a directory '
              .. 'other than /root.')
    end

    -- read_yaml_conf
    local yaml_conf, err = file.read_yaml_conf(env.apisix_home)
    if not yaml_conf then
        util.die("failed to read local yaml config of apisix: ", err, "\n")
    end
         ...
end     

首先判断Apisix的安装目录是否是根路径,接着调用read_yaml_conf(env.apisix_home),传入安装路径进行配置文件的地址拼接,读取配置文件并交给tinyyaml解析(解析为table类型)

local use_openresty_1_17 = false
    if not version_greater_equal(or_ver, "1.19.3") then
        use_openresty_1_17 = true
    end

    local or_info = util.execute_cmd("openresty -V 2>&1")
    local with_module_status = true
    if or_info and not or_info:find("http_stub_status_module", 1, true) then
        stderr:write("'http_stub_status_module' module is missing in ",
                     "your openresty, please check it out. Without this ",
                     "module, there will be fewer monitoring indicators.\n")
        with_module_status = false
    end

    local use_apisix_openresty = true
    if or_info and not or_info:find("apisix-nginx-module", 1, true) then
        use_apisix_openresty = false
    end

    local enabled_plugins = {}
    for i, name in ipairs(yaml_conf.plugins) do
        enabled_plugins[name] = true
    end
 ....
 
   local sys_conf = {
        use_openresty_1_17 = use_openresty_1_17,
        lua_path = env.pkg_path_org,
        lua_cpath = env.pkg_cpath_org,
        os_name = util.trim(util.execute_cmd("uname")),
        apisix_lua_home = env.apisix_home,
        with_module_status = with_module_status,
        use_apisix_openresty = use_apisix_openresty,
        error_log = {level = "warn"},
        enabled_plugins = enabled_plugins,
        dubbo_upstream_multiplex_count = dubbo_upstream_multiplex_count,
        tcp_enable_ssl = tcp_enable_ssl,
    }
    
  ....  

接着就是准备系统上下文环境,即一系列的参数校验及转换,获取系统运行时环境相关的信息,并对sys_conf进行赋值,校验。

local conf_render = template.compile(ngx_tpl)
    local ngxconf = conf_render(sys_conf)

    local ok, err = util.write_file(env.apisix_home .. "/conf/nginx.conf",

最后则是根据ngx_tpl(Nginx模板)和sys_conf,使用lua-resty-template进行模板渲染,即将sys_conf的参数值传入ngxconf, 最终生成nginx.conf文件。

初始化etcd
function _M.init(env, args)
   ...
   
    local host_count = #(yaml_conf.etcd.host)
    local scheme
    for i = 1, host_count do
        local host = yaml_conf.etcd.host[i]
        local fields = util.split(host, "://")
        if not fields then
            util.die("malformed etcd endpoint: ", host, "\n")
        end

        if not scheme then
            scheme = fields[1]
        elseif scheme ~= fields[1] then
            print([[WARNING: mixed protocols among etcd endpoints]])
        end
    end

    -- check the etcd cluster version
    for index, host in ipairs(yaml_conf.etcd.host) do
        local version_url = host .. "/version"
        local errmsg

        local res, err
        local retry_time = 0
        while retry_time < 2 do
            res, err = request(version_url, yaml_conf)
            -- In case of failure, request returns nil followed by an error message.
            -- Else the first return value is the response body
            -- and followed by the response status code.
            if res then
                break
            end
            retry_time = retry_time + 1
            print(str_format("Warning! Request etcd endpoint \'%s\' error, %s, retry time=%s",
                             version_url, err, retry_time))
        end

        if not res then
            errmsg = str_format("request etcd endpoint \'%s\' error, %s\n", version_url, err)
            util.die(errmsg)
        end

        local body, _, err = dkjson.decode(res)
        if err or (body and not body["etcdcluster"]) then
            errmsg = str_format("got malformed version message: \"%s\" from etcd \"%s\"\n", res,
                                version_url)
            util.die(errmsg)
        end

        local cluster_version = body["etcdcluster"]
        if compare_semantic_version(cluster_version, env.min_etcd_version) then
            util.die("etcd cluster version ", cluster_version,
                     " is less than the required version ",
                     env.min_etcd_version,
                     ", please upgrade your etcd cluster\n")
        end
    end

etcd的启动,首先是读取配置文件并进行校验,解析出etcd集群的地址集,在2次重试中获取etcd集群的版本并通过compare_semantic_version进行版本校验。

for index, host in ipairs(yaml_conf.etcd.host) do
        local is_success = true

        local errmsg
        local auth_token
        local user = yaml_conf.etcd.user
        local password = yaml_conf.etcd.password
        if user and password then
            local auth_url = host .. "/v3/auth/authenticate"
            local json_auth = {
                name =  etcd_conf.user,
                password = etcd_conf.password
            }

            local post_json_auth = dkjson.encode(json_auth)
            local response_body = {}

            local res, err
            local retry_time = 0
            while retry_time < 2 do
                res, err = request({
                    url = auth_url,
                    method = "POST",
                    source = ltn12.source.string(post_json_auth),
                    sink = ltn12.sink.table(response_body),
                    headers = {
                        ["Content-Length"] = #post_json_auth
                    }
                }, yaml_conf)
                -- In case of failure, request returns nil followed by an error message.
                -- Else the first return value is just the number 1
                -- and followed by the response status code.
                if res then
                    break
                end
                retry_time = retry_time + 1
                print(str_format("Warning! Request etcd endpoint \'%s\' error, %s, retry time=%s",
                                 auth_url, err, retry_time))
            end

            if not res then
                errmsg = str_format("request etcd endpoint \"%s\" error, %s\n", auth_url, err)
                util.die(errmsg)
            end

            local res_auth = table_concat(response_body)
            local body_auth, _, err_auth = dkjson.decode(res_auth)
            if err_auth or (body_auth and not body_auth["token"]) then
                errmsg = str_format("got malformed auth message: \"%s\" from etcd \"%s\"\n",
                                    res_auth, auth_url)
                util.die(errmsg)
            end

            auth_token = body_auth.token
        end


接着则是调用 /v3/auth/authenticate 进行etcd 令牌的获取,此后的操作需要在请求头带上Authorization进行身份校验。

local dirs = {}
        for name in pairs(constants.HTTP_ETCD_DIRECTORY) do
            dirs[name] = true
        end
        for name in pairs(constants.STREAM_ETCD_DIRECTORY) do
            dirs[name] = true
        end

        for dir_name in pairs(dirs) do
            local key =  (etcd_conf.prefix or "") .. dir_name .. "/"

            local put_url = host .. "/v3/kv/put"
            local post_json = '{"value":"' .. base64_encode("init_dir")
                              .. '", "key":"' .. base64_encode(key) .. '"}'
            local response_body = {}
            local headers = {["Content-Length"] = #post_json}
            if auth_token then
                headers["Authorization"] = auth_token
            end

            local res, err
            local retry_time = 0
            while retry_time < 2 do
                res, err = request({
                    url = put_url,
                    method = "POST",
                    source = ltn12.source.string(post_json),
                    sink = ltn12.sink.table(response_body),
                    headers = headers
                }, yaml_conf)
                retry_time = retry_time + 1
                if res then
                    break
                end
                print(str_format("Warning! Request etcd endpoint \'%s\' error, %s, retry time=%s",
                                 put_url, err, retry_time))
            end

            if not res then
                errmsg = str_format("request etcd endpoint \"%s\" error, %s\n", put_url, err)
                util.die(errmsg)
            end

            local res_put = table_concat(response_body)
            if res_put:find("404 page not found", 1, true) then
                errmsg = str_format("gRPC gateway is not enabled in etcd cluster \"%s\",",
                                    "which is required by Apache APISIX\n")
                util.die(errmsg)
            end

            if res_put:find("error", 1, true) then
                is_success = false
                if (index == host_count) then
                    errmsg = str_format("got malformed key-put message: \"%s\" from etcd \"%s\"\n",
                                        res_put, put_url)
                    util.die(errmsg)
                end

                break
            end

            if args and args["verbose"] then
                print(res_put)
            end
        end

        if is_success then
            etcd_ok = true
            break
        end
    end

    if not etcd_ok then
        util.die("none of the configured etcd works well")
    end

最后则是在etcd中创建目录: local post_json = '{"value":"' .. base64_encode("init_dir") .. '", "key":"' .. base64_encode(key) .. '"}' 创建成功后,在etcd中生成的目录结构为:

@11d2d7374c12:/$ etcdctl get --prefix ""
/apisix/consumers/
init_dir
/apisix/data_plane/server_info/039a6657-8d21-4b3b-b364-32c1ad92a6a6
{"id":"039a6657-8d21-4b3b-b364-32c1ad92a6a6","etcd_version":"3.4.0","version":"2.5","hostname":"4364cdfb7e75","up_time":13941,"boot_time":1625041981,"last_report_time":1625055922}
/apisix/global_rules/
init_dir
/apisix/plugin_configs/
init_dir
/apisix/plugin_metadata/
init_dir
/apisix/plugins
[{"name":"api-breaker"},{"name":"authz-keycloak"},{"name":"basic-auth"},{"name":"batch-requests"},{"name":"consumer-restriction"},{"name":"cors"},{"name":"echo"},{"name":"fault-injection"},{"name":"grpc-transcode"},{"name":"hmac-auth"},{"name":"http-logger"},{"name":"ip-restriction"},{"name":"jwt-auth"},{"name":"kafka-logger"},{"name":"key-auth"},{"name":"limit-conn"},{"name":"limit-count"},{"name":"limit-req"},{"name":"openid-connect"},{"name":"prometheus"},{"name":"proxy-cache"},{"name":"proxy-mirror"},{"name":"proxy-rewrite"},{"name":"redirect"},{"name":"referer-restriction"},{"name":"request-id"},{"name":"request-validation"},{"name":"response-rewrite"},{"name":"serverless-post-function"},{"name":"serverless-pre-function"},{"name":"sls-logger"},{"name":"syslog"},{"name":"tcp-logger"},{"name":"udp-logger"},{"name":"uri-blocker"},{"name":"wolf-rbac"},{"name":"zipkin"},{"name":"server-info"},{"name":"traffic-split"},{"name":"mqtt-proxy","stream":true}]
/apisix/plugins/
init_dir
/apisix/proto/
init_dir
/apisix/routes/
init_dir
/apisix/services/
init_dir
/apisix/ssl/
init_dir
/apisix/stream_routes/
init_dir
/apisix/upstreams/
init_dir
启动Openresty

Apisix是基于OpenResty构建起来的应用,最后步骤就是启动openresty了 util.execute_cmd(env.openresty_args) 执行的cmd命令如下: local openresty_args = [[openresty -p ]] .. apisix_home .. [[ -c ]] .. apisix_home .. [[/conf/nginx.conf]]

主进程初始化

通过查看前面生成的Nginx配置文件:

init_by_lua_block {
    require "resty.core"
    apisix = require("apisix")

    local dns_resolver = { "127.0.0.11", }
    local args = {
        dns_resolver = dns_resolver,
    }
    apisix.http_init(args)
}

可以看到主进程的初始化时调用的apisix.http_init

function _M.http_init(args)
    require("resty.core")

    if require("ffi").os == "Linux" then
        require("ngx.re").opt("jit_stack_size", 200 * 1024)
    end

    require("jit.opt").start("minstitch=2", "maxtrace=4000",
                             "maxrecord=8000", "sizemcode=64",
                             "maxmcode=4000", "maxirconst=1000")

    core.resolver.init_resolver(args)
    core.id.init()

    local process = require("ngx.process")
    local ok, err = process.enable_privileged_agent()
    if not ok then
        core.log.error("failed to enable privileged_agent: ", err)
    end

    if core.config.init then
        local ok, err = core.config.init()
        if not ok then
            core.log.error("failed to load the configuration: ", err)
        end
    end
end

该方法依次进行了:jit参数的设置及启动,apisix uid的生成,agent进程的启动,及core.config.init()

其中我们主要分析core.config.init()

我们首先看core.lua中对于config的定义 :

local config_center = local_conf.apisix and local_conf.apisix.config_center
                      or "etcd"
log.info("use config_center: ", config_center)
local config = require("apisix.core.config_" .. config_center)
config.type = config_center

默认使用本地配置中配置的config_center,如果没配置,则使用etcd,即config = config_etcd.lua

接着看其init操作:

    local local_conf, err = config_local.local_conf()
    if not local_conf then
        return nil, err
    end

    if table.try_read_attr(local_conf, "apisix", "disable_sync_configuration_during_start") then
        return true
    end

    local etcd_cli, err = get_etcd()
    if not etcd_cli then
        return nil, "failed to start a etcd instance: " .. err
    end

    local etcd_conf = local_conf.etcd
    local prefix = etcd_conf.prefix
    local res, err = readdir(etcd_cli, prefix, create_formatter(prefix))
    if not res then
        return nil, err
    end

    return true
end

我们主要看这一步:*local res, err = readdir(etcd_cli, prefix, create_formatter(prefix))*

create_formatter(prefix) 即根据本地etcd配置的前缀(默认为"/apisix" )生成formatter

readdir则是从etcd中读取目录内容并通过formatter进行格式化。

local function readdir(etcd_cli, key, formatter)
    if not etcd_cli then
        return nil, "not inited"
    end

    local res, err = etcd_cli:readdir(key)
    if not res then
        -- log.error("failed to get key from etcd: ", err)
        return nil, err
    end

    if type(res.body) ~= "table" then
        return nil, "failed to read etcd dir"
    end

    res, err = etcd_apisix.get_format(res, key .. '/', true, formatter)
    if not res then
        return nil, err
    end

    return res
end

首先调用etcd客户端的readdir函数,从etcd拉取指定key(此时key为 /apisix, 即根目录)的全部内容,再通过 etcd_apisix.get_format(res, key .. '/', true, formatter) ,根据传进来的formatter进行数据的进一步操作:

function _M.get_format(res, real_key, is_dir, formatter)
...

    if formatter then
        return formatter(res)
    end

...
end

formatter主要的操作就是 :遍历从etcd中读取到的指定目录下的数据放入config_etcd中定义的loaded_configuration中供后续使用。

工作进程初始化

工作进程的初始化操作由apisix.http_init_worker()负责调用各个组件的init_work()函数,我们着重看以下截取出来的步骤:

function _M.http_init_worker()
...
    local discovery = require("apisix.discovery.init").discovery
    if discovery and discovery.init_worker then
        discovery.init_worker()
    end
...    
    plugin.init_worker()
    router.http_init_worker()
    require("apisix.http.service").init_worker()
    plugin_config.init_worker()
    require("apisix.consumer").init_worker()

    if core.config == require("apisix.core.config_yaml") then
        core.config.init_worker()
    end
...
end
服务发现初始化

首先是服务发现的初始化(如果有配置的话):

if discovery_type then
    for discovery_name, _ in pairs(discovery_type) do
        log.info("use discovery: ", discovery_name)
        discovery[discovery_name] = require("apisix.discovery." .. discovery_name)
    end
end

function discovery.init_worker()
    if discovery_type then
        for discovery_name, _ in pairs(discovery_type) do
            discovery[discovery_name].init_worker()
        end
    end
end

discovery.init.lua中,会遍历本地配置的全部服务发现组件依次调用各自的init_worker() 做初始化操作。以eureka为例:

function _M.init_worker()
    if not local_conf.discovery.eureka or
        not local_conf.discovery.eureka.host or #local_conf.discovery.eureka.host == 0 then
        error("do not set eureka.host")
        return
    end

    local ok, err = core.schema.check(schema, local_conf.discovery.eureka)
    if not ok then
        error("invalid eureka configuration: " .. err)
        return
    end
    default_weight = local_conf.discovery.eureka.weight or 100
    log.info("default_weight:", default_weight, ".")
    local fetch_interval = local_conf.discovery.eureka.fetch_interval or 30
    log.info("fetch_interval:", fetch_interval, ".")
    ngx_timer_at(0, fetch_full_registry)
    ngx_timer_every(fetch_interval, fetch_full_registry)

首先根据eureka.lua中定义的schema(配置格式)做配置检查, ngx_timer_at(0, fetch_full_registry)会立即从eureka拉取可用的服务节点,接着启动一个定时器,ngx_timer_every(fetch_interval, fetch_full_registry),根据本地配置的fetch_interval(默认30秒)定时拉取节点数据。

插件初始化

接着是插件的初始化:

  local_conf = core.config.local_conf(true)
        http_plugin_names = local_conf.plugins
        stream_plugin_names = local_conf.stream_plugins
local function load(plugin_names)
    local processed = {}
    for _, name in ipairs(plugin_names) do
        if processed[name] == nil then
            processed[name] = true
        end
    end

    core.log.warn("new plugins: ", core.json.delay_encode(processed))

    for name in pairs(local_plugins_hash) do
        unload_plugin(name)
    end

    core.table.clear(local_plugins)
    core.table.clear(local_plugins_hash)

    for name in pairs(processed) do
        load_plugin(name, local_plugins)
    end

   ...
end

插件的初始化中,首先获取本地的插件配置,遍历移除旧的插件,再依次进行加载:

local function load_plugin(name, plugins_list, is_stream_plugin)
    local pkg_name = "apisix.plugins." .. name
    if is_stream_plugin then
        pkg_name = "apisix.stream.plugins." .. name
    end

    local ok, plugin = pcall(require, pkg_name)
...
    plugin.name = name
    plugin.attr = plugin_attr(name)
    core.table.insert(plugins_list, plugin)

    if plugin.init then
        plugin.init()
    end

    return
end

"apisix.plugins."name拼接出相对路径并进行引用,接着调用plugin_attr(name) 进行本地配置的插件属性的注入并放入plugin.lua定义的local_plugins中,最终调用插件各自的初始化函数。

 -- sort by plugin's priority
    if #local_plugins > 1 then
        sort_tab(local_plugins, sort_plugin)
    end

    for i, plugin in ipairs(local_plugins) do
        local_plugins_hash[plugin.name] = plugin
        if local_conf and local_conf.apisix
           and local_conf.apisix.enable_debug then
            core.log.warn("loaded plugin and sort by priority:",
                          " ", plugin.priority,
                          " name: ", plugin.name)
        end
    end

    _M.load_times = _M.load_times + 1
    core.log.info("load plugin times: ", _M.load_times)
    return true

在所有本地插件加载完毕之后,会对插件列表进行优先级排序,并放入 以插件名为key,插件为value的哈希表local_plugins_hash中。

路由初始化
function _M.http_init_worker()
    local conf = core.config.local_conf()
    local router_http_name = "radixtree_uri"
    local router_ssl_name = "radixtree_sni"

    if conf and conf.apisix and conf.apisix.router then
        router_http_name = conf.apisix.router.http or router_http_name
        router_ssl_name = conf.apisix.router.ssl or router_ssl_name
    end

    local router_http = require("apisix.http.router." .. router_http_name)
    attach_http_router_common_methods(router_http)
    router_http.init_worker(filter)
    _M.router_http = router_http

    local router_ssl = require("apisix.ssl.router." .. router_ssl_name)
    router_ssl.init_worker()
    _M.router_ssl = router_ssl

Apisix提供 radixtree_host_uriradixtree_uriradixtree_uri_with_parameter三种路由匹配规则,默认采用radixtree_uri(压缩前缀树), attach_http_router_common_methods(router_http)会给radixtree_uri.lua 绑定两个默认方法(如果没有自定义实现的话):

init_worker() :调用apisix.core.config_etcd.lua(默认)中的new函数,拉取存在etcd中的 /route目录下的路由信息,赋值给user_routes

routes() :返回上面 init_worker() 所拉取的全部路由信息(路由数据及其版本)

在上述方法绑定完成之后,调用 init_worker() 进行路由拉取, _M.router_http = router_http将初始化完成的radixtree_uri保存在router.lua中,供后续请求进来时进行路由匹配router.router_http.match(api_ctx)

数据更新

上面提到了apisix.core.config_etcd.lua中的new函数,即从etcd数据库中获取指定目录下的数据:

function _M.new(key, opts)
...
    local automatic = opts and opts.automatic
    local item_schema = opts and opts.item_schema
    local filter_fun = opts and opts.filter
    local timeout = opts and opts.timeout
    local single_item = opts and opts.single_item
    local checker = opts and opts.checker

    local obj = setmetatable({
        etcd_cli = nil,
        key = key and prefix .. key,
        automatic = automatic,
        item_schema = item_schema,
        checker = checker,
        sync_times = 0,
        running = true,
        conf_version = 0,
        values = nil,
        need_reload = true,
        routes_hash = nil,
        prev_index = 0,
        last_err = nil,
        last_err_time = nil,
        resync_delay = resync_delay,
        timeout = timeout,
        single_item = single_item,
        filter = filter_fun,
    }, mt)

   ...

obj即最终返回的对象(lua好像没对象这么一说,姑且这么理解吧..),其预先定义了一系列字段及其初始值,并在setmetatable() 中为其设置了元表mt

local mt = {
    __index = _M,
    __tostring = function(self)
        return " etcd key: " .. self.key
    end
}
local _M = {
    version = 0.3,
    local_conf = config_local.local_conf,
    clear_local_cache = config_local.clear_cache,
}

这里补充一下(学过lua的忽略):mt中定义了__index键,其含义:当你通过键来访问table的时候,如果这个键没有值,那么lua就会寻找该table的元表(metatable)中的 _index键,如果 _index键包含一个表格,lua会在表格中查找相应的键。

继续看new函数:

if automatic then
        if not key then
            return nil, "missing `key` argument"
        end

        if loaded_configuration[key] then
            local res = loaded_configuration[key]
            loaded_configuration[key] = nil -- tried to load

            log.notice("use loaded configuration ", key)

            local dir_res, headers = res.body, res.headers
            load_full_data(obj, dir_res, headers)
        end

        ngx_timer_at(0, _automatic_fetch, obj)

    else
        local etcd_cli, err = get_etcd()
        if not etcd_cli then
            return nil, "failed to start a etcd instance: " .. err
        end
        obj.etcd_cli = etcd_cli
    end

    if key then
        created_obj[key] = obj
    end

    return obj

首先会先判断automatic是否自动更新(调用new时决定),如果是,则判断loaded_configuration中是否已加载好了该key的数据(前面主进程初始化时加载过一次 /apisix下全部key),如果已加载过,则直接load_full_data为obj赋值并设置need_reloadflase(跳过下一次readdir请求)。

ngx_timer_at(0, _automatic_fetch, obj)

ngx_timer_at会创建一个Nginx timer,在事件循环中,Nginx会找到到期的timer,并在一个独立的协程中执行对应的Lua回调函数,即_automatic_fetch——自动更新逻辑。

接着我们来看自动更新逻辑,这块也是Apisix结合ETCD实现热更新,热插件的主要逻辑:

local function _automatic_fetch(premature, self)
    if premature then
        return
    end

    local i = 0
    while not exiting() and self.running and i <= 32 do
        i = i + 1

        local ok, err = xpcall(function()
            if not self.etcd_cli then
                local etcd_cli, err = get_etcd()
                if not etcd_cli then
                    error("failed to create etcd instance for key ["
                          .. self.key .. "]: " .. (err or "unknown"))
                end
                self.etcd_cli = etcd_cli
            end

            local ok, err = sync_data(self)
            
          ...
    end

    if not exiting() and self.running then
        ngx_timer_at(0, _automatic_fetch, self)
    end
end

while not exiting() and self.running and i <= 32 do这里没头没尾来了个32...不知道代表啥

其中, local etcd_cli, err = get_etcd()获得etcd客户端并赋值给obj, local ok, err = sync_data(self)则进行数据更新操作:

local function sync_data(self)
    if not self.key then
        return nil, "missing 'key' arguments"
    end

    if self.need_reload then
        local res, err = readdir(self.etcd_cli, self.key)
       ...
//清理旧数据
        if self.values then
            for i, val in ipairs(self.values) do
                if val and val.clean_handlers then
                    for _, clean_handler in ipairs(val.clean_handlers) do
                        clean_handler(val)
                    end
                    val.clean_handlers = nil
                end
            end

            self.values = nil
            self.values_hash = nil
        end
        
//赋值新数据        
        load_full_data(self, dir_res, headers)

        return true
    end

//目录已加载过则监听变化

    local dir_res, err = waitdir(self.etcd_cli, self.key, self.prev_index + 1, self.timeout)
    log.info("waitdir key: ", self.key, " prev_index: ", self.prev_index + 1)
    log.info("res: ", json.delay_encode(dir_res, true))

...

    local res_copy = res
    -- waitdir will return [res] even for self.single_item = true
    for _, res in ipairs(res_copy) do
...

        self:upgrade_version(res.modifiedIndex)
...
        local pre_index = self.values_hash[key]
        if pre_index then
            local pre_val = self.values[pre_index]
            if pre_val and pre_val.clean_handlers then
                for _, clean_handler in ipairs(pre_val.clean_handlers) do
                    clean_handler(pre_val)
                end
                pre_val.clean_handlers = nil
            end

            if res.value then
                if not self.single_item then
                    res.value.id = key
                end

                self.values[pre_index] = res
                res.clean_handlers = {}
                log.info("update data by key: ", key)

            else
                self.sync_times = self.sync_times + 1
                self.values[pre_index] = false
                self.values_hash[key] = nil
                log.info("delete data by key: ", key)
            end

        elseif res.value then
            res.clean_handlers = {}
            insert_tab(self.values, res)
            self.values_hash[key] = #self.values
            if not self.single_item then
                res.value.id = key
            end

            log.info("insert data by key: ", key)
        end
        
...
        if self.filter then
            self.filter(res)
        end

        self.conf_version = self.conf_version + 1
    end

    return self.values
end

可以看到,对于need_reload,默认 /apisix目录在主进程初始化时加载过一次,故为flase,其余目录则需要在此时进行首次加载 local res, err = readdir(self.etcd_cli, self.key);经过首次加载过后的目录,则是调用 local dir_res, err = waitdir(self.etcd_cli, self.key, self.prev_index + 1, self.timeout),跟进去看其实现: local res_func, func_err, http_cli = etcd_cli:watchdir(key, opts) 其调用了iresty-lua-resty-etcd中的watchdir函数,即使用etcdwatch机制,监听指定key的变化。

insert_tab(self.values, res)
self.values_hash[key] = #self.values
self.conf_version = self.conf_version + 1

在获得更新后数据时,则为obj重新赋值并更新版本。

综上所述,new相当于返回了一个包含etcd客户端,目录数据(values),过滤器等等一系列数据的复合对象,其根据automatic决定是否监听etcd指定目录进行自动更新。

请求处理

接着我们分析请求进来时的处理流程:

 access_by_lua_block {
    apisix.http_access_phase()
}
ngx_ctx预处理
function _M.http_access_phase()
    local ngx_ctx = ngx.ctx
​
    if ngx_ctx.api_ctx and ngx_ctx.api_ctx.ssl_client_verified then
        local res = ngx_var.ssl_client_verify
        if res ~= "SUCCESS" then
            if res == "NONE" then
                core.log.error("client certificate was not present")
            else
                core.log.error("clent certificate verification is not passed: ", res)
            end
            return core.response.exit(400)
        end
    end
​
    -- always fetch table from the table pool, we don't need a reused api_ctx
    local api_ctx = core.tablepool.fetch("api_ctx", 0, 32)
    ngx_ctx.api_ctx = api_ctx
    core.ctx.set_vars_meta(api_ctx)
...

ngx.ctx即请求上下文,其生命周期与单个请求相同,用于请求中不同阶段的数据共享。core.ctx.set_vars_meta(api_ctx)内部则是为var设置元表。外部需要通过请求获取变量时,会调用ngx_ctx.api_ctx.var,根据上文提到的index函数的含义,在var表找不到对应key时,则进入元表中的index函数中查找,而index函数中则定义了一系列通过LuaJitFFI获得对应nginx*变量的方式,其性能会高于ngx.var.*的方式。另一方面,其还做了缓存处理:

            local cached = t._cache[key]
            if cached ~= nil then
                return cached
            end
            
            ....   
            
            if val ~= nil then
                t._cache[key] = val
            end
路由匹配
...
    if router.api.has_route_not_under_apisix() or
        core.string.has_prefix(uri, "/apisix/")
    then
        local skip = local_conf and local_conf.apisix.global_rule_skip_internal_api
        local matched = router.api.match(api_ctx, skip)
        if matched then
            return
        end
    end 
    
    router.router_http.match(api_ctx)
​
    -- run global rule
    plugin.run_global_rules(api_ctx, router.global_rules, nil)
...    

has_route_not_under_apisix()默认为true,跟进去看其引用,可看到fetch_api_router()函数对has_route_not_under_apisix进行了赋值:即遍历本地插件列表,如果该插件有提供api,例如prometheusexport_uri,如果其uri不以 /apisix/ 为前缀,则为true;

core.string.has_prefix(uri, "/apisix/")则是判断目前的请求的uri是否以 /apisix/ 为前缀

两个条件任一满足则走router.api.match的匹配逻辑,如果匹配则return;从这里我们可以看出,业务在自定义路由规则时,应避免以 /apisix为前缀,一般仅Apisix自身暴露的Api或是插件的Api 才以 /apisix为前缀。

接着我们来看真正的(业务层面的)路由匹配规则:

根据之前路由初始化的分析我们知道,此时 router.router_http默认即radixtree_uri,我们来看它的匹配逻辑:

function _M.match(api_ctx)
    local user_routes = _M.user_routes
    if not cached_version or cached_version ~= user_routes.conf_version then
        uri_router = base_router.create_radixtree_uri_router(user_routes.values,
                                                             uri_routes, false)
        cached_version = user_routes.conf_version
    end
​
    if not uri_router then
        core.log.error("failed to fetch valid `uri` router: ")
        return true
    end
​
    return base_router.match_uri(uri_router, match_opts, api_ctx)
end

首先检查缓存版本号,cached_version初始为nil,而user_routes根据上文对new函数的分析可知 其是在动态变化的,若版本不一致,则进行路由重建——根据从etcd获取的最新的路由目录创建出uri_router并更新缓存版本,最后作为入参传进base_router.match_uri,即apisix.http.routematch_uri函数:

function _M.match_uri(uri_router, match_opts, api_ctx)
    core.table.clear(match_opts)
    match_opts.method = api_ctx.var.request_method
    match_opts.host = api_ctx.var.host
    match_opts.remote_addr = api_ctx.var.remote_addr
    match_opts.vars = api_ctx.var
    match_opts.matched = core.tablepool.fetch("matched_route_record", 0, 4)
​
    local ok = uri_router:dispatch(api_ctx.var.uri, match_opts, api_ctx, match_opts)
    return ok
end

最终由dispatch来执行路由匹配。

plugin.run_global_rules(api_ctx, router.global_rules, nil)则是全局路由的匹配逻辑:

        local plugins = core.tablepool.fetch("plugins", 32, 0)
        local values = global_rules.values
        for _, global_rule in config_util.iterate_values(values) do
            api_ctx.conf_type = "global_rule"
            api_ctx.conf_version = global_rule.modifiedIndex
            api_ctx.conf_id = global_rule.value.id
​
            core.table.clear(plugins)
            plugins = _M.filter(global_rule, plugins)
            if phase_name == nil then
                _M.run_plugin("rewrite", plugins, api_ctx)
                _M.run_plugin("access", plugins, api_ctx)
            else
                _M.run_plugin(phase_name, plugins, api_ctx)
            end
        end

此处遍历全局路由的各个规则,在 _M.filter(global_rule, plugins)这一步会返回该规则所配置的插件,由

_M.run_plugin进行插件的执行:根据传入的phase来调用插件相应phase阶段的函数

插件过滤

接着我们看插件的过滤:

local plugins = plugin.filter(route)
api_ctx.plugins = plugins
plugin.run_plugin("rewrite", plugins, api_ctx)
function _M.filter(user_route, plugins)
    local user_plugin_conf = user_route.value.plugins
    if user_plugin_conf == nil or
       core.table.nkeys(user_plugin_conf) == 0 then
        trace_plugins_info_for_debug(nil)
        -- when 'plugins' is given, always return 'plugins' itself instead
        -- of another one
        return plugins or core.empty_tab
    end
​
    plugins = plugins or core.tablepool.fetch("plugins", 32, 0)
    for _, plugin_obj in ipairs(local_plugins) do
        local name = plugin_obj.name
        local plugin_conf = user_plugin_conf[name]
​
        if type(plugin_conf) == "table" and not plugin_conf.disable then
            core.table.insert(plugins, plugin_obj)
            core.table.insert(plugins, plugin_conf)
        end
    end
​
    trace_plugins_info_for_debug(plugins)
​
    return plugins
end

根据上文插件初始化的分析,local_plugins即本地定义的插件列表,其在工作进程初始化时完成初始化及优先级排序,而user_plugin_conf 则是etcd上路由启用的最新的插件列表(经过plugin_config.mergemerge_service_route合并处理过后的插件列表),两者做交集得出可运行的插件并调用plugin.run_plugin 运行。

负载均衡

接着我们来看负载均衡这块:

    if up_id then
        local upstream = get_upstream_by_id(up_id)   
        api_ctx.matched_upstream = upstream    
   
...   
   local code, err = set_upstream(route, api_ctx)
    if code then
        core.log.error("failed to set upstream: ", err)
        core.response.exit(code)
    end
​
    local server, err = load_balancer.pick_server(route, api_ctx)
    if not server then
        core.log.error("failed to pick server: ", err)
        return core.response.exit(502)
    end
​
    api_ctx.picked_server = server
​
    set_upstream_headers(api_ctx, server)

上游服务节点的获取方式有两种形式,一种为配置在路由中,写死每个上游节点的ip,端口等信息;

另一种则是从服务注册中心拉取可用的节点。官方更建议采用后者进行上游节点的获取:

当业务量发生变化时,需要对上游服务进行扩缩容,或者因服务器硬件故障需要更换服务器。如果网关是通过配置来维护上游服务信息,在微服务架构模式下,其带来的维护成本可想而知。再者因不能及时更新这些信息,也会对业务带来一定的影响,还有人为误操作带来的影响也不可忽视,所以网关非常必要通过服务注册中心动态获取最新的服务实例信息。

上文的服务发现初始化部分我们已经讲到,类似eureka或者nacos等官方实现,其内部是采用一个定时器定时从注册中心拉取可用节点的ngx_timer_every(fetch_interval, fetch_full_registry),也就是官方所说的“准实时”

在选完节点后,则需要从中挑选出一个节点处理请求:load_balancer.pick_server(route, api_ctx)此时内部会过滤掉不健康的节点(如果配置了健康检查的话)。