Closed jeremyjpj0916 closed 5 months ago
I tested my change out via this code change:
warmup.lua Note the diffs: local DEBUG = ngx.DEBUG log(DEBUG, "warmup acl groups cache for consumer id: ", entity.consumer.id , "...")
local utils = require "kong.tools.utils"
local constants = require "kong.constants"
local buffer = require "string.buffer"
local acl_groups
local load_module_if_exists = require "kong.tools.module".load_module_if_exists
if load_module_if_exists("kong.plugins.acl.groups") then
acl_groups = require "kong.plugins.acl.groups"
end
local cache_warmup = {}
local encode = buffer.encode
local tostring = tostring
local ipairs = ipairs
local math = math
local max = math.max
local floor = math.floor
local kong = kong
local type = type
local ngx = ngx
local now = ngx.now
local log = ngx.log
local NOTICE = ngx.NOTICE
local DEBUG = ngx.DEBUG
local NO_TTL_FLAG = require("kong.resty.mlcache").NO_TTL_FLAG
local GLOBAL_QUERY_OPTS = { workspace = ngx.null, show_ws_id = true }
function cache_warmup._mock_kong(mock_kong)
kong = mock_kong
end
local function warmup_dns(premature, hosts, count)
if premature then
return
end
log(NOTICE, "warming up DNS entries ...")
local start = now()
local upstreams_dao = kong.db["upstreams"]
local upstreams_names = {}
if upstreams_dao then
local page_size
if upstreams_dao.pagination then
page_size = upstreams_dao.pagination.max_page_size
end
for upstream, err in upstreams_dao:each(page_size, GLOBAL_QUERY_OPTS) do
if err then
log(NOTICE, "failed to iterate over upstreams: ", err)
break
end
upstreams_names[upstream.name] = true
end
end
for i = 1, count do
local host = hosts[i]
local must_warm_up = upstreams_names[host] == nil
-- warmup DNS entry only if host is not an upstream name
if must_warm_up then
kong.dns.toip(host)
end
end
local elapsed = floor((now() - start) * 1000)
log(NOTICE, "finished warming up DNS entries",
"' into the cache (in ", tostring(elapsed), "ms)")
end
function cache_warmup.single_entity(dao, entity)
local entity_name = dao.schema.name
local cache_store = constants.ENTITY_CACHE_STORE[entity_name]
local cache_key = dao:cache_key(entity)
local cache = kong[cache_store]
local ok, err
if cache then
ok, err = cache:safe_set(cache_key, entity)
else
cache_key = "kong_core_db_cache" .. cache_key
local ttl = max(kong.configuration.db_cache_ttl or 3600, 0)
local value = encode(entity)
ok, err = ngx.shared.kong_core_db_cache:safe_set(cache_key, value, ttl, ttl == 0 and NO_TTL_FLAG or 0)
end
if not ok then
return nil, err
end
return true
end
function cache_warmup.single_dao(dao)
local entity_name = dao.schema.name
local cache_store = constants.ENTITY_CACHE_STORE[entity_name]
log(NOTICE, "Preloading '", entity_name, "' into the ", cache_store, "...")
local start = now()
local hosts_array, hosts_set, host_count
if entity_name == "services" then
hosts_array = {}
hosts_set = {}
host_count = 0
end
local page_size
if dao.pagination then
page_size = dao.pagination.max_page_size
end
for entity, err in dao:each(page_size, GLOBAL_QUERY_OPTS) do
if err then
return nil, err
end
if entity_name == "services" then
if utils.hostname_type(entity.host) == "name"
and hosts_set[entity.host] == nil then
host_count = host_count + 1
hosts_array[host_count] = entity.host
hosts_set[entity.host] = true
end
end
local ok, err = cache_warmup.single_entity(dao, entity)
if not ok then
return nil, err
end
if entity_name == "acls" and acl_groups ~= nil then
log(DEBUG, "warmup acl groups cache for consumer id: ", entity.consumer.id , "...")
local _, err = acl_groups.warmup_groups_cache(entity.consumer.id)
if err then
log(NOTICE, "warmup acl groups cache for consumer id: ", entity.consumer.id , " err: ", err)
end
end
end
if entity_name == "services" and host_count > 0 then
ngx.timer.at(0, warmup_dns, hosts_array, host_count)
end
local elapsed = floor((now() - start) * 1000)
log(NOTICE, "finished preloading '", entity_name,
"' into the ", cache_store, " (in ", tostring(elapsed), "ms)")
return true
end
-- Loads entities from the database into the cache, for rapid subsequent
-- access. This function is intended to be used during worker initialization.
function cache_warmup.execute(entities)
if not kong.cache or not kong.core_cache then
return true
end
for _, entity_name in ipairs(entities) do
if entity_name == "routes" then
-- do not spend shm memory by caching individual Routes entries
-- because the routes are kept in-memory by building the router object
kong.log.notice("the 'routes' entity is ignored in the list of ",
"'db_cache_warmup_entities' because Kong ",
"caches routes in memory separately")
goto continue
end
if entity_name == "plugins" then
-- to speed up the init, the plugins are warmed up upon initial
-- plugin iterator build
kong.log.notice("the 'plugins' entity is ignored in the list of ",
"'db_cache_warmup_entities' because Kong ",
"pre-warms plugins automatically")
goto continue
end
local dao = kong.db[entity_name]
if not (type(dao) == "table" and dao.schema) then
kong.log.warn(entity_name, " is not a valid entity name, please check ",
"the value of 'db_cache_warmup_entities'")
goto continue
end
local ok, err = cache_warmup.single_dao(dao)
if not ok then
if err == "no memory" then
kong.log.warn("cache warmup has been stopped because cache ",
"memory is exhausted, please consider increasing ",
"the value of 'mem_cache_size' (currently at ",
kong.configuration.mem_cache_size, ")")
return true
end
return nil, err
end
::continue::
end
return true
end
return cache_warmup
And output now looks uniform with no extra acl loggings compared to other resources in notice mode. Gives just enough insight for notice mode I think, and I need to remove plugins since that already happens :P :
2024-06-13T13:27:09.278103341-05:00 2024/06/13 18:27:09 [notice] 48#0: *1 [lua] warmup.lua:114: single_dao(): Preloading 'services' into the core_cache..., context: init_worker_by_lua*
2024-06-13T13:27:09.306282332-05:00 2024/06/13 18:27:09 [notice] 48#0: *1 [lua] warmup.lua:163: single_dao(): finished preloading 'services' into the core_cache (in 29ms), context: init_worker_by_lua*
2024-06-13T13:27:09.306306978-05:00 2024/06/13 18:27:09 [notice] 48#0: *1 [kong] warmup.lua:189 the 'plugins' entity is ignored in the list of 'db_cache_warmup_entities' because Kong pre-warms plugins automatically, context: init_worker_by_lua*
2024-06-13T13:27:09.306311035-05:00 2024/06/13 18:27:09 [notice] 48#0: *1 [lua] warmup.lua:114: single_dao(): Preloading 'consumers' into the cache..., context: init_worker_by_lua*
2024-06-13T13:27:09.338330930-05:00 2024/06/13 18:27:09 [notice] 48#0: *1 [lua] warmup.lua:163: single_dao(): finished preloading 'consumers' into the cache (in 0ms), context: init_worker_by_lua*
2024-06-13T13:27:09.338361006-05:00 2024/06/13 18:27:09 [notice] 48#0: *1 [lua] warmup.lua:114: single_dao(): Preloading 'acls' into the cache..., context: init_worker_by_lua*
2024-06-13T13:27:09.560178812-05:00 2024/06/13 18:27:09 [notice] 48#0: *1 [lua] warmup.lua:163: single_dao(): finished preloading 'acls' into the cache (in 0ms), context: init_worker_by_lua*
2024-06-13T13:27:09.560194852-05:00 2024/06/13 18:27:09 [notice] 48#0: *1 [lua] warmup.lua:114: single_dao(): Preloading 'certificates' into the core_cache..., context: init_worker_by_lua*
2024-06-13T13:27:09.573904618-05:00 2024/06/13 18:27:09 [notice] 48#0: *1 [lua] warmup.lua:163: single_dao(): finished preloading 'certificates' into the core_cache (in 0ms), context: init_worker_by_lua*
2024-06-13T13:27:09.573930266-05:00 2024/06/13 18:27:09 [notice] 48#0: *1 [lua] warmup.lua:114: single_dao(): Preloading 'jwt_secrets' into the cache..., context: init_worker_by_lua*
2024-06-13T13:27:09.589073119-05:00 2024/06/13 18:27:09 [notice] 48#0: *1 [lua] warmup.lua:163: single_dao(): finished preloading 'jwt_secrets' into the cache (in 0ms), context: init_worker_by_lua*
2024-06-13T13:27:09.589116109-05:00 2024/06/13 18:27:09 [notice] 48#0: *1 [lua] warmup.lua:114: single_dao(): Preloading 'oauth2_credentials' into the cache..., context: init_worker_by_lua*
2024-06-13T13:27:09.605808576-05:00 2024/06/13 18:27:09 [notice] 48#0: *1 [lua] warmup.lua:163: single_dao(): finished preloading 'oauth2_credentials' into the cache (in 0ms), context: init_worker_by_lua*
2024-06-13T13:27:09.626576355-05:00 2024/06/13 18:27:09 [notice] 48#0: *1319 [lua] warmup.lua:47: warming up DNS entries ..., context: ngx.timer
2024-06-13T13:27:14.245183292-05:00 2024/06/13 18:27:14 [notice] 48#0: *1319 [lua] warmup.lua:81: finished warming up DNS entries' into the cache (in 4619ms), context: ngx.timer
Side note... I have noticed my Kong runtimes spending a lot of time doing DNS warmups, especially stage and prod, taking as much as 30-60 seconds just to warm up DNS for 1000s of endpoint records. This is a dev environment with like less than 40 proxies on it in this example... why would DNS warmups take so long 4.6+ seconds? Maybe lookups that hang or not found? Any way to speed up DNS cache warmups? Maybe make kong dns lookups fail faster if it was a not found or if the reply from dns server is taking too long(better tuning timeout options maybe that let it time out at 20ms or something?). I know, unrelated to original post but was on my brain re-reading my logs here haha.
Some of my own DNS settings:
- name: KONG_DNS_ORDER
value: LAST,A,SRV,CNAME
- name: KONG_DNS_HOSTSFILE
value: /etc/hosts
- name: KONG_DNS_STALE_TTL
value: "4"
- name: KONG_DNS_NOT_FOUND_TTL
value: "10"
- name: KONG_DNS_RESOLVER
value: 10.xx.xx.xxx,10.xx.xx.xxx
- name: KONG_DNS_ERROR_TTL
value: "1"
- name: KONG_DNS_NO_SYNC
value: "off"
Maybe a way to tune those or better recommended settings?
Side note... I have noticed my Kong runtimes spending a lot of time doing DNS warmups, especially stage and prod, taking as much as 30-60 seconds just to warm up DNS for 1000s of endpoint records. This is a dev environment with like less than 40 proxies on it in this example... why would DNS warmups take so long 4.6+ seconds? Maybe lookups that hang or not found? Any way to speed up DNS cache warmups? Maybe make kong dns lookups fail faster if it was a not found or if the reply from dns server is taking too long(better tuning timeout options maybe that let it time out at 20ms or something?). I know, unrelated to original post but was on my brain re-reading my logs here haha.
Some of my own DNS settings:
- name: KONG_DNS_ORDER value: LAST,A,SRV,CNAME - name: KONG_DNS_HOSTSFILE value: /etc/hosts - name: KONG_DNS_STALE_TTL value: "4" - name: KONG_DNS_NOT_FOUND_TTL value: "10" - name: KONG_DNS_RESOLVER value: 10.xx.xx.xxx,10.xx.xx.xxx - name: KONG_DNS_ERROR_TTL value: "1" - name: KONG_DNS_NO_SYNC value: "off"
Maybe a way to tune those or better recommended settings?
@chobits FYI
It's introduced from #11414. Any special considerations for this log? @oowl
Maybe make kong dns lookups fail faster if it was a not found or if the reply from dns server is taking too long(better tuning timeout options maybe that let it time out at 20ms or something?)
you could modify the timeout
and attempts
in /etc/resolv.conf to control the underlying network timeout.
the total number of times of retransmitting the DNS request when receiving a DNS response times out according to the timeout setting. When trying to retransmit the query, the next nameserver according to the round-robin algorithm will be picked up. It will retry
That means if there is only one nameserver,and attempts equals 5, it will retry the query to this one nameserver 5 times.
Defaults to 5 times.
the time in second for waiting for the response for a single attempt of request transmission. note that this is ''not'' the maximal total waiting time before giving up, the maximal total waiting time can be calculated by the expression timeout x retrans.
Defaults to 2 seconds.
@chobits let me give some of those suggested tweaks a try and see how it goes.
I have: resolv.conf
ubuntu@kong-7cf8b4b7d5-6wqdw:/etc$ cat resolv.conf
search gateway-dev-networkzone-dc.svc.cluster.local svc.cluster.local cluster.local company.com
nameserver 10.xx.0.xx
options ndots:5
But I also override some values with the ENV variables too:
- name: LOCALDOMAIN
value: .
- name: RES_OPTIONS
value: ndots:1
- name: KONG_DNS_ORDER
value: LAST,A,SRV,CNAME
- name: KONG_DNS_HOSTSFILE
value: /etc/hosts
- name: KONG_DNS_STALE_TTL
value: "4"
- name: KONG_DNS_NOT_FOUND_TTL
value: "10"
- name: KONG_DNS_RESOLVER
value: 10.xx.xx.xxx,10.xx.xx.xxx
- name: KONG_DNS_ERROR_TTL
value: "1"
- name: KONG_DNS_NO_SYNC
value: "off"
By using the DNS_RESOLVER kong var override I think it ignores some of the resolv.conf and such.
@chobits I am going to start a separate bug report issue on DNS behavior noticed in Kong on debug mode because I am seeing some discrepancies that I think will be easily reproducible as a bug for not configuring proper overrides in the runtime. https://github.com/Kong/kong/issues/13301
Is there an existing issue for this?
Kong version (
$ kong version
)3.7.0
Current Behavior
When
It seems specifically the acls bit does a lot of extra logging per every single consumer resource in Kong's runtime. This did not happen in notice level on Kong 2.8.x. For Kong instances with 1000s of consumers this print block takes up a lot of stdout early on in the runtime printing all these. Feels to me like the each consumer warmup acl group should be in [debug] mode, not [notice] mode which should be giving more important information.
Expected Behavior
Not log every single consumer uuid built into Kong on cache warmup of acls in notice mode.
See line here: https://github.com/Kong/kong/blob/3.7.0/kong/cache/warmup.lua#L148
I propose Kong makes the success log of it DEBUG level, and let if an acl cache consumer warmup fails like the line below with the err statement can keep that as NOTICE as it is or even ERROR level log since unanticipated behavior.
Steps To Reproduce
Read the current/expected behavior block has all deets
Anything else?
No response