Edit File: wsysdig_summary.lua
--[[ Copyright (C) 2013-2018 Draios Inc dba Sysdig. This file is part of sysdig. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --]] -- Chisel description description = "internal chisel, creates the json for the wsysdig summary page." short_description = "wsysdig summary generator" category = "NA" hidden = true -- Imports and globals require "common" -- Chisel argument list args = { { name = "composite_args", description = "The number of events in the file. If this argument is not specified, the chisel will just scan the file, compute the number of events and then relaunch itself with the number as argument.", argtype = "int", optional = true } } local disable_index = true -- change this if you are working on this script and don't want to be bothered by indexing local n_samples = 400 local sampling_period = 0 local arg_n_timeline_samples = n_samples local json = require ("dkjson") local gsummary = {} -- The global summary local ssummary = {} -- Last sample's summary local nintervals = 0 local file_cache_exists = false local arg_file_duration = nil local evtcnt = 0 local index_format_version = 1 -- Increase this if the content or the format of the output changes. -- An increase in this number will cause existing indexes to be discharged. -- Argument notification callback function on_set_arg(name, val) if name == "composite_args" then vals = split(val, ",") local val1n = tonumber(vals[1]) if val1n ~= 0 and val1n < n_samples then arg_n_timeline_samples = val1n end if vals[2] ~= nil then arg_file_duration = vals[2] end return true end return false end ------------------------------------------------------------------------------- -- Summary handling helpers ------------------------------------------------------------------------------- local services = { [80] = 'HTTP', [8080] = 'HTTP', [443] = 'HTTPs', [22] = 'SSH', [53] = 'DNS', [6666] = 'Sysdig Agent', [6667] = 'Sysdig Agent', [6443] = 'Sysdig Agent', [2379] = 'etcd', [22379] = 'etcd', [3306] = 'mysql', [5432] = 'postgres', [6379] = 'redis', [5984] = 'couchdb', [9880] = 'fluentd', [8125] = 'statsd', [4730] = 'gearman', [50070] = 'hadoop', [8020] = 'hadoop', [9000] = 'hadoop', [60000] = 'hbase', [60010] = 'hbase', [60020] = 'hbase', [60030] = 'hbase', [2181] = 'kafka', [1978] = 'Kyoto Tycoon', [11211] = 'memcached', [27017] = 'mongodb', [27018] = 'mongodb', [27019] = 'mongodb', [28017] = 'mongodb', [5672] = 'rabbitmq', [8087] = 'riak', [8098] = 'riak', [8983] = 'solr', [5555] = 'voltdb' } -- -- Populate the protocol table for a crappy application that use a ton of ports -- for j=9200,9400,1 do services[j] = 'elasticsearch' end -- -- Create the protocols table by inverting the services table -- local protocols = {} for i, v in pairs(services) do if protocols[v] == nil then protocols[v] = {i} else protocols[v][#protocols[v] + 1] = i end end function create_category_basic(excludable, noteworthy, aggregation) aggregation = aggregation or 'sum' return { tot=0, max=0, timeLine={}, excludable=excludable, noteworthy=noteworthy, aggregation=aggregation } end function create_category_table(excludable, noteworthy, aggregation) aggregation = aggregation or 'sum' return { tot=0, max=0, timeLine={}, table={}, excludable=excludable, noteworthy=noteworthy, aggregation=aggregation } end function reset_summary(s) s.procCount = create_category_table(false, false, 'avg') s.containerCount = create_category_table(false, false, 'avg') s.executedCommands = create_category_basic(false, true) s.executedInteractiveCommands = create_category_basic(true, true) s.syscallCount = create_category_basic(false, false) s.fileCount = create_category_table(true, false) s.fileBytes = create_category_basic(false, false) s.fileBytesR = create_category_basic(false, false) s.fileBytesW = create_category_basic(false, false) s.fileCountW = create_category_table(true, false) s.sysFileCountW = create_category_table(true, true) s.connectionCount = create_category_table(true, false) s.netBytes = create_category_basic(false, false) s.netBytesR = create_category_basic(false, false) s.netBytesW = create_category_basic(false, false) s.notifications = create_category_basic(true, true) if s.listeningPortCount == nil then s.listeningPortCount = create_category_table(true, false, 'avg') end s.newConnectionsO = create_category_basic(true, false) s.newConnectionsI = create_category_basic(true, false) s.newConnectionsSsh = create_category_basic(true, true) s.newListeningPorts = create_category_basic(true, true) s.fileDeletionsCount = create_category_basic(true, true) s.newSymLinksCount = create_category_basic(true, true) s.forkCount = create_category_basic(true, false) s.openErrorCount = create_category_basic(true, false) s.connectErrorCount = create_category_basic(true, true) s.sudoInvocations = create_category_basic(true, true) s.setnsInvocations = create_category_basic(true, true) s.signalCount = create_category_basic(true, false) s.segfaultCount = create_category_basic(true, true) s.over1msFileIoCount = create_category_basic(true, false) s.over10msFileIoCount = create_category_basic(true, false) s.over100msFileIoCount = create_category_basic(true, true) s.appLogCount = create_category_basic(true, false) s.appLogCountW = create_category_basic(true, false) s.appLogCountE = create_category_basic(true, true) s.sysLogCount = create_category_basic(true, false) s.sysLogCountW = create_category_basic(true, false) s.sysLogCountE = create_category_basic(true, true) s.dockerEvtsCount = create_category_basic(true, true) -- reset dynamic dockerEvtsCount* categories for ccat in pairs(s) do prefix = 'dockerEvtsCount' if starts_with(ccat, prefix) and ccat ~= prefix then s[ccat] = create_category_basic(true, true) end end s.sysReqCountHttp = create_category_basic(true, true) s.sysErrCountHttp = create_category_basic(true, true) -- creating the protocol categories involves two passes of the services table for i, v in pairs(protocols) do local ccat = 'protoBytes_' .. i s[ccat] = create_category_basic(true, false) end end function add_summaries(ts_s, ts_ns, dst, src) local time = sysdig.make_ts(ts_s, ts_ns) for k, v in pairs(src) do if dst[k] == nil then -- add missing category dynamically -- dynamic categories are dockerEvtsCount* prefix = 'dockerEvtsCount' if starts_with(k, prefix) and k ~= prefix then dst[k] = create_category_basic(true, true) end end dst[k].tot = dst[k].tot + v.tot if v.tot > dst[k].max then dst[k].max = v.tot end local tl = dst[k].timeLine tl[#tl+1] = {t=time, v=v.tot} if v.table ~= nil then local dt = dst[k].table for tk, tv in pairs(v.table) do dt[tk] = tv end end end end function generate_subsampled_timeline(src, nsamples, op) local res = {} local ratio = math.ceil(#src / nsamples) local k = 0 local accumulator = 0 local etime = src[1].t local max = 0 local tot = 0 for j = 1,#src,1 do k = k + 1 accumulator = accumulator + src[j].v if k >= ratio then if op == 'avg' then accumulator = accumulator / k end res[#res+1] = {t=etime, v=accumulator} tot = tot + accumulator if accumulator > max then max = accumulator end k = 0 accumulator = 0 if src[j + 1] ~= nil then etime = src[j + 1].t end end end return{timeLine=res, tot=tot, max=max} end function subsample_timelines(jtable) if arg_n_timeline_samples ~= 0 and arg_n_timeline_samples ~= n_samples then for k, v in pairs(jtable.metrics) do local data = v.data st = generate_subsampled_timeline(data.timeLine, arg_n_timeline_samples, data.aggregation) v.data.timeLine = st.timeLine v.data.max = st.max end end end ------------------------------------------------------------------------------- -- Helpers to dig into the data coming from sysdig ------------------------------------------------------------------------------- function string.starts(big_str, small_str) return string.sub(big_str, 1, string.len(small_str)) == small_str end function is_system_dir(filename) if string.starts(filename, '/bin/') or string.starts(filename, '/sbin/') or string.starts(filename, '/boot/') or string.starts(filename, '/etc/') or string.starts(filename, '/lib') or string.starts(filename, '/usr/bin/') or string.starts(filename, '/usr/sbin/') or string.starts(filename, '/usr/share/') or string.starts(filename, '/usr/lib') then return true end return false end function is_log_file(filename) if(string.find(filename, '%.log') or string.find(filename, '_log') or string.find(filename, '/var/log')) and not (string.find(filename, '%.gz') or string.find(filename, '%.tgz')) then return true end return false end function generate_io_stats(fdname, cnt_cat) if fdname == nil then return end if cnt_cat.table[fdname] == nil then cnt_cat.table[fdname] = 1 cnt_cat.tot = cnt_cat.tot + 1 end end function generate_proto_stats(sport, buflen) local proto = services[sport] if proto ~= nil then local catname = 'protoBytes_' .. proto ssummary[catname].tot = ssummary[catname].tot + buflen end end function parse_thread_table_startup() local data = {} local cnt = 0 local ttable = sysdig.get_thread_table_barebone(sysdig.get_filter()) for k, v in pairs(ttable) do for kf, vf in pairs(v.fdtable) do if vf.is_server then data[vf.sport] = 1 end end end ssummary.listeningPortCount.tot = 0 for k, v in pairs(data) do ssummary.listeningPortCount.tot = ssummary.listeningPortCount.tot + 1 end --print(ssummary.listeningPortCount.tot) ssummary.listeningPortCount.table = data end function parse_thread_table_interval() local data = {} local cnt = 0 local ttable = sysdig.get_thread_table_barebone_nofds(sysdig.get_filter()) for k, v in pairs(ttable) do if v.tid == v.pid then data[v.pid] = 1 cnt = cnt + 1 end end ssummary.procCount.tot = cnt ssummary.procCount.table = data end function parse_container_table() local data = {} local cnt = 0 local ctable = sysdig.get_container_table() for k, v in pairs(ctable) do data[v.id] = v.name cnt = cnt + 1 end ssummary.containerCount.tot = cnt ssummary.containerCount.table = data end function update_docker_cats(evt_type) local cat = 'dockerEvtsCount' .. evt_type if (ssummary[cat] == nil) then ssummary[cat] = create_category_basic(true, true) end ssummary[cat].tot = ssummary[cat].tot + 1 end ------------------------------------------------------------------------------- -- Initialization callbacks ------------------------------------------------------------------------------- function on_init() if arg_file_duration == nil then return true end if(sysdig.get_filter() ~= nil and sysdig.get_filter() ~= '') then disable_index = true end sampling_period = arg_file_duration / (n_samples - 1) chisel.set_precise_interval_ns(sampling_period) percent_update_sample_period = math.floor(n_samples / 100 * 3) if percent_update_sample_period < 2 then percent_update_sample_period = 1 end reset_summary(gsummary) reset_summary(ssummary) -- set the following fields on_event() fetype = chisel.request_field("evt.type") fdir = chisel.request_field("evt.dir") frawres = chisel.request_field("evt.rawres") ffdcontname = chisel.request_field("fd.containername") ffdname = chisel.request_field("fd.name") ffdtype = chisel.request_field("fd.type") fiswrite = chisel.request_field("evt.is_io_write") fisread = chisel.request_field("evt.is_io_read") fbuffer = chisel.request_field("evt.buffer") fbuflen = chisel.request_field("evt.buflen") fsport = chisel.request_field("fd.sport") flport = chisel.request_field("fd.lport") ftypechar = chisel.request_field("fd.typechar") fexe = chisel.request_field("evt.arg.exe") fsignal = chisel.request_field("evt.arg.sig") flatency = chisel.request_field("evt.latency") fsyslogsev = chisel.request_field("syslog.severity") finfrasource = chisel.request_field("evt.arg.source") finfraname = chisel.request_field("evt.arg.name") fpname = chisel.request_field("proc.pname") -- kick off GC collectgarbage() print('{"slices": [') return true end function on_capture_start() if arg_file_duration == nil then return true end if not disable_index then local dirname = sysdig.get_evtsource_name() .. '_wd_index' local f = io.open(dirname .. '/VERSION', "r") if f ~= nil then local version = tonumber(f:read "*all") f:close() if version == index_format_version then file_cache_exists = true sysdig.end_capture() end end end parse_thread_table_startup() return true end ------------------------------------------------------------------------------- -- Event callback ------------------------------------------------------------------------------- function on_event() if arg_file_duration == nil then evtcnt = evtcnt + 1 return true end ssummary.syscallCount.tot = ssummary.syscallCount.tot + 1 local dir = evt.field(fdir) if dir ~= nil then if dir == '<' then local rawres = evt.field(frawres) local etype = evt.field(fetype) if rawres ~= nil and rawres >= 0 then local fdcontname = evt.field(ffdcontname) local fdname = evt.field(ffdname) local fdtype = evt.field(ffdtype) local iswrite = evt.field(fiswrite) local isread = evt.field(fisread) if iswrite or isread then if fdtype == 'file' then local buflen = evt.field(fbuflen) if buflen == nil then buflen = 0 end generate_io_stats(fdcontname, ssummary.fileCount) if iswrite then generate_io_stats(fdcontname, ssummary.fileCountW) ssummary.fileBytes.tot = ssummary.fileBytes.tot + buflen ssummary.fileBytesW.tot = ssummary.fileBytesW.tot + buflen if is_system_dir(fdname) then generate_io_stats(fdname, ssummary.sysFileCountW) end -- log metrics support local syslogsev = evt.field(fsyslogsev) if syslogsev ~= nil then ssummary.sysLogCount.tot = ssummary.sysLogCount.tot + 1 if syslogsev == 4 then ssummary.sysLogCountW.tot = ssummary.sysLogCountW.tot + 1 elseif syslogsev < 4 then ssummary.sysLogCountE.tot = ssummary.sysLogCountE.tot + 1 end elseif is_log_file(fdname) then local buf = evt.field(fbuffer) local msgs = split(buf, "\n") for i, msg in ipairs(msgs) do if #msg ~= 0 then ssummary.appLogCount.tot = ssummary.appLogCount.tot + 1 local ls = string.lower(msg) if string.find(ls, "warn") then ssummary.appLogCountW.tot = ssummary.appLogCountW.tot + 1 elseif string.find(ls, "error") or string.find(ls, "critic") or string.find(ls, "emergency") or string.find(ls, "alert") then ssummary.appLogCountE.tot = ssummary.appLogCountE.tot + 1 end end end end elseif isread then ssummary.fileBytes.tot = ssummary.fileBytes.tot + buflen ssummary.fileBytesR.tot = ssummary.fileBytesR.tot + buflen end local latency = evt.field(flatency) if latency ~= nil and not string.starts(fdname, '/dev/') then if latency > 100000000 then ssummary.over100msFileIoCount.tot = ssummary.over100msFileIoCount.tot + 1 end if latency > 10000000 then ssummary.over10msFileIoCount.tot = ssummary.over10msFileIoCount.tot + 1 end if latency > 1000000 then ssummary.over1msFileIoCount.tot = ssummary.over1msFileIoCount.tot + 1 end end elseif fdtype == 'ipv4' or fdtype == 'ipv6' then local buflen = evt.field(fbuflen) if buflen == nil then buflen = 0 end generate_io_stats(fdcontname, ssummary.connectionCount) if iswrite then ssummary.netBytes.tot = ssummary.netBytes.tot + buflen ssummary.netBytesW.tot = ssummary.netBytesW.tot + buflen elseif isread then ssummary.netBytes.tot = ssummary.netBytes.tot + buflen ssummary.netBytesR.tot = ssummary.netBytesR.tot + buflen end local sport = evt.field(fsport) if sport ~= nil then generate_proto_stats(sport, buflen) end local buf = evt.field(fbuffer) if string.starts(buf, 'HTTP/') then ssummary.sysReqCountHttp.tot = ssummary.sysReqCountHttp.tot + 1 local parts = split(buf, ' ') if tonumber(parts[2]) ~= 200 then ssummary.sysErrCountHttp.tot = ssummary.sysErrCountHttp.tot + 1 end end elseif fdtype == 'unix' then if iswrite then -- apps can write to syslog using unix pipes local syslogsev = evt.field(fsyslogsev) if syslogsev ~= nil then ssummary.sysLogCount.tot = ssummary.sysLogCount.tot + 1 if syslogsev == 4 then ssummary.sysLogCountW.tot = ssummary.sysLogCountW.tot + 1 elseif syslogsev < 4 then ssummary.sysLogCountE.tot = ssummary.sysLogCountE.tot + 1 end end end end elseif etype == 'execve' then ssummary.executedCommands.tot = ssummary.executedCommands.tot + 1 local pname = evt.field(fpname) if pname ~= nil then if string.find(pname, 'bash') then ssummary.executedInteractiveCommands.tot = ssummary.executedInteractiveCommands.tot + 1 end end local exe = evt.field(fexe) if exe == 'sudo' then ssummary.sudoInvocations.tot = ssummary.sudoInvocations.tot + 1 end elseif etype == 'bind' then local sport = evt.field(fsport) if sport ~= nil then generate_io_stats(sport, ssummary.listeningPortCount) ssummary.newListeningPorts.tot = ssummary.newListeningPorts.tot + 1 end elseif etype == 'connect' then local sport = evt.field(fsport) if sport ~= nil then ssummary.newConnectionsO.tot = ssummary.newConnectionsO.tot + 1 if sport == 22 then ssummary.newConnectionsSsh.tot = ssummary.newConnectionsSsh.tot + 1 end end elseif etype == 'accept' then local sport = evt.field(fsport) if sport ~= nil then ssummary.newConnectionsI.tot = ssummary.newConnectionsI.tot + 1 if sport == 22 then ssummary.newConnectionsSsh.tot = ssummary.newConnectionsSsh.tot + 1 end end elseif etype == 'unlink' or etype == 'unlinkat' then ssummary.fileDeletionsCount.tot = ssummary.fileDeletionsCount.tot + 1 elseif etype == 'symlink' or etype == 'symlinkat' then ssummary.newSymLinksCount.tot = ssummary.newSymLinksCount.tot + 1 elseif etype == 'clone' or etype == 'fork' then if rawres > 0 then ssummary.forkCount.tot = ssummary.forkCount.tot + 1 end elseif etype == 'setns' then ssummary.setnsInvocations.tot = ssummary.setnsInvocations.tot + 1 end elseif etype == 'connect' then local sport = evt.field(fsport) if sport ~= nil then ssummary.newConnectionsO.tot = ssummary.newConnectionsO.tot + 1 if sport == 22 then ssummary.newConnectionsSsh.tot = ssummary.newConnectionsSsh.tot + 1 end end if rawres ~= -115 then local fdtype = evt.field(ffdtype) if fdtype == 'ipv4' or fdtype == 'ipv6' then ssummary.connectErrorCount.tot = ssummary.connectErrorCount.tot + 1 end end elseif etype == 'accept' then local sport = evt.field(fsport) if sport ~= nil then ssummary.newConnectionsI.tot = ssummary.newConnectionsI.tot + 1 if sport == 22 then ssummary.newConnectionsSsh.tot = ssummary.newConnectionsSsh.tot + 1 end end elseif etype == 'open' then ssummary.openErrorCount.tot = ssummary.openErrorCount.tot + 1 end else local etype = evt.field(fetype) if etype == 'close' then local sport = evt.field(fsport) if sport ~= nil then local typechar = evt.field(ftypechar) if typechar == '2' then if ssummary.listeningPortCount.table[sport] ~= nil then ssummary.listeningPortCount.table[sport] = nil ssummary.listeningPortCount.tot = ssummary.listeningPortCount.tot - 1 end end end elseif etype == 'signaldeliver' then ssummary.signalCount.tot = ssummary.signalCount.tot + 1 local signal = evt.field(fsignal) if signal == 'SIGSEGV' then ssummary.segfaultCount.tot = ssummary.segfaultCount.tot + 1 end elseif etype == 'notification' then ssummary.notifications.tot = ssummary.notifications.tot + 1 elseif etype == 'infra' then local infrasource = evt.field(finfrasource) if infrasource == 'docker' then ssummary.dockerEvtsCount.tot = ssummary.dockerEvtsCount.tot + 1 local infraname = evt.field(finfraname) update_docker_cats(infraname) end end end end return true end ------------------------------------------------------------------------------- -- Periodic timeout callback ------------------------------------------------------------------------------- function on_interval(ts_s, ts_ns, delta) parse_thread_table_interval() parse_container_table() if nintervals == 0 then -- clean up events collectgarbage() end --print(json.encode(ssummary.connectionCount, { indent = true })) add_summaries(ts_s, ts_ns, gsummary, ssummary) reset_summary(ssummary) if nintervals % percent_update_sample_period == 0 then local progress = sysdig.get_read_progress() if progress == 100 then progress = 99.99 end print('{"progress": ' .. progress .. ' },') io.flush(stdout) end nintervals = nintervals + 1 return true end ------------------------------------------------------------------------------- -- End of capture output generation ------------------------------------------------------------------------------- function update_table_count(cat) if cat.table ~= nil then local cnt = 0 for tk, tv in pairs(cat.table) do cnt = cnt + 1 end cat.tot = cnt cat.table = nil end end function update_table_counts() for k, v in pairs(gsummary) do update_table_count(v) end end function should_include(category) if category.excludable then if category.tot ~= 0 then return true else return false end else return true end end function get_category_table(include_network_apps, include_security, include_performance, include_logs, include_infrastructure) local res = { {id='general', name='General'}, {id='file', name='File'}, {id='network', name='Network'}, } if include_network_apps then res[#res+1] = {id='napps', name='Network Apps'} end if include_security then res[#res+1] = {id='security', name='Security'} end if include_performance then res[#res+1] = {id='performance', name='Performance'} end if include_logs then res[#res+1] = {id='logs', name='Logs'} end if include_infrastructure then res[#res+1] = {id='infrastructure', name='Infrastructure'} end return res end function build_output(captureDuration) local ctable = copytable(gsummary.containerCount.table) local res = {} local has_cat_logs = false; local has_cat_infrastructure = false; local has_cat_netapps = false; local jtable = { info={ IndexFormatVersion=index_format_version, containers=ctable, durationNs=captureDuration, startTs = sysdig.get_firstevent_ts(), endTs = sysdig.get_lastevent_ts() }, metrics=res} local filter = sysdig.get_filter() update_table_counts() if should_include(gsummary.notifications) then res[#res+1] = { name = 'Sysdig Secure Notifications', desc = 'Sysdig Secure notifications. Sysdig secure inserts a "notification" event in the capture stream each time a policy triggers. This metric counts the notifications. Chart it over time to compare the other metrics with the point in time where policies were triggered.', category = 'general', targetView = 'notifications', drillDownKey = 'NONE', data = gsummary.notifications } end if should_include(gsummary.procCount) then res[#res+1] = { name = 'Running Processes', desc = 'Total number of processes that were running during the capture', category = 'general', targetView = 'procs', drillDownKey = '', data = gsummary.procCount } end if(not string.find(filter, 'container') and should_include(gsummary.containerCount)) then res[#res+1] = { name = 'Running Containers', desc = 'Total number of containers that were running during the capture', category = 'general', targetView = 'containers', drillDownKey = '', data = gsummary.containerCount } end if(should_include(gsummary.syscallCount)) then res[#res+1] = { name = 'System Calls', desc = 'Number of system calls performed by any process/container in the system', category = 'general', targetView = 'syscalls', drillDownKey = '', data = gsummary.syscallCount } end if should_include(gsummary.fileBytes) then res[#res+1] = { name = 'File Bytes In+Out', desc = 'Amount of bytes read from or written to the file system', category = 'file', targetView = 'files', drillDownKey = 'fd.directory', targetViewSortingCol = 2, data = gsummary.fileBytes } end if should_include(gsummary.fileBytesR) then res[#res+1] = { name = 'File Bytes In', desc = 'Amount of bytes read from the file system', category = 'file', targetView = 'files', drillDownKey = 'fd.directory', targetViewSortingCol = 0, data = gsummary.fileBytesR } end if should_include(gsummary.fileBytesW) then res[#res+1] = { name = 'File Bytes Out', desc = 'Amount of bytes written to the file system', category = 'file', targetView = 'files', drillDownKey = 'fd.directory', targetViewSortingCol = 1, data = gsummary.fileBytesW } end if should_include(gsummary.fileCount) then res[#res+1] = { name = 'Accessed Files', desc = 'Number of files that have been accessed during the capture', category = 'file', targetView = 'files', targetViewFilter = 'evt.is_io_read=true', drillDownKey = 'fd.directory', targetViewSortingCol = 2, data = gsummary.fileCount } end if should_include(gsummary.fileCountW) then res[#res+1] = { name = 'Modified Files', desc = 'Number of files that have been received writes during the capture', category = 'file', targetView = 'files', drillDownKey = 'fd.directory', targetViewSortingCol = 1, targetViewFilter = 'evt.is_io_write=true', data = gsummary.fileCountW } end if should_include(gsummary.netBytes) then res[#res+1] = { name = 'Net Bytes In+Out', desc = 'Amount of bytes read from or written to the network', category = 'network', targetView = 'sports', drillDownKey = 'fd.directory', targetViewSortingCol = 4, data = gsummary.netBytes } end if should_include(gsummary.netBytesR) then res[#res+1] = { name = 'Net Bytes In', desc = 'Amount of bytes read from the network', category = 'network', targetView = 'sports', drillDownKey = 'fd.sport', targetViewSortingCol = 2, data = gsummary.netBytesR } end if should_include(gsummary.netBytesW) then res[#res+1] = { name = 'Net Bytes Out', desc = 'Amount of bytes written to the network', category = 'network', targetView = 'sports', drillDownKey = 'fd.sport', targetViewSortingCol = 3, data = gsummary.netBytesW } end if should_include(gsummary.connectionCount) then res[#res+1] = { name = 'Active Network Connections', desc = 'Number of network connections that have been accessed during the capture', category = 'network', targetView = 'connections', targetViewFilter = 'evt.is_io=true', drillDownKey = 'fd.sport', targetViewSortingCol = 8, data = gsummary.connectionCount } end if should_include(gsummary.listeningPortCount) then res[#res+1] = { name = 'Listening Ports', desc = 'Number of open ports on this system', category = 'network', targetView = 'port_bindings', drillDownKey = 'fd.sport', data = gsummary.listeningPortCount } end if should_include(gsummary.newListeningPorts) then res[#res+1] = { name = 'New Listening Ports', desc = 'Number of open ports that have been added during the observation interval', category = 'network', targetView = 'port_bindings', drillDownKey = 'fd.sport', data = gsummary.newListeningPorts } end if should_include(gsummary.newConnectionsO) then res[#res+1] = { name = 'New Outbound Connections', desc = 'New client network connections', category = 'network', targetView = 'dig', targetViewTitle = 'Connect events', targetViewFilter = 'evt.type=connect and evt.dir=< and fd.sport exists', drillDownKey = 'NONE', data = gsummary.newConnectionsO } end if should_include(gsummary.newConnectionsI) then res[#res+1] = { name = 'New Inbound Connections', desc = 'New server network connections', category = 'network', targetView = 'dig', targetViewTitle = 'Connect events', targetViewFilter = 'evt.type=accept and evt.dir=< and fd.sport exists', drillDownKey = '', data = gsummary.newConnectionsI } end if should_include(gsummary.executedCommands) then res[#res+1] = { name = 'Executed Commands', desc = 'Number of new programs that have been executed during the observed interval', category = 'security', targetView = 'spy_users_wsysdig', drillDownKey = '', data = gsummary.executedCommands } end if should_include(gsummary.executedInteractiveCommands) then res[#res+1] = { name = 'Executed Interactive Commands', desc = 'Number of new programs that have been executed from a shell during the observed interval', category = 'security', targetView = 'spy_users_wsysdig', targetViewFilter = 'proc.pname=bash', drillDownKey = 'NONE', data = gsummary.executedInteractiveCommands } end if should_include(gsummary.newSymLinksCount) then res[#res+1] = { name = 'New Symlinks', desc = 'Number of new symbolic links that were created', category = 'security', targetView = 'dig', targetViewTitle = 'Symlink creations', targetViewFilter = '(evt.type=symlink or evt.type=symlinkat) and evt.dir=< and evt.failed = false', drillDownKey = 'NONE', data = gsummary.newSymLinksCount } end if should_include(gsummary.sysFileCountW) then res[#res+1] = { name = 'Modified System Files', desc = 'Number of files that have been accessed during the capture', category = 'security', targetViewSortingCol = 1, targetView = 'files', targetViewFilter = 'evt.is_io_write=true', drillDownKey = 'NONE', data = gsummary.sysFileCountW } end if should_include(gsummary.sudoInvocations) then res[#res+1] = { name = 'Sudo Invocations', desc = 'Number of times the sudo program has been called', category = 'security', targetView = 'dig', targetViewTitle = 'Sudo executions', targetViewFilter = 'evt.type=execve and evt.arg.exe=sudo', drillDownKey = 'NONE', data = gsummary.sudoInvocations } end if should_include(gsummary.setnsInvocations) then res[#res+1] = { name = 'Setns Invocations', desc = 'Number of times the setns system call has been called. Setns is typically used to "enter" in another container', category = 'security', targetView = 'dig', targetViewTitle = 'Setns executions', targetViewFilter = 'evt.type=setns', drillDownKey = 'NONE', data = gsummary.setnsInvocations } end if should_include(gsummary.newConnectionsSsh) then res[#res+1] = { name = 'New SSH Connections', desc = 'Client or server connections', category = 'security', targetView = 'dig', targetViewTitle = 'Connect events', targetViewFilter = '(evt.type=accept or evt.type=connect) and evt.dir=< and fd.sport=22', drillDownKey = '', data = gsummary.newConnectionsSsh } end if should_include(gsummary.fileDeletionsCount) then res[#res+1] = { name = 'Deleted Files', desc = 'Number of files that were deleted', category = 'security', targetView = 'dig', targetViewTitle = 'File deletions', targetViewFilter = 'evt.type=unlink or evt.type=unlinkat', drillDownKey = 'NONE', data = gsummary.fileDeletionsCount } end if should_include(gsummary.sysReqCountHttp) then res[#res+1] = { name = 'HTTP Requests', desc = 'Number of HTTP requests', category = 'performance', targetView = 'echo', targetViewTitle = 'HTTP responses', targetViewFilter = '(fd.type=ipv4 or fd.type=ipv6) and evt.buffer contains "HTTP/"', drillDownKey = 'fd.directory', data = gsummary.sysReqCountHttp } end if should_include(gsummary.sysErrCountHttp) then res[#res+1] = { name = 'HTTP Errors', desc = 'Number of HTTP responses with code different from 400', category = 'performance', targetView = 'echo', targetViewTitle = 'HTTP responses', targetViewFilter = '(fd.type=ipv4 or fd.type=ipv6) and evt.arg.data startswith "HTTP/" and not evt.arg.data contains "200"', drillDownKey = 'fd.directory', data = gsummary.sysErrCountHttp } end if should_include(gsummary.openErrorCount) then res[#res+1] = { name = 'File Open Errors', desc = 'Count of failed file opens', category = 'performance', targetView = 'dig', targetViewTitle = 'Failed open() calls', targetViewFilter = 'evt.type=open and evt.rawres<0', drillDownKey = 'fd.directory', data = gsummary.openErrorCount } end if should_include(gsummary.forkCount) then res[#res+1] = { name = 'Fork Count', desc = 'Count of processes and threads that have been created', category = 'performance', targetView = 'dig', targetViewTitle = 'Clone executions', targetViewFilter = 'evt.type=clone and evt.rawres=0', drillDownKey = 'NONE', data = gsummary.forkCount } end if should_include(gsummary.connectErrorCount) then res[#res+1] = { name = 'Failed Connection Attempts', desc = 'Count of failed network connect calls', category = 'performance', targetView = 'dig', targetViewTitle = 'Failed connect() calls', targetViewFilter = 'evt.type=connect and (fd.type=ipv4 or fd.type=ipv6) and evt.rawres<0 and evt.res!=EINPROGRESS', drillDownKey = 'NONE', data = gsummary.connectErrorCount } end if should_include(gsummary.signalCount) then res[#res+1] = { name = 'Received Signals', desc = 'Number of unix signals that have been received by the processes on the system', category = 'performance', targetView = 'dig', targetViewTitle = 'Received signals', targetViewFilter = 'evt.type=signaldeliver', drillDownKey = 'NONE', data = gsummary.signalCount } end if should_include(gsummary.segfaultCount) then res[#res+1] = { name = 'Segmentation Faults', desc = 'Number of process segfaults', category = 'performance', targetView = 'dig', targetViewTitle = 'List of segfault events', targetViewFilter = 'evt.type=signaldeliver and evt.arg.sig=SIGSEGV', drillDownKey = 'NONE', data = gsummary.segfaultCount } end if should_include(gsummary.over1msFileIoCount) then res[#res+1] = { name = 'Slow File I/O calls (1ms+)', desc = 'Number of file read or write calls that took more than 1ms to return', category = 'performance', targetView = 'slow_io', targetViewSortingCol = 1, drillDownKey = 'NONE', data = gsummary.over1msFileIoCount } end if should_include(gsummary.over10msFileIoCount) then res[#res+1] = { name = 'Slow File I/O calls (10ms+)', desc = 'Number of file read or write calls that took more than 10ms to return', category = 'performance', targetView = 'slow_io', targetViewSortingCol = 1, drillDownKey = 'NONE', data = gsummary.over10msFileIoCount } end if should_include(gsummary.over100msFileIoCount) then res[#res+1] = { name = 'Slow File I/O calls (100ms+)', desc = 'Number of file read or write calls that took more than 100ms to return', category = 'performance', targetView = 'slow_io', targetViewSortingCol = 1, drillDownKey = 'NONE', data = gsummary.over100msFileIoCount } end if should_include(gsummary.appLogCount) then res[#res+1] = { name = 'App Log Messages', desc = 'Number of writes to application log files', category = 'logs', targetView = 'echo', targetViewTitle = 'Application Log Messages', targetViewFilter = '((fd.name contains .log or fd.name contains _log or fd.name contains /var/log) and not (fd.name contains .gz or fd.name contains .tgz)) and evt.is_io_write=true', drillDownKey = 'NONE', data = gsummary.appLogCount } has_cat_logs = true end if should_include(gsummary.appLogCountW) then res[#res+1] = { name = 'App Log Warning Messages', desc = 'Number of writes to application log files containing the word "warning"', category = 'logs', targetView = 'echo', targetViewTitle = 'Warning Application Log Messages', targetViewFilter = '((fd.name contains .log or fd.name contains _log or fd.name contains /var/log) and not (fd.name contains .gz or fd.name icontains .tgz)) and evt.is_io_write=true and evt.arg.data icontains warn', drillDownKey = 'NONE', data = gsummary.appLogCountW } has_cat_logs = true end if should_include(gsummary.appLogCountE) then res[#res+1] = { name = 'App Log Error Messages', desc = 'Number of writes to application log files containing the word "error"', category = 'logs', targetView = 'echo', targetViewTitle = 'Error Application Log Messages', targetViewFilter = '((fd.name contains .log or fd.name contains _log or fd.name contains /var/log) and not (fd.name contains .gz or fd.name contains .tgz)) and evt.is_io_write=true and (evt.arg.data icontains error or evt.arg.data icontains critic or evt.arg.data icontains emerg or evt.arg.data icontains alert)', drillDownKey = 'NONE', data = gsummary.appLogCountE } has_cat_logs = true end if should_include(gsummary.sysLogCount) then res[#res+1] = { name = 'Syslog Messages', desc = 'Number of entries written to syslog', category = 'logs', targetView = 'spy_syslog', targetViewTitle = 'Syslog Messages', drillDownKey = 'NONE', data = gsummary.sysLogCount } has_cat_logs = true end if should_include(gsummary.sysLogCountW) then res[#res+1] = { name = 'Syslog Warning Messages', desc = 'Number of entries with severity WARNING written to syslog', category = 'logs', targetView = 'spy_syslog', targetViewTitle = 'Syslog Messages', targetViewFilter = 'syslog.severity=4', drillDownKey = 'NONE', data = gsummary.sysLogCountW } has_cat_logs = true end if should_include(gsummary.sysLogCountE) then res[#res+1] = { name = 'Syslog Error Messages', desc = 'Number of entries with severity ERROR or lower written to syslog', category = 'logs', targetView = 'spy_syslog', targetViewTitle = 'Syslog Messages', targetViewFilter = 'syslog.severity<4', drillDownKey = 'NONE', data = gsummary.sysLogCountE } has_cat_logs = true end if should_include(gsummary.dockerEvtsCount) then res[#res+1] = { name = 'Docker Events', desc = 'Total number of events generated by docker activity', category = 'infrastructure', targetView = 'docker_events', drillDownKey = 'NONE', data = gsummary.dockerEvtsCount } has_cat_infrastructure = true end -- evaluate dynamic dockerEvtsCount* categories prefix = 'dockerEvtsCount' dockerEvtsCountEvents = {} for ccat in pairs(gsummary) do if starts_with(ccat, prefix) and ccat ~= prefix then if should_include(gsummary[ccat]) then ccat_name = ccat:sub(#prefix + 1) dockerEvtsCountEvents[ccat] = { name = ccat_name .. ' Events', desc = 'Total number of docker events of type ' .. ccat_name, category = 'infrastructure', targetView = 'docker_events', targetViewFilter = 'evt.arg.name="' .. ccat_name .. '"' , drillDownKey = 'NONE', data = gsummary[ccat] } has_cat_infrastructure = true end end end -- sort categories to make sure the final list is "stable" table.sort(dockerEvtsCountEvents, function (a, b) return a.name - b.name end) for i, v in pairs(dockerEvtsCountEvents) do res[#res+1] = v end for i, v in pairs(protocols) do local ccat = 'protoBytes_' .. i if should_include(gsummary[ccat]) then local flt = '' for ii, vv in pairs(v) do flt = flt .. ('fd.sport=' .. vv .. ' or ') end flt = string.sub(flt, 0, #flt - 4) res[#res+1] = { name = i .. ' Bytes', desc = 'Total number of network bytes generated by the ' .. i .. ' protocol', category = 'napps', targetView = 'connections', targetViewFilter = flt, drillDownKey = 'NONE', data = gsummary[ccat] } has_cat_netapps = true end end jtable.info.categories = get_category_table(has_cat_netapps, true, true, has_cat_logs, has_cat_infrastructure) return jtable end function load_index(dirname) local f = io.open(dirname .. '/summary.json', "r") if f == nil then return nil end local res = f:read("*all") f:close() return res end -- Callback called by the engine at the end of the capture function on_capture_end(ts_s, ts_ns, delta) if arg_file_duration == nil then sysdig.run_app('-r "' .. sysdig.get_evtsource_name() .. '" -c wsysdig_summary ' .. arg_n_timeline_samples .. ',' .. delta .. ' ' .. sysdig.get_filter()) return true end local sstr = '' local dirname = sysdig.get_evtsource_name() .. '_wd_index' if file_cache_exists and not disable_index then sstr = load_index(dirname) if sstr == nil then print('{"progress": 100, "error": "can\'t read the trace file index" }') print(']}') return false end jtable = json.decode(sstr) subsample_timelines(jtable) sstr = json.encode(jtable, { indent = true }) else add_summaries(ts_s, ts_ns, gsummary, ssummary) jtable = build_output(delta) sstr = json.encode(jtable, { indent = true }) if not disable_index then os.execute('rm -fr ' .. dirname .. " 2> /dev/null") os.execute('rmdir ' .. dirname .. " 2> nul") os.execute('mkdir ' .. dirname .. " 2> /dev/null") os.execute('md ' .. dirname .. " 2> nul") -- Save the data local f = io.open(dirname .. '/summary.json', "w") if f == nil then print('{"progress": 100, "error": "can\'t create the trace file index" }') print(']}') return false end f:write(sstr) f:close() -- Save the index version local fv = io.open(dirname .. '/VERSION', "w") if fv == nil then print('{"progress": 100, "error": "can\'t create the trace file index" }') print(']}') return false end fv:write(index_format_version) fv:close() end subsample_timelines(jtable) sstr = json.encode(jtable, { indent = true }) end print('{"progress": 100, "data": '.. sstr ..'}') print(']}') return true end