Closed yazun closed 2 years ago
nb. __AUDIT__
is 0.
Audit is an enterprise-level security feature of TBase, which can audit logs. When GUC enable_audit is set to off, there will be many ".trace" files with a size of 0 in the pg_log/maintain directory:
Please confirm whether we have manually deleted the pg_log/maintain directory.
The directory is there, but the file names seem not to match the log ones: i.e.
| <163>Aug 23 09:48:35 gaiadb01 dn[1779]: [1009849695] 2021-08-23 09:48:35 CEST [1779,coord(0.0)]:xid[0-] [1-1] user=,db=,client=,query=,
FATAL: 58P01: could not open audit log file "pg_log/maintain/maintain-Monday-09.trace": No such file or directory#0122021-08-23 09:48:35 CEST [1779,coord(0.0)]:xid[0-] [2-1] user=,db=,client=,query=,
LOCATION: audit_open_log_file, auditlogger.c:1502
while on this machine we have:
total 0
-rw------- 1 pgxzDR3 pgxzDR3 0 Aug 9 02:04 maintain-Monday-00.trace
-rw------- 1 pgxzDR3 pgxzDR3 0 Aug 8 01:04 maintain-Sunday-00.trace
-rw------- 1 pgxzDR3 pgxzDR3 0 Aug 7 00:04 maintain-Saturday-00.trace
-rw------- 1 pgxzDR3 pgxzDR3 0 Aug 6 01:50 maintain-Friday-00.trace
-rw------- 1 pgxzDR3 pgxzDR3 0 Aug 5 00:50 maintain-Thursday-00.trace
-rw------- 1 pgxzDR3 pgxzDR3 0 Aug 4 02:37 maintain-Wednesday-00.trace
-rw------- 1 pgxzDR3 pgxzDR3 0 Aug 3 12:44 maintain-Tuesday-12.trace
-rw------- 1 pgxzDR3 pgxzDR3 0 Aug 3 00:38 maintain-Tuesday-00.trace
-rw------- 1 pgxzDR3 pgxzDR3 0 Aug 2 19:22 maintain-Monday-19.trace
so somehow filenames are off..
also the strace of the quite busy audit process (30% CPU!)
strace: Process 3177 attached
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7fcfa4e18b10) = 2711
sendto(4, "<166>Aug 23 18:22:41 dn[3177]: [1027839515] 2021-08-23 18:22:41 CEST [3177,coord(0.0)]:xid[0-] [2055679031-1] user=,db=,client=,query=,LOG: 00000: audit logger process (PID 2708) exited with exit code 1\n2021-08-23 18:22:41wait4(-1, 0x7fff7142e28c, WNOHANG, NULL) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
rt_sigreturn({mask=[]}) = -1 EINTR (Interrupted system call)
rt_sigprocmask(SIG_SETMASK, ~[ILL TRAP ABRT BUS FPE SEGV CONT SYS RTMIN RT_1], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
select(7, [3 5 6], NULL, NULL, {tv_sec=60, tv_usec=0}) = ? ERESTARTNOHAND (To be restarted if no handler)
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=2711, si_uid=10256, si_status=1, si_utime=0, si_stime=0} ---
rt_sigprocmask(SIG_SETMASK, ~[ILL TRAP ABRT BUS FPE SEGV CONT SYS RTMIN RT_1], NULL, 8) = 0
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 1}], WNOHANG, NULL) = 2711
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7fcfa4e18b10) = 2712
sendto(4, "<166>Aug 23 18:22:41 dn[3177]: [1027839516] 2021-08-23 18:22:41 CEST [3177,coord(0.0)]:xid[0-] [2055679033-1] user=,db=,client=,query=,LOG: 00000: audit logger process (PID 2711) exited with exit code 1\n2021-08-23 18:22:41wait4(-1, 0x7fff7142e28c, WNOHANG, NULL) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
rt_sigreturn({mask=[]}) = -1 EINTR (Interrupted system call)
rt_sigprocmask(SIG_SETMASK, ~[ILL TRAP ABRT BUS FPE SEGV CONT SYS RTMIN RT_1], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
select(7, [3 5 6], NULL, NULL, {tv_sec=60, tv_usec=0}) = ? ERESTARTNOHAND (To be restarted if no handler)
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=2712, si_uid=10256, si_status=1, si_utime=0, si_stime=0} ---
rt_sigprocmask(SIG_SETMASK, ~[ILL TRAP ABRT BUS FPE SEGV CONT SYS RTMIN RT_1], NULL, 8) = 0
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 1}], WNOHANG, NULL) = 2712
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7fcfa4e18b10) = 2713
sendto(4, "<166>Aug 23 18:22:41 dn[3177]: [1027839517] 2021-08-23 18:22:41 CEST [3177,coord(0.0)]:xid[0-] [2055679035-1] user=,db=,client=,query=,LOG: 00000: audit logger process (PID 2712) exited with exit code 1\n2021-08-23 18:22:41wait4(-1, 0x7fff7142e28c, WNOHANG, NULL) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
rt_sigreturn({mask=[]}) = -1 EINTR (Interrupted system call)
rt_sigprocmask(SIG_SETMASK, ~[ILL TRAP ABRT BUS FPE SEGV CONT SYS RTMIN RT_1], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
select(7, [3 5 6], NULL, NULL, {tv_sec=60, tv_usec=0}) = ? ERESTARTNOHAND (To be restarted if no handler)
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=2713, si_uid=10256, si_status=1, si_utime=0, si_stime=0} ---
rt_sigprocmask(SIG_SETMASK, ~[ILL TRAP ABRT BUS FPE SEGV CONT SYS RTMIN RT_1], NULL, 8) = 0
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 1}], WNOHANG, NULL) = 2713
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7fcfa4e18b10) = 2714
sendto(4, "<166>Aug 23 18:22:41 dn[3177]: [1027839518] 2021-08-23 18:22:41 CEST [3177,coord(0.0)]:xid[0-] [2055679037-1] user=,db=,client=,query=,LOG: 00000: audit logger process (PID 2713) exited with exit code 1\n2021-08-23 18:22:41wait4(-1, 0x7fff7142e28c, WNOHANG, NULL) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
rt_sigreturn({mask=[]}) = -1 EINTR (Interrupted system call)
rt_sigprocmask(SIG_SETMASK, ~[ILL TRAP ABRT BUS FPE SEGV CONT SYS RTMIN RT_1], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
select(7, [3 5 6], NULL, NULL, {tv_sec=60, tv_usec=0}) = ? ERESTARTNOHAND (To be restarted if no handler)
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=2714, si_uid=10256, si_status=1, si_utime=0, si_stime=0} ---
rt_sigprocmask(SIG_SETMASK, ~[ILL TRAP ABRT BUS FPE SEGV CONT SYS RTMIN RT_1], NULL, 8) = 0
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 1}], WNOHANG, NULL) = 2714
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7fcfa4e18b10) = 2715
sendto(4, "<166>Aug 23 18:22:41 dn[3177]: [1027839519] 2021-08-23 18:22:41 CEST [3177,coord(0.0)]:xid[0-] [2055679039-1] user=,db=,client=,query=,LOG: 00000: audit logger process (PID 2714) exited with exit code 1\n2021-08-23 18:22:41wait4(-1, 0x7fff7142e28c, WNOHANG, NULL) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
rt_sigreturn({mask=[]}) = -1 EINTR (Interrupted system call)
rt_sigprocmask(SIG_SETMASK, ~[ILL TRAP ABRT BUS FPE SEGV CONT SYS RTMIN RT_1], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
select(7, [3 5 6], NULL, NULL, {tv_sec=60, tv_usec=0}) = ? ERESTARTNOHAND (To be restarted if no handler)
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=2715, si_uid=10256, si_status=1, si_utime=0, si_stime=0} ---
rt_sigprocmask(SIG_SETMASK, ~[ILL TRAP ABRT BUS FPE SEGV CONT SYS RTMIN RT_1], NULL, 8) = 0
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 1}], WNOHANG, NULL) = 2715
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7fcfa4e18b10) = 2716
sendto(4, "<166>Aug 23 18:22:41 dn[3177]: [1027839520] 2021-08-23 18:22:41 CEST [3177,coord(0.0)]:xid[0-] [2055679041-1] user=,db=,client=,query=,LOG: 00000: audit logger process (PID 2715) exited with exit code 1\n2021-08-23 18:22:41wait4(-1, 0x7fff7142e28c, WNOHANG, NULL) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
@JennyJennyChen any chance for a hint what could be done to stop this logging to happen?
Ok, somehow the directory disappeared. recreation worked. Will keep the issue open for few days to see if the recreation fixed it for good. (and sorry if it was a false alarm).
@JennyJennyChen any chance for a hint what could be done to stop this logging to happen? Sorry, there is no way to stop this logging at the moment. We used benchmarksql to run for 1 week and did not find this problem. If re-creation cannot be fixed it for good, you can provide us with more information, thank you!
recreation of the directory has solved the problem. So closing it for now. Still a mystery what happened.
We noticed excessive number (like 24M per day) log entries related to missing audit files, that are neither needed nor initialized on both coord and dn.
Is it a bug or misconfig?
Audit GUCs are off: