[20260525]跟踪library cache lock library cache pin使用systemtap(21c)2.txt
[20260525]跟踪library cache lock library cache pin使用systemtap(21c)2.txt
--//链接:[20260525]跟踪library cache lock library cache pin使用systemtap(21c)1.txt 列出几种方案。
--//先尝试方案1,输出乱码,当时也没有仔细分析。方案2失败,再尝试方法3,先升级SystemTap到4.0版本。
--//继续后续的测试:
1.环境:
SCOTT@book01p> @ ver2
==============================
PORT_STRING : x86_64/Linux 2.4.xx
VERSION : 21.0.0.0.0
BANNER : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
BANNER_FULL : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
Version 21.3.0.0.0
BANNER_LEGACY : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
CON_ID : 0
PL/SQL procedure successfully completed.
--//session 1:
SCOTT@book01p> @ spid
==============================
SID : 123
SERIAL# : 7956
PROCESS : 3721
SERVER : DEDICATED
SPID : 3723
PID : 85
P_SERIAL# : 1
KILL_COMMAND : alter system kill session '123,7956' immediate;
PL/SQL procedure successfully completed.
--//记下进程号 3723.
3.尝试方案3:
--//在完成升级SystemTap到4.0版本后,测试问题依旧,还是报如下类似错误。
$ cat stap/lkpn21c.stp
global lk=0, pn=0
#globak off_set=0
#probe begin {
# printf("Begin.\n")
#
#}
probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kgllkal")
{
printf("spid=%d :%s lk_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++lk, long_arg(2),long_arg(3),user_string(long_arg(2)+0x1c8));
}
probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kglpnal")
{
printf("spid=%d :%s pn_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++pn, long_arg(2),long_arg(3),user_string(long_arg(2)+0x1c8));
# printf("%d\t%s: %s %x %x %d\n", pid(), execname(), ppfunc(), int_arg(1),long_arg(2),long_arg(3));
}
# stap -v /home/oracle/sqllaji/stap/lkpn21c.stp -x 3723 --tmpdir=/tmp
Pass 1: parsed user script and 497 library scripts using 274136virt/84392res/3332shr/81676data kb, in 780usr/50sys/838real ms.
Pass 2: analyzed script: 3 probes, 6 functions, 4 embeds, 6 globals using 477852virt/289144res/4492shr/285392data kb, in 1380usr/270sys/1658real ms.
Pass 3: using cached /root/.systemtap/cache/18/stap_18663a97ced696cafbe78ab2f01f695a_13818.c
Pass 4: using cached /root/.systemtap/cache/18/stap_18663a97ced696cafbe78ab2f01f695a_13818.ko
Pass 5: starting run.
WARNING: probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kglpnal") inode-offset 0000000014f6c020 registration error (rc -524)
WARNING: probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kgllkal") inode-offset 0000000014f67e90 registration error (rc -524)
WARNING: task_finder inode-uprobes callback for task 3723 failed: -524
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
^CPass 5: run completed in 0usr/30sys/3402real ms.
--//变成rc -524错误.
--//前面没升级前报错误如下:
WARNING: probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kglpnal") inode-offset 0000000014f6c020 registration error (rc 0)
WARNING: probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kgllkal") inode-offset 0000000014f67e90 registration error (rc 0)
--//实际上还是类似。这次多了一个提示:WARNING: task_finder inode-uprobes callback for task 3723 failed: -524
--//回过头,再次方案1,方案1不报错,但是我的测试kglnaobj显示的信息是乱码。
4.尝试方案1.
--//当时尝试+2 +4 +8 偏移,问题依旧,记录执行过程:
$ ls -il $(which oracle)
17445396 -rwsr-s--x. 1 oracle oinstall 498943224 2024-08-08 17:52:00 /u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle
--//记下oracle执行文件的i-node number=17445396,后面执行脚本需要。
--//按照kimi的建议,建立执行文本fix_offset.stp,内容如下:
# cat /home/oracle/sqllaji/stap/fix_offset.stp
#! /usr/bin/env stap -g
// 参数:$1 = oracle inode, $2 = 目标函数文件偏移, $3 = 修正量(通常 2 或 4)
probe kernel.function("uprobe_register"), kernel.function("uprobe_unregister")
{
if ($inode->i_ino == $1) {
if ($offset == $2) {
printf("Patching Oracle probe offset %x -> %x\n", $offset, $offset + $3);
$offset = $offset + $3;
}
}
}
# cat ooo.sh
#! /bin/bash
stap -g -v /home/oracle/sqllaji/stap/fix_offset.stp 17445396 0x0000000014f6c020 $1 &
stap -g -v /home/oracle/sqllaji/stap/fix_offset.stp 17445396 0x0000000014f67e90 $1 &
--//注:放在后台执行,输入$1参数表示偏移。17445396对应 oracle执行文件的i-node,0x0000000014f6c020 0x0000000014f67e90 对应目标函数文件偏移。
--//window 1:
$ gdb -f -p 3723
...
(gdb) disassemble kgllkal
Dump of assembler code for function kgllkal:
0x0000000015367e90 <+0>: xchg %ax,%ax
0x0000000015367e92 <+2>: push %rbp
0x0000000015367e93 <+3>: mov %rsp,%rbp
0x0000000015367e96 <+6>: push %r12
0x0000000015367e98 <+8>: push %r13
...
(gdb) disassemble kglpnal
Dump of assembler code for function kglpnal:
0x000000001536c020 <+0>: xchg %ax,%ax
0x000000001536c022 <+2>: push %rbp
0x000000001536c023 <+3>: mov %rsp,%rbp
0x000000001536c026 <+6>: push %r12
0x000000001536c028 <+8>: push %r13
...
--//偏移选择+4错误的。接着测试2,3,6,8.输出乱码。
--//当时想是否因为选择偏移,导致取出显示的参数 handle address, mode 不对.
--//先使用gdb跟踪调用kgllkal函数,寄存器的值.
--//window 1:
$ gdb -f -p 3723
...
(gdb) b kgllkal
Breakpoint 1 at 0x15367e90
--//session 1:
--//首先在session 1,在启动gdb监测前执行select * from dept where deptno=21;多次避免再次执行类似语句出现大量递归sql语句。
SCOTT@book01p> select * from dept where deptno=22;
--//挂起。
--//window 1:
(gdb) c
Continuing.
Breakpoint 1, 0x0000000015367e90 in kgllkal ()
(gdb) info registers
rax 0x3 3
rbx 0x1 1
rcx 0x1 1
rdx 0x644a9208 1682608648
rsi 0x7ffd692eaf60 140726368120672
rdi 0x7fe9f4920060 140642807316576
rbp 0x7ffd692eafc0 0x7ffd692eafc0
rsp 0x7ffd692eaa68 0x7ffd692eaa68
r8 0x7e82d6a0 2122503840
r9 0x7ffd692eb4d0 140726368122064
r10 0x0 0
r11 0x0 0
r12 0x0 0
r13 0x7ffd692ead88 140726368120200
r14 0x0 0
r15 0x1 1
rip 0x15367e90 0x15367e90 <kgllkal>
eflags 0x202 [ IF ]
cs 0x33 51
ss 0x2b 43
ds 0x0 0
es 0x0 0
fs 0x0 0
gs 0x0 0
--//rdx 对应 handle address,rcx 对应 mode。
(gdb) ni 1
0x0000000015367e92 in kgllkal ()
(gdb) info registers
rax 0x3 3
rbx 0x1 1
rcx 0x1 1
rdx 0x644a9208 1682608648
rsi 0x7ffd692eaf60 140726368120672
rdi 0x7fe9f4920060 140642807316576
rbp 0x7ffd692eafc0 0x7ffd692eafc0
rsp 0x7ffd692eaa68 0x7ffd692eaa68
r8 0x7e82d6a0 2122503840
r9 0x7ffd692eb4d0 140726368122064
r10 0x0 0
r11 0x0 0
r12 0x0 0
r13 0x7ffd692ead88 140726368120200
r14 0x0 0
r15 0x1 1
rip 0x15367e92 0x15367e92 <kgllkal+2>
eflags 0x202 [ IF ]
cs 0x33 51
ss 0x2b 43
ds 0x0 0
es 0x0 0
fs 0x0 0
gs 0x0 0
--//执行一条指令后,rdx,rcx并没有变化。
--//这样是否可以修改stp脚本,取寄存器rdx ,rcx值,这样就没有问题。
--//修改如下:
$ cat stap/lkpn21cx.stp
global lk=0, pn=0
#globak off_set=0
#probe begin {
# printf("Begin.\n")
#
#}
probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kgllkal")
{
# printf("spid=%d :%s lk_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++lk, long_arg(2),long_arg(3),user_string(long_arg(2)+0x1c8));
printf("spid=%d :%s lk_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++lk, register("rdx"),register("rcx"),user_string(register("rdx")+0x1c8));
}
probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kglpnal")
{
# printf("spid=%d :%s pn_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++pn, long_arg(2),long_arg(3),user_string(long_arg(2)+0x1c8));
printf("spid=%d :%s pn_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++pn, register("rdx"),register("rcx"),user_string(register("rdx")+0x1c8));
# printf("%d\t%s: %s %x %x %d\n", pid(), execname(), ppfunc(), int_arg(1),long_arg(2),long_arg(3));
}
# source ooo.sh 2
Pass 1: parsed user script and 497 library scripts using 274132virt/84400res/3340shr/81672data kb, in 810usr/40sys/847real ms.
Pass 1: parsed user script and 497 library scripts using 274132virt/84400res/3340shr/81672data kb, in 830usr/40sys/872real ms.
Pass 2: analyzed script: 2 probes, 3 functions, 0 embeds, 0 globals using 312764virt/124052res/4320shr/120304data kb, in 600usr/100sys/725real ms.
Pass 2: analyzed script: 2 probes, 3 functions, 0 embeds, 0 globals using 312764virt/124052res/4316shr/120304data kb, in 620usr/90sys/710real ms.
Pass 3: using cached /root/.systemtap/cache/77/stap_77e75aae44ea58a2ffd3f3849cbbffeb_2697.c
Pass 4: using cached /root/.systemtap/cache/77/stap_77e75aae44ea58a2ffd3f3849cbbffeb_2697.ko
Pass 5: starting run.
Pass 3: using cached /root/.systemtap/cache/72/stap_72f36f8539c93b8faf45b87458ab7e8d_2697.c
Pass 4: using cached /root/.systemtap/cache/72/stap_72f36f8539c93b8faf45b87458ab7e8d_2697.ko
Pass 5: starting run.
# stap -v /home/oracle/sqllaji/stap/lkpn21cx.stp -x 3723 --tmpdir=/tmp
Pass 1: parsed user script and 497 library scripts using 274136virt/84396res/3332shr/81676data kb, in 790usr/50sys/837real ms.
Pass 2: analyzed script: 3 probes, 13 functions, 4 embeds, 6 globals using 477852virt/289148res/4496shr/285392data kb, in 1420usr/270sys/1701real ms.
Pass 3: using cached /root/.systemtap/cache/c7/stap_c78e8b2ad9d2c07d4c365e81e8b7768c_13027.c
Pass 4: using cached /root/.systemtap/cache/c7/stap_c78e8b2ad9d2c07d4c365e81e8b7768c_13027.ko
Pass 5: starting run.
Patching Oracle probe offset 14f67e90 -> 14f67e92
Patching Oracle probe offset 14f6c020 -> 14f6c022
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
spid=3723 :kgllkal lk_count 1 -- handle address: 644a9208 , mode = 1 , kglnaobj : select * from dept where deptno=22
....
--//显示正确。
--//注意下划线内容,修正地址的偏移。
5.继续尝试执行lkpn21c.stp:
--//对比gdb与stap的跟踪:
Breakpoint 1 at 0x15367e90
(gdb) c
Continuing.
Breakpoint 1, 0x0000000015367e90 in kgllkal ()
(gdb) info regi
rax 0x3 3
rbx 0x1 1
rcx 0x1 1
rdx 0x6719a280 1729733248
rsi 0x7ffd692eaf60 140726368120672
rdi 0x7fe9f4920060 140642807316576
rbp 0x7ffd692eafc0 0x7ffd692eafc0
rsp 0x7ffd692eaa68 0x7ffd692eaa68
r8 0x7e82d6a0 2122503840
r9 0x7ffd692eb4d0 140726368122064
r10 0x0 0
r11 0x0 0
r12 0x0 0
r13 0x7ffd692ead88 140726368120200
r14 0x0 0
r15 0x1 1
rip 0x15367e90 0x15367e90 <kgllkal>
eflags 0x202 [ IF ]
cs 0x33 51
ss 0x2b 43
ds 0x0 0
es 0x0 0
fs 0x0 0
gs 0x0 0
# stap -v /home/oracle/sqllaji/stap/lkpn21c.stp -x 3723 --tmpdir=/tmp
Pass 1: parsed user script and 497 library scripts using 274132virt/84396res/3332shr/81672data kb, in 780usr/50sys/829real ms.
Pass 2: analyzed script: 3 probes, 6 functions, 4 embeds, 6 globals using 477848virt/289148res/4492shr/285388data kb, in 1430usr/280sys/1712real ms.
Pass 3: using cached /root/.systemtap/cache/bc/stap_bce25348d61e89ec998958392424e805_13818.c
Pass 4: using cached /root/.systemtap/cache/bc/stap_bce25348d61e89ec998958392424e805_13818.ko
Pass 5: starting run.
Patching Oracle probe offset 14f6c020 -> 14f6c022
Patching Oracle probe offset 14f67e90 -> 14f67e92
spid=3723 :kgllkal lk_count 1 -- handle address: 7ffd692eaf60 , mode = 1729889216 , kglnaobj : GD9
spid=3723 :kglpnal pn_count 1 -- handle address: 7ffd692eb148 , mode = 1729889216 , kglnaobj : 累旚?
spid=3723 :kgllkal lk_count 2 -- handle address: 7ffd692e8b80 , mode = 1920743280 , kglnaobj :
...
--//上下对比可以发现handle address 取的是寄存器rsi,mode 取得是寄存器rdx。也就是21c定义kgllkal函数,参数数量发生改变。
--//handle addres 对应取参数3,而mode 对应取参数4.修改如下:
$ cat stap/lkpn21c.stp
global lk=0, pn=0
#globak off_set=0
#probe begin {
# printf("Begin.\n")
#
#}
probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kgllkal")
{
printf("spid=%d :%s lk_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++lk, long_arg(3),long_arg(4),user_string(long_arg(3)+0x1c8));
}
probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kglpnal")
{
printf("spid=%d :%s pn_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++pn, long_arg(3),long_arg(4),user_string(long_arg(3)+0x1c8));
# printf("%d\t%s: %s %x %x %d\n", pid(), execname(), ppfunc(), int_arg(1),long_arg(2),long_arg(3));
}
# stap -v /home/oracle/sqllaji/stap/lkpn21c.stp -x 3723 --tmpdir=/tmp
Pass 1: parsed user script and 497 library scripts using 274136virt/84396res/3332shr/81676data kb, in 820usr/40sys/850real ms.
Pass 2: analyzed script: 3 probes, 6 functions, 4 embeds, 6 globals using 477852virt/289148res/4492shr/285392data kb, in 1390usr/260sys/1674real ms.
Pass 3: using cached /root/.systemtap/cache/18/stap_18663a97ced696cafbe78ab2f01f695a_13818.c
Pass 4: using cached /root/.systemtap/cache/18/stap_18663a97ced696cafbe78ab2f01f695a_13818.ko
Pass 5: starting run.
Patching Oracle probe offset 14f6c020 -> 14f6c022
Patching Oracle probe offset 14f67e90 -> 14f67e92
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--//依旧需要执行fix_offset.stp脚本,修正偏移。
--//第1次执行
spid=3723 :kgllkal lk_count 1 -- handle address: 6723e650 , mode = 1 , kglnaobj : select * from dept where deptno=29
spid=3723 :kglpnal pn_count 1 -- handle address: 6723e650 , mode = 2 , kglnaobj : select * from dept where deptno=29
spid=3723 :kgllkal lk_count 2 -- handle address: 727c3770 , mode = 2 , kglnaobj : bookSYSCDB$ROOT
spid=3723 :kgllkal lk_count 3 -- handle address: 6dcc5dd8 , mode = 2 , kglnaobj : 1073777561SYSCDB$ROOT
spid=3723 :kgllkal lk_count 4 -- handle address: 6724d530 , mode = 2 , kglnaobj : e16f7e54bccd0a24f108891ab16a0359$BUILD$BOOK01P
spid=3723 :kgllkal lk_count 5 -- handle address: 672511c0 , mode = 1 , kglnaobj :
spid=3723 :kglpnal pn_count 2 -- handle address: 672511c0 , mode = 3 , kglnaobj :
spid=3723 :kgllkal lk_count 6 -- handle address: 727c3770 , mode = 2 , kglnaobj : bookSYSCDB$ROOT
spid=3723 :kgllkal lk_count 7 -- handle address: 6dcc5dd8 , mode = 2 , kglnaobj : 1073777561SYSCDB$ROOT
spid=3723 :kgllkal lk_count 8 -- handle address: 6729acf0 , mode = 1 , kglnaobj : e16f7e54bccd0a24f108891ab16a0359Child:0BOOK01P
spid=3723 :kglpnal pn_count 3 -- handle address: 6729acf0 , mode = 3 , kglnaobj : e16f7e54bccd0a24f108891ab16a0359Child:0BOOK01P
spid=3723 :kgllkal lk_count 9 -- handle address: 67d60ce8 , mode = 1 , kglnaobj : SCOTTBOOK01P
spid=3723 :kgllkal lk_count 10 -- handle address: 727c3770 , mode = 2 , kglnaobj : bookSYSCDB$ROOT
spid=3723 :kgllkal lk_count 11 -- handle address: 6dcc5dd8 , mode = 2 , kglnaobj : 1073777561SYSCDB$ROOT
spid=3723 :kgllkal lk_count 12 -- handle address: 644d51c8 , mode = 2 , kglnaobj : DEPTSCOTTBOOK01襣d
spid=3723 :kglpnal pn_count 4 -- handle address: 644d51c8 , mode = 2 , kglnaobj : DEPTSCOTTBOOK01襣d
spid=3723 :kgllkal lk_count 13 -- handle address: 644ccd18 , mode = 2 , kglnaobj : 5358706841214419813BOOK01P
spid=3723 :kglpnal pn_count 5 -- handle address: 644ccd18 , mode = 2 , kglnaobj : 5358706841214419813BOOK01P
spid=3723 :kgllkal lk_count 14 -- handle address: 644c6f38 , mode = 2 , kglnaobj : 1256087081022357994BOOK01P
spid=3723 :kglpnal pn_count 6 -- handle address: 644c6f38 , mode = 2 , kglnaobj : 1256087081022357994BOOK01P
spid=3723 :kgllkal lk_count 15 -- handle address: 644c22a8 , mode = 2 , kglnaobj : 13547376130454050250BOOK01P
spid=3723 :kglpnal pn_count 7 -- handle address: 644c22a8 , mode = 2 , kglnaobj : 13547376130454050250BOOK01P
spid=3723 :kgllkal lk_count 16 -- handle address: 644c0e78 , mode = 2 , kglnaobj : 4448762010415191240BOOK01P
spid=3723 :kglpnal pn_count 8 -- handle address: 644c0e78 , mode = 2 , kglnaobj : 4448762010415191240BOOK01P
spid=3723 :kgllkal lk_count 17 -- handle address: 644c6f38 , mode = 2 , kglnaobj : 1256087081022357994BOOK01P
spid=3723 :kglpnal pn_count 9 -- handle address: 644c6f38 , mode = 2 , kglnaobj : 1256087081022357994BOOK01P
--//第2次执行
spid=3723 :kgllkal lk_count 18 -- handle address: 6723e650 , mode = 1 , kglnaobj : select * from dept where deptno=29
spid=3723 :kgllkal lk_count 19 -- handle address: 672511c0 , mode = 1 , kglnaobj :
spid=3723 :kgllkal lk_count 20 -- handle address: 644d51c8 , mode = 2 , kglnaobj : DEPTSCOTTBOOK01襣d
spid=3723 :kglpnal pn_count 10 -- handle address: 644d51c8 , mode = 2 , kglnaobj : DEPTSCOTTBOOK01襣d
--//第3次执行
spid=3723 :kgllkal lk_count 21 -- handle address: 6723e650 , mode = 1 , kglnaobj : select * from dept where deptno=29
spid=3723 :kgllkal lk_count 22 -- handle address: 672511c0 , mode = 1 , kglnaobj :
--//第4次执行
--//没有输出.
5.收尾:
--//1.按ctrl+c退出stap的跟踪.
--//2.将后台执行fix_offset.stp停止。
# fg
stap -g -v /home/oracle/sqllaji/stap/fix_offset.stp 17445396 0x0000000014f67e90 $1
^C
Pass 5: run completed in 50usr/300sys/1050715real ms.
# fg
stap -g -v /home/oracle/sqllaji/stap/fix_offset.stp 17445396 0x0000000014f6c020 $1
^C
Pass 5: run completed in 70usr/340sys/1052782real ms.
--//3.清理 /tmp 目录,反复调试在tmp目录留下许多垃圾文件,全部删除,过程略。
--//链接:[20260525]跟踪library cache lock library cache pin使用systemtap(21c)1.txt 列出几种方案。
--//先尝试方案1,输出乱码,当时也没有仔细分析。方案2失败,再尝试方法3,先升级SystemTap到4.0版本。
--//继续后续的测试:
1.环境:
SCOTT@book01p> @ ver2
==============================
PORT_STRING : x86_64/Linux 2.4.xx
VERSION : 21.0.0.0.0
BANNER : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
BANNER_FULL : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
Version 21.3.0.0.0
BANNER_LEGACY : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
CON_ID : 0
PL/SQL procedure successfully completed.
--//session 1:
SCOTT@book01p> @ spid
==============================
SID : 123
SERIAL# : 7956
PROCESS : 3721
SERVER : DEDICATED
SPID : 3723
PID : 85
P_SERIAL# : 1
KILL_COMMAND : alter system kill session '123,7956' immediate;
PL/SQL procedure successfully completed.
--//记下进程号 3723.
3.尝试方案3:
--//在完成升级SystemTap到4.0版本后,测试问题依旧,还是报如下类似错误。
$ cat stap/lkpn21c.stp
global lk=0, pn=0
#globak off_set=0
#probe begin {
# printf("Begin.\n")
#
#}
probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kgllkal")
{
printf("spid=%d :%s lk_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++lk, long_arg(2),long_arg(3),user_string(long_arg(2)+0x1c8));
}
probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kglpnal")
{
printf("spid=%d :%s pn_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++pn, long_arg(2),long_arg(3),user_string(long_arg(2)+0x1c8));
# printf("%d\t%s: %s %x %x %d\n", pid(), execname(), ppfunc(), int_arg(1),long_arg(2),long_arg(3));
}
# stap -v /home/oracle/sqllaji/stap/lkpn21c.stp -x 3723 --tmpdir=/tmp
Pass 1: parsed user script and 497 library scripts using 274136virt/84392res/3332shr/81676data kb, in 780usr/50sys/838real ms.
Pass 2: analyzed script: 3 probes, 6 functions, 4 embeds, 6 globals using 477852virt/289144res/4492shr/285392data kb, in 1380usr/270sys/1658real ms.
Pass 3: using cached /root/.systemtap/cache/18/stap_18663a97ced696cafbe78ab2f01f695a_13818.c
Pass 4: using cached /root/.systemtap/cache/18/stap_18663a97ced696cafbe78ab2f01f695a_13818.ko
Pass 5: starting run.
WARNING: probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kglpnal") inode-offset 0000000014f6c020 registration error (rc -524)
WARNING: probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kgllkal") inode-offset 0000000014f67e90 registration error (rc -524)
WARNING: task_finder inode-uprobes callback for task 3723 failed: -524
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
^CPass 5: run completed in 0usr/30sys/3402real ms.
--//变成rc -524错误.
--//前面没升级前报错误如下:
WARNING: probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kglpnal") inode-offset 0000000014f6c020 registration error (rc 0)
WARNING: probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kgllkal") inode-offset 0000000014f67e90 registration error (rc 0)
--//实际上还是类似。这次多了一个提示:WARNING: task_finder inode-uprobes callback for task 3723 failed: -524
--//回过头,再次方案1,方案1不报错,但是我的测试kglnaobj显示的信息是乱码。
4.尝试方案1.
--//当时尝试+2 +4 +8 偏移,问题依旧,记录执行过程:
$ ls -il $(which oracle)
17445396 -rwsr-s--x. 1 oracle oinstall 498943224 2024-08-08 17:52:00 /u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle
--//记下oracle执行文件的i-node number=17445396,后面执行脚本需要。
--//按照kimi的建议,建立执行文本fix_offset.stp,内容如下:
# cat /home/oracle/sqllaji/stap/fix_offset.stp
#! /usr/bin/env stap -g
// 参数:$1 = oracle inode, $2 = 目标函数文件偏移, $3 = 修正量(通常 2 或 4)
probe kernel.function("uprobe_register"), kernel.function("uprobe_unregister")
{
if ($inode->i_ino == $1) {
if ($offset == $2) {
printf("Patching Oracle probe offset %x -> %x\n", $offset, $offset + $3);
$offset = $offset + $3;
}
}
}
# cat ooo.sh
#! /bin/bash
stap -g -v /home/oracle/sqllaji/stap/fix_offset.stp 17445396 0x0000000014f6c020 $1 &
stap -g -v /home/oracle/sqllaji/stap/fix_offset.stp 17445396 0x0000000014f67e90 $1 &
--//注:放在后台执行,输入$1参数表示偏移。17445396对应 oracle执行文件的i-node,0x0000000014f6c020 0x0000000014f67e90 对应目标函数文件偏移。
--//window 1:
$ gdb -f -p 3723
...
(gdb) disassemble kgllkal
Dump of assembler code for function kgllkal:
0x0000000015367e90 <+0>: xchg %ax,%ax
0x0000000015367e92 <+2>: push %rbp
0x0000000015367e93 <+3>: mov %rsp,%rbp
0x0000000015367e96 <+6>: push %r12
0x0000000015367e98 <+8>: push %r13
...
(gdb) disassemble kglpnal
Dump of assembler code for function kglpnal:
0x000000001536c020 <+0>: xchg %ax,%ax
0x000000001536c022 <+2>: push %rbp
0x000000001536c023 <+3>: mov %rsp,%rbp
0x000000001536c026 <+6>: push %r12
0x000000001536c028 <+8>: push %r13
...
--//偏移选择+4错误的。接着测试2,3,6,8.输出乱码。
--//当时想是否因为选择偏移,导致取出显示的参数 handle address, mode 不对.
--//先使用gdb跟踪调用kgllkal函数,寄存器的值.
--//window 1:
$ gdb -f -p 3723
...
(gdb) b kgllkal
Breakpoint 1 at 0x15367e90
--//session 1:
--//首先在session 1,在启动gdb监测前执行select * from dept where deptno=21;多次避免再次执行类似语句出现大量递归sql语句。
SCOTT@book01p> select * from dept where deptno=22;
--//挂起。
--//window 1:
(gdb) c
Continuing.
Breakpoint 1, 0x0000000015367e90 in kgllkal ()
(gdb) info registers
rax 0x3 3
rbx 0x1 1
rcx 0x1 1
rdx 0x644a9208 1682608648
rsi 0x7ffd692eaf60 140726368120672
rdi 0x7fe9f4920060 140642807316576
rbp 0x7ffd692eafc0 0x7ffd692eafc0
rsp 0x7ffd692eaa68 0x7ffd692eaa68
r8 0x7e82d6a0 2122503840
r9 0x7ffd692eb4d0 140726368122064
r10 0x0 0
r11 0x0 0
r12 0x0 0
r13 0x7ffd692ead88 140726368120200
r14 0x0 0
r15 0x1 1
rip 0x15367e90 0x15367e90 <kgllkal>
eflags 0x202 [ IF ]
cs 0x33 51
ss 0x2b 43
ds 0x0 0
es 0x0 0
fs 0x0 0
gs 0x0 0
--//rdx 对应 handle address,rcx 对应 mode。
(gdb) ni 1
0x0000000015367e92 in kgllkal ()
(gdb) info registers
rax 0x3 3
rbx 0x1 1
rcx 0x1 1
rdx 0x644a9208 1682608648
rsi 0x7ffd692eaf60 140726368120672
rdi 0x7fe9f4920060 140642807316576
rbp 0x7ffd692eafc0 0x7ffd692eafc0
rsp 0x7ffd692eaa68 0x7ffd692eaa68
r8 0x7e82d6a0 2122503840
r9 0x7ffd692eb4d0 140726368122064
r10 0x0 0
r11 0x0 0
r12 0x0 0
r13 0x7ffd692ead88 140726368120200
r14 0x0 0
r15 0x1 1
rip 0x15367e92 0x15367e92 <kgllkal+2>
eflags 0x202 [ IF ]
cs 0x33 51
ss 0x2b 43
ds 0x0 0
es 0x0 0
fs 0x0 0
gs 0x0 0
--//执行一条指令后,rdx,rcx并没有变化。
--//这样是否可以修改stp脚本,取寄存器rdx ,rcx值,这样就没有问题。
--//修改如下:
$ cat stap/lkpn21cx.stp
global lk=0, pn=0
#globak off_set=0
#probe begin {
# printf("Begin.\n")
#
#}
probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kgllkal")
{
# printf("spid=%d :%s lk_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++lk, long_arg(2),long_arg(3),user_string(long_arg(2)+0x1c8));
printf("spid=%d :%s lk_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++lk, register("rdx"),register("rcx"),user_string(register("rdx")+0x1c8));
}
probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kglpnal")
{
# printf("spid=%d :%s pn_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++pn, long_arg(2),long_arg(3),user_string(long_arg(2)+0x1c8));
printf("spid=%d :%s pn_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++pn, register("rdx"),register("rcx"),user_string(register("rdx")+0x1c8));
# printf("%d\t%s: %s %x %x %d\n", pid(), execname(), ppfunc(), int_arg(1),long_arg(2),long_arg(3));
}
# source ooo.sh 2
Pass 1: parsed user script and 497 library scripts using 274132virt/84400res/3340shr/81672data kb, in 810usr/40sys/847real ms.
Pass 1: parsed user script and 497 library scripts using 274132virt/84400res/3340shr/81672data kb, in 830usr/40sys/872real ms.
Pass 2: analyzed script: 2 probes, 3 functions, 0 embeds, 0 globals using 312764virt/124052res/4320shr/120304data kb, in 600usr/100sys/725real ms.
Pass 2: analyzed script: 2 probes, 3 functions, 0 embeds, 0 globals using 312764virt/124052res/4316shr/120304data kb, in 620usr/90sys/710real ms.
Pass 3: using cached /root/.systemtap/cache/77/stap_77e75aae44ea58a2ffd3f3849cbbffeb_2697.c
Pass 4: using cached /root/.systemtap/cache/77/stap_77e75aae44ea58a2ffd3f3849cbbffeb_2697.ko
Pass 5: starting run.
Pass 3: using cached /root/.systemtap/cache/72/stap_72f36f8539c93b8faf45b87458ab7e8d_2697.c
Pass 4: using cached /root/.systemtap/cache/72/stap_72f36f8539c93b8faf45b87458ab7e8d_2697.ko
Pass 5: starting run.
# stap -v /home/oracle/sqllaji/stap/lkpn21cx.stp -x 3723 --tmpdir=/tmp
Pass 1: parsed user script and 497 library scripts using 274136virt/84396res/3332shr/81676data kb, in 790usr/50sys/837real ms.
Pass 2: analyzed script: 3 probes, 13 functions, 4 embeds, 6 globals using 477852virt/289148res/4496shr/285392data kb, in 1420usr/270sys/1701real ms.
Pass 3: using cached /root/.systemtap/cache/c7/stap_c78e8b2ad9d2c07d4c365e81e8b7768c_13027.c
Pass 4: using cached /root/.systemtap/cache/c7/stap_c78e8b2ad9d2c07d4c365e81e8b7768c_13027.ko
Pass 5: starting run.
Patching Oracle probe offset 14f67e90 -> 14f67e92
Patching Oracle probe offset 14f6c020 -> 14f6c022
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
spid=3723 :kgllkal lk_count 1 -- handle address: 644a9208 , mode = 1 , kglnaobj : select * from dept where deptno=22
....
--//显示正确。
--//注意下划线内容,修正地址的偏移。
5.继续尝试执行lkpn21c.stp:
--//对比gdb与stap的跟踪:
Breakpoint 1 at 0x15367e90
(gdb) c
Continuing.
Breakpoint 1, 0x0000000015367e90 in kgllkal ()
(gdb) info regi
rax 0x3 3
rbx 0x1 1
rcx 0x1 1
rdx 0x6719a280 1729733248
rsi 0x7ffd692eaf60 140726368120672
rdi 0x7fe9f4920060 140642807316576
rbp 0x7ffd692eafc0 0x7ffd692eafc0
rsp 0x7ffd692eaa68 0x7ffd692eaa68
r8 0x7e82d6a0 2122503840
r9 0x7ffd692eb4d0 140726368122064
r10 0x0 0
r11 0x0 0
r12 0x0 0
r13 0x7ffd692ead88 140726368120200
r14 0x0 0
r15 0x1 1
rip 0x15367e90 0x15367e90 <kgllkal>
eflags 0x202 [ IF ]
cs 0x33 51
ss 0x2b 43
ds 0x0 0
es 0x0 0
fs 0x0 0
gs 0x0 0
# stap -v /home/oracle/sqllaji/stap/lkpn21c.stp -x 3723 --tmpdir=/tmp
Pass 1: parsed user script and 497 library scripts using 274132virt/84396res/3332shr/81672data kb, in 780usr/50sys/829real ms.
Pass 2: analyzed script: 3 probes, 6 functions, 4 embeds, 6 globals using 477848virt/289148res/4492shr/285388data kb, in 1430usr/280sys/1712real ms.
Pass 3: using cached /root/.systemtap/cache/bc/stap_bce25348d61e89ec998958392424e805_13818.c
Pass 4: using cached /root/.systemtap/cache/bc/stap_bce25348d61e89ec998958392424e805_13818.ko
Pass 5: starting run.
Patching Oracle probe offset 14f6c020 -> 14f6c022
Patching Oracle probe offset 14f67e90 -> 14f67e92
spid=3723 :kgllkal lk_count 1 -- handle address: 7ffd692eaf60 , mode = 1729889216 , kglnaobj : GD9
spid=3723 :kglpnal pn_count 1 -- handle address: 7ffd692eb148 , mode = 1729889216 , kglnaobj : 累旚?
spid=3723 :kgllkal lk_count 2 -- handle address: 7ffd692e8b80 , mode = 1920743280 , kglnaobj :
...
--//上下对比可以发现handle address 取的是寄存器rsi,mode 取得是寄存器rdx。也就是21c定义kgllkal函数,参数数量发生改变。
--//handle addres 对应取参数3,而mode 对应取参数4.修改如下:
$ cat stap/lkpn21c.stp
global lk=0, pn=0
#globak off_set=0
#probe begin {
# printf("Begin.\n")
#
#}
probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kgllkal")
{
printf("spid=%d :%s lk_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++lk, long_arg(3),long_arg(4),user_string(long_arg(3)+0x1c8));
}
probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kglpnal")
{
printf("spid=%d :%s pn_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++pn, long_arg(3),long_arg(4),user_string(long_arg(3)+0x1c8));
# printf("%d\t%s: %s %x %x %d\n", pid(), execname(), ppfunc(), int_arg(1),long_arg(2),long_arg(3));
}
# stap -v /home/oracle/sqllaji/stap/lkpn21c.stp -x 3723 --tmpdir=/tmp
Pass 1: parsed user script and 497 library scripts using 274136virt/84396res/3332shr/81676data kb, in 820usr/40sys/850real ms.
Pass 2: analyzed script: 3 probes, 6 functions, 4 embeds, 6 globals using 477852virt/289148res/4492shr/285392data kb, in 1390usr/260sys/1674real ms.
Pass 3: using cached /root/.systemtap/cache/18/stap_18663a97ced696cafbe78ab2f01f695a_13818.c
Pass 4: using cached /root/.systemtap/cache/18/stap_18663a97ced696cafbe78ab2f01f695a_13818.ko
Pass 5: starting run.
Patching Oracle probe offset 14f6c020 -> 14f6c022
Patching Oracle probe offset 14f67e90 -> 14f67e92
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--//依旧需要执行fix_offset.stp脚本,修正偏移。
--//第1次执行
spid=3723 :kgllkal lk_count 1 -- handle address: 6723e650 , mode = 1 , kglnaobj : select * from dept where deptno=29
spid=3723 :kglpnal pn_count 1 -- handle address: 6723e650 , mode = 2 , kglnaobj : select * from dept where deptno=29
spid=3723 :kgllkal lk_count 2 -- handle address: 727c3770 , mode = 2 , kglnaobj : bookSYSCDB$ROOT
spid=3723 :kgllkal lk_count 3 -- handle address: 6dcc5dd8 , mode = 2 , kglnaobj : 1073777561SYSCDB$ROOT
spid=3723 :kgllkal lk_count 4 -- handle address: 6724d530 , mode = 2 , kglnaobj : e16f7e54bccd0a24f108891ab16a0359$BUILD$BOOK01P
spid=3723 :kgllkal lk_count 5 -- handle address: 672511c0 , mode = 1 , kglnaobj :
spid=3723 :kglpnal pn_count 2 -- handle address: 672511c0 , mode = 3 , kglnaobj :
spid=3723 :kgllkal lk_count 6 -- handle address: 727c3770 , mode = 2 , kglnaobj : bookSYSCDB$ROOT
spid=3723 :kgllkal lk_count 7 -- handle address: 6dcc5dd8 , mode = 2 , kglnaobj : 1073777561SYSCDB$ROOT
spid=3723 :kgllkal lk_count 8 -- handle address: 6729acf0 , mode = 1 , kglnaobj : e16f7e54bccd0a24f108891ab16a0359Child:0BOOK01P
spid=3723 :kglpnal pn_count 3 -- handle address: 6729acf0 , mode = 3 , kglnaobj : e16f7e54bccd0a24f108891ab16a0359Child:0BOOK01P
spid=3723 :kgllkal lk_count 9 -- handle address: 67d60ce8 , mode = 1 , kglnaobj : SCOTTBOOK01P
spid=3723 :kgllkal lk_count 10 -- handle address: 727c3770 , mode = 2 , kglnaobj : bookSYSCDB$ROOT
spid=3723 :kgllkal lk_count 11 -- handle address: 6dcc5dd8 , mode = 2 , kglnaobj : 1073777561SYSCDB$ROOT
spid=3723 :kgllkal lk_count 12 -- handle address: 644d51c8 , mode = 2 , kglnaobj : DEPTSCOTTBOOK01襣d
spid=3723 :kglpnal pn_count 4 -- handle address: 644d51c8 , mode = 2 , kglnaobj : DEPTSCOTTBOOK01襣d
spid=3723 :kgllkal lk_count 13 -- handle address: 644ccd18 , mode = 2 , kglnaobj : 5358706841214419813BOOK01P
spid=3723 :kglpnal pn_count 5 -- handle address: 644ccd18 , mode = 2 , kglnaobj : 5358706841214419813BOOK01P
spid=3723 :kgllkal lk_count 14 -- handle address: 644c6f38 , mode = 2 , kglnaobj : 1256087081022357994BOOK01P
spid=3723 :kglpnal pn_count 6 -- handle address: 644c6f38 , mode = 2 , kglnaobj : 1256087081022357994BOOK01P
spid=3723 :kgllkal lk_count 15 -- handle address: 644c22a8 , mode = 2 , kglnaobj : 13547376130454050250BOOK01P
spid=3723 :kglpnal pn_count 7 -- handle address: 644c22a8 , mode = 2 , kglnaobj : 13547376130454050250BOOK01P
spid=3723 :kgllkal lk_count 16 -- handle address: 644c0e78 , mode = 2 , kglnaobj : 4448762010415191240BOOK01P
spid=3723 :kglpnal pn_count 8 -- handle address: 644c0e78 , mode = 2 , kglnaobj : 4448762010415191240BOOK01P
spid=3723 :kgllkal lk_count 17 -- handle address: 644c6f38 , mode = 2 , kglnaobj : 1256087081022357994BOOK01P
spid=3723 :kglpnal pn_count 9 -- handle address: 644c6f38 , mode = 2 , kglnaobj : 1256087081022357994BOOK01P
--//第2次执行
spid=3723 :kgllkal lk_count 18 -- handle address: 6723e650 , mode = 1 , kglnaobj : select * from dept where deptno=29
spid=3723 :kgllkal lk_count 19 -- handle address: 672511c0 , mode = 1 , kglnaobj :
spid=3723 :kgllkal lk_count 20 -- handle address: 644d51c8 , mode = 2 , kglnaobj : DEPTSCOTTBOOK01襣d
spid=3723 :kglpnal pn_count 10 -- handle address: 644d51c8 , mode = 2 , kglnaobj : DEPTSCOTTBOOK01襣d
--//第3次执行
spid=3723 :kgllkal lk_count 21 -- handle address: 6723e650 , mode = 1 , kglnaobj : select * from dept where deptno=29
spid=3723 :kgllkal lk_count 22 -- handle address: 672511c0 , mode = 1 , kglnaobj :
--//第4次执行
--//没有输出.
5.收尾:
--//1.按ctrl+c退出stap的跟踪.
--//2.将后台执行fix_offset.stp停止。
# fg
stap -g -v /home/oracle/sqllaji/stap/fix_offset.stp 17445396 0x0000000014f67e90 $1
^C
Pass 5: run completed in 50usr/300sys/1050715real ms.
# fg
stap -g -v /home/oracle/sqllaji/stap/fix_offset.stp 17445396 0x0000000014f6c020 $1
^C
Pass 5: run completed in 70usr/340sys/1052782real ms.
--//3.清理 /tmp 目录,反复调试在tmp目录留下许多垃圾文件,全部删除,过程略。
浙公网安备 33010602011771号