[20260525]跟踪library cache lock library cache pin使用systemtap(21c)2.txt

[20260525]跟踪library cache lock library cache pin使用systemtap(21c)2.txt

--//链接:[20260525]跟踪library cache lock library cache pin使用systemtap(21c)1.txt 列出几种方案。
--//先尝试方案1,输出乱码,当时也没有仔细分析。方案2失败,再尝试方法3,先升级SystemTap到4.0版本。
--//继续后续的测试:

1.环境:
SCOTT@book01p> @ ver2
==============================
PORT_STRING                   : x86_64/Linux 2.4.xx
VERSION                       : 21.0.0.0.0
BANNER                        : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
BANNER_FULL                   : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
Version 21.3.0.0.0
BANNER_LEGACY                 : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
CON_ID                        : 0
PL/SQL procedure successfully completed.

--//session 1:
SCOTT@book01p> @ spid
==============================
SID                           : 123
SERIAL#                       : 7956
PROCESS                       : 3721
SERVER                        : DEDICATED
SPID                          : 3723
PID                           : 85
P_SERIAL#                     : 1
KILL_COMMAND                  : alter system kill session '123,7956' immediate;
PL/SQL procedure successfully completed.
--//记下进程号 3723.

3.尝试方案3:
--//在完成升级SystemTap到4.0版本后,测试问题依旧,还是报如下类似错误。
$ cat  stap/lkpn21c.stp
global lk=0, pn=0
#globak off_set=0

#probe begin {
#    printf("Begin.\n")
#
#}

probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kgllkal")
{
    printf("spid=%d :%s lk_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++lk, long_arg(2),long_arg(3),user_string(long_arg(2)+0x1c8));
}

probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kglpnal")
{
    printf("spid=%d :%s pn_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++pn, long_arg(2),long_arg(3),user_string(long_arg(2)+0x1c8));
#   printf("%d\t%s: %s %x %x %d\n", pid(), execname(), ppfunc(), int_arg(1),long_arg(2),long_arg(3));
}

# stap  -v /home/oracle/sqllaji/stap/lkpn21c.stp -x 3723  --tmpdir=/tmp
Pass 1: parsed user script and 497 library scripts using 274136virt/84392res/3332shr/81676data kb, in 780usr/50sys/838real ms.
Pass 2: analyzed script: 3 probes, 6 functions, 4 embeds, 6 globals using 477852virt/289144res/4492shr/285392data kb, in 1380usr/270sys/1658real ms.
Pass 3: using cached /root/.systemtap/cache/18/stap_18663a97ced696cafbe78ab2f01f695a_13818.c
Pass 4: using cached /root/.systemtap/cache/18/stap_18663a97ced696cafbe78ab2f01f695a_13818.ko
Pass 5: starting run.
WARNING: probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kglpnal") inode-offset 0000000014f6c020 registration error (rc -524)
WARNING: probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kgllkal") inode-offset 0000000014f67e90 registration error (rc -524)
WARNING: task_finder inode-uprobes callback for task 3723 failed: -524
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
^CPass 5: run completed in 0usr/30sys/3402real ms.
--//变成rc -524错误.

--//前面没升级前报错误如下:
WARNING: probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kglpnal") inode-offset 0000000014f6c020 registration error (rc 0)
WARNING: probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kgllkal") inode-offset 0000000014f67e90 registration error (rc 0)

--//实际上还是类似。这次多了一个提示:WARNING: task_finder inode-uprobes callback for task 3723 failed: -524
--//回过头,再次方案1,方案1不报错,但是我的测试kglnaobj显示的信息是乱码。

4.尝试方案1.
--//当时尝试+2 +4 +8 偏移,问题依旧,记录执行过程:

$ ls -il $(which oracle)
17445396 -rwsr-s--x. 1 oracle oinstall 498943224 2024-08-08 17:52:00 /u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle
--//记下oracle执行文件的i-node number=17445396,后面执行脚本需要。

--//按照kimi的建议,建立执行文本fix_offset.stp,内容如下:
# cat /home/oracle/sqllaji/stap/fix_offset.stp
#! /usr/bin/env stap -g

// 参数:$1 = oracle inode, $2 = 目标函数文件偏移, $3 = 修正量(通常 2 或 4)
probe kernel.function("uprobe_register"), kernel.function("uprobe_unregister")
{
    if ($inode->i_ino == $1) {
        if ($offset == $2) {
            printf("Patching Oracle probe offset %x -> %x\n", $offset, $offset + $3);
            $offset = $offset + $3;
        }
    }
}

# cat ooo.sh
#! /bin/bash
stap -g -v /home/oracle/sqllaji/stap/fix_offset.stp 17445396 0x0000000014f6c020 $1 &
stap -g -v /home/oracle/sqllaji/stap/fix_offset.stp 17445396 0x0000000014f67e90 $1 &
--//注:放在后台执行,输入$1参数表示偏移。17445396对应 oracle执行文件的i-node,0x0000000014f6c020 0x0000000014f67e90 对应目标函数文件偏移。

--//window 1:
$ gdb -f -p 3723
...
(gdb) disassemble kgllkal
Dump of assembler code for function kgllkal:
   0x0000000015367e90 <+0>:     xchg   %ax,%ax
   0x0000000015367e92 <+2>:     push   %rbp
   0x0000000015367e93 <+3>:     mov    %rsp,%rbp
   0x0000000015367e96 <+6>:     push   %r12
   0x0000000015367e98 <+8>:     push   %r13
...

(gdb) disassemble kglpnal
Dump of assembler code for function kglpnal:
   0x000000001536c020 <+0>:     xchg   %ax,%ax
   0x000000001536c022 <+2>:     push   %rbp
   0x000000001536c023 <+3>:     mov    %rsp,%rbp
   0x000000001536c026 <+6>:     push   %r12
   0x000000001536c028 <+8>:     push   %r13
...

--//偏移选择+4错误的。接着测试2,3,6,8.输出乱码。
--//当时想是否因为选择偏移,导致取出显示的参数 handle address, mode 不对.
--//先使用gdb跟踪调用kgllkal函数,寄存器的值.

--//window 1:
$ gdb -f -p 3723
...
(gdb) b kgllkal
Breakpoint 1 at 0x15367e90

--//session 1:
--//首先在session 1,在启动gdb监测前执行select * from dept where deptno=21;多次避免再次执行类似语句出现大量递归sql语句。
SCOTT@book01p> select * from dept where deptno=22;
--//挂起。

--//window 1:
(gdb) c
Continuing.

Breakpoint 1, 0x0000000015367e90 in kgllkal ()
(gdb) info registers
rax            0x3      3
rbx            0x1      1
rcx            0x1      1
rdx            0x644a9208       1682608648
rsi            0x7ffd692eaf60   140726368120672
rdi            0x7fe9f4920060   140642807316576
rbp            0x7ffd692eafc0   0x7ffd692eafc0
rsp            0x7ffd692eaa68   0x7ffd692eaa68
r8             0x7e82d6a0       2122503840
r9             0x7ffd692eb4d0   140726368122064
r10            0x0      0
r11            0x0      0
r12            0x0      0
r13            0x7ffd692ead88   140726368120200
r14            0x0      0
r15            0x1      1
rip            0x15367e90       0x15367e90 <kgllkal>
eflags         0x202    [ IF ]
cs             0x33     51
ss             0x2b     43
ds             0x0      0
es             0x0      0
fs             0x0      0
gs             0x0      0
--//rdx 对应 handle address,rcx 对应 mode。

(gdb) ni 1
0x0000000015367e92 in kgllkal ()
(gdb) info registers
rax            0x3      3
rbx            0x1      1
rcx            0x1      1
rdx            0x644a9208       1682608648
rsi            0x7ffd692eaf60   140726368120672
rdi            0x7fe9f4920060   140642807316576
rbp            0x7ffd692eafc0   0x7ffd692eafc0
rsp            0x7ffd692eaa68   0x7ffd692eaa68
r8             0x7e82d6a0       2122503840
r9             0x7ffd692eb4d0   140726368122064
r10            0x0      0
r11            0x0      0
r12            0x0      0
r13            0x7ffd692ead88   140726368120200
r14            0x0      0
r15            0x1      1
rip            0x15367e92       0x15367e92 <kgllkal+2>
eflags         0x202    [ IF ]
cs             0x33     51
ss             0x2b     43
ds             0x0      0
es             0x0      0
fs             0x0      0
gs             0x0      0
--//执行一条指令后,rdx,rcx并没有变化。
--//这样是否可以修改stp脚本,取寄存器rdx ,rcx值,这样就没有问题。
--//修改如下:
$ cat stap/lkpn21cx.stp
global lk=0, pn=0
#globak off_set=0

#probe begin {
#    printf("Begin.\n")
#
#}

probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kgllkal")
{
#   printf("spid=%d :%s lk_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++lk, long_arg(2),long_arg(3),user_string(long_arg(2)+0x1c8));
    printf("spid=%d :%s lk_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++lk, register("rdx"),register("rcx"),user_string(register("rdx")+0x1c8));
}

probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kglpnal")
{
#   printf("spid=%d :%s pn_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++pn, long_arg(2),long_arg(3),user_string(long_arg(2)+0x1c8));
    printf("spid=%d :%s pn_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++pn, register("rdx"),register("rcx"),user_string(register("rdx")+0x1c8));
#   printf("%d\t%s: %s %x %x %d\n", pid(), execname(), ppfunc(), int_arg(1),long_arg(2),long_arg(3));
}

# source ooo.sh  2
Pass 1: parsed user script and 497 library scripts using 274132virt/84400res/3340shr/81672data kb, in 810usr/40sys/847real ms.
Pass 1: parsed user script and 497 library scripts using 274132virt/84400res/3340shr/81672data kb, in 830usr/40sys/872real ms.
Pass 2: analyzed script: 2 probes, 3 functions, 0 embeds, 0 globals using 312764virt/124052res/4320shr/120304data kb, in 600usr/100sys/725real ms.
Pass 2: analyzed script: 2 probes, 3 functions, 0 embeds, 0 globals using 312764virt/124052res/4316shr/120304data kb, in 620usr/90sys/710real ms.
Pass 3: using cached /root/.systemtap/cache/77/stap_77e75aae44ea58a2ffd3f3849cbbffeb_2697.c
Pass 4: using cached /root/.systemtap/cache/77/stap_77e75aae44ea58a2ffd3f3849cbbffeb_2697.ko
Pass 5: starting run.
Pass 3: using cached /root/.systemtap/cache/72/stap_72f36f8539c93b8faf45b87458ab7e8d_2697.c
Pass 4: using cached /root/.systemtap/cache/72/stap_72f36f8539c93b8faf45b87458ab7e8d_2697.ko
Pass 5: starting run.

# stap  -v /home/oracle/sqllaji/stap/lkpn21cx.stp -x 3723  --tmpdir=/tmp
Pass 1: parsed user script and 497 library scripts using 274136virt/84396res/3332shr/81676data kb, in 790usr/50sys/837real ms.
Pass 2: analyzed script: 3 probes, 13 functions, 4 embeds, 6 globals using 477852virt/289148res/4496shr/285392data kb, in 1420usr/270sys/1701real ms.
Pass 3: using cached /root/.systemtap/cache/c7/stap_c78e8b2ad9d2c07d4c365e81e8b7768c_13027.c
Pass 4: using cached /root/.systemtap/cache/c7/stap_c78e8b2ad9d2c07d4c365e81e8b7768c_13027.ko
Pass 5: starting run.
Patching Oracle probe offset 14f67e90 -> 14f67e92
Patching Oracle probe offset 14f6c020 -> 14f6c022
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
spid=3723 :kgllkal lk_count 1 -- handle address: 644a9208 , mode = 1 , kglnaobj : select * from dept where deptno=22
....
--//显示正确。
--//注意下划线内容,修正地址的偏移。

5.继续尝试执行lkpn21c.stp:
--//对比gdb与stap的跟踪:
Breakpoint 1 at 0x15367e90
(gdb) c
Continuing.

Breakpoint 1, 0x0000000015367e90 in kgllkal ()
(gdb) info regi
rax            0x3      3
rbx            0x1      1
rcx            0x1      1
rdx            0x6719a280       1729733248
rsi            0x7ffd692eaf60   140726368120672
rdi            0x7fe9f4920060   140642807316576
rbp            0x7ffd692eafc0   0x7ffd692eafc0
rsp            0x7ffd692eaa68   0x7ffd692eaa68
r8             0x7e82d6a0       2122503840
r9             0x7ffd692eb4d0   140726368122064
r10            0x0      0
r11            0x0      0
r12            0x0      0
r13            0x7ffd692ead88   140726368120200
r14            0x0      0
r15            0x1      1
rip            0x15367e90       0x15367e90 <kgllkal>
eflags         0x202    [ IF ]
cs             0x33     51
ss             0x2b     43
ds             0x0      0
es             0x0      0
fs             0x0      0
gs             0x0      0

# stap  -v /home/oracle/sqllaji/stap/lkpn21c.stp -x 3723  --tmpdir=/tmp
Pass 1: parsed user script and 497 library scripts using 274132virt/84396res/3332shr/81672data kb, in 780usr/50sys/829real ms.
Pass 2: analyzed script: 3 probes, 6 functions, 4 embeds, 6 globals using 477848virt/289148res/4492shr/285388data kb, in 1430usr/280sys/1712real ms.
Pass 3: using cached /root/.systemtap/cache/bc/stap_bce25348d61e89ec998958392424e805_13818.c
Pass 4: using cached /root/.systemtap/cache/bc/stap_bce25348d61e89ec998958392424e805_13818.ko
Pass 5: starting run.
Patching Oracle probe offset 14f6c020 -> 14f6c022
Patching Oracle probe offset 14f67e90 -> 14f67e92
spid=3723 :kgllkal lk_count 1 -- handle address: 7ffd692eaf60 , mode = 1729889216 , kglnaobj : GD9
spid=3723 :kglpnal pn_count 1 -- handle address: 7ffd692eb148 , mode = 1729889216 , kglnaobj : 累旚?
spid=3723 :kgllkal lk_count 2 -- handle address: 7ffd692e8b80 , mode = 1920743280 , kglnaobj :
...
--//上下对比可以发现handle address 取的是寄存器rsi,mode 取得是寄存器rdx。也就是21c定义kgllkal函数,参数数量发生改变。
--//handle addres 对应取参数3,而mode 对应取参数4.修改如下:

$ cat  stap/lkpn21c.stp
global lk=0, pn=0
#globak off_set=0

#probe begin {
#    printf("Begin.\n")
#
#}

probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kgllkal")
{
    printf("spid=%d :%s lk_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++lk, long_arg(3),long_arg(4),user_string(long_arg(3)+0x1c8));
}

probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kglpnal")
{
    printf("spid=%d :%s pn_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++pn, long_arg(3),long_arg(4),user_string(long_arg(3)+0x1c8));
#   printf("%d\t%s: %s %x %x %d\n", pid(), execname(), ppfunc(), int_arg(1),long_arg(2),long_arg(3));
}

# stap  -v /home/oracle/sqllaji/stap/lkpn21c.stp -x 3723  --tmpdir=/tmp
Pass 1: parsed user script and 497 library scripts using 274136virt/84396res/3332shr/81676data kb, in 820usr/40sys/850real ms.
Pass 2: analyzed script: 3 probes, 6 functions, 4 embeds, 6 globals using 477852virt/289148res/4492shr/285392data kb, in 1390usr/260sys/1674real ms.
Pass 3: using cached /root/.systemtap/cache/18/stap_18663a97ced696cafbe78ab2f01f695a_13818.c
Pass 4: using cached /root/.systemtap/cache/18/stap_18663a97ced696cafbe78ab2f01f695a_13818.ko
Pass 5: starting run.
Patching Oracle probe offset 14f6c020 -> 14f6c022
Patching Oracle probe offset 14f67e90 -> 14f67e92
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--//依旧需要执行fix_offset.stp脚本,修正偏移。
--//第1次执行
spid=3723 :kgllkal lk_count 1 -- handle address: 6723e650 , mode = 1 , kglnaobj : select * from dept where deptno=29
spid=3723 :kglpnal pn_count 1 -- handle address: 6723e650 , mode = 2 , kglnaobj : select * from dept where deptno=29
spid=3723 :kgllkal lk_count 2 -- handle address: 727c3770 , mode = 2 , kglnaobj : bookSYSCDB$ROOT
spid=3723 :kgllkal lk_count 3 -- handle address: 6dcc5dd8 , mode = 2 , kglnaobj : 1073777561SYSCDB$ROOT
spid=3723 :kgllkal lk_count 4 -- handle address: 6724d530 , mode = 2 , kglnaobj : e16f7e54bccd0a24f108891ab16a0359$BUILD$BOOK01P
spid=3723 :kgllkal lk_count 5 -- handle address: 672511c0 , mode = 1 , kglnaobj :
spid=3723 :kglpnal pn_count 2 -- handle address: 672511c0 , mode = 3 , kglnaobj :
spid=3723 :kgllkal lk_count 6 -- handle address: 727c3770 , mode = 2 , kglnaobj : bookSYSCDB$ROOT
spid=3723 :kgllkal lk_count 7 -- handle address: 6dcc5dd8 , mode = 2 , kglnaobj : 1073777561SYSCDB$ROOT
spid=3723 :kgllkal lk_count 8 -- handle address: 6729acf0 , mode = 1 , kglnaobj : e16f7e54bccd0a24f108891ab16a0359Child:0BOOK01P
spid=3723 :kglpnal pn_count 3 -- handle address: 6729acf0 , mode = 3 , kglnaobj : e16f7e54bccd0a24f108891ab16a0359Child:0BOOK01P
spid=3723 :kgllkal lk_count 9 -- handle address: 67d60ce8 , mode = 1 , kglnaobj : SCOTTBOOK01P
spid=3723 :kgllkal lk_count 10 -- handle address: 727c3770 , mode = 2 , kglnaobj : bookSYSCDB$ROOT
spid=3723 :kgllkal lk_count 11 -- handle address: 6dcc5dd8 , mode = 2 , kglnaobj : 1073777561SYSCDB$ROOT
spid=3723 :kgllkal lk_count 12 -- handle address: 644d51c8 , mode = 2 , kglnaobj : DEPTSCOTTBOOK01襣d
spid=3723 :kglpnal pn_count 4 -- handle address: 644d51c8 , mode = 2 , kglnaobj : DEPTSCOTTBOOK01襣d
spid=3723 :kgllkal lk_count 13 -- handle address: 644ccd18 , mode = 2 , kglnaobj : 5358706841214419813BOOK01P
spid=3723 :kglpnal pn_count 5 -- handle address: 644ccd18 , mode = 2 , kglnaobj : 5358706841214419813BOOK01P
spid=3723 :kgllkal lk_count 14 -- handle address: 644c6f38 , mode = 2 , kglnaobj : 1256087081022357994BOOK01P
spid=3723 :kglpnal pn_count 6 -- handle address: 644c6f38 , mode = 2 , kglnaobj : 1256087081022357994BOOK01P
spid=3723 :kgllkal lk_count 15 -- handle address: 644c22a8 , mode = 2 , kglnaobj : 13547376130454050250BOOK01P
spid=3723 :kglpnal pn_count 7 -- handle address: 644c22a8 , mode = 2 , kglnaobj : 13547376130454050250BOOK01P
spid=3723 :kgllkal lk_count 16 -- handle address: 644c0e78 , mode = 2 , kglnaobj : 4448762010415191240BOOK01P
spid=3723 :kglpnal pn_count 8 -- handle address: 644c0e78 , mode = 2 , kglnaobj : 4448762010415191240BOOK01P
spid=3723 :kgllkal lk_count 17 -- handle address: 644c6f38 , mode = 2 , kglnaobj : 1256087081022357994BOOK01P
spid=3723 :kglpnal pn_count 9 -- handle address: 644c6f38 , mode = 2 , kglnaobj : 1256087081022357994BOOK01P
--//第2次执行
spid=3723 :kgllkal lk_count 18 -- handle address: 6723e650 , mode = 1 , kglnaobj : select * from dept where deptno=29
spid=3723 :kgllkal lk_count 19 -- handle address: 672511c0 , mode = 1 , kglnaobj :
spid=3723 :kgllkal lk_count 20 -- handle address: 644d51c8 , mode = 2 , kglnaobj : DEPTSCOTTBOOK01襣d
spid=3723 :kglpnal pn_count 10 -- handle address: 644d51c8 , mode = 2 , kglnaobj : DEPTSCOTTBOOK01襣d
--//第3次执行
spid=3723 :kgllkal lk_count 21 -- handle address: 6723e650 , mode = 1 , kglnaobj : select * from dept where deptno=29
spid=3723 :kgllkal lk_count 22 -- handle address: 672511c0 , mode = 1 , kglnaobj :
--//第4次执行
--//没有输出.

5.收尾:

--//1.按ctrl+c退出stap的跟踪.
--//2.将后台执行fix_offset.stp停止。
# fg
stap -g -v /home/oracle/sqllaji/stap/fix_offset.stp 17445396 0x0000000014f67e90 $1
^C
Pass 5: run completed in 50usr/300sys/1050715real ms.

# fg
stap -g -v /home/oracle/sqllaji/stap/fix_offset.stp 17445396 0x0000000014f6c020 $1
^C
Pass 5: run completed in 70usr/340sys/1052782real ms.

--//3.清理 /tmp 目录,反复调试在tmp目录留下许多垃圾文件,全部删除,过程略。

posted @ 2026-05-28 21:19  lfree  阅读(3)  评论(0)    收藏  举报