第25篇-虚拟机对象操作指令之getfield

getfield指令表示获取指定类的实例域，并将其值压入栈顶。其格式如下：

getstatic indexbyte1 indexbyte2

无符号数indexbyte1和indexbyte2构建为(indexbyte1<<8)|indexbyte2，这个值指明了一个当前类的运行时常量池索引值，指向的运行时常量池项为一个字段的符号引用。

getfield字节码指令的生成函数为TemplateTable::getfield()，这些生成函数如下：

void TemplateTable::getfield(int byte_no) {
  getfield_or_static(byte_no, false); // getfield的byte_no值为1
}

最终会调用getfield_or_static()函数生成机器指令片段。此函数生成的机器指令片段对应的汇编代码如下：

0x00007fffe10202d0: movzwl 0x1(%r13),%edx
0x00007fffe10202d5: mov    -0x28(%rbp),%rcx
0x00007fffe10202d9: shl    $0x2,%edx
0x00007fffe10202dc: mov    0x10(%rcx,%rdx,8),%ebx
0x00007fffe10202e0: shr    $0x10,%ebx
0x00007fffe10202e3: and    $0xff,%ebx
// 0xb4是getfield指令的Opcode，如果相等，则说明已经连接，直接跳转到resolved
0x00007fffe10202e9: cmp    $0xb4,%ebx
0x00007fffe10202ef: je     0x00007fffe102038e

0x00007fffe10202f5: mov    $0xb4,%ebx
// 省略通过调用MacroAssembler::call_VM()函数来执行 
// InterpreterRuntime::resolve_get_put()函数的汇编代码 
// ...

调用MacroAssembler::call_VM()函数生成如下代码，通过这些代码来执行InterpreterRuntime::resolve_get_put()函数。MacroAssembler::call_VM()函数的汇编在之前已经详细介绍过，这里不再介绍，直接给出汇编代码，如下：

0x00007fffe10202fa: callq  0x00007fffe1020304
0x00007fffe10202ff: jmpq   0x00007fffe1020382

0x00007fffe1020304: mov    %rbx,%rsi
0x00007fffe1020307: lea    0x8(%rsp),%rax
0x00007fffe102030c: mov    %r13,-0x38(%rbp)
0x00007fffe1020310: mov    %r15,%rdi
0x00007fffe1020313: mov    %rbp,0x200(%r15)
0x00007fffe102031a: mov    %rax,0x1f0(%r15)
0x00007fffe1020321: test   $0xf,%esp
0x00007fffe1020327: je     0x00007fffe102033f
0x00007fffe102032d: sub    $0x8,%rsp
0x00007fffe1020331: callq  0x00007ffff66b567c
0x00007fffe1020336: add    $0x8,%rsp
0x00007fffe102033a: jmpq   0x00007fffe1020344
0x00007fffe102033f: callq  0x00007ffff66b567c
0x00007fffe1020344: movabs $0x0,%r10
0x00007fffe102034e: mov    %r10,0x1f0(%r15)
0x00007fffe1020355: movabs $0x0,%r10
0x00007fffe102035f: mov    %r10,0x200(%r15)
0x00007fffe1020366: cmpq   $0x0,0x8(%r15)
0x00007fffe102036e: je     0x00007fffe1020379
0x00007fffe1020374: jmpq   0x00007fffe1000420
0x00007fffe1020379: mov    -0x38(%rbp),%r13
0x00007fffe102037d: mov    -0x30(%rbp),%r14
0x00007fffe1020381: retq

如上代码完成的事情很简单，就是调用C++函数编写的InterpreterRuntime::resolve_get_put()函数，此函数会填充常量池缓存中ConstantPoolCacheEntry信息，关于ConstantPoolCache以及ConstantPoolCacheEntry，还有ConstantPoolCacheEntry中各个字段的含义在《深入剖析Java虚拟机：源码剖析与实例详解（基础卷）》中已经详细介绍过，这里不再介绍。

0x00007fffe1020382: movzwl 0x1(%r13),%edx
0x00007fffe1020387: mov    -0x28(%rbp),%rcx
0x00007fffe102038b: shl    $0x2,%edx

---- resolved ---- 

// 获取[_indices,_f1,_f2,_flags]中的_f2，由于ConstantPoolCache占用16字节，而_indices
// 和_f2各占用8字节，所以_f2的偏移为32字节，也就是0x32
// _f2中保存的是字段在oop实例中的字节偏移，通过此偏移就可获取此字段存储在
// oop中的值
0x00007fffe102038e: mov    0x20(%rcx,%rdx,8),%rbx

// 获取[_indices,_f1,_f2,_flags]中的_flags 
0x00007fffe1020393: mov    0x28(%rcx,%rdx,8),%eax

// 将栈中的objectref对象弹出到%rcx中
0x00007fffe1020397: pop    %rcx

// provoke（激起; 引起; 引发） OS NULL exception if reg = NULL by
// accessing M[reg] w/o changing any (non-CC) registers
// NOTE: cmpl is plenty（足够） here to provoke a segv
0x00007fffe1020398: cmp    (%rcx),%rax

// 将_flags向右移动28位，剩下TosState
0x00007fffe102039b: shr    $0x1c,%eax
0x00007fffe102039e: and    $0xf,%eax
// 如果不相等，说明TosState的值不为0，则跳转到notByte
0x00007fffe10203a1: jne    0x00007fffe10203ba

// btos

// btos的编号为0,代码执行到这里时，可能栈顶缓存要求是btos
// %rcx中存储的是objectref，%rbx中存储的是_f2，获取字段对应的值存储到%rax中
0x00007fffe10203a7: movsbl (%rcx,%rbx,1),%eax
0x00007fffe10203ab: push   %rax

// 对字节码指令进行重写，将Bytecodes::_fast_bgetfield的Opcode存储到%ecx中
0x00007fffe10203ac: mov    $0xcc,%ecx
// 将Bytecodes::_fast_bgetfield的Opcode更新到字节码指令的操作码
0x00007fffe10203b1: mov    %cl,0x0(%r13)
// 跳转到---- Done ----
0x00007fffe10203b5: jmpq   0x00007fffe102050f
---- notByte ----
0x00007fffe10203ba: cmp    $0x7,%eax
0x00007fffe10203bd: jne    0x00007fffe102045d  // 跳转到notObj


// atos

// 调用MacroAssembler::load_heap_oop()函数生成如下代码
0x00007fffe10203c3: mov    (%rcx,%rbx,1),%eax
// ... 省略部分代码
// 结束MacroAssembler::load_heap_oop()函数的调用
0x00007fffe102044e: push   %rax
// 重写字节码指令为Bytecodes::_fast_agetfield
0x00007fffe102044f: mov    $0xcb,%ecx
0x00007fffe1020454: mov    %cl,0x0(%r13)
0x00007fffe1020458: jmpq   0x00007fffe102050f
// -- notObj --
0x00007fffe102045d: cmp    $0x3,%eax
0x00007fffe1020460: jne    0x00007fffe1020478 // 跳转到notInt

// itos

0x00007fffe1020466: mov    (%rcx,%rbx,1),%eax
0x00007fffe1020469: push   %rax
// 重写字节码指令o Bytecodes::_fast_igetfield
0x00007fffe102046a: mov    $0xd0,%ecx
0x00007fffe102046f: mov    %cl,0x0(%r13)
0x00007fffe1020473: jmpq   0x00007fffe102050f
// --- notInt ----
0x00007fffe1020478: cmp    $0x1,%eax
0x00007fffe102047b: jne    0x00007fffe1020494 // 跳转到notChar


// ctos

0x00007fffe1020481: movzwl (%rcx,%rbx,1),%eax
0x00007fffe1020485: push   %rax
// 重写字节码指令为Bytecodes::_fast_cgetfield
0x00007fffe1020486: mov    $0xcd,%ecx
0x00007fffe102048b: mov    %cl,0x0(%r13)
0x00007fffe102048f: jmpq   0x00007fffe102050f
// ---- notChar ----
0x00007fffe1020494: cmp    $0x2,%eax
0x00007fffe1020497: jne    0x00007fffe10204b0 // 跳转到notShort

// stos

0x00007fffe102049d: movswl (%rcx,%rbx,1),%eax
0x00007fffe10204a1: push   %rax
// 重写字节码指令为Bytecodes::_fast_sgetfield
0x00007fffe10204a2: mov    $0xd2,%ecx
0x00007fffe10204a7: mov    %cl,0x0(%r13)
0x00007fffe10204ab: jmpq   0x00007fffe102050f
// ---- notShort ----
0x00007fffe10204b0: cmp    $0x4,%eax
0x00007fffe10204b3: jne    0x00007fffe10204d3 // 跳转到notLong

// ltos

0x00007fffe10204b9: mov    (%rcx,%rbx,1),%rax
0x00007fffe10204bd: sub    $0x10,%rsp
0x00007fffe10204c1: mov    %rax,(%rsp)
// 重写字节码指令为Bytecodes::_fast_lgetfield,
0x00007fffe10204c5: mov    $0xd1,%ecx
0x00007fffe10204ca: mov    %cl,0x0(%r13)
0x00007fffe10204ce: jmpq   0x00007fffe102050f
// ---- notLong ----
0x00007fffe10204d3: cmp    $0x5,%eax
0x00007fffe10204d6: jne    0x00007fffe10204f8 // 跳转到notFloat


// ftos
0x00007fffe10204dc: vmovss (%rcx,%rbx,1),%xmm0
0x00007fffe10204e1: sub    $0x8,%rsp
0x00007fffe10204e5: vmovss %xmm0,(%rsp)
// 重写字节码指令为Bytecodes::_fast_fgetfield
0x00007fffe10204ea: mov    $0xcf,%ecx
0x00007fffe10204ef: mov    %cl,0x0(%r13)
0x00007fffe10204f3: jmpq   0x00007fffe102050f
// ---- notFloat ----
0x00007fffe10204f8: vmovsd (%rcx,%rbx,1),%xmm0
0x00007fffe10204fd: sub    $0x10,%rsp
0x00007fffe1020501: vmovsd %xmm0,(%rsp)
0x00007fffe1020506: mov    $0xce,%ecx
0x00007fffe102050b: mov    %cl,0x0(%r13)

// -- Done --

我们需要介绍一下虚拟机内部的一些自定义指令，这些自定义指令的模板如下：

// JVM bytecodes
def(Bytecodes::_fast_agetfield      , ubcp|____|____|____, atos, atos, fast_accessfield    ,  atos        );
def(Bytecodes::_fast_bgetfield      , ubcp|____|____|____, atos, itos, fast_accessfield    ,  itos        );
def(Bytecodes::_fast_cgetfield      , ubcp|____|____|____, atos, itos, fast_accessfield    ,  itos        );
def(Bytecodes::_fast_dgetfield      , ubcp|____|____|____, atos, dtos, fast_accessfield    ,  dtos        );
def(Bytecodes::_fast_fgetfield      , ubcp|____|____|____, atos, ftos, fast_accessfield    ,  ftos        );
def(Bytecodes::_fast_igetfield      , ubcp|____|____|____, atos, itos, fast_accessfield    ,  itos        );
def(Bytecodes::_fast_lgetfield      , ubcp|____|____|____, atos, ltos, fast_accessfield    ,  ltos        );
def(Bytecodes::_fast_sgetfield      , ubcp|____|____|____, atos, itos, fast_accessfield    ,  itos        );

以_fast_agetfield内部定义的字节码指令为例为来，生成函数为TemplateTable::fast_accessfield()函数，汇编代码如下：

0x00007fffe101e4e1: movzwl 0x1(%r13),%ebx
0x00007fffe101e4e6: mov    -0x28(%rbp),%rcx
0x00007fffe101e4ea: shl    $0x2,%ebx
// 计算%rcx+%rdx*8+0x20，获取ConstantPoolCacheEntry[_indices,_f1,_f2,_flags]中的_f2
// 因为ConstantPoolCache的大小为0x16字节，%rcx+0x20定位到第一个ConstantPoolCacheEntry的开始位置
// %rdx*8算出来的是相对于第一个ConstantPoolCacheEntry的字节偏移
0x00007fffe101e4ed: mov    0x20(%rcx,%rbx,8),%rbx

// 检查空异常
0x00007fffe101e4f2: cmp    (%rax),%rax
// %rax中存储的是objectref，也就是要从这个实例中获取字段的值，通过偏移%rbx后就
// 能获取到偏移的值，然后加载到%eax
0x00007fffe101e4f5: mov    (%rax,%rbx,1),%eax

其它的字节码指令类似，这里不再过多介绍。从这里可以看出，我们不需要再执行getfield对应的那些汇编指令，只执行_fast开头的指令即可，这些指令比起getfield指令来说简化了很多，大大提高了解释执行的速度。　　

专注虚拟机与编译器研究

第25篇-虚拟机对象操作指令之getfield

导航

公告