pixilang语言研究(反编译器和语言原理)
https://warmplace.ru/soft/pixilang/
//Pixilang 虚拟机操作码:
// 小写 - 数字is in the 操作码 body;
// V - variable (type = PIX_OPCODE);
// N - short number (type = PIX_OPCODE);
// I - PIX_INT (type = one or several PIX_OPCODEs);
// F - PIX_FLOAT (type = one or several PIX_OPCODEs).
//Pixilang 程序格式:
// 地址 | 指令
// 0 | HALT
// 1 | start of the main user function
// ... | ...
// ... | RET_i - return from the main function
//Pixilang 栈数据结构:
// [ top of the stack ]
// ...
// function parameter X;
// ...
// function parameter 1;
// number of parameters; <============= current FP
// previous FP (frame pointer);
// previous PC (program counter);
// local variables;
// ... <============= current SP
// ...
// [ bottom of the stack - offset=0 ]
//Pixilang 变换矩阵:
// | 0 4 8 12 |
// | 1 5 9 13 |
// | 2 6 10 14 |
// | 3 7 11 15 |
语法树节点
/*Syntax tree node:*/ struct snode { snode_type type; uint8_t flags; size_t code_ptr; /*Start of node*/ size_t code_ptr2; /*End of node (start of the next node)*/ snode_val val; snode** n; /*Children*/ uint nn; /*Children count*/ };
编译器架构
-
前端: Yacc语法分析器 + 词法分析器(yylex函数)
-
中间表示: 抽象语法树(AST,snode结构)
-
后端: AST遍历生成字节码(compile_tree函数)
2. 字节码特征
从代码中可以看到Pixilang的字节码:
-
基于栈的虚拟机
-
操作码存储在PIX_OPCODE中
-
支持立即数编码在操作码中(通过移位PIX_OPCODE_BITS)
-
有跳转指令(JMP_i, JMP_IF_FALSE_i等
需要理解的部分
在编程语言中,本地符号表是什么东西?
什么是yacc,yacc的作用
加载或者编译pixi源码的关键函数
//Load *.pixicode file or compile *.pixi source file int pix_load( const char* name, pix_vm* vm ) { int rv = 0; char* src = NULL; char* base_path = NULL; size_t fsize = sfs_get_file_size( name ); if( fsize >= 8 ) { sfs_file f = sfs_open( name, "rb" ); if( f ) { char sign[ 9 ]; sign[ 8 ] = 0; sfs_read( &sign, 1, 8, f ); sfs_close( f ); if( smem_strcmp( (const char*)sign, "PIXICODE" ) == 0 ) { //Binary code: base_path = pix_get_base_path( name ); int load_code_err = pix_vm_load_code( name, base_path, vm ); if( load_code_err ) { rv = 5 + load_code_err * 100; } goto pix_compile_end; } } } if( fsize ) { src = SMEM_ALLOC2( char, fsize ); if( !src ) { rv = 1; ERROR( "memory allocation error" ); goto pix_compile_end; } sfs_file f = sfs_open( name, "rb" ); if( f == 0 ) { rv = 2; ERROR( "can't open file %s", name ); goto pix_compile_end; } int start_offset = 0; if( fsize >= 3 ) { sfs_read( src, 1, 3, f ); if( (uint8_t)src[ 0 ] == 0xEF && (uint8_t)src[ 1 ] == 0xBB && (uint8_t)src[ 2 ] == 0xBF ) { //Byte order mark found. Skip it: start_offset += 3; } else { sfs_rewind( f ); } } if( start_offset ) { fsize -= start_offset; sfs_seek( f, start_offset, SFS_SEEK_SET ); } sfs_read( src, 1, fsize, f ); sfs_close( f ); base_path = pix_get_base_path( name ); #ifdef PIX_ENCODED_SOURCE pix_decode_source( src, fsize ); #endif int comp_err = pix_compile( src, fsize, (char*)name, base_path, vm ); if( comp_err ) { rv = 3 + comp_err * 100; goto pix_compile_end; } } else { ERROR( "%s not found (or it's empty)", name ); rv = 4; } pix_compile_end: smem_free( src ); smem_free( base_path ); return rv; }
浙公网安备 33010602011771号