LuaJIT2.1 和 Lua5.4.8 性能对比 - 指南

说明

最近在学习 LuaJIT，想看看把它接入到项目中使用，会提高多大的性能。

今天抽时间，简单地测试了一下 LuaJIT 2.2 和 Lua5.4.8 的性能。

测试平台：

系统：Windows 10 WSL
CPU：Intel® Core™ i7-8700 CPU @ 3.20GHz 3.19 GHz
内存：48.0 GB

下面测试结果只是我简单测试的结果，仅供参考。
相关代码在最后面。

综合性能对比分析

第一组测试（详细性能对比）

测试项目	Lua 5.4	LuaJIT	性能提升
Fibonacci(30) 递归	0.0515秒	0.0095秒	5.4倍
数学操作(10万次)	0.0125秒	0.0022秒	5.7倍
字符串操作	0.0033秒	0.0043秒	0.8倍
表操作(10万)	0.0797秒	0.0322秒	2.5倍

第二组测试（深度分析）

测试规模/类型	Lua 5.4	LuaJIT	性能提升
100万次循环	0.0041秒	0.0010秒	4.1倍
500万次循环	0.0204秒	0.0051秒	4.0倍
1000万次循环	0.0407秒	0.0102秒	4.0倍
浮点运算(100万)	0.0298秒	0.0056秒	5.3倍
整数操作	0.0062秒	0.0010秒	6.2倍
浮点操作	0.0069秒	0.0010秒	6.9倍
顺序访问	0.0020秒	0.0006秒	3.3倍
随机访问	0.0034秒	0.0010秒	3.4倍

关键说明

1. 稳定的性能提升

LuaJIT在所有数值计算任务上都展现了4-7倍的性能提升，这个倍数很稳定，说明JIT优化效果是可预测的。

2. 规模无关的优化效果

从100万到1000万次循环，性能提升倍数保持在4倍左右，说明LuaJIT的优化效果不受问题规模影响。

3. 内存使用效率

Lua 5.4: 1048.76 KB
LuaJIT: 563.17 KB

LuaJIT使用了约**46%**更少的内存，这可能因为：

更高效的对象表示
不同的垃圾回收策略
JIT编译后的代码更紧凑

4. 类型统一优化

在LuaJIT中，整数和浮点操作的性能几乎相同（都是0.0010秒），这说明JIT编译器成功地进行了类型特化优化。

5. 内存访问模式优化

LuaJIT对顺序访问和随机访问都有显著优化，但顺序访问的优势更明显。

6. JIT预热效果

有趣的是，这次测试中JIT预热效果不明显（1.01倍），这可能因为：

测试代码相对简单，很快就被优化了
测试规模足够大，预热时间相对较短

相关代码

测试1

-- detailed_comparison.lua
print("=== Detailed Performance Comparison ==="
)
print("Lua Version:"
, _VERSION)
print("Engine:"
, jit and jit.version or "Standard Lua Interpreter"
)
print(
)
local
function benchmark(name, func, ...
)
collectgarbage("collect"
)
local start = os.clock(
)
local result = func(...
)
local elapsed = os.clock(
) - start
print(string.format("%-30s: %8.4f seconds"
, name, elapsed)
)
return elapsed
end
-- 避免溢出的斐波那契测试
local
function safe_fibonacci(n)
if n <= 1
then
return n end
local a, b = 0
, 1
for i = 2
, n do
a, b = b, a + b
-- 检查是否即将溢出
if b >
1e15
then
return b -- 提前返回避免溢出
end
end
return b
end
-- 不同规模的递归测试
local
function fib_recursive(n)
if n <= 1
then
return n end
return fib_recursive(n-1
) + fib_recursive(n-2
)
end
-- 数学计算密集
local
function math_intensive(n)
local sum = 0
for i = 1
, n do
sum = sum + math.sin(i) * math.cos(i) + math.sqrt(i)
end
return sum
end
-- 字符串操作
local
function string_operations(n)
local result = ""
for i = 1
, n do
result = result .. tostring(i)
if #result >
100000
then
break
end -- 避免内存问题
end
return #result
end
-- table密集操作
local
function table_intensive(n)
local t = {
}
for i = 1
, n do
t[i] = {
x = i, y = i * 2
, data = "item" .. i
}
end
local sum = 0
for i = 1
, n do
sum = sum + t[i].x + t[i].y
end
return sum
end
print("Running benchmarks..."
)
-- 适中的测试规模
benchmark("Fibonacci(30) recursive"
, fib_recursive, 30
)
benchmark("Safe Fibonacci(100000)"
, safe_fibonacci, 100000
)
benchmark("Math operations (100K)"
, math_intensive, 100000
)
benchmark("String operations"
, string_operations, 5000
)
benchmark("Table operations (100K)"
, table_intensive, 100000
)
-- 显示内存使用
collectgarbage("collect"
)
print(string.format("\nMemory usage: %.2f KB"
, collectgarbage("count"
)
)
)
-- JIT特定信息
if jit then
print("\nJIT Information:"
)
print("Status:"
, jit.status(
)
)
print("Architecture:"
, jit.arch)
-- 显示编译的trace数量
local traces = 0
for i = 1
, 1000
do
if jit.util and jit.util.traceinfo and jit.util.traceinfo(i)
then
traces = traces + 1
end
end
if traces >
0
then
print("Compiled traces:"
, traces)
end
end

测试代码2：

-- deep_analysis.lua
print("=== Deep Performance Analysis ==="
)
print("Engine:"
, jit and jit.version or ("Standard " .. _VERSION)
)
print(
)
local
function benchmark_with_analysis(name, func, iterations, ...
)
-- 预热运行
func(...
)
-- 多次测试求平均值
local times = {
}
for i = 1
, iterations do
collectgarbage("collect"
)
local start = os.clock(
)
local result = func(...
)
local elapsed = os.clock(
) - start
times[i] = elapsed
end
-- 计算统计信息
local total = 0
local min_time = times[1]
local max_time = times[1]
for i = 1
, iterations do
total = total + times[i]
if times[i] < min_time then min_time = times[i]
end
if times[i] > max_time then max_time = times[i]
end
end
local avg_time = total / iterations
print(string.format("%-25s: avg=%.4fs, min=%.4fs, max=%.4fs"
,
name, avg_time, min_time, max_time)
)
return avg_time
end
-- 不同规模的循环测试
local
function loop_test(n)
local sum = 0
for i = 1
, n do
sum = sum + i
end
return sum
end
-- 浮点数密集计算
local
function float_intensive(n)
local x = 1.0
for i = 1
, n do
x = x * 1.000001
x = math.sqrt(x)
end
return x
end
-- 整数vs浮点数操作
local
function integer_ops(n)
local sum = 0
for i = 1
, n do
sum = sum + (i * 2
) -- 整数运算
end
return sum
end
local
function float_ops(n)
local sum = 0.0
for i = 1
, n do
sum = sum + (i * 2.0
) -- 浮点运算
end
return sum
end
-- 表访问模式测试
local
function sequential_access(n)
local t = {
}
for i = 1
, n do
t[i] = i
end
local sum = 0
for i = 1
, n do
sum = sum + t[i]
end
return sum
end
local
function random_access(n)
local t = {
}
for i = 1
, n do
t[i] = i
end
local sum = 0
for i = 1
, n do
local idx = (i * 17 + 31
) % n + 1 -- 伪随机访问
sum = sum + t[idx]
end
return sum
end
print("Multiple runs for statistical accuracy:"
)
print(
)
-- 不同规模的测试
local sizes = {
1000000
, 5000000
, 10000000
}
for _, size in ipairs(sizes)
do
print(string.format("=== Scale: %d operations ==="
, size)
)
benchmark_with_analysis("Loop " .. size, loop_test, 3
, size)
if size <= 1000000
then -- 避免浮点运算太慢
benchmark_with_analysis("Float " .. size, float_intensive, 3
, size)
end
print(
)
end
print("=== Data Type Comparison ==="
)
benchmark_with_analysis("Integer operations"
, integer_ops, 5
, 1000000
)
benchmark_with_analysis("Float operations"
, float_ops, 5
, 1000000
)
print(
)
print("=== Memory Access Patterns ==="
)
benchmark_with_analysis("Sequential access"
, sequential_access, 3
, 100000
)
benchmark_with_analysis("Random access"
, random_access, 3
, 100000
)
print(
)
-- JIT特定分析
if jit then
print("=== JIT Warmup Analysis ==="
)
local
function warmup_test(n)
local sum = 0
for i = 1
, n do
sum = sum + math.sin(i) * math.cos(i)
end
return sum
end
-- 冷启动
local start = os.clock(
)
warmup_test(100000
)
local cold_time = os.clock(
) - start
-- 预热后
local start2 = os.clock(
)
warmup_test(100000
)
local warm_time = os.clock(
) - start2
print(string.format("Cold start: %.4fs"
, cold_time)
)
print(string.format("After warmup: %.4fs"
, warm_time)
)
print(string.format("Warmup speedup: %.2fx"
, cold_time / warm_time)
)
end

.
.
上述测试结果只是我简单测试的结果，仅供参考（图片AI生成）
.
.

posted @ 2025-07-20 19:11 yjbjingcha 阅读(207) 评论(0) 收藏举报

刷新页面返回顶部