zookeeper Unreasonable length错误导致无法启动
https://www.codeleading.com/article/49815499853/
https://blog.csdn.net/scl323/article/details/888021
现象: 启动报错
Exception in thread "main" java.io.IOException: Unreasonable length = 1860320 at org.apache.jute.BinaryInputArchive.checkLength(BinaryInputArchive.java:127) at org.apache.jute.BinaryInputArchive.readBuffer(BinaryInputArchive.java:92) at com.sf.boot.code.Zk4lTest.testLog(Zk4lTest.java:241) at com.sf.boot.code.Zk4lTest.main(Zk4lTest.java:392)
原因:
- zookeeper启动的时候会加载datalog(快照日志)进行内存数据恢复,zookeeper的快照日志是按照行记录的,在读取的时候会判断record的大小是否大于jute.maxbuffer(默认1M).
- 在写入数据的时候也是会判断节点数据是否大于jute.maxbuffer,那么这样的数据是如何写入快照日志的?答案是closeSession的时候
- 当一个客户端建立了很多的临时节点时,在进行closeSession的时候会把所有的临时节点的path集中在一起写到快照日志的record(因为closeSession时是需要删除所有临时节点的)。
这时候如果path很长很多,那么就有可能超过jute.maxbuffer。例如:dubbo的场景,dubbo是把所有注册数据都当成path,这就导致path很长
解决方案:
- zookeeper启动的时候设置 jvm参数大小 -Djute.maxbuffer
- 删除zookeeper有问题的快照日志文件,不过这样做会导致还没有持久化的数据丢失。zookeeper是写快照日志,当快照日志达到一定条数才会落盘持久化
解析快照日志文件
可以使用org.apache.zookeeper.server.LogFormatter 解析快照日志。
下面的方法是copy LogFormatter的代码修改了一下
解析事务日志方式:
/** * 测试快照日志是否正常 * @param filePath * @param errorFilePath * @param byteLength * @throws Exception */ public static void testLog(String filePath,String errorFilePath,long byteLength) throws Exception { FileInputStream fis = new FileInputStream(filePath); BinaryInputArchive logStream = BinaryInputArchive.getArchive(fis); FileHeader fhdr = new FileHeader(); fhdr.deserialize(logStream, "fileheader"); int count = 0; int length = 0; while (true) { long crcValue = 0; byte[] bytes; try { crcValue = logStream.readLong("crcvalue"); bytes = logStream.readBuffer("txnEntry"); length += bytes.length; } catch (Exception e) { System.out.println("11EOF reached after " + count + " txns. crcValue:"+crcValue); throw e; } if (bytes.length > byteLength){ File errorFile = new File(errorFilePath+"."+count); FileUtils.write(errorFile,new String(bytes)); } // System.out.println(crcValue); if (bytes.length == 0) { // Since we preallocate, we define EOF to be an // empty transaction System.out.println("22EOF reached after " + count + " txns."); System.out.println(length); return; } Checksum crc = new Adler32(); crc.update(bytes, 0, bytes.length); if (crcValue != crc.getValue()) { throw new IOException("CRC doesn't match " + crcValue + " vs " + crc.getValue()); } TxnHeader hdr = new TxnHeader(); Record txn = SerializeUtils.deserializeTxn(bytes, hdr); if (hdr.getType() == -11){ System.out.println(); System.out.println("count:"+count+"byte:"+bytes.length); System.out.println("原始数据:"+new String(bytes)); System.out.println(DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.LONG).format(new Date(hdr.getTime())) + " session 0x" + Long.toHexString(hdr.getClientId()) + " cxid 0x" + Long.toHexString(hdr.getCxid()) + " zxid 0x" + Long.toHexString(hdr.getZxid()) + " type:" + hdr.getType() + " " + JSON.toJSONString(txn)); } if (logStream.readByte("EOR") != 'B') { // LOG.error("Last transaction was partial."); throw new EOFException("Last transaction was partial."); } count++; } }
解析事务日志需要用到类 org.apache.zookeeper.server.LogFormatter
java -classpath zookeeper-3.4.13.jar:zookeeper-jute-3.4.13.jar:slf4j-api-1.7.6.jar org.apache.zookeeper.server.LogFormatter /datalog/version-2/log.1
解析快照需要用到类 org.apache.zookeeper.server.SnapshotFormatter
java -classpath zookeeper-3.4.13.jar:zookeeper-jute-3.4.13.jar:slf4j-api-1.7.6.jar org.apache.zookeeper.server.SnapshotFormatter /datalog/version-2/snapshot.110005fdc7 > zk.log
// 输出异常log数据
public class ZkTest { /** * 测试快照日志是否正常 * @param filePath * @param errorFilePath * @param byteLength 输出节点数据的最小长度 * @throws Exception */ public static void parseLog(String filePath,String errorFilePath,long byteLength) throws Exception { FileInputStream fis = new FileInputStream(filePath); BinaryInputArchive logStream = BinaryInputArchive.getArchive(fis); FileHeader fhdr = new FileHeader(); fhdr.deserialize(logStream, "fileheader"); int count = 0; int length = 0; while (true) { long crcValue = 0; byte[] bytes; try { crcValue = logStream.readLong("crcvalue"); bytes = logStream.readBuffer("txnEntry"); length += bytes.length; } catch (Exception e) { System.out.println("11EOF reached after " + count + " txns. crcValue:"+crcValue); throw e; } if (bytes.length > byteLength){ File errorFile = new File(errorFilePath+"."+count); FileUtils.write(errorFile,new String(bytes)); } // System.out.println(crcValue); if (bytes.length == 0) { // Since we preallocate, we define EOF to be an // empty transaction System.out.println("22EOF reached after " + count + " txns."); System.out.println(length); return; } Checksum crc = new Adler32(); crc.update(bytes, 0, bytes.length); if (crcValue != crc.getValue()) { throw new IOException("CRC doesn't match " + crcValue + " vs " + crc.getValue()); } TxnHeader hdr = new TxnHeader(); Record txn = SerializeUtils.deserializeTxn(bytes, hdr); if (hdr.getType() == -11 && bytes.length > byteLength){ System.out.println(); System.out.println("count:"+count+"byte:"+bytes.length); System.out.println("原始数据:"+new String(bytes)); System.out.println(DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.LONG).format(new Date(hdr.getTime())) + " session 0x" + Long.toHexString(hdr.getClientId()) + " cxid 0x" + Long.toHexString(hdr.getCxid()) + " zxid 0x" + Long.toHexString(hdr.getZxid()) + " type:" + hdr.getType() + " " + JSON.toJSONString(txn)); } if (logStream.readByte("EOR") != 'B') { // LOG.error("Last transaction was partial."); throw new EOFException("Last transaction was partial."); } count++; } } public static void main(String[] args) { System.setProperty("jute.maxbuffer","1775360000"); System.setProperty("jute.maxbuffer.extrasize","1775360000"); String inFilePath = "/Users/wind_mac/Documents/zk-test/src4/log.3f7800000001"; String outFilePath = "/Users/wind_mac/Documents/zk-test/src4/log/log"; try { parseLog(inFilePath,outFilePath,17753600); } catch (Exception e) { e.printStackTrace(); } } }