关于QQwry格式
关于QQwry格式
作者 cnss 2004-8-18
版权所有 转载请注明出处
http://blog.csdn.net/cnss
刚才通过RSS看到一篇关于QQwry格式的blog: http://blog.csdn.net/taft/archive/2004/08/18/77559.aspx
想不到QQwry还在用,这是俺两年前设计的,这个格式该被淘汰了.为什么这么说呢,因为它采用的是索引+二分查找来减小内存占用和提高查找速度的.
由于采用二分查找,所以IP数据要被分为最小的片,假设有A,B两条数据,B数据完全覆盖A数据,那么转换为QQwry后两条数据就变成了三条.如果原始数据非常有条理,就可以避免这个现象,不过这是不可能的,几万条数据会越来越乱,所以QQwry的尺寸会迅速增加,之所以增长的不是特别快,是因为格式对重复数据有一定压缩.
QQwry.dat:"咦?我没吃那么多,怎么胖的那么快!?"
有几点改进一下,就可以满足日后需要了:
1.搜索改成可分层搜索,并且转换文件时可以选择侧重文件大小还是搜索速度.
2.现在索引是24bit的,因此数据区不能超过16M.
3.加入收集人的签名.
那篇文章是作者猜测的格式,我再把原来整理的发一遍吧.0x2 0x0 0x0 0x0不是错误,可能是给御风而行放版权信息的地方.如要qqwry格式源代码可联系我.
-----------------------------------------------
新格式说明
主要分为数据区和索引区
★数据区元素:
存放IP信息中的:结束IP(4字节),国家(不定长),地区(不定长)
排列顺序:无要求
★索引区元素:
存放IP信息中的:起始IP(4字节),索引值(3字节)
排列顺序:起始IP按升序排列
★IP为4字节,如"255.0.0.0"表示为0xFF000000,存在文件中则为00 00 00 FF(字节序原因)
★索引值为该IP消息的<结束IP、国家、地区>在文件中的位置。指向<结束IP>
★ 如果结束IP后的字节为0x01,则说明该IP消息的<国家、地区>与前面的IP信息重复,这时0x01后面的3个字节为国家、地区字符串的偏移量。可以根据这三个字节去前面找国家、地区。
★如果国家的第一个字节为0x02,说明该国家串与前面的国家或地区串重复,0x02后面的三个字节为该串的偏移量,可以根据该偏移量找到前面的串。
★ 如果地区的第一个字节为0x02,说明该地区串与前面的国家或地区串重复,0x02后面的三个字节为该串的偏移量,可以根据该偏移量找到前面的串。
★ 有可能在出现0x01的情况下出现0x02,这时需要跳转两次查找国家、地区字符串。
★ 正常的字符串以NULL做结尾。
★ IP信息不允许有重复、覆盖
★ 使用索引是为了保证能以线性速度搜索
★ 新格式不允许为未知数据的IP消息,原格式中的未知数据已经都去掉了。如果有未知数据的IP信息,将大大增加文件长度。
文件的头4个字节是索引区第一个元素的偏移量,第二个4字节是索引区最后一个元素的偏移量。通过这两个偏移量,可以用二分法快速查找IP信息。如:一条IP信息是,要查询的IP为150
起始 结束 国家 地区
100 200 中国 北京
首先在索引区找到起始IP小于150的最后一个元素,通过索引,找到结束IP,如果150大于结束IP,说明是未知数据;如果150小于等于结束IP,则找到国家、地区。
//////////
using System;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
using System.Collections;
/************************************************************************/
/* QQWry.dat ip database reader and finder
* by vmlinux@smth
* 2004/10/29 */
/************************************************************************/
namespace vmlinux.QQWry
{
#region Sample
/********************************************************
QQWry.IndexRecords recs=new QQWry.IndexRecords(ref fs);
QQWry.DataRecord data=null;
//Find IP Address
if(recs.Find("192.168.0.1"))
{
//found
data=recs.CurrentData;
//start ip:recs.Current.ToString()
//end ip :data.EndIP
//location:data.Location
//network :data.Network
//jobs here
}
else
{
//not found
}
//Find IP Address Pattern
if(recs.Find("61.232.202.*"))
{
do
{
data=recs.CurrentData;
//jobs here
}while(recs.PatternSearch && recs.MoveNext());
}
else
{
//not found
}
//Enumerate Index Field
foreach(IndexRecord x in recs)
{
data=x.Data;
//jobs here
}
//Enumerate Data Field
foreach(DataRecord x in recs)
{
//jobs here
}
*******************************************************/
#endregion
#region QQWry Data Field
/// <summary>
/// enumerator for data field
/// </summary>
public class DataRecords: System.Collections.IEnumerator
{
private static readonly int DataStartPosition=8; //start file position of data field
private DataRecord drRecord=null; //current data record
private FileStream fsFile=null; //QQWry file
private int iPosition; //current file position
private int iEndPosition; //position to end enumeration
// private bool bInitOK=false; //tag whether to start to move to next
//in foreach statement, before run inside {...} a MoveNext() is called
/// <summary>
/// return current data record
/// </summary>
public object Current
{
get
{
return drRecord;
}
}
/// <summary>
/// move to next data record
/// </summary>
/// <returns></returns>
public bool MoveNext()
{
if(iPosition>=iEndPosition)
return false;
else
{
drRecord.ReadFrom(iPosition);
// if(bInitOK)
iPosition+=drRecord.DataLength;
// else
// bInitOK=true;
return true;
}
}
/// <summary>
/// reset position
/// </summary>
public void Reset()
{
iPosition=DataStartPosition;
// bInitOK=false;
}
/// <summary>
/// constructor
/// </summary>
/// <param name="fs">data file</param>
/// <param name="endpos">file position to end enumeration</param>
public DataRecords(ref FileStream fs,int endpos)
{
drRecord=new DataRecord(ref fs);
iPosition=DataStartPosition;
iEndPosition=endpos;
fsFile=fs;
}
public System.Collections.IEnumerator GetEnumerator()
{
return this;
}
}
/// <summary>
/// this class represent a record in data field
/// its structure is as following
/// ====================================
/// IP Address (4 bytes)
/// Location Data Node (variable by tag)
/// Network Data Node (variable by tag)
/// ====================================
/// </summary>
public class DataRecord
{
#region internal tools and buffer
internal static byte[] buffer=new byte[256];
internal static string ReadString(ref FileStream fs,ref int len)
{
fs.Seek(-1,SeekOrigin.Current);
fs.Read(buffer,0,buffer.Length);
for(len=0;len<buffer.Length;++len)
{
if(0==buffer[len])
break;
}
fs.Seek(len-buffer.Length+1,SeekOrigin.Current);
return System.Text.Encoding.Default.GetString(buffer,0,len++);
}
#endregion
private FileStream fs=null; //data file
private byte[] btIPBytes=null; //ip bytes, "1.255.255.255" looks like FF FF FF 01
private DataNode ndLocation=null; //location data node
private DataNode ndNetwork=null; //network data node
private int iLen=0; //data length, not the length of value which is a string
internal byte[] IPBytes
{
get
{
return btIPBytes;
}
}
/// <summary>
/// the string format ip which presents as end ip of ip range by design
/// </summary>
public string EndIP
{
get
{
return string.Format("{0}.{1}.{2}.{3}",btIPBytes[3],btIPBytes[2],btIPBytes[1],btIPBytes[0]);
}
}
/// <summary>
/// location value
/// </summary>
public string Location
{
get
{
return ndLocation.Value;
}
}
/// <summary>
/// network value
/// </summary>
public string Network
{
get
{
return ndNetwork.Value;
}
}
/// <summary>
/// data length in file
/// </summary>
public int DataLength
{
get
{
return iLen;
}
}
/// <summary>
/// return debug info
/// </summary>
public string DebugInfo
{
get
{
return string.Format("Loc:{0} Net:{1}",ndLocation.Type,ndNetwork.Type);
}
}
public DataRecord(ref FileStream fsFile)
{
fs=fsFile;
btIPBytes=new byte[4];
ndNetwork=new DataNode(ref fs);
ndLocation=new DataNode(ref fs);
}
public DataRecord(int offset,ref FileStream fsFile)
{
fs=fsFile;
btIPBytes=new byte[4];
ndLocation=new DataNode(ref fs);
ndNetwork=new DataNode(ref fs);
ReadFrom(offset);
}
/// <summary>
/// load data at offset
/// </summary>
/// <param name="offset"></param>
public void ReadFrom(int offset)
{
fs.Seek(offset,SeekOrigin.Begin);
fs.Read(btIPBytes,0,4);
iLen=4;
ndLocation.ReadFrom((int)fs.Position);
iLen+=ndLocation.DataLength;
if(ndLocation.Type==DataNodeType.Type1)
{
ndLocation.GotoLink();
ndNetwork.ReadFrom((int)fs.Position);
}
else
{
ndNetwork.ReadFrom((int)fs.Position);
iLen+=ndNetwork.DataLength;
}
//goto final data node
ndLocation.GotoRoot();
ndNetwork.GotoRoot();
}
/// <summary>
/// check whether ip in range, here less than or equal to node's ip data
/// </summary>
/// <param name="offset"></param>
/// <param name="ip"></param>
/// <returns>true if target ip less than or equal to current ip</returns>
public bool CheckRange(int offset,byte[] ip)
{
fs.Seek(offset,SeekOrigin.Begin);
fs.Read(btIPBytes,0,4);
return btIPBytes[3]!=ip[3]?(btIPBytes[3]>ip[3]):(btIPBytes[2]!=ip[2]?(btIPBytes[2]>ip[2]):(btIPBytes[1]!=ip[1]?(btIPBytes[1]>ip[1]):(btIPBytes[0]>=ip[0]?true:false)));
}
}
#region Data Node
public enum DataNodeType
{
Normal,Type1,Type2,Unknown
}
/// <summary>
/// data node of a data record
/// </summary>
public class DataNode
{
private DataNodeType ndType=DataNodeType.Unknown; //default node type is unknown
private FileStream fs=null; //data file
private string sValue=null; //value of this node which is a string if the node is of normal type
private byte[] btDNOffset=null; //linked node offset position bytes
private int iLen=0; //data length of this node
/// <summary>
/// type of node
/// </summary>
public DataNodeType Type
{
get
{
return ndType;
}
}
/// <summary>
/// string value of node, only available when node is normal
/// </summary>
public string Value
{
get
{
return sValue;
}
}
/// <summary>
/// data length in bytes
/// </summary>
public int DataLength
{
get
{
return iLen;
}
}
/// <summary>
/// linked node file position
/// </summary>
public int LinkNodeOffset
{
get
{
return BitConverter.ToInt32(btDNOffset,0);
}
}
public DataNode(ref FileStream fsFile)
{
fs=fsFile;
btDNOffset=new byte[4];
}
public DataNode(int offset,ref FileStream fsFile)
{
fs=fsFile;
btDNOffset=new byte[4];
ReadFrom(offset);
}
/// <summary>
/// read data from offset
/// </summary>
/// <param name="offset"></param>
public void ReadFrom(int offset)
{
fs.Seek(offset,SeekOrigin.Begin);
//read tag byte
fs.Read(btDNOffset,0,1);
//check tag
if(btDNOffset[0]==1)
{
//a type1 node
ndType=DataNodeType.Type1;
fs.Read(btDNOffset,0,3);
iLen=4;
}
else if(btDNOffset[0]==2)
{
//a type2 node
ndType=DataNodeType.Type2;
fs.Read(btDNOffset,0,3);
iLen=4;
}
else
{
//a normal node
ndType=DataNodeType.Normal;
sValue=DataRecord.ReadString(ref fs,ref iLen);
}
}
/// <summary>
/// goto final node which contains value
/// </summary>
public void GotoRoot()
{
int i=0;
while(this.Type!=DataNodeType.Normal)
{
this.ReadFrom(this.LinkNodeOffset);
if(++i>10)
throw new Exception("nested too much!");
}
}
/// <summary>
/// goto linked node
/// </summary>
public void GotoLink()
{
if(this.Type!=DataNodeType.Normal)
this.ReadFrom(this.LinkNodeOffset);
}
}
#endregion
#endregion
#region QQWry Index Field
/// <summary>
/// index records enumerator
/// </summary>
public class IndexRecords: System.Collections.IEnumerator
{
internal static readonly int IndexCacheLevel=3; //cache level 3 means 2**3=8 items
internal static Hashtable IndexCache=null; //cache items
private static readonly int IndexRecordSize=7; //index record size is 7
private FileStream fsFile=null; //data file
private int IndexStart=0; //start position of index field
private int IndexEnd=0; //end position of index field
private IndexRecord irData=null; //current index record
private int iPosition=0; //current file position
private Regex regIPPattern=null;
private bool bHavePattern=false;
private int PatternEnd=0;
/// <summary>
/// get index record count
/// </summary>
public int Count
{
get
{
return (IndexEnd-IndexStart)/IndexRecordSize;
}
}
public bool PatternSearch
{
get
{
return bHavePattern;
}
}
public void InitCache()
{
if(IndexRecords.IndexCache==null)
{
//size is set to 8 according to index cache level (3)
IndexRecords.IndexCache=new Hashtable(8);
}
}
/// <summary>
/// find first ip range includes supplied ip
/// ip in string format
/// TODO: Add domain support
/// </summary>
/// <param name="ipstr"></param>
/// <returns></returns>
public bool Find(string ipstr)
{
string[] flds=ipstr.Split(new char[]{'.',' ','\r','\n','\t'},5);
byte[] ip=new byte[4];
bHavePattern=false;
try
{
//parse ip
for(int i=0;i<4;++i)
{
if(flds[i]=="*")
{
ip[3-i]=0;
bHavePattern=true;
}
else
ip[3-i]=byte.Parse(flds[i]);
}
if(bHavePattern)
{
//pattern search
regIPPattern=new Regex(ipstr.Replace("*","[0-9]{1,3}"),RegexOptions.Compiled);
byte[] endip=new byte[4];
for(int i=0;i<4;++i)
{
if(flds[i]=="*")
{
endip[3-i]=255;
bHavePattern=true;
}
else
endip[3-i]=byte.Parse(flds[i]);
}
//if no pattern end, cancel pattern enumerate
if(!Find(endip))
bHavePattern=false;
else
PatternEnd=iPosition;
}
}
catch
{
throw new Exception("Invalid IP format."+ipstr);
}
return Find(ip);
}
public bool Find(IPAddress ip)
{
bHavePattern=false;
byte[] ipbyte=ip.GetAddressBytes();
/*
byte t=ipbyte[0];
ipbyte[0]=ipbyte[3];
ipbyte[3]=t;
t=ipbyte[1];
ipbyte[1]=ipbyte[2];
ipbyte[2]=t;
/*/
ipbyte[0]=(byte)(ipbyte[3]+(ipbyte[3]=ipbyte[0])-ipbyte[0]);
ipbyte[1]=(byte)(ipbyte[2]+(ipbyte[2]=ipbyte[1])-ipbyte[1]);
/**/
return Find(ipbyte);
}
/// <summary>
/// binary search to find first ip range
/// find ip in byte format 1.255.255.255 -------- FF FF FF 01
/// </summary>
/// <param name="ip"></param>
/// <returns>true if found</returns>
protected bool Find(byte[] ip)
{
int l=0;
int r=this.Count;
int m=(l+r)/2;
int level=1;
bool bIsGreater=false;
while(!irData.CheckRange(IndexStart+m*IndexRecordSize,ref bIsGreater,ip,level))
{
if(bIsGreater)
r=m-1;
else
l=m+1;
//nothing found
if(l>r)
return false;
m=(l+r)/2;
level++;
}
//redirect to current node
iPosition=IndexStart+(m+1)*IndexRecordSize;
return true;
}
public IndexRecords(ref FileStream fs)
{
irData=new IndexRecord(ref fs);
fsFile=fs;
fs.Seek(0,SeekOrigin.Begin);
fs.Read(DataRecord.buffer,0,8);
//first 4 bytes records start position of index field
IndexStart=BitConverter.ToInt32(DataRecord.buffer,0);
//second 4 bytes records end position of index field
IndexEnd=BitConverter.ToInt32(DataRecord.buffer,4);
iPosition=IndexStart;
InitCache();
}
public IndexRecords(int indexstart,int indexend,ref FileStream fs)
{
irData=new IndexRecord(ref fs);
fsFile=fs;
IndexStart=indexstart;
IndexEnd=indexend;
iPosition=IndexStart;
InitCache();
}
public DataRecord CurrentData
{
get
{
return irData.Data;
}
}
/// <summary>
/// current index node
/// </summary>
public object Current
{
get
{
return irData;
}
}
/// <summary>
/// reset position
/// </summary>
public void Reset()
{
bHavePattern=false;
iPosition=IndexStart;
}
/// <summary>
/// move to next record
/// </summary>
/// <returns>true if have more nodes</returns>
public bool MoveNext()
{
if(iPosition>=IndexEnd || (bHavePattern && iPosition>=PatternEnd))
{
//end pattern search
bHavePattern=false;
return false;
}
else
{
if(bHavePattern)
{
irData.ReadFrom(iPosition);
iPosition+=IndexRecordSize;
if(regIPPattern.IsMatch(irData.StartIP))
return true;
else
return MoveNext();
}
else
{
irData.ReadFrom(iPosition);
iPosition+=IndexRecordSize;
return true;
}
}
}
public System.Collections.IEnumerator GetEnumerator()
{
return this;
}
}
/// <summary>
/// index record
/// </summary>
public class IndexRecord
{
private byte[] btIPBytes=null; //ip bytes
private byte[] btDNOffset=null; //data record position
private DataRecord drData=null; //linked data record
private FileStream fs=null; //data file
/// <summary>
/// ip address, it is the start ip of ip range
/// </summary>
public string StartIP
{
get
{
return string.Format("{0}.{1}.{2}.{3}",btIPBytes[3],btIPBytes[2],btIPBytes[1],btIPBytes[0]);
}
}
/// <summary>
/// return data node offset of current index
/// </summary>
private int DataRecordOffset
{
get
{
return BitConverter.ToInt32(btDNOffset,0);
}
}
/// <summary>
/// get current data record
/// </summary>
public DataRecord Data
{
get
{
drData.ReadFrom(this.DataRecordOffset);
return drData;
}
}
public IndexRecord(ref FileStream fsFile)
{
fs=fsFile;
btIPBytes=new byte[4];
btDNOffset=new byte[4];
drData=new DataRecord(ref fs);
}
public IndexRecord(int offset,ref FileStream fsFile)
{
fs=fsFile;
btIPBytes=new byte[4];
btDNOffset=new byte[4];
drData=new DataRecord(ref fs);
ReadFrom(offset);
}
/// <summary>
///
/// </summary>
/// <param name="offset"></param>
public void ReadFrom(int offset)
{
fs.Seek(offset,SeekOrigin.Begin);
fs.Read(btIPBytes,0,4);
fs.Read(btDNOffset,0,3);
}
/// <summary>
/// check whether ip in current range
/// </summary>
/// <param name="offset"></param>
/// <param name="g">true if targer ip is less than or equal to current ip</param>
/// <param name="ip"></param>
/// <param name="lvl">search level</param>
/// <returns>true if in range</returns>
public bool CheckRange(int offset,ref bool g,byte[] ip,int lvl)
{
if(lvl<=IndexRecords.IndexCacheLevel)
{
//check cache
byte[] buf=(byte[])IndexRecords.IndexCache[offset];
bool c=false;
if(buf==null)
{
//add new cache item
buf=new byte[11];
ReadFrom(offset);
btIPBytes.CopyTo(buf,0);
btDNOffset.CopyTo(buf,4);
c=drData.CheckRange(this.DataRecordOffset,ip);
drData.IPBytes.CopyTo(buf,7);
IndexRecords.IndexCache.Add(offset,buf);
}
g=buf[3]!=ip[3]?(buf[3]>ip[3]):(buf[2]!=ip[2]?(buf[2]>ip[2]):(buf[1]!=ip[1]?(buf[1]>ip[1]):(buf[0]>ip[0]?true:false)));
if(g)
return false;
else
return c;
}
else
{
ReadFrom(offset);
//compare ip bytes
g=btIPBytes[3]!=ip[3]?(btIPBytes[3]>ip[3]):(btIPBytes[2]!=ip[2]?(btIPBytes[2]>ip[2]):(btIPBytes[1]!=ip[1]?(btIPBytes[1]>ip[1]):(btIPBytes[0]>ip[0]?true:false)));
if(g)
return false; //target_ip start_ip<--- ip range --->end_ip , so target_ip is out of range
else
return drData.CheckRange(this.DataRecordOffset,ip);
}
}
public override string ToString()
{
return this.StartIP;
}
}
#endregion
}
浙公网安备 33010602011771号