Doyle

Doyle's Memories off

导航

关于QQwry格式

 

关于QQwry格式

作者  cnss 2004-8-18
版权所有 转载请注明出处
http://blog.csdn.net/cnss

刚才通过RSS看到一篇关于QQwry格式的blog: http://blog.csdn.net/taft/archive/2004/08/18/77559.aspx

想不到QQwry还在用,这是俺两年前设计的,这个格式该被淘汰了.为什么这么说呢,因为它采用的是索引+二分查找来减小内存占用和提高查找速度的.

由于采用二分查找,所以IP数据要被分为最小的片,假设有A,B两条数据,B数据完全覆盖A数据,那么转换为QQwry后两条数据就变成了三条.如果原始数据非常有条理,就可以避免这个现象,不过这是不可能的,几万条数据会越来越乱,所以QQwry的尺寸会迅速增加,之所以增长的不是特别快,是因为格式对重复数据有一定压缩.

QQwry.dat:"咦?我没吃那么多,怎么胖的那么快!?"

有几点改进一下,就可以满足日后需要了:
1.搜索改成可分层搜索,并且转换文件时可以选择侧重文件大小还是搜索速度.
2.现在索引是24bit的,因此数据区不能超过16M.
3.加入收集人的签名.

那篇文章是作者猜测的格式,我再把原来整理的发一遍吧.0x2 0x0 0x0 0x0不是错误,可能是给御风而行放版权信息的地方.如要qqwry格式源代码可联系我.

-----------------------------------------------
新格式说明

主要分为数据区和索引区
★数据区元素:
存放IP信息中的:结束IP(4字节),国家(不定长),地区(不定长)
排列顺序:无要求
★索引区元素:
存放IP信息中的:起始IP(4字节),索引值(3字节)
排列顺序:起始IP按升序排列
★IP为4字节,如"255.0.0.0"表示为0xFF000000,存在文件中则为00 00 00 FF(字节序原因)
★索引值为该IP消息的<结束IP、国家、地区>在文件中的位置。指向<结束IP>
★ 如果结束IP后的字节为0x01,则说明该IP消息的<国家、地区>与前面的IP信息重复,这时0x01后面的3个字节为国家、地区字符串的偏移量。可以根据这三个字节去前面找国家、地区。
★如果国家的第一个字节为0x02,说明该国家串与前面的国家或地区串重复,0x02后面的三个字节为该串的偏移量,可以根据该偏移量找到前面的串。
★ 如果地区的第一个字节为0x02,说明该地区串与前面的国家或地区串重复,0x02后面的三个字节为该串的偏移量,可以根据该偏移量找到前面的串。
★ 有可能在出现0x01的情况下出现0x02,这时需要跳转两次查找国家、地区字符串。
★ 正常的字符串以NULL做结尾。
★ IP信息不允许有重复、覆盖
★ 使用索引是为了保证能以线性速度搜索
★ 新格式不允许为未知数据的IP消息,原格式中的未知数据已经都去掉了。如果有未知数据的IP信息,将大大增加文件长度。

  文件的头4个字节是索引区第一个元素的偏移量,第二个4字节是索引区最后一个元素的偏移量。通过这两个偏移量,可以用二分法快速查找IP信息。如:一条IP信息是,要查询的IP为150
起始    结束    国家    地区
100      200    中国    北京
首先在索引区找到起始IP小于150的最后一个元素,通过索引,找到结束IP,如果150大于结束IP,说明是未知数据;如果150小于等于结束IP,则找到国家、地区。


//////////

using System;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
using System.Collections;

/************************************************************************/
/* QQWry.dat ip database reader and finder
 * by vmlinux@smth
 * 2004/10/29                              */
/************************************************************************/
namespace vmlinux.QQWry
{
	#region Sample
	/********************************************************
	QQWry.IndexRecords recs=new QQWry.IndexRecords(ref fs);
	QQWry.DataRecord data=null;
	//Find IP Address
	if(recs.Find("192.168.0.1"))
	{
		//found
		data=recs.CurrentData;
		//start ip:recs.Current.ToString()
		//end ip  :data.EndIP
		//location:data.Location
		//network :data.Network
		//jobs here
	}
	else
	{
		//not found
	}
	//Find IP Address Pattern
	if(recs.Find("61.232.202.*"))
	{
		do
		{
			data=recs.CurrentData;
			//jobs here
		}while(recs.PatternSearch && recs.MoveNext());
	}
	else
	{
		//not found
	}
	//Enumerate Index Field
	foreach(IndexRecord x in recs)
	{
		data=x.Data;
		//jobs here
	}
	//Enumerate Data Field
	foreach(DataRecord x in recs)
	{
		//jobs here
	}
	*******************************************************/
	#endregion
	
	#region QQWry Data Field
	/// <summary>
	/// enumerator for data field
	/// </summary>
	public class DataRecords: System.Collections.IEnumerator
	{
		private static readonly int DataStartPosition=8;	//start file position of data field
		private DataRecord drRecord=null;					//current data record
		private FileStream fsFile=null;						//QQWry file
		private int iPosition;								//current file position
		private int iEndPosition;							//position to end enumeration
//		private bool bInitOK=false;							//tag whether to start to move to next
										//in foreach statement, before run inside {...} a MoveNext() is called

		/// <summary>
		/// return current data record
		/// </summary>
		public object Current
		{
			get
			{
				return drRecord;
			}
		}
		/// <summary>
		/// move to next data record
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			if(iPosition>=iEndPosition)
				return false;
			else
			{
				drRecord.ReadFrom(iPosition);
//				if(bInitOK)
					iPosition+=drRecord.DataLength;
//				else
//					bInitOK=true;
				return true;
			}
		}
		/// <summary>
		/// reset position
		/// </summary>
		public void Reset()
		{
			iPosition=DataStartPosition;
//			bInitOK=false;
		}
		/// <summary>
		/// constructor 
		/// </summary>
		/// <param name="fs">data file</param>
		/// <param name="endpos">file position to end enumeration</param>
		public DataRecords(ref FileStream fs,int endpos)
		{
			drRecord=new DataRecord(ref fs);
			iPosition=DataStartPosition;
			iEndPosition=endpos;
			fsFile=fs;
		}
		public System.Collections.IEnumerator GetEnumerator()
		{
			return this;
		}
	}

	/// <summary>
	/// this class represent a record in data field
	/// its structure is as following
	/// ====================================
	/// IP Address (4 bytes)
	/// Location Data Node (variable by tag)
	/// Network Data Node (variable by tag)
	/// ====================================
	/// </summary>
	public class DataRecord
	{
		#region internal tools and buffer
		internal static byte[] buffer=new byte[256];
		internal static string ReadString(ref FileStream fs,ref int len)
		{
			fs.Seek(-1,SeekOrigin.Current);
			fs.Read(buffer,0,buffer.Length);
			for(len=0;len<buffer.Length;++len)
			{
				if(0==buffer[len])
					break;
			}
			fs.Seek(len-buffer.Length+1,SeekOrigin.Current);
			return System.Text.Encoding.Default.GetString(buffer,0,len++);
		}
		#endregion

		private FileStream fs=null;				//data file
		private byte[] btIPBytes=null;			//ip bytes, "1.255.255.255" looks like FF FF FF 01
		private DataNode ndLocation=null;		//location data node
		private DataNode ndNetwork=null;		//network data node
		private int iLen=0;						//data length, not the length of value which is a string

		internal byte[] IPBytes
		{
			get
			{
				return btIPBytes;
			}
		}
		/// <summary>
		/// the string format ip which presents as end ip of ip range by design
		/// </summary>
		public string EndIP
		{
			get
			{
				return string.Format("{0}.{1}.{2}.{3}",btIPBytes[3],btIPBytes[2],btIPBytes[1],btIPBytes[0]);
			}
		}
		/// <summary>
		/// location value
		/// </summary>
		public string Location
		{
			get
			{
				return ndLocation.Value;
			}
		}
		/// <summary>
		/// network value
		/// </summary>
		public string Network
		{
			get
			{
				return ndNetwork.Value;
			}
		}
		/// <summary>
		/// data length in file
		/// </summary>
		public int DataLength
		{
			get
			{
				return iLen;
			}
		}
		/// <summary>
		/// return debug info
		/// </summary>
		public string DebugInfo
		{
			get
			{
				return string.Format("Loc:{0} Net:{1}",ndLocation.Type,ndNetwork.Type);
			}
		}
		public DataRecord(ref FileStream fsFile)
		{
			fs=fsFile;
			btIPBytes=new byte[4];
			ndNetwork=new DataNode(ref fs);
			ndLocation=new DataNode(ref fs);
		}
		public DataRecord(int offset,ref FileStream fsFile)
		{
			fs=fsFile;
			btIPBytes=new byte[4];
			ndLocation=new DataNode(ref fs);
			ndNetwork=new DataNode(ref fs);

			ReadFrom(offset);
		}

		/// <summary>
		/// load data at offset
		/// </summary>
		/// <param name="offset"></param>
		public void ReadFrom(int offset)
		{
			fs.Seek(offset,SeekOrigin.Begin);
			fs.Read(btIPBytes,0,4);
			iLen=4;
			ndLocation.ReadFrom((int)fs.Position);
			iLen+=ndLocation.DataLength;

			if(ndLocation.Type==DataNodeType.Type1)
			{
				ndLocation.GotoLink();
				ndNetwork.ReadFrom((int)fs.Position);
			}
			else
			{
				ndNetwork.ReadFrom((int)fs.Position);
				iLen+=ndNetwork.DataLength;
			}

			//goto final data node
			ndLocation.GotoRoot();
			ndNetwork.GotoRoot();
		}

		/// <summary>
		/// check whether ip in range, here less than or equal to node's ip data
		/// </summary>
		/// <param name="offset"></param>
		/// <param name="ip"></param>
		/// <returns>true if target ip less than or equal to current ip</returns>
		public bool CheckRange(int offset,byte[] ip)
		{
			fs.Seek(offset,SeekOrigin.Begin);
			fs.Read(btIPBytes,0,4);
			return btIPBytes[3]!=ip[3]?(btIPBytes[3]>ip[3]):(btIPBytes[2]!=ip[2]?(btIPBytes[2]>ip[2]):(btIPBytes[1]!=ip[1]?(btIPBytes[1]>ip[1]):(btIPBytes[0]>=ip[0]?true:false)));
		}
	}

	#region Data Node
	public enum DataNodeType
	{
		Normal,Type1,Type2,Unknown
	}

	/// <summary>
	/// data node of a data record
	/// </summary>
	public class DataNode
	{
		private DataNodeType ndType=DataNodeType.Unknown;	//default node type is unknown
		private FileStream fs=null;							//data file
		private string sValue=null;							//value of this node which is a string if the node is of normal type
		private byte[] btDNOffset=null;						//linked node offset position bytes
		private int iLen=0;									//data length of this node

		/// <summary>
		/// type of node
		/// </summary>
		public DataNodeType Type
		{
			get
			{
				return ndType;
			}
		}
		/// <summary>
		/// string value of node, only available when node is normal
		/// </summary>
		public string Value
		{
			get
			{
				return sValue;
			}
		}
		/// <summary>
		/// data length in bytes
		/// </summary>
		public int DataLength
		{
			get
			{
				return iLen;
			}
		}
		/// <summary>
		/// linked node file position
		/// </summary>
		public int LinkNodeOffset
		{
			get
			{
				return  BitConverter.ToInt32(btDNOffset,0);
			}
		}

		public DataNode(ref FileStream fsFile)
		{
			fs=fsFile;
			btDNOffset=new byte[4];
		}
		public DataNode(int offset,ref FileStream fsFile)
		{
			fs=fsFile;
			btDNOffset=new byte[4];

			ReadFrom(offset);
		}

		/// <summary>
		/// read data from offset
		/// </summary>
		/// <param name="offset"></param>
		public void ReadFrom(int offset)
		{
			fs.Seek(offset,SeekOrigin.Begin);
			//read tag byte
			fs.Read(btDNOffset,0,1);
			//check tag
			if(btDNOffset[0]==1)
			{
				//a type1 node
				ndType=DataNodeType.Type1;
				fs.Read(btDNOffset,0,3);
				iLen=4;
			}
			else if(btDNOffset[0]==2)
			{
				//a type2 node
				ndType=DataNodeType.Type2;
				fs.Read(btDNOffset,0,3);
				iLen=4;
			}
			else
			{
				//a normal node
				ndType=DataNodeType.Normal;
				sValue=DataRecord.ReadString(ref fs,ref iLen);
			}
		}

		/// <summary>
		/// goto final node which contains value
		/// </summary>
		public void GotoRoot()
		{
			int i=0;
			while(this.Type!=DataNodeType.Normal)
			{
				this.ReadFrom(this.LinkNodeOffset);
				if(++i>10)
					throw new Exception("nested too much!");
			}
		}
		/// <summary>
		/// goto linked node
		/// </summary>
		public void GotoLink()
		{
			if(this.Type!=DataNodeType.Normal)
				this.ReadFrom(this.LinkNodeOffset);
		}
	}
	#endregion
	#endregion

	#region QQWry Index Field
	/// <summary>
	/// index records enumerator
	/// </summary>
	public class IndexRecords: System.Collections.IEnumerator
	{
		internal static readonly int IndexCacheLevel=3;		//cache level 3 means 2**3=8 items
		internal static Hashtable IndexCache=null;			//cache items
		private static readonly int IndexRecordSize=7;		//index record size is 7
		private FileStream fsFile=null;						//data file
		private int IndexStart=0;							//start position of index field
		private int IndexEnd=0;								//end position of index field
		private IndexRecord irData=null;					//current index record
		private int iPosition=0;							//current file position
		private Regex regIPPattern=null;
		private bool bHavePattern=false;
		private int PatternEnd=0;

		/// <summary>
		/// get index record count
		/// </summary>
		public int Count
		{
			get
			{
				return (IndexEnd-IndexStart)/IndexRecordSize;
			}
		}
		public bool PatternSearch
		{
			get
			{
				return bHavePattern;
			}
		}
		public void InitCache()
		{
			if(IndexRecords.IndexCache==null)
			{
				//size is set to 8 according to index cache level (3)
				IndexRecords.IndexCache=new Hashtable(8);
			}
		}
		/// <summary>
		/// find first ip range includes supplied ip
		/// ip in string format
		/// TODO: Add domain support
		/// </summary>
		/// <param name="ipstr"></param>
		/// <returns></returns>
		public bool Find(string ipstr)
		{
			string[] flds=ipstr.Split(new char[]{'.',' ','\r','\n','\t'},5);
			byte[] ip=new byte[4];
			bHavePattern=false;
			try
			{
				//parse ip
				for(int i=0;i<4;++i)
				{
					if(flds[i]=="*")
					{
						ip[3-i]=0;
						bHavePattern=true;
					}
					else
						ip[3-i]=byte.Parse(flds[i]);
				}
				if(bHavePattern)
				{
					//pattern search
					regIPPattern=new Regex(ipstr.Replace("*","[0-9]{1,3}"),RegexOptions.Compiled);
					byte[] endip=new byte[4];
					for(int i=0;i<4;++i)
					{
						if(flds[i]=="*")
						{
							endip[3-i]=255;
							bHavePattern=true;
						}
						else
							endip[3-i]=byte.Parse(flds[i]);
					}
					//if no pattern end, cancel pattern enumerate
					if(!Find(endip))
						bHavePattern=false;
					else
						PatternEnd=iPosition;
				}
			}
			catch
			{
				throw new Exception("Invalid IP format."+ipstr);
			}
			return Find(ip);
		}

		public bool Find(IPAddress ip)
		{
			bHavePattern=false;
			byte[] ipbyte=ip.GetAddressBytes();
			/*
			byte t=ipbyte[0];
			ipbyte[0]=ipbyte[3];
			ipbyte[3]=t;
			t=ipbyte[1];
			ipbyte[1]=ipbyte[2];
			ipbyte[2]=t;
			/*/
			ipbyte[0]=(byte)(ipbyte[3]+(ipbyte[3]=ipbyte[0])-ipbyte[0]);
			ipbyte[1]=(byte)(ipbyte[2]+(ipbyte[2]=ipbyte[1])-ipbyte[1]);
			/**/
			return Find(ipbyte);
		}
		/// <summary>
		/// binary search to find first ip range
		/// find ip in byte format 1.255.255.255 -------- FF FF FF 01
		/// </summary>
		/// <param name="ip"></param>
		/// <returns>true if found</returns>
		protected bool Find(byte[] ip)
		{
			int l=0;
			int r=this.Count;
			int m=(l+r)/2;
			int level=1;
			bool bIsGreater=false;
			while(!irData.CheckRange(IndexStart+m*IndexRecordSize,ref bIsGreater,ip,level))
			{
				if(bIsGreater)
					r=m-1;
				else
					l=m+1;
				//nothing found
				if(l>r)
					return false;
				m=(l+r)/2;
				level++;
			}
			//redirect to current node
			iPosition=IndexStart+(m+1)*IndexRecordSize;
			return true;
		}
		public IndexRecords(ref FileStream fs)
		{
			irData=new IndexRecord(ref fs);
			fsFile=fs;
			fs.Seek(0,SeekOrigin.Begin);
			fs.Read(DataRecord.buffer,0,8);
			//first 4 bytes records start position of index field
			IndexStart=BitConverter.ToInt32(DataRecord.buffer,0);
			//second 4 bytes records end position of index field
			IndexEnd=BitConverter.ToInt32(DataRecord.buffer,4);
			iPosition=IndexStart;
			InitCache();
		}
		public IndexRecords(int indexstart,int indexend,ref FileStream fs)
		{
			irData=new IndexRecord(ref fs);
			fsFile=fs;

			IndexStart=indexstart;
			IndexEnd=indexend;
			iPosition=IndexStart;
			InitCache();
		}
		public DataRecord CurrentData
		{
			get
			{
				return irData.Data;
			}
		}
		/// <summary>
		/// current index node
		/// </summary>
		public object Current
		{
			get
			{
				return irData;
			}
		}
		/// <summary>
		/// reset position
		/// </summary>
		public void Reset()
		{
			bHavePattern=false;
			iPosition=IndexStart;
		}
		/// <summary>
		/// move to next record
		/// </summary>
		/// <returns>true if have more nodes</returns>
		public bool MoveNext()
		{
			if(iPosition>=IndexEnd || (bHavePattern && iPosition>=PatternEnd))
			{
				//end pattern search
				bHavePattern=false;
				return false;
			}
			else
			{
				if(bHavePattern)
				{
					irData.ReadFrom(iPosition);
					iPosition+=IndexRecordSize;
					if(regIPPattern.IsMatch(irData.StartIP))
						return true;
					else
						return MoveNext();
				}
				else
				{
					irData.ReadFrom(iPosition);
					iPosition+=IndexRecordSize;
					return true;
				}
			}
		}
		public System.Collections.IEnumerator GetEnumerator()
		{
			return this;
		}
	}
	/// <summary>
	/// index record
	/// </summary>
	public class IndexRecord
	{
		private byte[] btIPBytes=null;		//ip bytes
		private byte[] btDNOffset=null;		//data record position
		private DataRecord drData=null;		//linked data record
		private FileStream fs=null;			//data file

		/// <summary>
		/// ip address, it is the start ip of ip range
		/// </summary>
		public string StartIP
		{
			get
			{
				return string.Format("{0}.{1}.{2}.{3}",btIPBytes[3],btIPBytes[2],btIPBytes[1],btIPBytes[0]);
			}
		}
		/// <summary>
		/// return data node offset of current index
		/// </summary>
		private int DataRecordOffset
		{
			get
			{
				return BitConverter.ToInt32(btDNOffset,0);
			}
		}
		/// <summary>
		/// get current data record
		/// </summary>
		public DataRecord Data
		{
			get
			{
				drData.ReadFrom(this.DataRecordOffset);
				return drData;
			}
		}

		public IndexRecord(ref FileStream fsFile)
		{
			fs=fsFile;
			btIPBytes=new byte[4];
			btDNOffset=new byte[4];
			drData=new DataRecord(ref fs);
		}
		public IndexRecord(int offset,ref FileStream fsFile)
		{
			fs=fsFile;
			btIPBytes=new byte[4];
			btDNOffset=new byte[4];
			drData=new DataRecord(ref fs);

			ReadFrom(offset);
		}

		/// <summary>
		/// 
		/// </summary>
		/// <param name="offset"></param>
		public void ReadFrom(int offset)
		{
			fs.Seek(offset,SeekOrigin.Begin);
			fs.Read(btIPBytes,0,4);
			fs.Read(btDNOffset,0,3);
		}

		/// <summary>
		/// check whether ip in current range
		/// </summary>
		/// <param name="offset"></param>
		/// <param name="g">true if targer ip is less than or equal to current ip</param>
		/// <param name="ip"></param>
		/// <param name="lvl">search level</param>
		/// <returns>true if in range</returns>
		public bool CheckRange(int offset,ref bool g,byte[] ip,int lvl)
		{
			if(lvl<=IndexRecords.IndexCacheLevel)
			{
				//check cache
				byte[] buf=(byte[])IndexRecords.IndexCache[offset];
				bool c=false;
				if(buf==null)
				{
					//add new cache item
					buf=new byte[11];
					ReadFrom(offset);
					btIPBytes.CopyTo(buf,0);
					btDNOffset.CopyTo(buf,4);
					c=drData.CheckRange(this.DataRecordOffset,ip);
					drData.IPBytes.CopyTo(buf,7);
					IndexRecords.IndexCache.Add(offset,buf);
				}
				g=buf[3]!=ip[3]?(buf[3]>ip[3]):(buf[2]!=ip[2]?(buf[2]>ip[2]):(buf[1]!=ip[1]?(buf[1]>ip[1]):(buf[0]>ip[0]?true:false)));
				if(g)
					return false;
				else
					return c;
			}
			else
			{
				ReadFrom(offset);
				//compare ip bytes
				g=btIPBytes[3]!=ip[3]?(btIPBytes[3]>ip[3]):(btIPBytes[2]!=ip[2]?(btIPBytes[2]>ip[2]):(btIPBytes[1]!=ip[1]?(btIPBytes[1]>ip[1]):(btIPBytes[0]>ip[0]?true:false)));
				if(g)
					return false;	//target_ip start_ip<--- ip range --->end_ip	, so target_ip is out of range
				else
					return drData.CheckRange(this.DataRecordOffset,ip);
			}
		}
		public override string ToString()
		{
			return this.StartIP;
		}
	}
	#endregion

}

posted on 2004-10-29 12:03  Doyle  阅读(802)  评论(0)    收藏  举报