Byte数组操作方法集(Java&.Net)

在velocity优化时由于要将String转化为byte,所以就会涉及到一些针对byte数组的操作需要,如在一个数组中查找一个小数组、数组替换、数组扩展等操作,下面这个类就提供了这样一组方法,而且性能还不错。

 

package com.taobao.sketch.util;

 

import java.io.UnsupportedEncodingException;

import java.nio.ByteBuffer;

import java.nio.CharBuffer;

import java.nio.charset.Charset;

 

/**

* ArrayUtil,一些基于byte数组的操作方法集

* <p/>

* Author By: junshan

* Created Date: 2010-12-27 16:17:23

*/

public class ArrayUtil {

 

/**

* 查找并替换指定byte数组

*

* @param org of type byte[] 原数组

* @param search of type byte[] 要查找的数组

* @param replace of type byte[] 要替换的数组

* @param startIndex of type int 开始搜索索引

* @return byte[] 返回新的数组

* @throws UnsupportedEncodingException when

*/

public static byte[] arrayReplace(byte[] org, byte[] search, byte[] replace, int startIndex) throws UnsupportedEncodingException {

int index = indexOf(org, search, startIndex);

if (index != -1) {

int newLength = org.length + replace.length – search.length;

byte[] newByte = new byte[newLength];

System.arraycopy(org, 0, newByte, 0, index);

System.arraycopy(replace, 0, newByte, index, replace.length);

System.arraycopy(org, index + search.length, newByte, index + replace.length, org.length – index – search.length);

int newStart = index + replace.length;

//String newstr = new String(newByte, “GBK”);

//System.out.println(newstr);

if ((newByte.length – newStart) > replace.length) {

return arrayReplace(newByte, search, replace, newStart);

}

return newByte;

} else {

return org;

}

}

 

/**

* 从指定数组的copy一个子数组并返回

*

* @param org of type byte[] 原数组

* @param to 合并一个byte[]

* @return 合并的数据

*/

public static byte[] append(byte[] org, byte[] to) {

byte[] newByte = new byte[org.length + to.length];

System.arraycopy(org, 0, newByte, 0, org.length);

System.arraycopy(to, 0, newByte, org.length, to.length);

return newByte;

}

 

/**

* 从指定数组的copy一个子数组并返回

*

* @param org of type byte[] 原数组

* @param to 合并一个byte

* @return 合并的数据

*/

public static byte[] append(byte[] org, byte to) {

byte[] newByte = new byte[org.length + 1];

System.arraycopy(org, 0, newByte, 0, org.length);

newByte[org.length] = to;

return newByte;

}

 

/**

* 从指定数组的copy一个子数组并返回

*

* @param org of type byte[] 原数组

* @param from 起始点

* @param append 要合并的数据

*/

public static void append(byte[] org, int from, byte[] append) {

System.arraycopy(append, 0, org, from, append.length);

}

 

/**

* 从指定数组的copy一个子数组并返回

*

* @param original of type byte[] 原数组

* @param from 起始点

* @param to 结束点

* @return 返回copy的数组

*/

public static byte[] copyOfRange(byte[] original, int from, int to) {

int newLength = to – from;

if (newLength < 0)

throw new IllegalArgumentException(from + ” > ” + to);

byte[] copy = new byte[newLength];

System.arraycopy(original, from, copy, 0,

Math.min(original.length – from, newLength));

return copy;

}

 

 

public static byte[] char2byte(String encode, char… chars) {

Charset cs = Charset.forName(encode);

CharBuffer cb = CharBuffer.allocate(chars.length);

cb.put(chars);

cb.flip();

ByteBuffer bb = cs.encode(cb);

return bb.array();

}

 

/**

* 查找指定数组的起始索引

*

* @param org of type byte[] 原数组

* @param search of type byte[] 要查找的数组

* @return int 返回索引

*/

public static int indexOf(byte[] org, byte[] search) {

return indexOf(org, search, 0);

}

 

/**

* 查找指定数组的起始索引

*

* @param org of type byte[] 原数组

* @param search of type byte[] 要查找的数组

* @param startIndex 起始索引

* @return int 返回索引

*/

public static int indexOf(byte[] org, byte[] search, int startIndex) {

KMPMatcher kmpMatcher = new com.taobao.sketch.util.ArrayUtil.KMPMatcher();

kmpMatcher.computeFailure4Byte(search);

return kmpMatcher.indexOf(org, startIndex);

//return com.alibaba.common.lang.ArrayUtil.indexOf(org, search);

}

 

/**

* 查找指定数组的最后一次出现起始索引

*

* @param org of type byte[] 原数组

* @param search of type byte[] 要查找的数组

* @return int 返回索引

*/

public static int lastIndexOf(byte[] org, byte[] search) {

return lastIndexOf(org, search, 0);

}

 

/**

* 查找指定数组的最后一次出现起始索引

*

* @param org of type byte[] 原数组

* @param search of type byte[] 要查找的数组

* @param fromIndex 起始索引

* @return int 返回索引

*/

public static int lastIndexOf(byte[] org, byte[] search, int fromIndex) {

KMPMatcher kmpMatcher = new com.taobao.sketch.util.ArrayUtil.KMPMatcher();

kmpMatcher.computeFailure4Byte(search);

return kmpMatcher.lastIndexOf(org, fromIndex);

}

 

/**

* KMP算法类

* <p/>

* Created on 2011-1-3

*/

static class KMPMatcher {

private int[] failure;

private int matchPoint;

private byte[] bytePattern;

 

/**

* Method indexOf …

*

* @param text of type byte[]

* @param startIndex of type int

* @return int

*/

public int indexOf(byte[] text, int startIndex) {

int j = 0;

if (text.length == 0 || startIndex > text.length) return -1;

 

for (int i = startIndex; i < text.length; i++) {

while (j > 0 && bytePattern[j] != text[i]) {

j = failure[j - 1];

}

if (bytePattern[j] == text[i]) {

j++;

}

if (j == bytePattern.length) {

matchPoint = i – bytePattern.length + 1;

return matchPoint;

}

}

return -1;

}

 

/**

* 找到末尾后重头开始找

*

* @param text of type byte[]

* @param startIndex of type int

* @return int

*/

public int lastIndexOf(byte[] text, int startIndex) {

matchPoint = -1;

int j = 0;

if (text.length == 0 || startIndex > text.length) return -1;

int end = text.length;

for (int i = startIndex; i < end; i++) {

while (j > 0 && bytePattern[j] != text[i]) {

j = failure[j - 1];

}

if (bytePattern[j] == text[i]) {

j++;

}

if (j == bytePattern.length) {

matchPoint = i – bytePattern.length + 1;

if ((text.length – i) > bytePattern.length) {

j = 0;

continue;

}

return matchPoint;

}

//如果从中间某个位置找,找到末尾没找到后,再重头开始找

if (startIndex != 0 && i + 1 == end) {

end = startIndex;

i = -1;

startIndex = 0;

}

}

return matchPoint;

}

 

 

/**

* 找到末尾后不会重头开始找

*

* @param text of type byte[]

* @param startIndex of type int

* @return int

*/

public int lastIndexOfWithNoLoop(byte[] text, int startIndex) {

matchPoint = -1;

int j = 0;

if (text.length == 0 || startIndex > text.length) return -1;

 

for (int i = startIndex; i < text.length; i++) {

while (j > 0 && bytePattern[j] != text[i]) {

j = failure[j - 1];

}

if (bytePattern[j] == text[i]) {

j++;

}

if (j == bytePattern.length) {

matchPoint = i – bytePattern.length + 1;

if ((text.length – i) > bytePattern.length) {

j = 0;

continue;

}

return matchPoint;

}

}

return matchPoint;

}

 

/**

* Method computeFailure4Byte …

*

* @param patternStr of type byte[]

*/

public void computeFailure4Byte(byte[] patternStr) {

bytePattern = patternStr;

int j = 0;

int len = bytePattern.length;

failure = new int[len];

for (int i = 1; i < len; i++) {

while (j > 0 && bytePattern[j] != bytePattern[i]) {

j = failure[j - 1];

}

if (bytePattern[j] == bytePattern[i]) {

j++;

}

failure[i] = j;

}

}

}

 

public static void main(String[] args) {

try {

byte[] org = “kadeadedcfdededghkk”.getBytes(“GBK”);

byte[] search = “kk”.getBytes(“GBK”);

 

int last = lastIndexOf(org, search, 19);

long t1 = 0;

long t2 = 0;

int f1 = 0;

int f2 = 0;

for (int i = 0; i < 10000; i++) {

long s1 = System.nanoTime();

f1 = indexOf(org, search, 0);

long s2 = System.nanoTime();

f2 = com.alibaba.common.lang.ArrayUtil.indexOf(org, search);

long s3 = System.nanoTime();

t1 = t1 + (s2 – s1);

t2 = t2 + (s3 – s2);

}

System.out.println(“kmp=” + t1 / 10000 + “,ali=” + t2 / 10000);

System.out.printf(“f1=” + f1 + “,f2=” + f2);

} catch (UnsupportedEncodingException e) {

e.printStackTrace();

}

}

}

 

.net

在 stream流 和 byte[] 中查找(搜索)指定字符串

这里注重看的是两个 Search 的扩展方法,一个是 stream 类型的扩展,另一个是 byte[] 类型的扩展,

如果大家有更好的“算法”,请给回复,我们一起优化!

 

-- 常用扩展代码,需要这部分代码的支持!

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Drawing;

namespace Ims.Bll
{
  /// <summary>
  /// stream 、 string 、byte[] 间的转换扩展方法类
  /// </summary>
  public static class StreamExtend
  {
    #region Stream 扩展
    /// <summary>
    /// Stream Stream 转换为 byte 数组
    /// </summary>
    /// <returns></returns>
    public static byte[] ToByteArray(this Stream stream)
    {
      byte[] bytes = new byte[stream.Length];
      stream.Read(bytes, 0, bytes.Length);
      // 设置当前流的位置为流的开始
      stream.Seek(0, SeekOrigin.Begin);
      return bytes;
    }
    /// <summary>
    /// Stream 转换为 image 图片
    /// </summary>
    /// <returns></returns>
    public static Image ToImage(this Stream stream)
    {
      Image img = new Bitmap(stream);
      return img;
    }
    /// <summary>
    /// Stream 转换为 string ,使用 Encoding.Default 编码
    /// </summary>
    /// <returns></returns>
    public static string ToStr(this Stream stream)
    {
      return System.Text.Encoding.Default.GetString(stream.ToByteArray());
    }
    /// <summary>
    /// 在当前流中搜索指定的 byte[]
    /// </summary>
    /// <param name="arr"></param>
    /// <param name="key">搜索关键字</param>
    /// <param name="beginPosition">搜索开始位置</param>
    /// <returns>如果存在则返回byte[]在流中首次出现的位置,否则返回 -1</returns>
    public static long Search(this Stream stream, long beginPosition, byte[] key)
    {
      if (stream == null || stream.Length <= beginPosition)
        return -1;

      if (key == null || stream.Length < key.Length)
        return -1;

      long i=-1;
      long j = -1;
      int currentByte = int.MinValue;
      for(i=beginPosition;i<stream.Length;i++)
      {
        if (stream.Length < key.Length + i)
          break;

        stream.Seek(i, SeekOrigin.Begin);
        for (j = 0; j < key.Length; j++)
        {
          currentByte = stream.ReadByte();
          if (currentByte != key[j])
            break;
        }
        if (j == key.Length)
          return i;

        if(currentByte == -1)
          break;
      }
      return -1;
    }
    #endregion

    #region byte[] 扩展
    /// <summary>
    /// byte[] 转换为 stream 流
    /// </summary>
    /// <returns></returns>
    public static Stream ToStream(this byte[] arr)
    {
      Stream stream = new MemoryStream(arr);
      // 设置当前流的位置为流的开始 www.2cto.com
      stream.Seek(0, SeekOrigin.Begin);
      return stream;
    }
    /// <summary>
    /// byte[] 转换为 Image
    /// </summary>
    /// <returns></returns>
    public static Image ToImage(this byte[] arr)
    {
      return Image.FromStream(arr.ToStream());
    }
    /// <summary>
    /// 转换为 string,使用 Encoding.Default 编码
    /// </summary>
    /// <returns></returns>
    public static string ToStr(this byte[] arr)
    {
      return System.Text.Encoding.Default.GetString(arr);
    }
    /// <summary>
    /// 搜索
    /// </summary>
    /// <param name="arr"></param>
    /// <param name="key">搜索关键字</param>
    /// <param name="beginPos">搜索开始位置</param>
    /// <returns></returns>
    public static int Search(this byte[] arr, int beginPosition, byte[] key)
    {
      if (arr == null || arr.Length <= beginPosition)
        return -1;

      if (key == null || arr.Length < key.Length)
        return -1;

      int i = -1;
      int j = -1;
      for (i = beginPosition; i < arr.Length; i++)
      {
        if (arr.Length < key.Length + i)
          break;

        for (j = 0; j < key.Length; j++)
        {
          if (arr[i+j] != key[j])
            break;
        }
        if (j == key.Length)
          return i;
      }
      return -1;
    }
    #endregion

    #region string 扩展
    /// <summary>
    /// string 转换为 byte[]
    /// </summary>
    /// <returns></returns>
    public static byte[] ToByteArray(this string str)
    {
      return System.Text.Encoding.Default.GetBytes(str);
    }
    /// <summary>
    /// string 转换为 Stream
    /// </summary>
    /// <returns></returns>
    public static Stream ToStream(this string str)
    {
      Stream stream = new MemoryStream(str.ToByteArray());
      // 设置当前流的位置为流的开始
      stream.Seek(0, SeekOrigin.Begin);
      return stream;
    }
    #endregion
  }
}

 

------------------------

-- 测试脚本

      byte[] arr = "0123456789111".ToByteArray();
      byte[] key1 = "123".ToByteArray();
      byte[] key2 = "678".ToByteArray();
      byte[] key3 = "911".ToByteArray();
      byte[] key4 = "111".ToByteArray();
      //流内搜索测试
      Stream sm = arr.ToStream();
      long index1 = sm.Search(0, key1);
      long index2 = sm.Search(0, key2);
      long index3 = sm.Search(0, key3);
      long index4 = sm.Search(0, key4);
      //byte[]内搜索测试
      long index10 = arr.Search(0, key1);
      long index20 = arr.Search(0, key2);
      long index30 = arr.Search(0, key3);
      long index40 = arr.Search(0, key4);

posted @ 2012-05-24 14:49  铁芒阁  阅读(16472)  评论(0编辑  收藏  举报
分享到: