构建高性能java程序-使用mapped file创建超大的矩阵
今天在一个博客中看到一个程序,使用mapped file机制,创建超大的矩阵,主要是为了节省内存,避免内存溢出异常。主要代码如下:
/**
*
*/
package high.performace.java;
import java.io.Closeable;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.List;
import sun.misc.Cleaner;
import sun.nio.ch.DirectBuffer;
public class LargeDoubleMatrix implements Closeable {
private static final int MAPPING_SIZE = 1 << 30;
private final RandomAccessFile raf;
private final int width;
private final int height;
private final List mappings = new ArrayList();
public LargeDoubleMatrix(String filename, int width, int height) throws IOException {
this.raf = new RandomAccessFile(filename, "rw");
try {
this.width = width;
this.height = height;
long size = 8L * width * height;
for (long offset = 0; offset < size; offset += MAPPING_SIZE) {
long size2 = Math.min(size - offset, MAPPING_SIZE);
mappings.add(raf.getChannel().map(FileChannel.MapMode.READ_WRITE, offset, size2));
}
} catch (IOException e) {
raf.close();
throw e;
}
}
protected long position(int x, int y) {
return (long) y * width + x;
}
public int width() {
return width;
}
public int height() {
return height;
}
public double get(int x, int y) {
assert x >= 0 && x < width;
assert y >= 0 && y < height;
long p = position(x, y) * 8;
int mapN = (int) (p / MAPPING_SIZE);
int offN = (int) (p % MAPPING_SIZE);
return mappings.get(mapN).getDouble(offN);
}
public void set(int x, int y, double d) {
assert x >= 0 && x < width;
assert y >= 0 && y < height;
long p = position(x, y) * 8;
int mapN = (int) (p / MAPPING_SIZE);
int offN = (int) (p % MAPPING_SIZE);
mappings.get(mapN).putDouble(offN, d);
}
public void close() throws IOException {
for (MappedByteBuffer mapping : mappings)
clean(mapping);
raf.close();
}
private void clean(MappedByteBuffer mapping) {
if (mapping == null) return;
Cleaner cleaner = ((DirectBuffer) mapping).cleaner();
if (cleaner != null) cleaner.clean();
}
}
package high.performace.java;
import java.io.IOException;
public class LargeDoubleMatrixTest {
public void getSetMatrix(int x, int y) throws IOException {
long start = System.nanoTime();
final long used0 = usedMemory();
LargeDoubleMatrix matrix = new LargeDoubleMatrix("ldm.test", x * x, y * y);
for (int i = 0; i < matrix.width(); i++)
matrix.set(i, i, i);
for (int i = 0; i < matrix.width(); i++)
assert matrix.get(i, i) == i;
long time = System.nanoTime() - start;
final long used = usedMemory() - used0;
if (used == 0)
System.err.println("You need to use -XX:-UseTLAB to see small changes in memory usage.");
System.out.printf(
"Setting the diagonal took %,d ms, Heap used is %,d KB%n",
time / 1000 / 1000, used / 1024);
matrix.close();
}
private long usedMemory() {
return Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
}
public static void main(String[] args) throws IOException {
new LargeDoubleMatrixTest().getSetMatrix(Integer.parseInt(args[0]), Integer.parseInt(args[0]));
}
}
主要的核心思想,就是使用mapped file存储矩,由于mapped file不是存储在heap中的,极大的减小了heap的使用,避免了内存溢出的异常。而且能够减小内存的整体使用。这个思路,在做大数据存储计算的时候,很值得借鉴。例如java实现的cassandra在服务器端就采用了mapped的方式来存储数据。
[引用]
http://www.javacodegeeks.com/2012/01/using-memory-mapped-file-for-huge.html
浙公网安备 33010602011771号