今天在一个博客中看到一个程序,使用mapped file机制,创建超大的矩阵,主要是为了节省内存,避免内存溢出异常。主要代码如下:
/** * */package high.performace.java;import java.io.Closeable;import java.io.IOException;import java.io.RandomAccessFile;import java.nio.MappedByteBuffer;import java.nio.channels.FileChannel;import java.util.ArrayList;import java.util.List;import sun.misc.Cleaner;import sun.nio.ch.DirectBuffer;public class LargeDoubleMatrix implements Closeable { private static final int MAPPING_SIZE = 1 << 30; private final RandomAccessFile raf; private final int width; private final int height; private final List mappings = new ArrayList(); public LargeDoubleMatrix(String filename, int width, int height) throws IOException { this.raf = new RandomAccessFile(filename, "rw"); try { this.width = width; this.height = height; long size = 8L * width * height; for (long offset = 0; offset < size; offset += MAPPING_SIZE) { long size2 = Math.min(size - offset, MAPPING_SIZE); mappings.add(raf.getChannel().map(FileChannel.MapMode.READ_WRITE, offset, size2)); } } catch (IOException e) { raf.close(); throw e; } } protected long position(int x, int y) { return (long) y * width + x; } public int width() { return width; } public int height() { return height; } public double get(int x, int y) { assert x >= 0 && x < width; assert y >= 0 && y < height; long p = position(x, y) * 8; int mapN = (int) (p / MAPPING_SIZE); int offN = (int) (p % MAPPING_SIZE); return mappings.get(mapN).getDouble(offN); } public void set(int x, int y, double d) { assert x >= 0 && x < width; assert y >= 0 && y < height; long p = position(x, y) * 8; int mapN = (int) (p / MAPPING_SIZE); int offN = (int) (p % MAPPING_SIZE); mappings.get(mapN).putDouble(offN, d); } public void close() throws IOException { for (MappedByteBuffer mapping : mappings) clean(mapping); raf.close(); } private void clean(MappedByteBuffer mapping) { if (mapping == null) return; Cleaner cleaner = ((DirectBuffer) mapping).cleaner(); if (cleaner != null) cleaner.clean(); }}
package high.performace.java;import java.io.IOException;public class LargeDoubleMatrixTest { public void getSetMatrix(int x, int y) throws IOException { long start = System.nanoTime(); final long used0 = usedMemory(); LargeDoubleMatrix matrix = new LargeDoubleMatrix("ldm.test", x * x, y * y); for (int i = 0; i < matrix.width(); i++) matrix.set(i, i, i); for (int i = 0; i < matrix.width(); i++) assert matrix.get(i, i) == i; long time = System.nanoTime() - start; final long used = usedMemory() - used0; if (used == 0) System.err.println("You need to use -XX:-UseTLAB to see small changes in memory usage."); System.out.printf( "Setting the diagonal took %,d ms, Heap used is %,d KB%n", time / 1000 / 1000, used / 1024); matrix.close(); } private long usedMemory() { return Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory(); } public static void main(String[] args) throws IOException { new LargeDoubleMatrixTest().getSetMatrix(Integer.parseInt(args[0]), Integer.parseInt(args[0])); }}
主要的核心思想,就是使用mapped file存储矩,由于mapped file不是存储在heap中的,极大的减小了heap的使用,避免了内存溢出的异常。而且能够减小内存的整体使用。这个思路,在做大数据存储计算的时候,很值得借鉴。例如java实现的cassandra在服务器端就采用了mapped的方式来存储数据。 [引用]