<rt id="bn8ez"></rt>
<label id="bn8ez"></label>

  • <span id="bn8ez"></span>

    <label id="bn8ez"><meter id="bn8ez"></meter></label>

    Chan Chen Coding...

    External Sorting

    Imagine you have the numbers 1 - 9

    9  7  2  6  3  4  8  5  1 

    And let's suppose that only 3 fit in memory at a time.

    So you'd break them into chunks of 3 and sort each, storing each result in a separate file:

    279 346 158 

    Now you'd open each of the three files as streams and read the first value from each:

    2 3 1 

    Output the lowest value 1, and get the next value from that stream, now you have:

    2 3 5 

    Output the next lowest value 2, and continue onwards until you've outputted the entire sorted list

    package sort.externalsort;

    import java.util.*;
    import java.io.*;
    import java.nio.charset.Charset;

    /**
     * Goal: offer a generic external-memory sorting program in Java.
     * 
     * It must be : - hackable (easy to adapt) - scalable to large files - sensibly
     * efficient.
     * 
     * This software is in the public domain.
     * 
     * Usage: java com/google/code/externalsorting/ExternalSort somefile.txt out.txt
     * 
     * You can change the default maximal number of temporary files with the -t
     * flag: java com/google/code/externalsorting/ExternalSort somefile.txt out.txt
     * -t 3
     * 
     * For very large files, you might want to use an appropriate flag to allocate
     * more memory to the Java VM: java -Xms2G
     * com/google/code/externalsorting/ExternalSort somefile.txt out.txt
     * 
     * By (in alphabetical order) Philippe Beaudoin, Jon Elsas, Christan Grant,
     * Daniel Haran, Daniel Lemire, April 2010 originally posted at
     * 
    http://www.daniel
     * -lemire.com/blog/archives/2010/04/01/external-memory-sorting-in-java/
     
    */
    public class ExternalSort {

        static int DEFAULTMAXTEMPFILES = 1024;

        // we divide the file into small blocks. If the blocks
        
    // are too small, we shall create too many temporary files.
        
    // If they are too big, we shall be using too much memory.
        public static long estimateBestSizeOfBlocks(File filetobesorted, int maxtmpfiles) {
            long sizeoffile = filetobesorted.length() * 2;
            /**
             * We multiply by two because later on someone insisted on counting the
             * memory usage as 2 bytes per character. By this model, loading a file with
             * 1 character will use 2 bytes.
             
    */
            // we don't want to open up much more than maxtmpfiles temporary files,
            
    // better run
            
    // out of memory first.
            long blocksize = sizeoffile / maxtmpfiles + (sizeoffile % maxtmpfiles == 0 ? 0 : 1);

            // on the other hand, we don't want to create many temporary files
            
    // for naught. If blocksize is smaller than half the free memory, grow it.
            long freemem = Runtime.getRuntime().freeMemory();
            if (blocksize < freemem / 2) {
                blocksize = freemem / 2;
            }
            return blocksize;
        }

        /**
         * This will simply load the file by blocks of x rows, then sort them
         * in-memory, and write the result to temporary files that have to be merged
         * later.
         * 
         * 
    @param file
         *          some flat file
         * 
    @param cmp
         *          string comparator
         * 
    @return a list of temporary flat files
         
    */
        public static List<File> sortInBatch(File file, Comparator<String> cmp) throws IOException {
            return sortInBatch(file, cmp, DEFAULTMAXTEMPFILES, Charset.defaultCharset(), null);
        }

        /**
         * This will simply load the file by blocks of x rows, then sort them
         * in-memory, and write the result to temporary files that have to be merged
         * later. You can specify a bound on the number of temporary files that will
         * be created.
         * 
         * 
    @param file
         *          some flat file
         * 
    @param cmp
         *          string comparator
         * 
    @param maxtmpfiles
         *          maximal number of temporary files
         * 
    @param Charset
         *          character set to use (can use Charset.defaultCharset())
         * 
    @param tmpdirectory
         *          location of the temporary files (set to null for default location)
         * 
    @return a list of temporary flat files
         
    */
        public static List<File> sortInBatch(File file, Comparator<String> cmp, int maxtmpfiles, Charset cs,
                File tmpdirectory) throws IOException {
            List<File> files = new ArrayList<File>();
            BufferedReader fbr = new BufferedReader(new InputStreamReader(new FileInputStream(file), cs));
            long blocksize = estimateBestSizeOfBlocks(file, maxtmpfiles);// in bytes

            try {
                List<String> tmplist = new ArrayList<String>();
                String line = "";
                try {
                    while (line != null) {
                        long currentblocksize = 0;// in bytes
                        while ((currentblocksize < blocksize) && ((line = fbr.readLine()) != null)) { // as
                                                                                                                                                                                    
    // long
                                                                                                                                                                                    
    // as
                                                                                                                                                                                    
    // you
                                                                                                                                                                                    
    // have
                                                                                                                                                                                    
    // enough
                                                                                                                                                                                    
    // memory
                            tmplist.add(line);
                            currentblocksize += line.length() * 2; // java uses 16 bits per
                                                                                                            
    // character?
                        }
                        files.add(sortAndSave(tmplist, cmp, cs, tmpdirectory));
                        tmplist.clear();
                    }
                } catch (EOFException oef) {
                    if (tmplist.size() > 0) {
                        files.add(sortAndSave(tmplist, cmp, cs, tmpdirectory));
                        tmplist.clear();
                    }
                }
            } finally {
                fbr.close();
            }
            return files;
        }

        /**
         * Sort a list and save it to a temporary file
         * 
         * 
    @return the file containing the sorted data
         * 
    @param tmplist
         *          data to be sorted
         * 
    @param cmp
         *          string comparator
         * 
    @param cs
         *          charset to use for output (can use Charset.defaultCharset())
         * 
    @param tmpdirectory
         *          location of the temporary files (set to null for default location)
         
    */
        public static File sortAndSave(List<String> tmplist, Comparator<String> cmp, Charset cs, File tmpdirectory)
                throws IOException {
            Collections.sort(tmplist, cmp);
            File newtmpfile = File.createTempFile("sortInBatch", "flatfile", tmpdirectory);
            newtmpfile.deleteOnExit();
            BufferedWriter fbw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(newtmpfile), cs));
            try {
                for (String r : tmplist) {
                    fbw.write(r);
                    fbw.newLine();
                }
            } finally {
                fbw.close();
            }
            return newtmpfile;
        }

        /**
         * This merges a bunch of temporary flat files
         * 
         * 
    @param files
         * 
    @param output
         *          file
         * 
    @return The number of lines sorted. (P. Beaudoin)
         
    */
        public static int mergeSortedFiles(List<File> files, File outputfile, final Comparator<String> cmp)
                throws IOException {
            return mergeSortedFiles(files, outputfile, cmp, Charset.defaultCharset());
        }

        /**
         * This merges a bunch of temporary flat files
         * 
         * 
    @param files
         * 
    @param output
         *          file
         * 
    @param Charset
         *          character set to use to load the strings
         * 
    @return The number of lines sorted. (P. Beaudoin)
         
    */
        public static int mergeSortedFiles(List<File> files, File outputfile, final Comparator<String> cmp,
                Charset cs) throws IOException {
            PriorityQueue<BinaryFileBuffer> pq = new PriorityQueue<BinaryFileBuffer>(11,
                    new Comparator<BinaryFileBuffer>() {
                        public int compare(BinaryFileBuffer i, BinaryFileBuffer j) {
                            return cmp.compare(i.peek(), j.peek());
                        }
                    });
            for (File f : files) {
                BinaryFileBuffer bfb = new BinaryFileBuffer(f, cs);
                pq.add(bfb);
            }
            BufferedWriter fbw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputfile), cs));
            int rowcounter = 0;
            try {
                while (pq.size() > 0) {
                    BinaryFileBuffer bfb = pq.poll();
                    String r = bfb.pop();
                    fbw.write(r);
                    fbw.newLine();
                    ++rowcounter;
                    if (bfb.empty()) {
                        bfb.fbr.close();
                        bfb.originalfile.delete();// we don't need you anymore
                    } else {
                        pq.add(bfb); // add it back
                    }
                }
            } finally {
                fbw.close();
                for (BinaryFileBuffer bfb : pq)
                    bfb.close();
            }
            return rowcounter;
        }

        public static void main(String[] args) throws IOException {

            boolean verbose = false;
            int maxtmpfiles = DEFAULTMAXTEMPFILES;
            Charset cs = Charset.defaultCharset();
            String inputfile = null, outputfile = null;
            for (int param = 0; param < args.length; ++param) {
                if (args[param].equals("-v") || args[param].equals("--verbose"))
                    verbose = true;
                else if ((args[param].equals("-t") || args[param].equals("--maxtmpfiles")) && args.length > param + 1) {
                    param++;
                    maxtmpfiles = Integer.parseInt(args[param]);
                } else if ((args[param].equals("-c") || args[param].equals("--charset")) && args.length > param + 1) {
                    param++;
                    cs = Charset.forName(args[param]);
                } else {
                    if (inputfile == null)
                        inputfile = args[param];
                    else if (outputfile == null)
                        outputfile = args[param];
                    else
                        System.out.println("Unparsed: " + args[param]);
                }
            }
            if (outputfile == null) {
                System.out.println("please provide input and output file names");
                return;
            }
            Comparator<String> comparator = new Comparator<String>() {
                public int compare(String r1, String r2) {
                    return r1.compareTo(r2);
                }
            };
            List<File> l = sortInBatch(new File(inputfile), comparator, maxtmpfiles, cs, null);
            if (verbose)
                System.out.println("created " + l.size() + " tmp files");
            mergeSortedFiles(l, new File(outputfile), comparator, cs);
        }
    }

    class BinaryFileBuffer {
        public static int BUFFERSIZE = 2048;
        public BufferedReader fbr;
        public File originalfile;
        private String cache;
        private boolean empty;

        public BinaryFileBuffer(File f, Charset cs) throws IOException {
            originalfile = f;
            fbr = new BufferedReader(new InputStreamReader(new FileInputStream(f), cs), BUFFERSIZE);
            reload();
        }

        public boolean empty() {
            return empty;
        }

        private void reload() throws IOException {
            try {
                if ((this.cache = fbr.readLine()) == null) {
                    empty = true;
                    cache = null;
                } else {
                    empty = false;
                }
            } catch (EOFException oef) {
                empty = true;
                cache = null;
            }
        }

        public void close() throws IOException {
            fbr.close();
        }

        public String peek() {
            if (empty())
                return null;
            return cache.toString();
        }

        public String pop() throws IOException {
            String answer = peek();
            reload();
            return answer;
        }

    }

     



    -----------------------------------------------------
    Silence, the way to avoid many problems;
    Smile, the way to solve many problems;

    posted on 2012-07-04 02:27 Chan Chen 閱讀(464) 評論(0)  編輯  收藏 所屬分類: Algorithm

    主站蜘蛛池模板: 91在线亚洲综合在线| 久久精品国产亚洲AV高清热| 亚洲国产精品成人午夜在线观看 | 国产精品久久久久久久久久免费| 亚洲美女中文字幕| 182tv免费观看在线视频| 亚洲精品永久www忘忧草| 免费专区丝袜脚调教视频| 亚洲国语在线视频手机在线| 19禁啪啪无遮挡免费网站| 亚洲国产亚洲综合在线尤物| 又黄又爽又成人免费视频| 亚洲综合无码无在线观看| 日日夜夜精品免费视频| 特黄特色的大片观看免费视频| 亚洲爽爽一区二区三区| 你是我的城池营垒免费看| 亚洲国产精品久久久久| 日韩中文字幕精品免费一区| 亚洲色欲色欱wwW在线| 又粗又大又长又爽免费视频| 巨胸狂喷奶水视频www网站免费| 亚洲人JIZZ日本人| 青青青国产在线观看免费| 亚洲AV无码专区在线观看成人| heyzo亚洲精品日韩| a级午夜毛片免费一区二区| 久久综合亚洲色一区二区三区| 免费看AV毛片一区二区三区| 四虎影视永久在线精品免费| 久久亚洲国产欧洲精品一| 美女被cao免费看在线看网站| 亚洲欧洲免费无码| 亚洲一区二区三区偷拍女厕| 日本免费xxxx| 九九九精品视频免费| 亚洲欧洲视频在线观看| 亚洲视频在线一区二区| 亚洲免费福利在线视频| 一区二区免费电影| 亚洲va在线va天堂va手机|