hunk ./demo.hs 4 --- fast +import System.IO +import System.Environment (getArgs) hunk ./demo.hs 7 --- this will take a while but should eventurally return the right answer -main = do - (res :: Int) <- return . last =<< externalSort bigList - putStrLn . show $ res +-- to do: compare speed against unix sort util on a 10 million line file. hunk ./demo.hs 9 - --- pure, in-memory prelude sort will crash your computer -mainBadIdea = putStrLn . show $ last $ sort bigList +-- pure in-memory prelude sort will crash your computer when the list gets over a million elements or so +-- externalsort caches the sublists used in the sort algorithm on your hard drive, so you can sort a much larger list. +{- +*Main> :! time ./demo preludesort +demo: out of memory (requested 1048576 bytes) +Command exited with non-zero status 1 +4.88user 0.68system 0:21.11elapsed 26%CPU (0avgtext+0avgdata 0maxresident)k +0inputs+0outputs (0major+64817minor)pagefaults 0swaps +*Main> :! time ./demo externalsort +10000000 +73.87user 1.96system 1:24.25elapsed 90%CPU (0avgtext+0avgdata 0maxresident)k +792inputs+156280outputs (6major+16739minor)pagefaults 0swaps +*Main>:! ulimit -v +262144 +*Main> +-} +main = do + [s] <- getArgs + case s of + "preludesort" -> run $ return . sort + "externalsort" -> run externalSort + _ -> let msg = "usage: ./demo preludesort or ./demo externalsort" + in fail msg hunk ./demo.hs 33 +run s = putStrLn =<< return . show . last =<< s bigList hunk ./demo.hs 35 +-- even last bigList gives out of memory error hunk ./demo.hs 37 -bigList = reverse [1..10^8] + +bigList = {-reverse-} [1..10^7] hunk ./external-sort.cabal 7 -Author: Ben Midfield +Author: Ben "Midfield" Lee hunk ./external-sort.cabal 11 -Stability: ExperimentalCategory: Other +Stability: Experimental +Category: Other hunk ./src/Algorithms/ExternalSort.lhs 88 +> externalSort :: (Ord a, Bin.Binary a) => [a] -> IO [a] hunk ./demo.hs 6 +import System.Time +import HSH + hunk ./demo.hs 15 -*Main> :! time ./demo preludesort + +The behavior below was on a demo executable, compiled. (In ghci, even last on a 10 million element list +caused an out of memory error.) The test computer had 256M physical ram and was ulimited to 256M cache. + +*Main>:! ulimit -v +262144 + +For 10 million element list: +*Main> :! time ./demo preludesort 7 hunk ./demo.hs 28 -*Main> :! time ./demo externalsort +*Main> :! time ./demo externalsort 7 hunk ./demo.hs 32 -*Main>:! ulimit -v -262144 -*Main> +./demo unixsort 7 +wrote bigfile, time: Mon Oct 20 15:25:26 CEST 2008 +demo: out of memory (requested 1048576 bytes) + +For 100 million element list: + hunk ./demo.hs 40 - [s] <- getArgs + [s,e] <- getArgs + let exp = read e hunk ./demo.hs 43 - "preludesort" -> run $ return . sort - "externalsort" -> run externalSort - _ -> let msg = "usage: ./demo preludesort or ./demo externalsort" + "preludesort" -> sortwith exp $ return . sort + "externalsort" -> sortwith exp externalSort + "unixsort" -> unixsort exp + _ -> let msg = "usage: ./demo preludesort 7 or ./demo externalsort 7 or ./demo unixsort 7 \ + \(sort 10 million element list)" hunk ./demo.hs 50 -run s = putStrLn =<< return . show . last =<< s bigList - --- even last bigList gives out of memory error -bigList :: [Int] +sortwith exp s = putStrLn =<< return . show . last =<< s ([1..10^exp ::Int]) hunk ./demo.hs 52 -bigList = {-reverse-} [1..10^7] +unixsort exp = do + let fn = "bigfile" + withFile fn AppendMode (\h -> (mapM_ (hPutStrLn h . show) ([1..10^exp::Int]) ) ) + putStrLn . ( ("wrote " ++ fn ++ ", time: ") ++ ) . show =<< getClockTime + run $ "time tail -n1 | sort " ++fn :: IO String + return () hunk ./demo.hs 36 -For 100 million element list: +For 100 million element list, external sort failed. +time ./demo externalsort 8 +demo: out of memory (requested 1048576 bytes) hunk ./demo.hs 40 +real 10m14.061s +user 8m26.712s +sys 0m11.793s +thartman@thartman-laptop:~/external-sort>ls -lh ExternalSort.bin +-rw-r--r-- 1 thartman thartman 764M Oct 20 15:50 ExternalSort.bin hunk ./demo.hs 45 + +The problem is not fitting a 10^8 element list in memory, the following works fine + (when compiled, though not in ghci): +t = putStrLn . show . last $ [1..10^8::Int] + +Maybe think about this more later. + hunk ./demo.hs 53 + hunk ./demo.hs 74 +t = putStrLn . show . last $ [1..10^8::Int] +