File: Fold.hs

package info (click to toggle)
ghc 9.6.6-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, trixie
  • size: 158,216 kB
  • sloc: haskell: 648,228; ansic: 81,656; cpp: 11,808; javascript: 8,444; sh: 5,831; fortran: 3,527; python: 3,277; asm: 2,523; makefile: 2,298; yacc: 1,570; lisp: 532; xml: 196; perl: 145; csh: 2
file content (67 lines) | stat: -rw-r--r-- 1,901 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
-- | Benchmark which formats paragraph, like the @sort@ unix utility.
--
-- Tested in this benchmark:
--
-- * Reading the file
--
-- * Splitting into paragraphs
--
-- * Reformatting the paragraphs to a certain line width
--
-- * Concatenating the results using the text builder
--
-- * Writing back to a handle
--
{-# LANGUAGE CPP, OverloadedStrings #-}
module Benchmarks.Programs.Fold
    ( benchmark
    ) where

import Data.List (foldl')
import Data.List (intersperse)
import System.IO (Handle)
import Test.Tasty.Bench (Benchmark, bench, whnfIO)
import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.Text.Lazy.Builder as TLB
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.IO as TL

benchmark :: FilePath -> Handle -> Benchmark
benchmark i o =
    bench "Fold" $ whnfIO $ T.readFile i >>= TL.hPutStr o . fold 80

-- | We represent a paragraph by a word list
--
type Paragraph = [T.Text]

-- | Fold a text
--
fold :: Int -> T.Text -> TL.Text
fold maxWidth = TLB.toLazyText . mconcat .
    intersperse "\n\n" . map (foldParagraph maxWidth) . paragraphs

-- | Fold a paragraph
--
foldParagraph :: Int -> Paragraph -> TLB.Builder
foldParagraph _    []       = mempty
foldParagraph max' (w : ws) = fst $ foldl' go (TLB.fromText w, T.length w) ws
  where
    go (builder, width) word
        | width + len + 1 <= max' =
            (builder `mappend` " " `mappend` word', width + len + 1)
        | otherwise =
            (builder `mappend` "\n" `mappend` word', len)
      where
        word' = TLB.fromText word
        len = T.length word

-- | Divide a text into paragraphs
--
paragraphs :: T.Text -> [Paragraph]
paragraphs = splitParagraphs . map T.words . T.lines
  where
    splitParagraphs ls = case break null ls of
        ([], []) -> []
        (p,  []) -> [concat p]
        (p,  lr) -> concat p : splitParagraphs (dropWhile null lr)