File: Diff.hs

package info (click to toggle)
haskell-hashed-storage 0.5.9-2
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 288 kB
  • sloc: haskell: 2,157; ansic: 799; makefile: 3
file content (136 lines) | stat: -rw-r--r-- 5,523 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
module Storage.Hashed.Diff where

import Prelude hiding ( lookup, filter )
import qualified Data.ByteString.Lazy.Char8 as BL
import Storage.Hashed.Tree
import Storage.Hashed.AnchoredPath
import Data.List.LCS
import Data.List ( groupBy )

unidiff :: Tree IO -> Tree IO -> IO BL.ByteString
unidiff l r =
    do (from, to) <- diffTrees l r
       diffs <- sequence $ zipCommonFiles diff from to
       return $ BL.concat diffs
    where diff p a b = do x <- readBlob a
                          y <- readBlob b
                          return $ diff' p x y
          diff' p x y =
              case unifiedDiff x y of
                x' | BL.null x' -> BL.empty
                   | otherwise ->
                       (BL.pack $ "--- " ++ anchorPath "old" p ++ "\n" ++
                              "+++ " ++ anchorPath "new" p ++ "\n")
                       `BL.append` x'

type Line = BL.ByteString
data WeaveLine = Common Line
               | Remove Line
               | Add Line
               | Replace Line Line
               | Skip Int deriving Show

-- | A weave -- two files woven together, with common and differing regions
-- marked up. Cf. 'WeaveLine'.
type Weave = [WeaveLine]

-- | Sort of a sub-weave.
type Hunk = [WeaveLine]

-- | Produce unified diff (in a string form, ie. formatted) from a pair of
-- bytestrings.
unifiedDiff :: BL.ByteString -> BL.ByteString -> BL.ByteString
unifiedDiff a b = printUnified $ concat unifiedHunks
    where unifiedHunks = reduceContext 3 $ map unifyHunk $ hunks $ weave a b

-- | Weave two bytestrings. Intermediate data structure for the actual unidiff
-- implementation. No skips are produced.
weave :: BL.ByteString -> BL.ByteString -> Weave
weave a' b' = weave' left common right
    where left = init' (BL.split '\n' a') -- drop trailing newline
          right = init' (BL.split '\n' b') -- drop trailing newline
          init' [] = []
          init' x = init x
          common = lcs left right
          weave' []     []     [] = []
          weave' []     c      [] = error $ "oops: Left & Right empty, Common: " ++ show c
          weave' []     []     (b:bs) = Add b : weave' [] [] bs
          weave' (a:as) []     [] = Remove a : weave' as [] []
          weave' (a:as) []     (b:bs) = Replace a b : weave' as [] bs
          weave' (a:as) (c:cs) (b:bs)
                 | a == c && b == c = Common a : weave' as cs bs
                 | a == c && b /= c = Add b : weave' (a:as) (c:cs) bs
                 | a /= c && b == c = Remove a : weave' as (c:cs) (b:bs)
                 | a /= c && b /= c = Replace a b : weave' as (c:cs) bs
                 | otherwise = error "oops!"
          weave' a c b = error $ "oops: \nLeft: " ++ show a ++ "\nCommon: " ++ show c ++ "\nRight: " ++ show b

-- | Break up a 'Weave' into 'Hunk's.
hunks :: Weave -> [Hunk]
hunks = groupBy grp
    where grp (Common _) (Common _) = True
          grp (Common _) _ = False
          grp _ (Common _) = False
          grp _ _ = True

-- | Reformat a 'Hunk' into a format suitable for unified diff. Replaces are
-- turned into add/remove pairs, all removals in a hunk go before all
-- adds. 'Hunk's of 'Common' lines are left intact. Produces input suitable for
-- 'reduceContext'.
unifyHunk :: Hunk -> Hunk
unifyHunk h = case h of
                (Common _:_) -> h
                _ -> reorder $ concatMap breakup h
    where reorder h' = [ Remove a | Remove a <- h' ] ++ [ Add a | Add a <- h' ]
          breakup (Replace f t) = [Remove f, Add t]
          breakup x = [x]

-- | Break up a 'Weave' into unified 'Hunk's, leaving @n@ lines of context around
-- every hunk. Consecutive 'Common' lines not used as context are replaced with
-- 'Skip's.
reduceContext :: Int -> [Hunk] -> [Hunk]
reduceContext n hs =
    case hs of
      [] -> []
      [Common _:_] -> []
      [x] -> [x]
      [h,t] -> [reduce 0 n h, reduce n 0 t]
      (h:rest) -> reduce 0 n h :
                    map (reduce n n) (init rest) ++
                    [reduce n 0 $ last rest]
    where
      reduce s e h@(Common _:_)
          | length h <= s + e = h
          | otherwise = take s h ++
                        [Skip $ length h - e - s ] ++
                        drop (length h - e) h
      reduce _ _ h = h

-- | Format a 'Weave' for printing.
deweave :: Weave -> BL.ByteString
deweave = BL.unlines . map disp
    where disp (Common l) = BL.cons ' ' l
          disp (Remove l) = BL.cons '-' l
          disp (Add l) = BL.cons '+' l
          disp (Replace _ t) = BL.cons '!' t
          disp (Skip n) = BL.pack $ "-- skip " ++ show n ++ " lines --"

-- | Print a \"hunked\" weave in form of an unified diff. 'Hunk' boundaries are
-- marked up as 'Skip' lines. Cf. 'reduceContext'.
printUnified :: Weave -> BL.ByteString
printUnified hunked = printHunks 1 1 $ groupBy splits hunked
    where splits (Skip _) _ = False
          splits _ (Skip _) = False
          splits _ _ = True
          printHunks _ _ [] = BL.empty
          printHunks l r ([Skip n]:rest) =
              printHunks (n+l) (n+r) rest
          printHunks l r (h:rest) =
              (BL.pack $ "@@ -" ++ show l ++ "," ++
                show (removals h) ++ " +" ++ show r ++
                "," ++ show (adds h) ++ " @@\n")
              `BL.append` deweave h `BL.append`
               printHunks (l + removals h) (r + adds h) rest
          commons h = length [ () | (Common _) <- h ]
          adds h = commons h + length [ () | (Add _) <- h ]
          removals h = commons h + length [ () | (Remove _) <- h ]