File: Manipulate.hs

package info (click to toggle)
haskell-text-manipulate 0.3.1.0-3
  • links: PTS
  • area: main
  • in suites: forky, sid, trixie
  • size: 112 kB
  • sloc: haskell: 711; makefile: 6
file content (257 lines) | stat: -rw-r--r-- 7,177 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}

-- Module      : Data.Text.Manipulate
-- Copyright   : (c) 2014-2020 Brendan Hay <brendan.g.hay@gmail.com>
-- License     : This Source Code Form is subject to the terms of
--               the Mozilla Public License, v. 2.0.
--               A copy of the MPL can be found in the LICENSE file or
--               you can obtain it at http://mozilla.org/MPL/2.0/.
-- Maintainer  : Brendan Hay <brendan.g.hay@gmail.com>
-- Stability   : experimental
-- Portability : non-portable (GHC extensions)

-- | Manipulate identifiers and structurally non-complex pieces
-- of text by delimiting word boundaries via a combination of whitespace,
-- control-characters, and case-sensitivity.
--
-- Assumptions have been made about word boundary characteristics inherint
-- in predominantely English text, please see individual function documentation
-- for further details and behaviour.
module Data.Text.Manipulate
  ( -- * Strict vs lazy types
    -- $strict

    -- * Unicode
    -- $unicode

    -- * Fusion
    -- $fusion

    -- * Subwords

    -- ** Removing words
    takeWord,
    dropWord,
    stripWord,

    -- ** Breaking on words
    breakWord,
    splitWords,

    -- * Character manipulation
    lowerHead,
    upperHead,
    mapHead,

    -- * Line manipulation
    indentLines,
    prependLines,

    -- * Ellipsis
    toEllipsis,
    toEllipsisWith,

    -- * Acronyms
    toAcronym,

    -- * Ordinals
    toOrdinal,

    -- * Casing
    toTitle,
    toCamel,
    toPascal,
    toSnake,
    toSpinal,
    toTrain,

    -- * Boundary predicates
    isBoundary,
    isWordBoundary,
  )
where

import qualified Data.Char as Char
import Data.List (intersperse)
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.Text.Lazy as LText
import qualified Data.Text.Lazy.Manipulate as LMan
import Data.Text.Manipulate.Internal.Fusion (strict)
import qualified Data.Text.Manipulate.Internal.Fusion as Fusion
import Data.Text.Manipulate.Internal.Types

-- $strict
-- This library provides functions for manipulating both strict and lazy Text types.
-- The strict functions are provided by the "Data.Text.Manipulate" module, while the lazy
-- functions are provided by the "Data.Text.Lazy.Manipulate" module.

-- $unicode
-- While this library supports Unicode in a similar fashion to the
-- underlying <http://hackage.haskell.org/package/text text> library,
-- more explicit Unicode handling of word boundaries can be found in the
-- <http://hackage.haskell.org/package/text-icu text-icu> library.

-- $fusion
-- Many functions in this module are subject to fusion, meaning that
-- a pipeline of such functions will usually allocate at most one Text value.
--
-- Functions that can be fused by the compiler are documented with the
-- phrase /Subject to fusion/.

-- DEBUG:
-- import Data.Text.Internal.Fusion        (stream)
-- import Data.Text.Internal.Fusion.Common (unstreamList)
-- tokens = unstreamList . Fusion.tokenise . stream

-- FIXME:
-- dropWord "ALong" == ""

-- | Lowercase the first character of a piece of text.
--
-- >>> lowerHead "Title Cased"
-- "title Cased"
lowerHead :: Text -> Text
lowerHead = mapHead Char.toLower

-- | Uppercase the first character of a piece of text.
--
-- >>> upperHead "snake_cased"
-- "Snake_cased"
upperHead :: Text -> Text
upperHead = mapHead Char.toUpper

-- | Apply a function to the first character of a piece of text.
mapHead :: (Char -> Char) -> Text -> Text
mapHead f x =
  case Text.uncons x of
    Just (c, cs) -> Text.singleton (f c) <> cs
    Nothing -> x

-- | Indent newlines by the given number of spaces.
--
-- /See:/ 'prependLines'
indentLines :: Int -> Text -> Text
indentLines n = prependLines (Text.replicate n " ")

-- | Prepend newlines with the given separator
prependLines :: Text -> Text -> Text
prependLines sep = mappend sep . Text.unlines . intersperse sep . Text.lines

-- | O(n) Truncate text to a specific length.
-- If the text was truncated the ellipsis sign "..." will be appended.
--
-- /See:/ 'toEllipsisWith'
toEllipsis :: Int -> Text -> Text
toEllipsis n = toEllipsisWith n "..."

-- | O(n) Truncate text to a specific length.
-- If the text was truncated the given ellipsis sign will be appended.
toEllipsisWith ::
  -- | Length.
  Int ->
  -- | Ellipsis.
  Text ->
  Text ->
  Text
toEllipsisWith n suf x
  | Text.length x > n = Text.take n x <> suf
  | otherwise = x

-- | O(n) Returns the first word, or the original text if no word
-- boundary is encountered. /Subject to fusion./
takeWord :: Text -> Text
takeWord = strict Fusion.takeWord

-- | O(n) Return the suffix after dropping the first word. If no word
-- boundary is encountered, the result will be empty. /Subject to fusion./
dropWord :: Text -> Text
dropWord = strict Fusion.dropWord

-- | Break a piece of text after the first word boundary is encountered.
--
-- >>> breakWord "PascalCasedVariable"
-- ("Pacal", "CasedVariable")
--
-- >>> breakWord "spinal-cased-variable"
-- ("spinal", "cased-variable")
breakWord :: Text -> (Text, Text)
breakWord x = (takeWord x, dropWord x)

-- | O(n) Return the suffix after removing the first word, or 'Nothing'
-- if no word boundary is encountered.
--
-- >>> stripWord "HTML5Spaghetti"
-- Just "Spaghetti"
--
-- >>> stripWord "noboundaries"
-- Nothing
stripWord :: Text -> Maybe Text
stripWord x
  | Text.length y < Text.length x = Just y
  | otherwise = Nothing
  where
    y = dropWord x

-- | O(n) Split into a list of words delimited by boundaries.
--
-- >>> splitWords "SupercaliFrag_ilistic"
-- ["Supercali","Frag","ilistic"]
splitWords :: Text -> [Text]
splitWords = go
  where
    go x = case breakWord x of
      (h, t)
        | Text.null h -> go t
        | Text.null t -> [h]
        | otherwise -> h : go t

-- | O(n) Create an adhoc acronym from a piece of cased text.
--
-- >>> toAcronym "AmazonWebServices"
-- Just "AWS"
--
-- >>> toAcronym "Learn-You Some_Haskell"
-- Just "LYSH"
--
-- >>> toAcronym "this_is_all_lowercase"
-- Nothing
toAcronym :: Text -> Maybe Text
toAcronym (Text.filter Char.isUpper -> x)
  | Text.length x > 1 = Just x
  | otherwise = Nothing

-- | Render an ordinal used to denote the position in an ordered sequence.
--
-- >>> toOrdinal (101 :: Int)
-- "101st"
--
-- >>> toOrdinal (12 :: Int)
-- "12th"
toOrdinal :: Integral a => a -> Text
toOrdinal = LText.toStrict . LMan.toOrdinal

-- | O(n) Convert casing to @Title Cased Phrase@. /Subject to fusion./
toTitle :: Text -> Text
toTitle = strict Fusion.toTitle

-- | O(n) Convert casing to @camelCasedPhrase@. /Subject to fusion./
toCamel :: Text -> Text
toCamel = strict Fusion.toCamel

-- | O(n) Convert casing to @PascalCasePhrase@. /Subject to fusion./
toPascal :: Text -> Text
toPascal = strict Fusion.toPascal

-- | O(n) Convert casing to @snake_cased_phrase@. /Subject to fusion./
toSnake :: Text -> Text
toSnake = strict Fusion.toSnake

-- | O(n) Convert casing to @spinal-cased-phrase@. /Subject to fusion./
toSpinal :: Text -> Text
toSpinal = strict Fusion.toSpinal

-- | O(n) Convert casing to @Train-Cased-Phrase@. /Subject to fusion./
toTrain :: Text -> Text
toTrain = strict Fusion.toTrain