1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
|
// Author(s): Wieger Wesselink
// Copyright: see the accompanying file COPYING or copy at
// https://svn.win.tue.nl/trac/MCRL2/browser/trunk/COPYING
//
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
//
/// \file mcrl2/utilities/text_utility.h
/// \brief String manipulation functions.
#include <fstream>
#include <iostream>
#include <algorithm>
#include <iterator>
#include <string>
#include <vector>
#include <boost/algorithm/string.hpp>
#include <boost/xpressive/xpressive.hpp>
#include "mcrl2/utilities/text_utility.h"
#include "mcrl2/utilities/exception.h"
#include "mcrl2/utilities/logger.h"
namespace mcrl2
{
namespace utilities
{
/// \brief Split a string into paragraphs.
/// \param text A string
/// \return The paragraphs of <tt>text</tt>
std::vector<std::string> split_paragraphs(const std::string& text)
{
std::vector<std::string> result;
// find multiple line endings
boost::xpressive::sregex paragraph_split = boost::xpressive::sregex::compile("\\n\\s*\\n");
// the -1 below directs the token iterator to display the parts of
// the string that did NOT match the regular expression.
boost::xpressive::sregex_token_iterator cur(text.begin(), text.end(), paragraph_split, -1);
boost::xpressive::sregex_token_iterator end;
for (; cur != end; ++cur)
{
std::string paragraph = *cur;
boost::trim(paragraph);
if (paragraph.size() > 0)
{
result.push_back(paragraph);
}
}
return result;
}
/// \brief Split the text.
/// \param line A string
/// \param separators A string
/// \return The splitted text
std::vector<std::string> split(const std::string& line, const std::string& separators)
{
std::vector<std::string> result;
boost::algorithm::split(result, line, boost::algorithm::is_any_of(separators));
return result;
}
/// \brief Read text from a file.
/// \param filename A string
/// \param warn If true, a warning is printed to standard error if the file is not found
/// \return The contents of the file
std::string read_text(const std::string& filename, bool warn)
{
std::ifstream in(filename.c_str());
if (!in)
{
if (warn)
{
mCRL2log(log::warning) << "Could not open input file: " << filename << std::endl;
return "";
}
else
{
throw mcrl2::runtime_error("Could not open input file: " + filename);
}
}
in.unsetf(std::ios::skipws); // Turn of white space skipping on the stream
std::string s;
std::copy(
std::istream_iterator<char>(in),
std::istream_iterator<char>(),
std::back_inserter(s)
);
return s;
}
/// \brief Remove comments from a text (everything from '%' until end of line).
/// \param text A string
/// \return The removal result
std::string remove_comments(const std::string& text)
{
// matches everything from '%' until end of line
boost::xpressive::sregex src = boost::xpressive::sregex::compile("%[^\\n]*\\n");
std::string dest("\n");
return boost::xpressive::regex_replace(text, src, dest);
}
/// \brief Removes whitespace from a string.
/// \param text A string
/// \return The removal result
std::string remove_whitespace(const std::string& text)
{
boost::xpressive::sregex src = boost::xpressive::sregex::compile("\\s");
std::string dest("");
return boost::xpressive::regex_replace(text, src, dest);
}
/// \brief Regular expression replacement in a string.
/// \param src A string
/// \param dest A string
/// \param text A string
/// \return The transformed string
std::string regex_replace(const std::string& src, const std::string& dest, const std::string& text)
{
return boost::xpressive::regex_replace(text, boost::xpressive::sregex::compile(src), dest);
}
/// \brief Split a string using a regular expression separator.
/// \param text A string
/// \param sep A string
/// \return The splitted string
std::vector<std::string> regex_split(const std::string& text, const std::string& sep)
{
std::vector<std::string> result;
// find multiple line endings
boost::xpressive::sregex paragraph_split = boost::xpressive::sregex::compile(sep);
// the -1 below directs the token iterator to display the parts of
// the string that did NOT match the regular expression.
boost::xpressive::sregex_token_iterator cur(text.begin(), text.end(), paragraph_split, -1);
boost::xpressive::sregex_token_iterator end;
for (; cur != end; ++cur)
{
std::string word = *cur;
boost::trim(word);
if (word.size() > 0)
{
result.push_back(word);
}
}
return result;
}
/// \brief Apply word wrapping to a text that doesn't contain newlines.
/// \param line A string of text.
/// \param max_line_length The maximum line length.
/// \return The wrapped text.
static
std::vector<std::string> word_wrap_line(const std::string& line, unsigned int max_line_length)
{
std::vector<std::string> result;
std::string text = line;
for (;;)
{
if (text.size() <= max_line_length)
{
result.push_back(boost::trim_right_copy(text));
break;
}
std::string::size_type i = text.find_last_of(" \t", max_line_length);
if (i == std::string::npos)
{
result.push_back(text.substr(0, max_line_length));
text = text.substr(max_line_length);
}
else
{
result.push_back(text.substr(0, i));
text = text.substr(i + 1);
}
}
return result;
}
/// \brief Apply word wrapping to a text.
/// \param text A string of text.
/// \param max_line_length The maximum line length.
/// \return The wrapped text.
std::string word_wrap_text(const std::string& text, unsigned int max_line_length)
{
std::vector<std::string> result;
// split the lines and remove trailing white space
std::vector<std::string> lines = split(text, "\n");
for (std::vector<std::string>::iterator i = lines.begin(); i != lines.end(); ++i)
{
boost::trim_right(*i);
}
// word wrap each of the lines
for (std::vector<std::string>::iterator i = lines.begin(); i != lines.end(); ++i)
{
std::vector<std::string> v = word_wrap_line(*i, max_line_length);
result.insert(result.end(), v.begin(), v.end());
}
return string_join(result, "\n");
}
/// \brief Test if a string is a number.
/// \param s A string of text.
/// \return True if s is of the form "0 | -? [1-9][0-9]*", false otherwise
bool is_numeric_string(const std::string& s)
{
// The static below prevents the regular expression recognizer to be compiled
// each time a string is matched, which is far too time consuming.
static boost::xpressive::sregex re = boost::xpressive::sregex::compile("0|(-?[1-9][0-9]*)");
return boost::xpressive::regex_match(s, re);
}
} // namespace utilities
} // namespace mcrl2
|