1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320
|
/*
Copyright (©) 2003-2025 Teus Benschop.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <filter/merge.h>
#pragma GCC diagnostic push
#pragma clang diagnostic ignored "-Wsign-conversion"
#pragma GCC diagnostic ignored "-Weffc++"
#pragma GCC diagnostic ignored "-Wold-style-cast"
#pragma GCC diagnostic ignored "-Wswitch-default"
#pragma GCC diagnostic ignored "-Wsuggest-override"
#include <dtl/dtl.hpp>
#pragma GCC diagnostic pop
#include <filter/string.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Weffc++"
#pragma GCC diagnostic ignored "-Wsuggest-override"
#pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
#ifndef HAVE_PUGIXML
#include <pugixml/pugixml.hpp>
#endif
#ifdef HAVE_PUGIXML
#include <pugixml.hpp>
#endif
#pragma GCC diagnostic pop
#include <email/send.h>
#include <filter/usfm.h>
static std::mutex filter_merge_mutex;
// This uses the dtl:: library for merge in C++.
// At times the library failed to merge.
// It was tried whether the Linux "merge" command was able to successfully merge such cases.
// But also this command failed to merge in such cases.
// The conclusion therefore is that the C++ merge library is equivalent in quality.
// merge - three-way merge.
// Merge is useful for combining separate changes to an original.
// The function normally returns the merged text.
// If case of conflicts, it returns an empty container.
std::vector <std::string> filter_merge_merge (const std::vector <std::string>& base, const std::vector <std::string>& user, const std::vector <std::string>& server)
{
// See issue https://github.com/bibledit/cloud/issues/418
// It is unclear at this time whether the code below
// to find the differences between texts, is thread-safe.
// So just to be sure, a mutex is placed around it.
filter_merge_mutex.lock();
std::vector <std::string> user_sequence (user);
std::vector <std::string> base_sequence (base);
std::vector <std::string> server_sequence (server);
dtl::Diff3 <std::string, std::vector <std::string>> diff3 (user_sequence, base_sequence, server_sequence);
diff3.compose ();
bool merged = diff3.merge ();
filter_merge_mutex.unlock();
if (!merged) {
return {};
}
return diff3.getMergedSequence ();
}
std::string filter_merge_lines2words (std::string data)
{
data = filter::strings::replace ("\n", " new__line ", data);
data = filter::strings::replace (" ", "\n", data);
return data;
}
std::string filter_merge_words2lines (std::string data)
{
data = filter::strings::replace ("\n", " ", data);
data = filter::strings::replace (" new__line ", "\n", data);
return data;
}
std::string filter_merge_lines2graphemes (std::string data)
{
data = filter::strings::replace ("\n", " new__line ", data);
std::string data2;
size_t count = filter::strings::unicode_string_length (data);
for (size_t i = 0; i < count; i++) {
std::string grapheme = filter::strings::unicode_string_substr (data, i, 1);
data2.append (grapheme);
data2.append ("\n");
}
return data2;
}
std::string filter_merge_graphemes2lines (std::string data)
{
data = filter::strings::replace ("\n", "", data);
data = filter::strings::replace (" new__line ", "\n", data);
return data;
}
void filter_merge_detect_conflict (std::string base,
std::string change,
std::string prioritized_change,
std::string result,
std::vector <Merge_Conflict> & conflicts)
{
// Clean input.
base = filter::strings::trim (base);
change = filter::strings::trim (change);
prioritized_change = filter::strings::trim (prioritized_change);
result = filter::strings::trim (result);
bool irregularity = false;
std::string subject;
if (!irregularity) {
if (base.empty ()) {
subject = "There was no text to base the merge upon";
irregularity = true;
}
}
if (!irregularity) {
if (change.empty ()) {
subject = "There was no changed text to merge with";
irregularity = true;
}
}
if (!irregularity) {
if (prioritized_change.empty ()) {
subject = "There was no existing text to merge with";
irregularity = true;
}
}
if (!irregularity) {
if (result.empty ()) {
subject = "The merge resulted in empty text";
irregularity = true;
}
}
if (!irregularity) {
if ((change != base) && (prioritized_change != change) && (prioritized_change == result)) {
subject = "Failed to merge your changes";
irregularity = true;
}
}
if (irregularity) {
Merge_Conflict conflict;
conflict.base = base;
conflict.change = change;
conflict.prioritized_change = prioritized_change;
conflict.result = result;
conflict.subject = subject;
conflicts.push_back (conflict);
}
}
// This filter merges files.
// $base: Data for the merge base.
// $change: Data as modified by one user.
// $prioritized_change: Data as modified by a user but prioritized.
// The filter uses a three-way merge algorithm.
// There should be one unchanged segment (either a line or word) between the modifications.
// If necessary it converts the data into a new format with one character per line for more fine-grained merging.
// In case of a conflict, it prioritizes changes from $prioritized_change.
// The filter returns the merged data.
// If $clever, it calls a more clever routine when it fails to merge.
std::string filter_merge_run (std::string base, std::string change, std::string prioritized_change,
bool clever,
std::vector <Merge_Conflict> & conflicts)
{
// Trim the input.
base = filter::strings::trim (base);
change = filter::strings::trim (change);
prioritized_change = filter::strings::trim (prioritized_change);
// Try a standard line-based merge. Should be sufficient for most cases.
std::vector <std::string> baselines = filter::strings::explode (base, '\n');
std::vector <std::string> userlines = filter::strings::explode (change, '\n');
std::vector <std::string> serverlines = filter::strings::explode (prioritized_change, '\n');
std::vector <std::string> results = filter_merge_merge (baselines, userlines, serverlines);
if (!results.empty ()) {
std::string result = filter::strings::implode (results, "\n");
filter_merge_detect_conflict (base, change, prioritized_change, result, conflicts);
return result;
}
// Convert the data to one word per line, and try to merge again.
std::string baseWords = filter_merge_lines2words (base);
std::string userWords = filter_merge_lines2words (change);
std::string serverWords = filter_merge_lines2words (prioritized_change);
baselines = filter::strings::explode (baseWords, '\n');
userlines = filter::strings::explode (userWords, '\n');
serverlines = filter::strings::explode (serverWords, '\n');
results = filter_merge_merge (baselines, userlines, serverlines);
if (!results.empty ()) {
std::string mergedWords = filter::strings::implode (results, "\n");
std::string result = filter_merge_words2lines (mergedWords);
filter_merge_detect_conflict (base, change, prioritized_change, result, conflicts);
return result;
}
// Convert the data so it has one grapheme per line, and try again.
std::string baseGraphemes = filter_merge_lines2graphemes (base);
std::string userGraphemes = filter_merge_lines2graphemes (change);
std::string serverGraphemes = filter_merge_lines2graphemes (prioritized_change);
baselines = filter::strings::explode (baseGraphemes, '\n');
userlines = filter::strings::explode (userGraphemes, '\n');
serverlines = filter::strings::explode (serverGraphemes, '\n');
results = filter_merge_merge (baselines, userlines, serverlines);
if (!results.empty ()) {
std::string mergedGraphemes = filter::strings::implode (results, "\n");
std::string result = filter_merge_graphemes2lines (mergedGraphemes);
filter_merge_detect_conflict (base, change, prioritized_change, result, conflicts);
return result;
}
if (clever) {
// It failed to merge: Call a more clever routine to do the merge.
std::string result = filter_merge_run_clever (base, change, prioritized_change, conflicts);
// Check on merge failure.
filter_merge_detect_conflict (base, change, prioritized_change, result, conflicts);
// Done.
return result;
}
// The data could not be merged no matter how hard it tried.
// Detect it as a conflict.
filter_merge_detect_conflict (base, change, prioritized_change, prioritized_change, conflicts);
// Done;
return prioritized_change;
}
// This filter merges USFM data in a clever way.
// $base: Data for the merge base.
// $change: Data as modified by one user.
// $prioritized_change: Data as modified by a user but prioritized.
// The filter uses a three-way merge algorithm.
std::string filter_merge_run_clever (std::string base, std::string change, std::string prioritized_change,
std::vector <Merge_Conflict> & conflicts)
{
// Get the verse numbers in the changed text.
std::vector <int> verses = filter::usfm::get_verse_numbers (change);
std::vector <std::string> results;
std::string previous_change;
// Go through the verses.
for (auto verse : verses) {
// Gets the texts to merge for this verse.
std::string base_text = filter::usfm::get_verse_text (base, verse);
std::string change_text = filter::usfm::get_verse_text (change, verse);
std::string prioritized_change_text = filter::usfm::get_verse_text (prioritized_change, verse);
// Check for combined verses.
if (change_text == previous_change) continue;
previous_change = change_text;
// Check whether any of the three text fragments can be considered to be a verse without content.
size_t empty_length = 3 + std::to_string(verse).length() + 1; // "\v n "
bool base_empty = base_text.length () <= empty_length;
bool change_empty = change_text.length () <= empty_length;
bool prioritized_change_empty = prioritized_change_text.length () <= empty_length;
// If the prioritized change is empty, and the other two are not,
// update the priotitized change to match the change.
// Without doing this the merge behaves in an unexpected way:
// It would take the prioritized change instead of the the base/change.
if (prioritized_change_empty) {
if (!base_empty && !change_empty) {
prioritized_change_text = change_text;
}
}
// Run the merge, but clear the "clever" flags else it may enter an infinite loop.
std::string result = filter_merge_run (base_text, change_text, prioritized_change_text, false, conflicts);
// Store it.
results.push_back (result);
}
// Done.
return filter::strings::implode (results, "\n");
}
void filter_merge_add_book_chapter (std::vector <Merge_Conflict> & conflicts, int book, int chapter)
{
for (auto & conflict : conflicts) {
conflict.book = book;
conflict.chapter = chapter;
}
}
|