File: exporttxt.cc

package info (click to toggle)
signalbackup-tools 20250313.1-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 3,752 kB
  • sloc: cpp: 47,042; sh: 477; ansic: 399; ruby: 19; makefile: 3
file content (347 lines) | stat: -rw-r--r-- 16,558 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
/*
  Copyright (C) 2023-2025  Selwin van Dijk

  This file is part of signalbackup-tools.

  signalbackup-tools is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.

  signalbackup-tools is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with signalbackup-tools.  If not, see <https://www.gnu.org/licenses/>.
*/

#include "signalbackup.ih"
#include "msgrange.h"

bool SignalBackup::exportTxt(std::string const &directory, std::vector<long long int> const &limittothreads,
                             std::vector<std::string> const &daterangelist, std::string const &selfphone [[maybe_unused]],
                             bool migrate, bool overwrite)
{
  Logger::message("Starting plaintext export to '", directory, "'");

  // v170 and above should work. Anything below will first migrate (I believe anything down to ~23 should more or less work)
  bool databasemigrated = false;
  MemSqliteDB backup_database;
  if (d_databaseversion < 170 || migrate)
  {
    SqliteDB::copyDb(d_database, backup_database);
    if (!migrateDatabase(d_databaseversion, 170)) // migrate == TRUE, but migration fails
    {
      Logger::error("Failed to migrate currently unsupported database version (", d_databaseversion, ")."
                    " Please upgrade your database");
      SqliteDB::copyDb(backup_database, d_database);
      return false;
    }
    databasemigrated = true;
  }

  // // >= 168 will work already? (not sure if 168 and 169 were ever in production, I don't have them at least)
  // if (d_databaseversion == 167)
  // {
  //   SqliteDB::copyDb(d_database, backup_database);
  //   if (!migrateDatabase(167, 170))
  //   {
  //     Logger::error("Failed to migrate currently unsupported database version (", d_databaseversion, ")."
  //                   " Please upgrade your database");
  //     SqliteDB::copyDb(backup_database, d_database);
  //     return false;
  //   }
  //   else
  //     databasemigrated = true;
  // }
  // else if (d_databaseversion < 167)
  // {
  //   if (!migrate)
  //   {
  //     Logger::error("Currently unsupported database version (", d_databaseversion, ").");
  //     Logger::error_indent("Please upgrade your database or append the `--migratedb' option to attempt to");
  //     Logger::error_indent("migrate this database to a supported version.");
  //     return false;
  //   }
  //   SqliteDB::copyDb(d_database, backup_database);
  //   if (!migrateDatabase(d_databaseversion, 170)) // migrate == TRUE, but migration fails
  //   {
  //     Logger::error("Failed to migrate currently unsupported database version (", d_databaseversion, ")."
  //                   " Please upgrade your database");
  //     SqliteDB::copyDb(backup_database, d_database);
  //     return false;
  //   }
  //   else
  //     databasemigrated = true;
  // }

  // check if dir exists, create if not
  if (!prepareOutputDirectory(directory, overwrite))
  {
    if (databasemigrated)
      SqliteDB::copyDb(backup_database, d_database);
    return false;
  }

  // check and warn about selfid & note-to-self thread
  d_selfid = selfphone.empty() ? scanSelf() : d_database.getSingleResultAs<long long int>("SELECT _id FROM recipient WHERE " + d_recipient_e164 + " = ?", selfphone, -1);
  if (d_selfid == -1)
  {
    if (!selfphone.empty())
      Logger::warning("Failed to determine id of 'self'.");
    else
      Logger::warning("Failed to determine id of 'self'. Consider passing `--setselfid \"[phone]\"' to set it manually");
  }
  else
    d_selfuuid = bepaald::toLower(d_database.getSingleResultAs<std::string>("SELECT " + d_recipient_aci + " FROM recipient WHERE _id = ?", d_selfid, std::string()));

  std::vector<long long int> threads = ((limittothreads.empty() || (limittothreads.size() == 1 && limittothreads[0] == -1)) ?
                                        threadIds() : limittothreads);

  std::map<long long int, RecipientInfo> recipient_info;

  // set where-clause for date requested
  std::vector<std::pair<std::string, std::string>> dateranges;
  if (daterangelist.size() % 2 == 0)
    for (unsigned int i = 0; i < daterangelist.size(); i += 2)
      dateranges.push_back({daterangelist[i], daterangelist[i + 1]});
  std::string datewhereclause;
  for (unsigned int i = 0; i < dateranges.size(); ++i)
  {
    bool needrounding = false;
    long long int startrange = dateToMSecsSinceEpoch(dateranges[i].first);
    long long int endrange   = dateToMSecsSinceEpoch(dateranges[i].second, &needrounding);
    if (startrange == -1 || endrange == -1 || endrange < startrange)
    {
      Logger::error("Skipping range: '", dateranges[i].first, " - ", dateranges[i].second, "'. Failed to parse or invalid range.");
      Logger::error_indent(startrange, " ", endrange);
      continue;
    }
    Logger::message("  Using range: ", dateranges[i].first, " - ", dateranges[i].second, " (", startrange, " - ", endrange, ")");

    if (needrounding)// if called with "YYYY-MM-DD HH:MM:SS"
      endrange += 999; // to get everything in the second specified...

    dateranges[i].first = bepaald::toString(startrange);
    dateranges[i].second = bepaald::toString(endrange);

    datewhereclause += (datewhereclause.empty() ? " AND (" : " OR ") + "date_received BETWEEN "s + dateranges[i].first + " AND " + dateranges[i].second;
    if (i == dateranges.size() - 1)
      datewhereclause += ')';
  }
  std::sort(dateranges.begin(), dateranges.end());


  // handle each thread
  for (int t : threads)
  {
    Logger::message("Dealing with thread ", t);

    //bool is_note_to_self = false;//(t == note_to_self_thread_id);

    // get recipient_id for thread;
    SqliteDB::QueryResults recid;
    long long int thread_recipient_id = -1;
    if (!d_database.exec("SELECT _id," + d_thread_recipient_id + " FROM thread WHERE _id = ?", t, &recid) ||
        recid.rows() != 1 || (thread_recipient_id = recid.valueAsInt(0, d_thread_recipient_id)) == -1)
    {
      Logger::error("Failed to find recipient_id for thread (", t, ")... skipping");
      continue;
    }
    long long int thread_id = recid.getValueAs<long long int>(0, "_id");

    bool isgroup = false;
    SqliteDB::QueryResults groupcheck;
    d_database.exec("SELECT group_id FROM recipient WHERE _id = ? AND group_id IS NOT NULL", thread_recipient_id, &groupcheck);
    if (groupcheck.rows())
      isgroup = true;

    // now get all messages
    SqliteDB::QueryResults messages;
    if (!d_database.exec("SELECT "s
                         "_id, " + d_mms_recipient_id + ", "
                         + (d_database.tableContainsColumn(d_mms_table, "to_recipient_id") ? "to_recipient_id" : "-1") +  " AS to_recipient_id, body, "
                         "date_received, " + d_mms_type + ", "
                         "attcount, reactioncount, mentioncount, "
                         "IFNULL(remote_deleted, 0) AS remote_deleted, "
                         "IFNULL(view_once, 0) AS view_once, " +
                         (d_database.tableContainsColumn(d_mms_table, "message_extras") ? "message_extras, " : "") +
                         "expires_in"
                         " FROM " + d_mms_table + " "
                         // get attachment count for message:
                         "LEFT JOIN (SELECT " + d_part_mid + " AS message_id, COUNT(*) AS attcount FROM " + d_part_table + " GROUP BY message_id) AS attmnts ON " + d_mms_table + "._id = attmnts.message_id "
                         // get reaction count for message:
                         "LEFT JOIN (SELECT message_id, COUNT(*) AS reactioncount FROM reaction GROUP BY message_id) AS rctns ON " + d_mms_table + "._id = rctns.message_id "
                         // get mention count for message:
                         "LEFT JOIN (SELECT message_id, COUNT(*) AS mentioncount FROM mention GROUP BY message_id) AS mntns ON " + d_mms_table + "._id = mntns.message_id "
                         "WHERE thread_id = ?"
                         + datewhereclause +
                         + (d_database.tableContainsColumn(d_mms_table, "latest_revision_id") ? " AND latest_revision_id IS NULL" : "") +
                         " ORDER BY date_received ASC", t, &messages))
    {
      Logger::error("Failed to query database for messages");
      if (databasemigrated)
        SqliteDB::copyDb(backup_database, d_database);
      return false;
    }
    if (messages.rows() == 0)
      continue;

    // get all recipients in thread (group member (past and present), quote/reaction authors, mentions)
    std::set<long long int> all_recipients_ids = getAllThreadRecipients(t);

    //try to set any missing info on recipients
    setRecipientInfo(all_recipients_ids, &recipient_info);

    // get conversation name, sanitize it and set outputfilename
    if (recipient_info.find(thread_recipient_id) == recipient_info.end())
    {
      Logger::error("Failed set recipient info for thread (", t, ")... skipping");
      continue;
    }

    std::string filename = /*(is_note_to_self ? "Note to self (_id"s + bepaald::toString(thread_id) + ")"
                             : */sanitizeFilename(recipient_info[thread_recipient_id].display_name + " (_id" + bepaald::toString(thread_id) + ").txt")/*)*/;

    if (bepaald::fileOrDirExists(directory + "/" + filename))
    {
      Logger::error("Refusing to overwrite existing file");
      if (databasemigrated)
        SqliteDB::copyDb(backup_database, d_database);
      return false;
    }

    std::ofstream txtoutput(directory + "/" + filename, std::ios_base::binary);
    if (!txtoutput.is_open())
    {
      Logger::error("Failed to open '", directory, "/", filename, "' for writing.");
      if (databasemigrated)
        SqliteDB::copyDb(backup_database, d_database);
      return false;
    }

    for (unsigned int i = 0; i < messages.rows(); ++i)
    {
      bool is_deleted = messages.getValueAs<long long int>(i, "remote_deleted") == 1;
      bool is_viewonce = messages.getValueAs<long long int>(i, "view_once") == 1;
      if (is_deleted || is_viewonce)
        continue;
      long long int type = messages.getValueAs<long long int>(i, d_mms_type);
      long long int msg_id = messages.getValueAs<long long int>(i, "_id");
      //bool incoming = !Types::isOutgoing(messages.getValueAs<long long int>(i, d_mms_type));
      long long int msg_recipient_id = messages.valueAsInt(i, d_mms_recipient_id);
      if (isgroup && Types::isOutgoing(type))
        msg_recipient_id = d_selfid;
      if (msg_recipient_id == -1) [[unlikely]]
      {
        Logger::warning("Failed to get message recipient id. Skipping.");
        continue;
      }
      std::string body = messages.valueAsString(i, "body");
      std::string readable_date = bepaald::toDateString(messages.getValueAs<long long int>(i, "date_received") / 1000,
                                                          "%b %d, %Y %H:%M:%S");
      SqliteDB::QueryResults attachment_results;
      if (messages.valueAsInt(i, "attcount", 0) > 0)
        d_database.exec("SELECT "
                        "_id, " +
                        (d_database.tableContainsColumn(d_part_table, "unique_id") ? "unique_id"s : "-1 AS unique_id") + ", " +
                        d_part_ct + ", "
                        "file_name, "
                        + d_part_pending + ", " +
                        (d_database.tableContainsColumn(d_part_table, "caption") ? "caption, "s : std::string()) +
                        "sticker_pack_id "
                        "FROM " + d_part_table + " WHERE " + d_part_mid + " IS ? AND quote IS 0", msg_id, &attachment_results);
      // check attachments for long message body -> replace cropped body & remove from attachment results
      setLongMessageBody(&body, &attachment_results);

      SqliteDB::QueryResults mention_results;
      if (messages.valueAsInt(i, "mentioncount", 0) > 0)
        d_database.exec("SELECT recipient_id, range_start, range_length FROM mention WHERE message_id IS ?", msg_id, &mention_results);

      SqliteDB::QueryResults reaction_results;
      if (messages.valueAsInt(i, "reactioncount", 0) > 0)
        d_database.exec("SELECT emoji, author_id, DATETIME(date_sent / 1000, 'unixepoch', 'localtime') AS 'date_sent', "
                        "DATETIME(date_received / 1000, 'unixepoch', 'localtime') AS 'date_received' "
                        "FROM reaction WHERE message_id IS ?", msg_id, &reaction_results);

      if (Types::isStatusMessage(type) || Types::isCallType(type))
      {
        // see note in exporthtml
        long long int target_rid = msg_recipient_id;
        if ((Types::isIdentityVerified(type) || Types::isIdentityDefault(type)) &&
            messages.valueAsInt(i, "to_recipient_id") != -1) [[unlikely]]
          target_rid = messages.valueAsInt(i, "to_recipient_id");

        std::string statusmsg;
        if (!body.empty() ||
            !(d_database.tableContainsColumn(d_mms_table, "message_extras") &&
              messages.valueHasType<std::pair<std::shared_ptr<unsigned char []>, size_t>>(i, "message_extras")))
          statusmsg = decodeStatusMessage(body, messages.getValueAs<long long int>(i, "expires_in"), type,
                                          getRecipientInfoFromMap(&recipient_info, target_rid).display_name);
        else if (d_database.tableContainsColumn(d_mms_table, "message_extras") &&
                 messages.valueHasType<std::pair<std::shared_ptr<unsigned char []>, size_t>>(i, "message_extras"))
          statusmsg = decodeStatusMessage(messages.getValueAs<std::pair<std::shared_ptr<unsigned char []>, size_t>>(i, "message_extras"),
                                          messages.getValueAs<long long int>(i, "expires_in"), type,
                                          getRecipientInfoFromMap(&recipient_info, target_rid).display_name);

        txtoutput << "[" << readable_date << "] " << "***" << " " << statusmsg <<  '\n';
      }
      else
      {
        // get originating username
        std::string user = getRecipientInfoFromMap(&recipient_info, msg_recipient_id).display_name;

        for (unsigned int a = 0; a < attachment_results.rows(); ++a)
        {
          std::string content_type = attachment_results.valueAsString(a, d_part_ct);
          if (content_type == "text/x-signal-plain") [[unlikely]]
            continue;

          std::string attachment_filename;
          if (!attachment_results.isNull(a, "file_name") && !attachment_results(a, "file_name").empty())
            attachment_filename = '"' + attachment_results(a, "file_name") + '"';
          else if (!content_type.empty())
            attachment_filename = "of type " + content_type;

          txtoutput << "[" << readable_date << "] *** <" << user << "> sent file"
                    << (attachment_filename.empty() ? "" : " " + attachment_filename);
          if (body.empty())
            TXTaddReactions(&reaction_results, &txtoutput);
          txtoutput << '\n';
        }
        if (!body.empty())
        {
          // prep body for mentions...
          std::vector<Range> ranges;
          for (unsigned int m = 0; m < mention_results.rows(); ++m)
          {
            std::string displayname = getNameFromRecipientId(mention_results.getValueAs<long long int>(m, "recipient_id"));
            if (displayname.empty())
              continue;
            ranges.emplace_back(Range{mention_results.getValueAs<long long int>(m, "range_start"),
                                      mention_results.getValueAs<long long int>(m, "range_length"),
                                      "",
                                      "@" + displayname,
                                      "",
                                      false});
          }
          applyRanges(&body, &ranges, nullptr);

          txtoutput << "[" << readable_date << "] <" << user << "> " << body;
          TXTaddReactions(&reaction_results, &txtoutput);
          txtoutput << '\n';
        }
      }
    }
  }

  Logger::message("All done!");
  if (databasemigrated)
  {
    Logger::message("restoring migrated database...");
    SqliteDB::copyDb(backup_database, d_database);
  }
  return true;
}