File: fssync

package info (click to toggle)
fssync 1.6-1
  • links: PTS
  • area: main
  • in suites: bullseye, buster, sid, stretch
  • size: 176 kB
  • ctags: 4
  • sloc: python: 1,468; makefile: 27
file content (1281 lines) | stat: -rwxr-xr-x 44,322 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
#!/usr/bin/python3
# -*- coding: utf-8 -*-
#
# Copyright (C) 2011-2017 Julien Muchembled <jm@jmuchemb.eu>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 3 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import argparse, errno, grp, logging.handlers, os
import pwd, shlex, signal, sqlite3, stat, struct, subprocess, sys, syslog
from collections import deque
from contextlib import contextmanager
from ctypes import CDLL, util as ctypes_util, get_errno, c_long
from hashlib import md5
from pickle import dumps, loads
from urllib.parse import splitport
from posix1e import ACL, ACL_USER, ACL_GROUP, delete_default, Entry, Permset

PROTOCOL_VERSION = 3
BLOCK_SIZE = 4096
PART_SIZE = BLOCK_SIZE * 256

logger = logging.getLogger(__name__)

def read_rpc(stdin):
  n = stdin.read(4)
  return n and stdin.read(*struct.unpack('!I', n))

def write_rpc(stdout, rpc):
  stdout.write(struct.pack('!I', len(rpc)) + rpc)

# NOTE: All paths are bytes instead of unicode.
#       This is required in order to handle paths with invalid characters.
#       For example, b'\x89' on a utf-8 filesystem:
#         sqlite3.connect(':memory:').execute('select ?', ('\udc89',))
#       raises UnicodeEncodeError
decode, encode = (lambda encoding:
  (lambda path: path.decode(encoding, 'surrogateescape'),
   lambda path: path.encode(encoding))
  )(sys.getfilesystemencoding())

class INF:
  def __lt__(self, other):
    return False
INF = INF()

def format_command(*args):
  return ' '.join(map(shlex.quote, args))

assert not os.O_RDONLY

def check_data(f, size, block_size):
  # XXX: should we cache reads ?
  while size:
    n = min(size, block_size)
    d = f.read(n)
    if len(d) != n:
      break
    yield md5(d).digest()
    size -= n

class UTIME_OMIT:
   def __divmod__(self, other):
     assert other == 1000000000
     return 0, (1 << 30) - 2
UTIME_OMIT = UTIME_OMIT()

libc = CDLL(ctypes_util.find_library('c'), use_errno=True)
def _fallocate():
  # We only use it to reduce fragmentation so this must not be replaced by
  # posix_fallocate (which emulates fallocate if kernel/fs does not support it).
  try:
    libc_fallocate = libc.fallocate
  except AttributeError:
    def _fallocate(fd, offset, length, keep_size=False):
      pass
  else:
    def _fallocate(fd, offset, length, keep_size=False):
      if libc_fallocate(fd, bool(keep_size), c_long(offset), c_long(length)):
        e = get_errno()
        if e not in (errno.ENOSYS, errno.EOPNOTSUPP):
          raise OSError(e, os.strerror(e))
  return _fallocate
_fallocate = _fallocate()

def dump_acl(**kw):
  acl = []
  for e in ACL(**kw):
    t = e.tag_type
    p = e.permset
    acl.append((t << 3 | p.read << 2 | p.write << 1 | p.execute,
                None if ACL_USER != t != ACL_GROUP else e.qualifier))
  return acl

def load_acl(acl):
  a = ACL()
  for t, q in acl:
    e = Entry(a)
    e.tag_type = t >> 3
    if q is not None:
      e.qualifier = q
    Permset(e).add(t & 7)
  return a

class Stat:
  # XXX: Consider splitting mode and moving type from value to key,
  #      in order to simplify code.

  NULL_KEY = None, None
  NULL_VALUE = (None,) * 8
  __slots__ = ('dev', 'ino', 'gid', 'mode', 'mtime_ns', 'rdev', 'size', 'uid',
               'acl', 'attr', 'blocks', 'blksize')

  def __init__(self, path):
    s = os.lstat(path)
    for k in self.__slots__[:-4]:
      setattr(self, k, getattr(s, 'st_' + k))
    if stat.S_ISDIR(self.mode):
      self.size = None
    else:
      self.blocks = s.st_blocks
      self.blksize = s.st_blksize
    try:
      x = dict((x, os.getxattr(path, x, follow_symlinks=False))
               for x in os.listxattr(path, follow_symlinks=False))
    except (AttributeError, OSError) as e:
      if isinstance(e, OSError) and e.errno != errno.ENOTSUP:
        raise
      a = d = x = None
    else:
      a = x.pop('system.posix_acl_access', None) and dump_acl(file=path)
      d = x.pop('system.posix_acl_default', None) and dump_acl(filedef=path)
      x.pop('trusted.SGI_ACL_FILE', None)
      x.pop('trusted.SGI_ACL_DEFAULT', None)
    self.acl = (a or d) and (a, d)
    self.attr = x or None

  def __eq__(self, other):
    for k in self.__slots__:
      if getattr(self, k) != getattr(other, k):
        return False
    return True

  @property
  def key(self):
    return tuple(getattr(self, k) for k in self.__slots__[:2])

  @property
  def value(self):
    return tuple(getattr(self, k) for k in self.__slots__[2:-2])

  @property
  def null_value(self):
    return (None, stat.S_IFMT(self.mode)) + (None,) * 6

  @classmethod
  def load(cls, key, value):
    self = cls.__new__(cls)
    for k, v in zip(self.__slots__, key + value):
      setattr(self, k, v)
    return self


class RpcClient:

  def __init__(self, stdin, stdout, map_users=False):
    self.stdin = stdin
    self.stdout = stdout
    self._map_users = map_users

  def wait(self):
    r = loads(read_rpc(self.stdin))
    if isinstance(r, Exception):
      raise r
    return r

  def send(self, value):
    write_rpc(self.stdout, dumps(value))

  def __getattr__(self, name):
    send = self.send
    if name in ('check', 'print', 'reverse'):
      if name == 'check':
        self.sync_meta # send pwd_grp if not done yet
      def rpc(*args, **kw):
        send((name, args, kw))
    else:
      if name == 'sync_meta' and self._map_users:
        # Send our passwd/group database only if remote will need it.
        send(('pwd_grp', (dict((x.pw_name, x.pw_uid) for x in pwd.getpwall()),
                          dict((x.gr_name, x.gr_gid) for x in grp.getgrall())),
                        {}))
        logger.debug('pwd_grp(...)')
        self.wait()
      def rpc(*args, **kw):
        send((name, args, kw))
        if len(args) > 1 and (args[1] is None or
          isinstance(args[1], (int, bytes))):
          logger.debug('%s(%r, %r)', name, args[0], args[1])
        else:
          logger.debug('%s(%r)', name, args[0])
    setattr(self, name, rpc)
    return rpc


class RpcSshClient(RpcClient):
  # NOTE: we use external 'ssh' command instead of 'paramiko' library because
  #       - it's faster
  #       - it seems the easiest way to have '.ssh/config' taken into account

  def __init__(self, host, command, *args, **kw):
    cmd = ['ssh']
    host, port = splitport(host)
    if port is not None:
      cmd += '-p', port
    cmd += host, command
    self._p = subprocess.Popen(cmd, bufsize=1,
                               stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    super(RpcSshClient, self).__init__(self._p.stdout, self._p.stdin.raw,
                                       *args, **kw)


class Local:

  NULL_KEY = dumps(Stat.NULL_KEY, 2)
  NULL_VALUE = dumps(Stat.NULL_VALUE, 2)
  prealloc = False

  def __init__(self, root, db, rpc):
    assert isinstance(root, bytes), root
    self.root = root
    self.root_fd = os.open(self.root, os.O_NOATIME)
    self.root_dev = os.fstat(self.root_fd).st_dev
    self.rpc = rpc
    # TODO: Add option to map other filesystems ?
    self.dev_map = {self.root_dev: None}
    self.masked = []
    self.con = sqlite3.connect(db, isolation_level=None)
    # I wish I could get fd of sqlite connection and pass it to os.fstat
    try:
      self.db_key = None if db == ':memory:' else self.stat(db).key
    except KeyError: # db is on another filesystem
      self.db_key = None
    self.con.execute("PRAGMA synchronous = OFF")
    self.con.text_factory = str
    # A row with null metadata means we are renaming/linking/removing a path:
    # 'inode' contains source path and 'path' is destination (null if removing).
    # This special meaning is part of recovering process in case synchronization
    # was interrupted.
    self.con.execute("""CREATE TABLE IF NOT EXISTS fssync (
      path blob PRIMARY KEY,
      inode blob,
      metadata blob,
      checked integer)
    """)
    self.con.execute("CREATE INDEX IF NOT EXISTS _fssync_i1 ON fssync(inode)")

  def __del__(self):
    os.close(self.root_fd)
    self.con.close()

  def __call__(self, filter=None, path_list=None, check=False, print0=False,
               prealloc=False, force=False):
    print = print0 and (self.rpc.print if type(self.rpc) is RpcClient # XXX
                                       else sys.stdout.buffer.write)
    if filter:
      self.set_filter(eval(filter, globals(), {}), print)
    self.prealloc = prealloc
    self.force = force
    self.rollback()
    for p in path_list or (b'',):
      if check:
        for path in self.check(p):
          print and print(path + b'\0')
      else:
        self.sync(p)
        self.clean(p)

  def stat(self, path):
    s = Stat(path)
    s.dev = self.dev_map[s.dev]
    return s

  def rename(self, path, new_path, null_key=False):
    sql = self.con.execute
    minmax = self._minmax(path)
    sql("begin")
    try:
      if new_path:
        if null_key:
          sql("update fssync set inode=? where path=?", (self.NULL_KEY, path))
        sql("delete from fssync where path=?", (new_path,))
        sql("update fssync set path=? where path=?", (new_path, path))
        sql("update fssync set path=cast(?||substr(path,?) as blob), checked=0"
            " where ?<path and path<?", (new_path, 1+len(path)) + minmax)
      else:
        sql("delete from fssync where path=?", (path,))
        sql("delete from fssync where ?<path and path<?", minmax)
    except:
      self.con.rollback()
      raise
    self.con.commit()

  def backup(self, path, null_key=False):
    i = 0
    while True:
      backup = path + ('#fssync%u.bak' % i).encode()
      if not os.path.lexists(os.path.join(self.root, backup)):
        try:
          self.con.execute("insert into fssync values (?, ?, null, 0)",
                           (backup, path))
          break
        except sqlite3.IntegrityError:
          pass
      i += 1
    self.rpc.rename(path, backup)
    try:
      self.rpc.wait()
    except FileNotFoundError:
      logger.warning("missing %r on destination side", path)
      self.con.execute("delete from fssync where path=?", (backup,))
      backup = None
    self.rename(path, backup, null_key)

  def remove(self, path):
    sql = self.con.execute
    sql("update fssync set inode=null, metadata=null where path=?", (path,))
    self.rpc.remove(path)
    self.rpc.wait()
    sql("delete from fssync where path=?", (path,))

  def filter(self, path, stat):
    pass

  def set_filter(self, func, print=None):
    root = decode(self.root)
    self.filter = lambda path, stat: func(root, decode(path), stat) and (
      print and print(path + b'\0') or 1)

  _minmax = staticmethod((lambda a, b: lambda path: (path + a, path + b))
                         (os.sep.encode(), chr(ord(os.sep) + 1).encode()))

  def non_empty(self, path, mode):
    return stat.S_ISDIR(mode) and self.con.execute(
      "select 1 from fssync where ?<path and path<?",
      self._minmax(path)).fetchone()

  def rollback(self):
    # There should not be more than 1 path to recover.
    sql = self.con.execute
    for path, other, metadata, _ in sql("select * from fssync where"
        " metadata is null or metadata=?", (self.NULL_VALUE,)).fetchall():
      if metadata:
        self.rpc.null_value(path)
        metadata = self.rpc.wait()
        if metadata:
          sql("update fssync set metadata=? where path=?", (metadata, path))
          continue
      else:
        self.rpc.rollback(path, other)
        self.rpc.wait()
      sql("delete from fssync where path=?", (path,))

  def is_masked(self, path, _lstat=os.lstat, _sep=os.sep.encode()):
    parts = path.split(_sep)
    fd = self.root_fd
    fd_list = []
    try:
      for i, x in enumerate(parts):
        s = _lstat(x, dir_fd=fd)
        while True:
          if s.st_dev != self.root_dev:
            return _sep.join(parts[:i+1])
          if not stat.S_ISDIR(s.st_mode):
            return
          try:
            fd = os.open(x, os.O_NOATIME, fd)
            break
          except PermissionError:
            ss = _lstat(x, fd)
            if ss == s:
              raise
            s = ss
        fd_list.append(fd)
    except FileNotFoundError:
      pass
    finally:
      for fd in fd_list:
        os.close(fd)

  def sync(self, path):
    sql = self.con.execute
    p = os.path.join(self.root, path)
    while True:
      try:
        s = self.stat(p)
      except ( KeyError          # other filesystem
             , FileNotFoundError # oops, it has just been deleted
             ) as e:
        if not path:
          raise
        if isinstance(e, KeyError):
          self.masked.append(path)
        return
      if path and self.filter(path, s):
        return
      key = s.key
      if key == self.db_key:
        return
      fmt = stat.S_IFMT(s.mode)
      if fmt == stat.S_IFDIR:
        try:
          # it may raise if folder has just been deleted or replaced
          children = os.listdir(p)
        except OSError as e:
          if path:
            if e.errno == errno.ENOTDIR:
              continue
            if e.errno == errno.ENOENT:
              return
          raise
      elif fmt == stat.S_IFLNK:
        try:
          # it may raise if symlink has just been deleted or replaced
          target = os.readlink(p)
        except OSError as e:
          if path:
            if e.errno == errno.EINVAL:
              continue
            if e.errno == errno.ENOENT:
              return
          raise
      break
    # When creating a new inode, we must make sure we don't reuse one that
    # already has hardlinks. In below code, this is done in 2 places.
    sync_parent = False
    value = s.value
    inode = dumps(key, 2)
    same_key_rows = sql('select * from fssync where inode=?',
                        (inode,)).fetchall()
    for same_key in same_key_rows:
      if same_key[0] == path:
        if same_key[3]: # checked
          return
        r = True
        break
    else:
      r = sql('select * from fssync where path=?', (path,)).fetchall()
      if r:
        (_, i, metadata, checked), = r
        if checked:
          # We got interrupted and FS changed. We'll sync at a next time.
          return
        old_key = loads(i)
        r = Stat.load(old_key, loads(metadata))
        old_fmt = stat.S_IFMT(r.mode)
        if old_key == Stat.NULL_KEY or \
           sql('select path from fssync where inode=? and path!=?',
               (i, path)).fetchone():
          # Remove remote ...
          if self.non_empty(path, r.mode):
            # ... but not directory contents, so delay removal.
            # Even if local inode is a directory, such cleanup is
            # required in case we want to rename from another path.
            self.backup(path, True)
          elif (# ... hardlinks of same type
                fmt == old_fmt or
                # and non-directory so that directory can be created.
                fmt == stat.S_IFDIR):
            # 1st case of non-reusable inode.
            # No need to backup because the old inode is either marked for
            # deletion or reachable via another path.
            self.remove(path)
          elif path:
            # Nothing to do here. It was not a directory and type differs so
            # it'll be deleted first automatically and we can avoid 1 RPC.
            logger.debug("let remote remove %r", path)
        else:
          self.backup(path)
        r = None
      elif self.rpc is None:
        # Assume remote is already up-to-date.
        r = s
        same_key_rows = None
    if same_key_rows:
      assert len({x[2] for x in same_key_rows}) == 1, same_key_rows
      x = Stat.load(key, loads(same_key[2]))
      old_fmt = stat.S_IFMT(x.mode)
      if fmt == stat.S_IFLNK == old_fmt and (value != x.value or self.force):
        # Unfortunately, symlinks can't be edited.
        for same_key in same_key_rows:
          self.rpc.readlink(same_key[0])
          try:
            if self.rpc.wait() != target:
              old_fmt = None
            else:
              target = None
            break
          except FileNotFoundError:
            pass
        else:
          r = old_fmt = None
        if value == x.value and target:
          logger.info("--force: syncing target of %r", path)
      if fmt != old_fmt:
        # Inode of different type but same number. Mark for deletion.
        sql("update fssync set inode=? where inode=?", (self.NULL_KEY, inode))
        if r:
          # Wow, even the path is the same.
          # This the 2nd place of non-reusable inode, so delete it now.
          (self.backup if self.non_empty(path, old_fmt or 0) else
           self.remove)(path)
          r = None
      elif r:
        # Most common case (inode not touched or only edited).
        r = x
      else:
        # Inode is the result of a rename or hardlink.
        sync_parent = True
        for backup, inode, metadata, checked in same_key_rows:
          if not checked:
            sub_root = self.is_masked(backup)
            if sub_root:
              sys.exit(
                "Can not synchronize %r because %r has/had same inode."
                " %r must be unmounted first (if it's not a mount point,"
                " e.g. a subvolume, then move it temporarily outside %r)."
                % tuple(decode(x) for x in (path, backup, sub_root, self.root)))
          # If we get interrupted, recovery phase must revert hardlink/rename.
          sql("insert or replace into fssync values (?, ?, null, 0)",
              (path, backup))
          try:
            if fmt == stat.S_IFDIR:
              self.rpc.rename(backup, path)
              self.rpc.wait()
              self.rename(backup, path)
            else:
              self.rpc.link(backup, path)
              self.rpc.wait()
              sql("update fssync set inode=?, metadata=? where path=?",
                  (inode, metadata, path))
            r = x
            break
          except FileNotFoundError:
            logger.warning("missing %r on destination side", backup)
            self.rename(backup, None)
        else:
            # All hardlinks are missing on remote, so create new.
            logger.warning("... create new inode for %r", path)

    if not r:
      # At this point, it is possible that the parent directory hasn't been
      # touched on destination side and we don't do it because the file has
      # disappeared locally. This happens rarely enough to be an issue.
      # What's important is that sync_parent is never False when it shouldn't
      # and we prefer to keep the code simple enough.
      sync_parent = True
      # If an error happens before the final SQL "update", and path disappears
      # before next sync, then we want to be sure it'll be removed on remote.
      sql("insert or replace into fssync values (?, ?, ?, 0)",
          (path, self.NULL_KEY, dumps(s.null_value, 2)))
    if fmt == stat.S_IFDIR:
      # Process contents before because this may alter permissions or
      # modification time.
      for name in children:
        if self.sync(os.path.join(path, name)):
          # Force update of metadata because synchronization of children
          # changed the modification time on destination.
          r = None
    same = r and value == r.value
    if not same or self.force and fmt == stat.S_IFREG:
      if fmt == stat.S_IFREG:
        try:
          f = os.fdopen(os.open(p, os.O_NOATIME), 'rb')
        except (FileNotFoundError, IsADirectoryError):
          if path:
            return sync_parent
          raise
        try:
          s2 = os.fstat(f.fileno())
          if key != (self.dev_map.get(s2.st_dev, ()), s2.st_ino) \
             or not stat.S_ISREG(s2.st_mode):
            return sync_parent
          checked = 0
          unchecked = s.size
          if r:
            # For the very rare case where:
            # - we're interrupted before syncing metadata
            # - and source file is restored in its previous state
            # we mark this path as dirty to force resync on next run.
            sql("insert or replace into fssync values (?, ?, ?, 0)",
                (path, inode, dumps(s.null_value, 2)))
          if (r and r.size or 0) < unchecked:
            sparse = s.blocks * 512 < unchecked
            if sparse or self.prealloc and BLOCK_SIZE < unchecked:
              self.rpc.truncate(path, unchecked, sparse)
              self.rpc.wait()
          while unchecked > 0:
            part_size = min(PART_SIZE, unchecked)
            self.rpc.check_data(path, checked, part_size, BLOCK_SIZE)
            local_hash_list = list(check_data(f, part_size, BLOCK_SIZE))
            remote_hash_list = self.rpc.wait()
            if not local_hash_list:
              break # local file truncated during check
            if not r:
              # This file is new. Make sure we won't have to restart from the
              # beginning if we get interrupted, which can be really annoying
              # for big files.
              sql("insert or replace into fssync values (?, ?, ?, 0)",
                  (path, inode, dumps(s.null_value, 2)))
              r = True
            diff_list = []
            for i, h in enumerate(local_hash_list):
              if remote_hash_list[i] != h:
                f.seek(checked)
                diff_list.append((checked, f.read(BLOCK_SIZE)))
              checked += BLOCK_SIZE
              unchecked -= BLOCK_SIZE
            if diff_list:
              self.rpc.sync_data(path, diff_list)
              if same:
                logger.info("--force: syncing data of %r", path)
                same = False
              self.rpc.wait()
            f.seek(checked)
        except IndexError: # remote file is smaller
          if same:
            logger.info("--force: syncing data of %r", path)
          if diff_list:
            self.rpc.sync_data(path, diff_list)
            self.rpc.wait()
          f.seek(checked)
          while unchecked > 0:
            self.rpc.sync_data(path, ((checked, f.read(PART_SIZE)),))
            checked += PART_SIZE
            unchecked -= PART_SIZE
            self.rpc.wait()
        finally:
          f.close()
      elif fmt == stat.S_IFLNK and target:
        assert sync_parent, path
        self.rpc.symlink(path, target)
        self.rpc.wait()
      self.rpc.sync_meta(path, s) # creates the file/dir
      if self.rpc.wait():         # (and parents) automatically
        sync_parent = True
    sql("begin")
    try:
      # Do not use 'update' because we didn't insert anything
      # if self.rpc is None.
      sql("insert or replace into fssync values (?, ?, null, 1)", (path, inode))
      sql("update fssync set metadata=? where inode=?",
          (dumps(value, 2), inode))
    except:
      self.con.rollback()
      raise
    self.con.commit()
    return sync_parent

  def clean(self, path):
    sql = self.con.execute
    if path:
      reset_args = (path,) + self._minmax(path)
      path = "(path=? or ?<path and path<?) and"
    else:
      reset_args = ()
      path = ''
    args = list(reset_args)
    query = " from fssync where %s checked=0" % path
    if self.masked:
      for m in self.masked:
        args.append(m)
        args += self._minmax(m)
      query += " and not (%s)" % " or ".join(
        ["path=? or ?<path and path<?"] * len(self.masked))
    path_list = [x for x, in sql("select path" + query, args).fetchall()]
    if path_list:
      path_list.sort(reverse=True)
      self.rpc.removemany(path_list)
      self.rpc.wait()
      sql("delete" + query, args)
    sql("update fssync set checked=0 where %s 1" % path, reset_args)

  def check(self, path):
    if path:
      args = (path,) + self._minmax(path)
      where = "where path=? or ?<path and path<?"
    else:
      args = ()
      where = ""
    sql = self.con.execute
    fetchone = sql("select path, inode, metadata from fssync "
                   + where + " order by path", args).fetchone
    s = Stat.__new__(Stat)
    self.rpc.check(path)
    while True:
      row_list = []
      try:
        for x in range(100):
          path, inode, metadata = fetchone()
          row_list.append((path, loads(inode), loads(metadata)))
      except TypeError:
        if not row_list:
          break
      self.rpc.send(row_list)
      for path, metadata in self.rpc.wait():
        if type(metadata) is tuple:
          s = Stat.load(Stat.NULL_KEY, metadata)
          s.gid = s.uid = s.acl = None
          sql("update fssync set metadata=? where path=?",
              (dumps(s.value, 2), path))
        elif metadata:
          s.mode = metadata
          sql("update fssync set inode=?, metadata=? where path=?",
              (self.NULL_KEY, dumps(s.null_value, 2), path))
        else:
          sql("delete from fssync where path=?", (path,))
        yield path
    self.rpc.send(None)
    untracked = []
    for s, path_list in self.rpc.wait():
      untracked.extend((p, s) for p in path_list)
    untracked.sort()
    ignored = []
    for path, s in untracked:
      if not any(path.startswith(p) for p in ignored):
        if type(s) is tuple:
          s = Stat.load(Stat.NULL_KEY, s)
        if self.filter(path, s):
          ignored.append(os.path.join(path, b''))
        else:
          logger.info("unexpected %r on destination side", path)
          sql("insert into fssync values (?, ?, ?, 0)",
              (path, dumps(s.key, 2), dumps(s.value, 2)))


class Remote:

  _open_args = None
  _pwd = _grp = staticmethod(lambda id: id)

  def __init__(self, root):
    assert isinstance(root, bytes), root
    self.root = root

  def _open(self, path, write=False):
    if self._open_args != (path, write):
      self._close()
      self._open_file = os.fdopen(os.open(path,
        os.O_RDWR if write else os.O_NOATIME), 'r+b' if write else 'rb')
      self._open_args = path, write
    return self._open_file

  def _close(self):
    if self._open_args:
      self._open_file.close()
      del self._open_args, self._open_file

  def __call__(self, stdin, stdout):
    while True:
      try:
        method, args, kw = loads(read_rpc(stdin))
      except EOFError:
        return
      try:
        try:
          m = getattr(self, method)
        except AttributeError:
          if method == 'reverse':
            break
          if method == 'print':
            sys.stdout.buffer.write(*args)
            continue
          if method == 'check':
            result = self._check(stdin, stdout, *args, **kw)
          else:
            raise
        else:
          if isinstance(args[0], bytes):
            args = (os.path.join(self.root, args[0]),) + args[1:]
          result = m(*args, **kw)
      except Exception as e:
        logger.exception('%s(%r)', method, args[0])
        result = e
      write_rpc(stdout, dumps(result))
    Local(self.root, args[0], RpcClient(stdin, stdout, args[1]))(
      *args[2:], **kw)

  def pwd_grp(self, p, g):
    p = dict((p.get(x.pw_name), x.pw_uid) for x in pwd.getpwall())
    g = dict((g.get(x.gr_name), x.gr_gid) for x in grp.getgrall())
    p.pop(None, None); self._pwd = p.__getitem__
    g.pop(None, None); self._grp = g.__getitem__

  def map_acl(self, acl):
    return acl and [(t, None if q is None else
        (self._pwd if t >> 3 == ACL_USER else self._grp)(q))
      for t, q in acl]

  @staticmethod
  @contextmanager
  def _preserve(path_list, parent=False):
    # XXX: This is the only place where fssync should not be terminated,
    #      or directories may end up with wrong permissions/timestamps.
    if parent:
      path_list = frozenset(map(os.path.dirname, path_list))
    parent_list = []
    try:
      for path in path_list:
        try:
          s = os.lstat(path)
        except FileNotFoundError:
          continue
        mode = None if os.access(path, 2) else stat.S_IMODE(s.st_mode)
        parent_list.append((path, s.st_mtime_ns, mode))
        if mode is not None:
          os.chmod(path, 0o700)
      yield
    finally:
      for path, mtime_ns, mode in parent_list:
        if mode is not None:
          os.chmod(path, mode)
        os.utime(path, ns=(UTIME_OMIT, mtime_ns), follow_symlinks=False)

  def removemany(self, path_list):
    with self._preserve(os.path.join(self.root, path)
        for path in set(map(os.path.dirname, path_list)).difference(path_list)):
      for path in path_list:
        self.remove(os.path.join(self.root, path))

  @staticmethod
  def isdir(path):
    try:
      return stat.S_ISDIR(os.lstat(path).st_mode)
    except FileNotFoundError:
      pass

  def remove(self, path):
    try:
      os.remove(path)
    except IsADirectoryError:
      os.rmdir(path)
    except PermissionError:
      os.chmod(os.path.dirname(path), 0o700)
      (os.rmdir if self.isdir(path) else os.remove)(path)
    except FileNotFoundError:
      pass

  readlink = staticmethod(os.readlink)

  def link(self, path, new_path):
    new_path = os.path.join(self.root, new_path)
    if os.path.lexists(new_path):
      if os.path.samestat(os.lstat(path), os.lstat(new_path)):
        return
      self.remove(new_path)
    else:
      self._makeparents(new_path)
    os.link(path, new_path)

  def rename(self, path, new_path):
    new_path = os.path.join(self.root, new_path)
    with self._preserve((path,), True):
      if os.path.lexists(new_path):
        self.remove(new_path)
      else:
        self._makeparents(new_path)
      os.rename(path, new_path)

  def rollback(self, path, other):
    if other is not None and self.isdir(path):
      other = os.path.join(self.root, other)
      with self._preserve((other,), True):
        # If src & dst are the same inode, this is a no-op.
        os.rename(path, other)
    self.remove(path)

  def null_value(self, path):
    try:
      return Stat(path).null_value
    except FileNotFoundError:
      pass

  @staticmethod
  def _makeparents(path):
    d = os.path.dirname(path)
    if os.path.lexists(d):
      if not os.access(d, 2):
        os.chmod(d, 0o700)
    else:
      os.makedirs(d, 0o700)

  def sync_meta(self, path, l):
    x = stat.S_IFMT(l.mode)
    if x == stat.S_IFREG:
      # Close before using Stat, otherwise s.mtime_ns will be wrong.
      self._close()
    mode = stat.S_IMODE(l.mode)
    size = l.size
    sync_parent = False
    while True:
      try:
        s = Stat(path)
      except FileNotFoundError:
        if x == stat.S_IFLNK or x == stat.S_IFREG and size:
          raise
      else:
        if x == stat.S_IFMT(s.mode):
          break
        self.remove(path)
      sync_parent = True
      self._makeparents(path)
      if x == stat.S_IFDIR:
        os.mkdir(path, mode)
      elif x == stat.S_IFREG:
        os.close(os.open(path, os.O_CREAT, mode))
      else:
        os.mknod(path, l.mode, l.rdev)
    if x == stat.S_IFREG:
      if size < s.size or size <= s.blocks * 512 - s.blksize:
        # l.size > s.size implies that local file was truncated during sync:
        # we don't want the backup to become sparse and next run will fix this
        # We also check number of blocks in case we allocated more disk space
        # than necessary.
        try:
          os.truncate(path, size)
        except PermissionError:
          s.mode |= 0o200
          os.chmod(path, s.mode)
          os.truncate(path, size)
        if s.size <= size:
          # Do it twice if we don't reduce file size,
          # to force FS to free extra space.
          os.truncate(path, size)
        s.mtime_ns = None # force update of modification time
    acl = self.map_acl(l.acl and l.acl[0])
    if acl != (s.acl and s.acl[0]):
        (load_acl(acl) if acl else ACL(mode=mode)).applyto(path)
    elif mode != stat.S_IMODE(s.mode):
      os.chmod(path, mode)
    x = self._pwd(l.uid), self._grp(l.gid)
    if x != (s.uid, s.gid):
      os.lchown(path, *x)
    x = l.mtime_ns
    if x != s.mtime_ns:
      os.utime(path, ns=(UTIME_OMIT, x), follow_symlinks=False)
    acl = self.map_acl(l.acl and l.acl[1])
    if acl != (s.acl and s.acl[1]):
      (load_acl(acl).applyto if acl else delete_default)(path)
    x = l.attr
    if x != s.attr:
      for attr in set(s.attr or ()).difference(x or ()):
        os.removexattr(path, attr, follow_symlinks=False)
      if x:
        for attr, value in x.items():
          os.setxattr(path, attr, value, follow_symlinks=False)
    return sync_parent

  def _makereg(self, path):
    try:
      if stat.S_ISREG(os.lstat(path).st_mode):
        return True
    except FileNotFoundError:
      self._makeparents(path)
    else:
      self.remove(path)
    return False

  def check_data(self, path, start, size, block_size):
    if start or self._makereg(path):
      f = self._open(path)
      try:
        f.seek(start)
        return tuple(check_data(f, size, block_size))
      except:
        self._close()
        raise
    os.close(os.open(path, os.O_CREAT|os.O_EXCL, 0o600))
    return ()

  def sync_data(self, path, diff_list):
    try:
      f = self._open(path, True)
    except PermissionError:
      os.chmod(path, 0o600)
      f = self._open(path, True)
    try:
      for offset, data in diff_list:
        f.seek(offset)
        f.write(data)
    except:
      self._close()
      raise

  def symlink(self, path, target):
    self._makeparents(path)
    self.remove(path)
    os.symlink(target, path)

  def truncate(self, path, size, sparse):
    self._makereg(path)
    try:
      fd = os.open(path, os.O_WRONLY|os.O_CREAT, 0o600)
    except PermissionError:
      os.chmod(path, 0o600)
      fd = os.open(path, os.O_WRONLY)
    try:
      if sparse:
        os.ftruncate(fd, size)
      else:
        _fallocate(fd, 0, size, True)
    finally:
      os.close(fd)

  def _check(self, stdin, stdout, root):
    inode_map = {}
    inodes = set()
    untracked = {}
    def walk():
      join = os.path.join
      next_dir = []
      d = [(b'', deque((root,)))]
      while d:
        r, names = d.pop()
        while names:
          p = names.popleft()
          path = r + p
          if next_dir and next_dir[-1] < path:
            names.appendleft(p)
            path = next_dir.pop()
            p = join(self.root, path)
          else:
            p = join(self.root, path)
            s = Stat(p)
            yield path, s
            if not stat.S_ISDIR(s.mode):
              continue
            path = join(path, b'')
            if names:
              next_dir.append(path)
              continue
          try:
            p = os.listdir(p)
          except PermissionError:
            path = os.path.dirname(path)
            if any(path in p[1] for p in untracked.values()):
              continue
            raise
          p.sort()
          d.append((r, names))
          r = path
          names = deque(p)
      yield INF, None
    walk = walk()
    p, s = next(walk)
    while True:
      item_list = loads(read_rpc(stdin))
      if not item_list:
        break
      path_list = []
      for path, inode, metadata in item_list:
        l = Stat.load(inode, metadata)
        while p < path:
          try:
            untracked[s.key][1].append(p)
          except KeyError:
            untracked[s.key] = s.value, [p]
          p, s = next(walk)
        if p == path:
          key = s.key
          if inode_map.get(inode) != key if key in inodes \
                                         else inode in inode_map:
            metadata = stat.S_IFMT(s.mode)
          else:
            inode_map[inode] = key
            inodes.add(key)
            metadata = s.value
          p, s = next(walk)
        else:
          metadata = None
        if l.uid is not None:
          l.uid = self._pwd(l.uid)
          l.gid = self._grp(l.gid)
          if l.acl:
            l.acl = self.map_acl(l.acl[0]), self.map_acl(l.acl[1])
        elif type(metadata) is tuple:
          continue # already marked for resync
        if l.value != metadata:
          if type(metadata) is tuple:
            logger.warning("%s: DB %r differs from FS %r",
                           path, l.value, metadata)
          else:
            logger.warning("%s: wrong hardlink" if metadata else "%s: missing",
                           path)
          path_list.append((path, metadata))
      write_rpc(stdout, dumps(path_list))
    while s:
      try:
        untracked[s.key][1].append(p)
      except KeyError:
        untracked[s.key] = s.value, [p]
      p, s = next(walk)
    r = []
    if untracked:
      for inode, key in inode_map.items():
        try:
          metadata, p = untracked.pop(key)
        except KeyError:
          continue
        r.append((Stat.load(inode, metadata), p))
      r += untracked.values()
    return r


class SysLogHandler(logging.handlers.SysLogHandler):

  __init__ = logging.Handler.__init__
  close = logging.Handler.close

  def emit(self, record):
    try:
      priority = self.priority_names[self.mapPriority(record.levelname)]
      for line in self.format(record).splitlines():
        syslog.syslog(priority, line)
    except Exception:
      self.handleError(record)


def main():
  parser = argparse.ArgumentParser(
    description="File system synchronization tool")
  _ = parser.add_argument
  _('--remote', type=int, metavar="VERSION",
              help="Used on the remote host to process RPC from local host.")
  _('-l', '--logfile',
              help="Output logging messages to specified file. If unset,"
                   " they are logged to stderr (local) or to syslog (remote)."
                   " Passing /dev/null disables logging.")
  _('-r', '--root', required=True, type=encode,
              help="Root path of dirs/files to synchronize.")
  _('-v', '--verbose', action='store_true',
              help="Increase verbosity to DEBUG level. Default level is INFO.")
  local_options = (
    _('--reverse', action='store_true',
              help="Synchronize data from remote(-R, -L, -d, -f)"
                   " to local(-r, -l), instead of doing it"
                   " from local(-r, -l, -d, -f) to remote(-R, -L)."
                   " This option is useful when the SSH connection can be"
                   " established in only 1 way."
                   " It is not an option for 2-way synchronization:"
                   " data must always be synchronized in the same direction."
                   " This options conflits with --print0 and host=-"),
    _('--print0', action='store_true',
              help="Print filtered paths on the standard output, followed by a"
                   " null character. When used with --check, this prints"
                   " paths that don't match database. You can pipe result to"
                   " \"tr -s '\\0' '\\n'\" if you want newline separators."),
    _('-a', '--allocate', action='store_true',
              help="Preallocate disk space on destination file system for"
                   " non-sparse files. This reduces disk fragmentation but"
                   " prevent files from being compressed on Btrfs."),
    _('-c', '--check', action='store_true',
              help="Check that database matches what's on destination side:"
                   " database is fixed if there's any difference. Inodes that"
                   " are only on destination are marked for removal if they're"
                   " not filtered (see --filter option). You can then run"
                   " fssync again without this option to fix errors, optionally"
                   " with --force. -a has no effect with this option."),
    _('-d', '--db',
              help="File path to database that maintains"
                   " the state of files on the remote side."),
    _('-f', '--filter',
              help="Python expression that evaluates to a function"
                   " which takes 3 arguments (root, p, s) and returns"
                   " True if 'root/p' must be ignored. 's' holds some"
                   " stats about 'root/p': see 'Stat' class."),
    _('-F', '--force', action='store_true',
              help="Check data (including symlink targets) on destination even"
                   " if metadata don't differ between source and database."
                   " An INFO log is output if data differ between source and"
                   " destination. --check should be used first if you suspect"
                   " any difference."),
    _('-m', '--map-users', action='store_true',
              help="Map uid/gid, including in ACLs, so that names don't change"
                   " between local and remote hosts. This requires remote to"
                   " define all users/groups that may appear locally."),
    _('-L', '--remote-logfile',
              help="Set '-l' option for remote end."),
    _('-R', '--remote-root',
              help="Set '-r' option for remote end. Default to '-r' value."),
    _('-X', '--remote-executable',
              help="Path to fssync executable on remote side."
                   " If unset, it is guessed from the 0-th argument."),
    _('host', nargs='?',
              help='SSH to connect (syntax: [user@]host[:port]).'
                   " Can be '-' to initialize database, provided you"
                   " synchronized everything by other means (like rsync)."))
  _('path', nargs='*', type=encode,
              help="Synchronize only these entries"
                   " (paths must be relative to --root).")
  args = parser.parse_args()

  if not args.root:
    parser.error('empty -r/--root argument')

  if args.logfile == os.devnull:
    logging.disable(logging.CRITICAL)
  else:
    format = '%(asctime)s %(levelname)s %(message)s'
    if args.logfile:
      handler = logging.FileHandler(args.logfile)
    elif args.remote:
      handler = SysLogHandler()
      format = '%(message)s'
    else:
      handler = logging.StreamHandler()
    handler.setFormatter(logging.Formatter(format))
    root = logging.getLogger()
    root.setLevel(logging.DEBUG if args.verbose else logging.INFO)
    root.addHandler(handler)

  signal.signal(signal.SIGHUP, lambda *args: sys.exit(-1))
  signal.signal(signal.SIGTERM, lambda *args: sys.exit())

  if args.remote:
    if args.remote != PROTOCOL_VERSION:
      sys.exit('Wrong protocol: make sure you have same version of fssync'
               ' on both sides.')
    for a in local_options:
      v = getattr(args, a.dest)
      if v is a.const if a.nargs == 0 else v is not None:
        parser.error('conflicting options --remote and ' +
          ('/'.join(a.option_strings) or a.metavar))

    Remote(args.root)(sys.stdin.buffer, sys.stdout.buffer.raw)

  else:
    if args.remote_executable is None:
      args.remote_executable = os.path.realpath(sys.argv[0])
    if args.remote_root is None:
      args.remote_root = decode(args.root)
    for a in local_options:
      if a.nargs != 0 and not (getattr(args, a.dest) or a.dest in (
          'filter', 'remote_logfile')):
        parser.error('empty or missing %s argument' %
          ('/'.join(a.option_strings) or a.dest))
    if args.check and args.force:
      parser.error("conflicting options --check and --force")

    if args.host == '-':
      rpc = None
    else:
      opt = ['--remote', str(PROTOCOL_VERSION), '-r', args.remote_root]
      if args.remote_logfile:
        opt += '-l', args.remote_logfile
      if args.verbose:
        opt.append('-v')
      rpc = RpcSshClient(args.host,
        format_command(args.remote_executable, *opt),
        args.map_users and not args.reverse)
    action = (args.filter, args.path, args.check,
              args.print0, args.allocate, args.force)
    if args.reverse:
      if rpc is None:
        parser.error("conflicting options --reverse and HOST=-")
      rpc.reverse(args.db, args.map_users, *action)
      Remote(args.root)(rpc.stdin, rpc.stdout)
    else:
      Local(args.root, args.db, rpc)(*action)

if __name__ == '__main__':
  sys.exit(main())