Description: Support zsys systems
 Zsys is an enhanced and structured dataset layout for ZFS.
 .
 It enables advanced use cases by differentiating system,
 user data and persistent partitions to allow only partial
 permanent or temporary rollback without destroying intermediate
 snapshots.
Author: Jean-Baptiste Lallement <jean.baptiste@ubuntu.com>
        Didier Roche <didrocks@ubuntu.com>
Last-Update: 2019-06-06
Index: zfs-linux-2.1.2/etc/systemd/system-generators/zfs-mount-generator.in
===================================================================
--- zfs-linux-2.1.2.orig/etc/systemd/system-generators/zfs-mount-generator.in
+++ zfs-linux-2.1.2/etc/systemd/system-generators/zfs-mount-generator.in
@@ -30,6 +30,8 @@ FSLIST="@sysconfdir@/zfs/zfs-list.cache"
 [ -d "${FSLIST}" ] || exit 0
 [ "$(echo "${FSLIST}"/*)" = "${FSLIST}/*" ] && exit 0
 
+OLD_IFS=$IFS
+
 do_fail() {
   printf 'zfs-mount-generator: %s\n' "$*" > /dev/kmsg
   exit 1
@@ -138,6 +140,9 @@ process_line() {
     fi
   done
 
+  # Escape the mountpoint per systemd policy.
+  mountfile="$(systemd-escape --path --suffix=mount "${p_mountpoint}")"
+
   if [ -n "${p_systemd_after}" ] && \
       [ "${p_systemd_after}" != "-" ] ; then
     after="${p_systemd_after} ${after}"
@@ -163,6 +168,62 @@ process_line() {
       [ "${p_encroot}" != "-" ] ; then
     keyloadunit="zfs-load-key-$(systemd-escape "${p_encroot}").service"
     if [ "${p_encroot}" = "${dataset}" ] ; then
+
+      # Automount and unmount ZSys USERDATA datasets with keystore
+      zsys_automount=0
+      automount_loadkey_extra_args=""
+      automount_pool=""
+      automount_user=""
+      if echo "${dataset}" | grep -q '/USERDATA/'; then
+        automount_pool=${dataset%%/*}
+        automount_user=${dataset##*/}
+        # Only operate on user dataset mountpoint itself and not its children
+        if ! echo "${automount_user}" | grep -q '/'; then
+          automount_user=${automount_user%%_*}
+          # Ensure we have a keystore
+          if [ -f "/run/keystore/${automount_pool}/${automount_user}.enc" -a -x /usr/sbin/user_keystore ]; then
+            zsys_automount=1
+          fi
+        fi
+      fi
+
+      # Create automount unit and keystore tracker
+      if [ ${zsys_automount} -eq 1 ]; then
+        automountunit="$(systemd-escape --path --suffix=automount "${p_mountpoint}")"
+      echo \
+"# Automatically generated by zfs-mount-generator
+
+[Unit]
+Description=Automount ZFS user home for ${dataset} on demand
+
+[Automount]
+Where=${p_mountpoint}
+TimeoutIdleSec=10
+
+[Install]
+WantedBy=local-fs.target
+"   > "${dest_norm}/${automountunit}"
+        create_dependencies "${automountunit}" "wants" "local-fs.target"
+
+        keystoreunit="zfs-keystore-$(systemd-escape "${p_encroot}").service"
+        automount_loadkey_extra_args="BindsTo=${mountfile}
+BindsTo=${keystoreunit}
+After=${keystoreunit}"
+        echo \
+"# Automatically generated by zfs-mount-generator
+
+[Unit]
+Description=Make available ZFS encryption key for ${dataset} from keystore
+ConditionPathExists=/run/keystore/${automount_pool}/${automount_user}.enc
+BindsTo=${keyloadunit}
+
+[Service]
+Type=oneshot
+RemainAfterExit=yes
+ExecStop=/usr/sbin/user_keystore lock ${automount_pool} ${automount_user}
+"   > "${dest_norm}/${keystoreunit}"
+      fi
+
       keymountdep=""
       if [ "${p_keyloc%%://*}" = "file" ] ; then
         if [ -n "${requiredmounts}" ] ; then
@@ -218,6 +279,7 @@ Wants=${wants}
 After=${after}
 ${requires}
 ${keymountdep}
+${automount_loadkey_extra_args}
 
 [Service]
 Type=oneshot
@@ -270,9 +332,6 @@ ExecStop=${keyunloadcmd}"   > "${dest_no
     do_fail "invalid mountpoint for ${dataset}"
   fi
 
-  # Escape the mountpoint per systemd policy.
-  mountfile="$(systemd-escape --path --suffix=mount "${p_mountpoint}")"
-
   # Parse options
   # see lib/libzfs/libzfs_mount.c:zfs_add_options
   opts=""
@@ -459,6 +518,87 @@ Options=defaults${opts},zfsutil" > "${de
 
 }
 
+ZPOOL_CACHE="@sysconfdir@/zfs/zpool.cache"
+PROPS="name,mountpoint,canmount,atime,relatime,devices,exec\
+,readonly,setuid,nbmand,encroot,keylocation\
+,org.openzfs.systemd:requires,org.openzfs.systemd:requires-mounts-for\
+,org.openzfs.systemd:before,org.openzfs.systemd:after\
+,org.openzfs.systemd:wanted-by,org.openzfs.systemd:required-by\
+,org.openzfs.systemd:nofail,org.openzfs.systemd:ignore"
+zsys_revert_failed=0
+errfile="/tmp/zsys-revert-out.log"
+
+drop_emergency_on_failure() {
+  if [ ${zsys_revert_failed} -eq 0 ]; then
+    return
+  fi
+
+  # Drop to emergency target in case of failure after cleanup fstab mountpoints.
+  # This avoids booting and having a mix of old and new datasets, and creating directory in the wrong
+  # datasets, like /boot/grub in / which will prevent zfs to mount /boot dataset later on.
+  rm -f "${dest_norm}"/*.mount
+  ln -s /lib/systemd/system/emergency.target "${dest_norm}"/default.target
+
+  printf 'ERROR: zfs-mount-generator failed and you requested a revert:\n' > /dev/kmsg
+  cat "${errfile}" > /dev/kmsg
+  printf 'You can reboot on current master dataset to fix the issue\n' > /dev/kmsg
+}
+
+# Handle revert so that zsys prepares all datasets as expected.
+initzsys() {
+  if [ ! -x @sbindir@/zsysd ]; then
+    return
+  fi
+
+  # Non ZFS system
+  if ! grep -q "root=ZFS=" /proc/cmdline; then
+    return
+  fi
+
+  # If we boot on the same dataset than last time, assume we don’t need to do anything as the cache file will only
+  # import desired pools.
+  bootds="$(sed -e 's/.*root=ZFS=\([^ ]\+\).*/\1/' /proc/cmdline)"
+  if grep -Eq "${bootds}\s+/\s+on" "${FSLIST}/"*; then
+      return
+  fi
+
+  # If we get here: we are reverting. Let zsys handle it
+  trap drop_emergency_on_failure EXIT INT QUIT ABRT PIPE TERM
+
+  exec 3>&1 1>"${errfile}"
+  exec 4>&2 2>&1
+
+  zsys_revert_failed=1
+  # Import and list previously imported pools for zsys
+  if [ -f "${ZPOOL_CACHE}" ]; then
+    @sbindir@/zpool import -c "${ZPOOL_CACHE}" -aN
+  # As a best effort, import all available pools, hoping there is no conflict.
+  else
+    echo "We had to search for all available pools because ${ZPOOL_CACHE} doesn't exist. To avoid this, create a zpool cache file."
+    @sbindir@/zpool import -aN
+  fi
+
+  @sbindir@/zsysd boot-prepare >"${errfile}"
+
+  # If FSLIST is empty, populate with all imported pools
+  if [ -z "$(ls -A ${FSLIST})" ]; then
+    @sbindir@/zpool list -H -o name | xargs -I{} touch ${FSLIST}/{}
+  fi
+
+  # Refresh zfs list cache
+  for cachefile in "${FSLIST}/"* ; do
+    pool=`basename ${cachefile}`
+    @sbindir@/zfs list -H -t filesystem -o "${PROPS}" -r "${pool}" >"${cachefile}"
+  done
+
+  exec 1>&3 3>&-
+  exec 2>&4 4>&-
+  zsys_revert_failed=0
+  rm "${errfile}"
+}
+
+initzsys
+
 for cachefile in "${FSLIST}/"* ; do
   # Disable glob expansion to protect against special characters when parsing.
   set -f
Index: zfs-linux-2.1.2/contrib/initramfs/scripts/zfs
===================================================================
--- zfs-linux-2.1.2.orig/contrib/initramfs/scripts/zfs
+++ zfs-linux-2.1.2/contrib/initramfs/scripts/zfs
@@ -66,6 +66,20 @@ get_fs_value()
 	"${ZFS}" get -H -ovalue "$value" "$fs" 2> /dev/null
 }
 
+# Get a ZFS filesystem property value with the source stripped from the value
+get_fs_value_without_source()
+{
+	value="$(get_fs_value $@)"
+	echo "${value%%:*}"
+}
+
+# Get a ZFS filesystem property source for a given key
+get_fs_source()
+{
+	value="$(get_fs_value $@)"
+	echo "${value#*:}"
+}
+
 # Find the 'bootfs' property on pool $1.
 # If the property does not contain '/', then ignore this
 # pool by exporting it again.
@@ -483,16 +497,17 @@ clone_snap()
 	snap="$1"
 	destfs="$2"
 	mountpoint="$3"
+	local additional_parameters="$4"
 
 	[ "$quiet" != "y" ] && zfs_log_begin_msg "Cloning '$snap' to '$destfs'"
 
+	if [ -n "${mountpoint}" ]; then
+		additional_parameters="${additional_parameters} -o mountpoint=${mountpoint}"
+	fi
+
 	# Clone the snapshot into a dataset we can boot from
-	# + We don't want this filesystem to be automatically mounted, we
-	#   want control over this here and nowhere else.
-	# + We don't need any mountpoint set for the same reason.
-	# We use the 'org.zol:mountpoint' property to remember the mountpoint.
-	ZFS_CMD="${ZFS} clone -o canmount=noauto -o mountpoint=none"
-	ZFS_CMD="${ZFS_CMD} -o org.zol:mountpoint=${mountpoint}"
+	ZFS_CMD="${ZFS} clone"
+	ZFS_CMD="${ZFS_CMD} -o canmount=noauto ${additional_parameters}"
 	ZFS_CMD="${ZFS_CMD} $snap $destfs"
 	ZFS_STDERR="$(${ZFS_CMD} 2>&1)"
 	ZFS_ERROR="$?"
@@ -611,6 +626,15 @@ setup_snapshot_booting()
 	snapname="${snap##*@}"
 	ZFS_BOOTFS="${rootfs}_${snapname}"
 
+	# Detect if we are on a zsys system, which will generates an unique UUID
+	# and override ZFS_BOOTFS
+	use_zsys=$(get_fs_value_without_source "${rootfs}" com.ubuntu.zsys:bootfs)
+	if [ "$use_zsys" = "yes" ]; then
+		zsys_uid=`uid`
+		ZFS_BOOTFS="${rootfs%_*}_${zsys_uid}" # we strip old uid and add new one
+	fi
+
+	# Rollback won't have effect on zsys system
 	if ! grep -qiE '(^|[^\\](\\\\)* )(rollback)=(on|yes|1)( |$)' /proc/cmdline
 	then
 		# If the destination dataset for the clone
@@ -640,10 +664,18 @@ setup_snapshot_booting()
 			#       rpool/ROOT/debian/boot@snap2	=> rpool/ROOT/debian_snap2/boot
 			#       rpool/ROOT/debian/usr@snap2	=> rpool/ROOT/debian_snap2/usr
 			#       rpool/ROOT/debian/var@snap2	=> rpool/ROOT/debian_snap2/var
+			#
+			# For zsys, we have stable root dataset names with uid, so:
+			#       rpool/ROOT/debian_uid1@snap2		=> rpool/ROOT/debian_uid2
+			#       rpool/ROOT/debian_uid1/boot@snap2	=> rpool/ROOT/debian_uid2/boot
+
 			subfs="${s##$rootfs}"
 			subfs="${subfs%%@$snapname}"
 
 			destfs="${rootfs}_${snapname}" # base fs.
+			if [ "${use_zsys}" = "yes" ]; then
+				destfs="${rootfs%_*}_${zsys_uid}" # we strip old uid and add new one
+			fi
 			[ -n "$subfs" ] && destfs="${destfs}$subfs" # + sub fs.
 
 			# Get the mountpoint of the filesystem, to be used
@@ -660,9 +692,38 @@ setup_snapshot_booting()
 				fi
 			fi
 
+			# On non zsys:
+			# + We don't want this filesystem to be automatically mounted, we
+			#   want control over this here and nowhere else.
+			# + We don't need any mountpoint set for the same reason.
+			# + We use the 'org.zol:mountpoint' property to remember the mountpoint.
+			# On zsys:
+			# + We don't want this filesystem to be automatically mounted, when cloned
+			#   so, we set canmount=noauto. Zsys early boot will set the current datasets
+			#   to on, alongside other system datasets switch. This enables
+			#   zpool import -a -R /altroot to mount the whole system.
+			#   The initrd script is doing zpool import -N, so we are not impacted by setting
+			#   canmount=on on secondary boot.
+			# + We thus need the real mountpoint set for this reason (as we can't set it
+			#   once the system booted, even if the mountpoint didn't change)
+			# + We set additional parameters to zsys to mark datasets we want mount manually
+			#   at boot.
+			if [ "${use_zsys}" != "yes" ]; then
+				clone_additional_parameters="-o org.zol:mountpoint=${mountpoint}"
+				mountpoint=none
+			else
+				[ "$(get_fs_value_without_source "$s" com.ubuntu.zsys:bootfs)" != "yes" ] && continue
+				clone_additional_parameters="-o com.ubuntu.zsys:bootfs=yes"
+				# Only set mountpoint explicitely if it was locally set
+				# Keep the possibility to have mountpoint inherited for manual zfs snapshots without zsys involved, which
+				# will have an empty user propertie
+				local mountpoint_source="$(get_fs_source "$s" com.ubuntu.zsys:mountpoint)"
+				[ -n "${mountpoint_source}" -a "${mountpoint_source}" != "local" ] && mountpoint=""
+			fi
+
 			# Clone the snapshot into its own
 			# filesystem
-			clone_snap "$s" "${destfs}" "${mountpoint}" || \
+			clone_snap "$s" "${destfs}" "${mountpoint}" "${clone_additional_parameters}" || \
 			    retval=$((retval + 1))
 		fi
 	done
@@ -887,6 +948,38 @@ mountroot()
 		shell
 	fi
 
+	# ------------
+	# Open and mount luks keystore for any pools using one
+	CRYPTROOT=/scripts/local-top/cryptroot
+	# We already processed original crypttab by calling local-top. Only add zvol related ones now.
+	if [ -x "${CRYPTROOT}" ]; then
+	        TABFILE=/cryptroot/crypttab
+	        :>"${TABFILE}"
+		# Wait for all keystore devices in /dev/zvol to appear with a 5s timeout
+		timeout=50
+		NUMKS=$(zfs list -H -o name | grep '/keystore$' | wc -l)
+		while [ ${NUMKS} -ne $(find /dev/zvol/ -name 'keystore' | wc -l) ]; do
+			if [ $timeout -le 0 ]; then
+				break
+			fi
+			sleep .1
+			timeout=$((timeout - 1))
+		done
+		# Locate, then mount the keystore volumes
+		for ks in $(find /dev/zvol/ -name 'keystore'); do
+			[ ! -e "${ks}" ] && continue
+			pool="$(basename $(dirname ${ks}))"
+			echo "keystore-${pool} ${ks} none luks,discard" >> "${TABFILE}"
+		done
+		${CRYPTROOT}
+		for dev in $(find /dev/mapper -name 'keystore-*'); do
+			# Translate filename to path
+			storepath="/run/$(echo $(basename ${dev})|sed -e 's,-,/,')"
+			mkdir -p "${storepath}"
+			mount "${dev}" "${storepath}"
+		done
+	fi
+
 	# In case the pool was specified as guid, resolve guid to name
 	pool="$("${ZPOOL}" get name,guid -o name,value -H | \
 	    awk -v pool="${ZFS_RPOOL}" '$2 == pool { print $1 }')"
@@ -906,6 +999,8 @@ mountroot()
 		# Booting from a snapshot?
 		# Will overwrite the ZFS_BOOTFS variable like so:
 		#   rpool/ROOT/debian@snap2 => rpool/ROOT/debian_snap2
+		#   or
+		#   rpool/ROOT/debian@snap2 => rpool/ROOT/debian_<uid> if selected system is a zsys one
 		echo "${ZFS_BOOTFS}" | grep -q '@' && \
 		    setup_snapshot_booting "${ZFS_BOOTFS}"
 	fi
@@ -943,13 +1038,23 @@ mountroot()
 	# Go through the complete list (recursively) of all filesystems below
 	# the real root dataset
 	filesystems="$("${ZFS}" list -oname -tfilesystem -H -r "${ZFS_BOOTFS}")"
+
+	# If the root filesystem is a zsys one, we select the datasets to mount
+	# at boot.
+	# Some datasets under ROOT/ can be mounted on top of persistent datasets
+	# that are hosted elsewhere in the pool. Those are thus only mounted at
+	# early boot.
+	use_zsys=$(get_fs_value_without_source "${ZFS_BOOTFS}" com.ubuntu.zsys:bootfs)
+
 	OLD_IFS="$IFS" ; IFS="
 "
 	for fs in $filesystems; do
+		[ "$use_zsys" = "yes" -a "$(get_fs_value_without_source "$fs" com.ubuntu.zsys:bootfs)" != "yes" ] && continue
 		IFS="$OLD_IFS" mount_fs "$fs"
 	done
 	IFS="$OLD_IFS"
 	for fs in $ZFS_INITRD_ADDITIONAL_DATASETS; do
+		[ "$use_zsys" = "yes" -a "$(get_fs_value_without_source "$fs" com.ubuntu.zsys:bootfs)" != "yes" ] && continue
 		mount_fs "$fs"
 	done
 
@@ -995,3 +1100,8 @@ mountroot()
 		fi
 	fi
 }
+
+uid()
+{
+	grep -a -m10 -E "\*" /dev/urandom 2>/dev/null | tr -dc 'a-z0-9' | cut -c-6
+}
