1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
|
// Program explore is evolved from the code discussed in more depth
// here:
//
// https://github.com/golang/go/issues/3405
//
// The code here demonstrates that while PR_SET_NO_NEW_PRIVS only
// applies to the calling thread, since
// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=103502a35cfce0710909da874f092cb44823ca03
// the seccomp filter application forces the setting to be mirrored on
// all the threads of a process.
//
// Based on the command line options, we can manipulate the program to
// behave in various ways. Example command lines:
//
// sudo ./explore
// sudo ./explore --kill=false
// sudo ./explore --kill=false --errno=0
//
// Supported Go toolchains are after go1.10. Those prior to go1.16
// are not fully reliable because of a go + glibc/psx incompatibility.
// Details:
//
// https://bugzilla.kernel.org/show_bug.cgi?id=219478
//
// Go toolchains go1.16+ can be compiled CGO_ENABLED=0 too,
// demonstrating native nocgo support for seccomp features.
package main
import (
"flag"
"fmt"
"log"
"runtime"
"syscall"
"time"
"unsafe"
"kernel.org/pub/linux/libs/security/libcap/psx"
)
var (
withPSX = flag.Bool("psx", false, "use the psx mechanism to invoke prctl syscall")
delays = flag.Bool("delays", false, "use this to pause the program at various places")
kill = flag.Bool("kill", true, "kill the process if setuid attempted")
errno = flag.Int("errno", int(syscall.ENOTSUP), "if kill is false, block syscall and return this errno")
)
const (
prSetNoNewPrivs = 38
sysSeccomp = 317 // x86_64 syscall number
seccompSetModeFilter = 1 // uses user-supplied filter.
seccompFilterFlagTsync = (1 << 0) // mirror filtering on all threads.
seccompRetErrno = 0x00050000 // returns an errno
seccompRetData = 0x0000ffff // mask for RET data payload (ex. errno)
seccompRetKillProcess = 0x80000000 // kill the whole process immediately
seccompRetTrap = 0x00030000 // disallow and force a SIGSYS
seccompRetAllow = 0x7fff0000
bpfLd = 0x00
bpfJmp = 0x05
bpfRet = 0x06
bpfW = 0x00
bpfAbs = 0x20
bpfJeq = 0x10
bpfK = 0x00
auditArchX86_64 = 3221225534 // HACK: I don't understand this value
archNr = auditArchX86_64
syscallNr = 0
)
// SockFilter is a single filter block.
type SockFilter struct {
// Code is the filter code instruction.
Code uint16
// Jt is the target for a true result from the code execution.
Jt uint8
// Jf is the target for a false result from the code execution.
Jf uint8
// K is a generic multiuse field
K uint32
}
// SockFProg is a
type SockFProg struct {
// Len is the number of contiguous SockFilter blocks that can
// be found at *Filter.
Len uint16
// Filter is the address of the first SockFilter block of a
// program sequence.
Filter *SockFilter
}
// SockFilterSlice is a subprogram filter.
type SockFilterSlice []SockFilter
func bpfStmt(code uint16, k uint32) SockFilter {
return SockFilter{code, 0, 0, k}
}
func bpfJump(code uint16, k uint32, jt uint8, jf uint8) SockFilter {
return SockFilter{code, jt, jf, k}
}
func validateArchitecture() []SockFilter {
return []SockFilter{
bpfStmt(bpfLd+bpfW+bpfAbs, 4), // HACK: I don't understand this 4.
bpfJump(bpfJmp+bpfJeq+bpfK, archNr, 1, 0),
bpfStmt(bpfRet+bpfK, seccompRetKillProcess),
}
}
func examineSyscall() []SockFilter {
return []SockFilter{
bpfStmt(bpfLd+bpfW+bpfAbs, syscallNr),
}
}
func allowSyscall(syscallNum uint32) []SockFilter {
return []SockFilter{
bpfJump(bpfJmp+bpfJeq+bpfK, syscallNum, 0, 1),
bpfStmt(bpfRet+bpfK, seccompRetAllow),
}
}
func disallowSyscall(syscallNum, errno uint32) []SockFilter {
return []SockFilter{
bpfJump(bpfJmp+bpfJeq+bpfK, syscallNum, 0, 1),
bpfStmt(bpfRet+bpfK, seccompRetErrno|(errno&seccompRetData)),
}
}
func killProcess() []SockFilter {
return []SockFilter{
bpfStmt(bpfRet+bpfK, seccompRetKillProcess),
}
}
func notifyProcessAndDie() []SockFilter {
return []SockFilter{
bpfStmt(bpfRet+bpfK, seccompRetTrap),
}
}
func trapOnSyscall(syscallNum uint32) []SockFilter {
return []SockFilter{
bpfJump(bpfJmp+bpfJeq+bpfK, syscallNum, 0, 1),
bpfStmt(bpfRet+bpfK, seccompRetTrap),
}
}
func allGood() []SockFilter {
return []SockFilter{
bpfStmt(bpfRet+bpfK, seccompRetAllow),
}
}
// prctl executes the prctl - unless the --psx commandline argument is
// used, this is on a single thread.
//
//go:uintptrescapes
func prctl(option, arg1, arg2, arg3, arg4, arg5 uintptr) error {
var e syscall.Errno
if *withPSX {
_, _, e = psx.Syscall6(syscall.SYS_PRCTL, option, arg1, arg2, arg3, arg4, arg5)
} else {
_, _, e = syscall.RawSyscall6(syscall.SYS_PRCTL, option, arg1, arg2, arg3, arg4, arg5)
}
if e != 0 {
return e
}
if *delays {
fmt.Println("prctl'd - check now")
time.Sleep(1 * time.Minute)
}
return nil
}
// SeccompSetModeFilter is our wrapper for performing our seccomp system call.
//
//go:uintptrescapes
func SeccompSetModeFilter(prog *SockFProg) error {
if _, _, e := syscall.RawSyscall(sysSeccomp, seccompSetModeFilter, seccompFilterFlagTsync, uintptr(unsafe.Pointer(prog))); e != 0 {
return e
}
return nil
}
var empty func()
func lockProcessThread(pick bool) {
// Make sure we are
pid := uintptr(syscall.Getpid())
runtime.LockOSThread()
for {
tid, _, _ := syscall.RawSyscall(syscall.SYS_GETTID, 0, 0, 0)
if (tid == pid) == pick {
fmt.Println("validated TID:", tid, "== PID:", pid, "is", pick)
break
}
runtime.UnlockOSThread()
go func() {
time.Sleep(1 * time.Microsecond)
}()
runtime.Gosched()
runtime.LockOSThread()
}
}
// applyPolicy uploads the program sequence.
func applyPolicy(prog *SockFProg) {
// Without PSX we can't guarantee the thread we execute the
// seccomp call on will be the same one that we disabled new
// privs on. With PSX, the disabling of new privs is mirrored
// on all threads.
if !*withPSX {
lockProcessThread(false)
defer runtime.UnlockOSThread()
}
// This is required to load a filter without privilege.
if err := prctl(prSetNoNewPrivs, 1, 0, 0, 0, 0); err != nil {
log.Fatalf("Prctl(PR_SET_NO_NEW_PRIVS): %v", err)
}
fmt.Println("Applying syscall policy...")
if err := SeccompSetModeFilter(prog); err != nil {
log.Fatalf("seccomp_set_mode_filter: %v", err)
}
fmt.Println("...Policy applied")
}
func main() {
flag.Parse()
if *delays {
fmt.Println("check first", syscall.Getpid())
time.Sleep(60 * time.Second)
}
var filter []SockFilter
filter = append(filter, validateArchitecture()...)
// Grab the system call number.
filter = append(filter, examineSyscall()...)
// List disallowed syscalls.
for _, x := range []uint32{
syscall.SYS_SETUID,
} {
if *kill {
filter = append(filter, trapOnSyscall(x)...)
} else {
filter = append(filter, disallowSyscall(x, uint32(*errno))...)
}
}
filter = append(filter, allGood()...)
prog := &SockFProg{
Len: uint16(len(filter)),
Filter: &filter[0],
}
applyPolicy(prog)
// Ensure we are running on the TID=PID.
lockProcessThread(true)
log.Print("Now it is time to try to run something privileged...")
if _, _, e := syscall.RawSyscall(syscall.SYS_SETUID, 1, 0, 0); e != 0 {
log.Fatalf("setuid failed with an error: %v", e)
}
log.Print("Looked like that worked, but it really didn't: uid == ", syscall.Getuid(), " != 1")
}
|