Line data Source code
1 : /*
2 : * linux/kernel/seccomp.c
3 : *
4 : * Copyright 2004-2005 Andrea Arcangeli <andrea@cpushare.com>
5 : *
6 : * Copyright (C) 2012 Google, Inc.
7 : * Will Drewry <wad@chromium.org>
8 : *
9 : * This defines a simple but solid secure-computing facility.
10 : *
11 : * Mode 1 uses a fixed list of allowed system calls.
12 : * Mode 2 allows user-defined system call filters in the form
13 : * of Berkeley Packet Filters/Linux Socket Filters.
14 : */
15 :
16 : #include <linux/atomic.h>
17 : #include <linux/audit.h>
18 : #include <linux/compat.h>
19 : #include <linux/sched.h>
20 : #include <linux/seccomp.h>
21 : #include <linux/slab.h>
22 : #include <linux/syscalls.h>
23 :
24 : #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
25 : #include <asm/syscall.h>
26 : #endif
27 :
28 : #ifdef CONFIG_SECCOMP_FILTER
29 : #include <linux/filter.h>
30 : #include <linux/pid.h>
31 : #include <linux/ptrace.h>
32 : #include <linux/security.h>
33 : #include <linux/tracehook.h>
34 : #include <linux/uaccess.h>
35 :
36 : /**
37 : * struct seccomp_filter - container for seccomp BPF programs
38 : *
39 : * @usage: reference count to manage the object lifetime.
40 : * get/put helpers should be used when accessing an instance
41 : * outside of a lifetime-guarded section. In general, this
42 : * is only needed for handling filters shared across tasks.
43 : * @prev: points to a previously installed, or inherited, filter
44 : * @len: the number of instructions in the program
45 : * @insnsi: the BPF program instructions to evaluate
46 : *
47 : * seccomp_filter objects are organized in a tree linked via the @prev
48 : * pointer. For any task, it appears to be a singly-linked list starting
49 : * with current->seccomp.filter, the most recently attached or inherited filter.
50 : * However, multiple filters may share a @prev node, by way of fork(), which
51 : * results in a unidirectional tree existing in memory. This is similar to
52 : * how namespaces work.
53 : *
54 : * seccomp_filter objects should never be modified after being attached
55 : * to a task_struct (other than @usage).
56 : */
57 : struct seccomp_filter {
58 : atomic_t usage;
59 : struct seccomp_filter *prev;
60 : struct bpf_prog *prog;
61 : };
62 :
63 : /* Limit any path through the tree to 256KB worth of instructions. */
64 : #define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter))
65 :
66 : /*
67 : * Endianness is explicitly ignored and left for BPF program authors to manage
68 : * as per the specific architecture.
69 : */
70 0 : static void populate_seccomp_data(struct seccomp_data *sd)
71 : {
72 0 : struct task_struct *task = current;
73 0 : struct pt_regs *regs = task_pt_regs(task);
74 : unsigned long args[6];
75 :
76 0 : sd->nr = syscall_get_nr(task, regs);
77 0 : sd->arch = syscall_get_arch();
78 : syscall_get_arguments(task, regs, 0, 6, args);
79 : sd->args[0] = args[0];
80 0 : sd->args[1] = args[1];
81 0 : sd->args[2] = args[2];
82 0 : sd->args[3] = args[3];
83 0 : sd->args[4] = args[4];
84 0 : sd->args[5] = args[5];
85 0 : sd->instruction_pointer = KSTK_EIP(task);
86 0 : }
87 :
88 : /**
89 : * seccomp_check_filter - verify seccomp filter code
90 : * @filter: filter to verify
91 : * @flen: length of filter
92 : *
93 : * Takes a previously checked filter (by bpf_check_classic) and
94 : * redirects all filter code that loads struct sk_buff data
95 : * and related data through seccomp_bpf_load. It also
96 : * enforces length and alignment checking of those loads.
97 : *
98 : * Returns 0 if the rule set is legal or -EINVAL if not.
99 : */
100 0 : static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
101 : {
102 : int pc;
103 0 : for (pc = 0; pc < flen; pc++) {
104 0 : struct sock_filter *ftest = &filter[pc];
105 0 : u16 code = ftest->code;
106 0 : u32 k = ftest->k;
107 :
108 0 : switch (code) {
109 : case BPF_LD | BPF_W | BPF_ABS:
110 0 : ftest->code = BPF_LDX | BPF_W | BPF_ABS;
111 : /* 32-bit aligned and not out of bounds. */
112 0 : if (k >= sizeof(struct seccomp_data) || k & 3)
113 : return -EINVAL;
114 0 : continue;
115 : case BPF_LD | BPF_W | BPF_LEN:
116 0 : ftest->code = BPF_LD | BPF_IMM;
117 0 : ftest->k = sizeof(struct seccomp_data);
118 0 : continue;
119 : case BPF_LDX | BPF_W | BPF_LEN:
120 0 : ftest->code = BPF_LDX | BPF_IMM;
121 0 : ftest->k = sizeof(struct seccomp_data);
122 0 : continue;
123 : /* Explicitly include allowed calls. */
124 : case BPF_RET | BPF_K:
125 : case BPF_RET | BPF_A:
126 : case BPF_ALU | BPF_ADD | BPF_K:
127 : case BPF_ALU | BPF_ADD | BPF_X:
128 : case BPF_ALU | BPF_SUB | BPF_K:
129 : case BPF_ALU | BPF_SUB | BPF_X:
130 : case BPF_ALU | BPF_MUL | BPF_K:
131 : case BPF_ALU | BPF_MUL | BPF_X:
132 : case BPF_ALU | BPF_DIV | BPF_K:
133 : case BPF_ALU | BPF_DIV | BPF_X:
134 : case BPF_ALU | BPF_AND | BPF_K:
135 : case BPF_ALU | BPF_AND | BPF_X:
136 : case BPF_ALU | BPF_OR | BPF_K:
137 : case BPF_ALU | BPF_OR | BPF_X:
138 : case BPF_ALU | BPF_XOR | BPF_K:
139 : case BPF_ALU | BPF_XOR | BPF_X:
140 : case BPF_ALU | BPF_LSH | BPF_K:
141 : case BPF_ALU | BPF_LSH | BPF_X:
142 : case BPF_ALU | BPF_RSH | BPF_K:
143 : case BPF_ALU | BPF_RSH | BPF_X:
144 : case BPF_ALU | BPF_NEG:
145 : case BPF_LD | BPF_IMM:
146 : case BPF_LDX | BPF_IMM:
147 : case BPF_MISC | BPF_TAX:
148 : case BPF_MISC | BPF_TXA:
149 : case BPF_LD | BPF_MEM:
150 : case BPF_LDX | BPF_MEM:
151 : case BPF_ST:
152 : case BPF_STX:
153 : case BPF_JMP | BPF_JA:
154 : case BPF_JMP | BPF_JEQ | BPF_K:
155 : case BPF_JMP | BPF_JEQ | BPF_X:
156 : case BPF_JMP | BPF_JGE | BPF_K:
157 : case BPF_JMP | BPF_JGE | BPF_X:
158 : case BPF_JMP | BPF_JGT | BPF_K:
159 : case BPF_JMP | BPF_JGT | BPF_X:
160 : case BPF_JMP | BPF_JSET | BPF_K:
161 : case BPF_JMP | BPF_JSET | BPF_X:
162 0 : continue;
163 : default:
164 : return -EINVAL;
165 : }
166 : }
167 : return 0;
168 : }
169 :
170 : /**
171 : * seccomp_run_filters - evaluates all seccomp filters against @syscall
172 : * @syscall: number of the current system call
173 : *
174 : * Returns valid seccomp BPF response codes.
175 : */
176 0 : static u32 seccomp_run_filters(struct seccomp_data *sd)
177 : {
178 0 : struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
179 : struct seccomp_data sd_local;
180 : u32 ret = SECCOMP_RET_ALLOW;
181 :
182 : /* Ensure unexpected behavior doesn't result in failing open. */
183 0 : if (unlikely(WARN_ON(f == NULL)))
184 : return SECCOMP_RET_KILL;
185 :
186 : /* Make sure cross-thread synced filter points somewhere sane. */
187 : smp_read_barrier_depends();
188 :
189 0 : if (!sd) {
190 0 : populate_seccomp_data(&sd_local);
191 : sd = &sd_local;
192 : }
193 :
194 : /*
195 : * All filters in the list are evaluated and the lowest BPF return
196 : * value always takes priority (ignoring the DATA).
197 : */
198 0 : for (; f; f = f->prev) {
199 0 : u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)sd);
200 :
201 0 : if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
202 : ret = cur_ret;
203 : }
204 : return ret;
205 : }
206 : #endif /* CONFIG_SECCOMP_FILTER */
207 :
208 : static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
209 : {
210 : assert_spin_locked(¤t->sighand->siglock);
211 :
212 0 : if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
213 : return false;
214 :
215 : return true;
216 : }
217 :
218 : static inline void seccomp_assign_mode(struct task_struct *task,
219 : unsigned long seccomp_mode)
220 : {
221 : assert_spin_locked(&task->sighand->siglock);
222 :
223 0 : task->seccomp.mode = seccomp_mode;
224 : /*
225 : * Make sure TIF_SECCOMP cannot be set before the mode (and
226 : * filter) is set.
227 : */
228 0 : smp_mb__before_atomic();
229 : set_tsk_thread_flag(task, TIF_SECCOMP);
230 : }
231 :
232 : #ifdef CONFIG_SECCOMP_FILTER
233 : /* Returns 1 if the parent is an ancestor of the child. */
234 0 : static int is_ancestor(struct seccomp_filter *parent,
235 : struct seccomp_filter *child)
236 : {
237 : /* NULL is the root ancestor. */
238 0 : if (parent == NULL)
239 : return 1;
240 0 : for (; child; child = child->prev)
241 0 : if (child == parent)
242 : return 1;
243 : return 0;
244 : }
245 :
246 : /**
247 : * seccomp_can_sync_threads: checks if all threads can be synchronized
248 : *
249 : * Expects sighand and cred_guard_mutex locks to be held.
250 : *
251 : * Returns 0 on success, -ve on error, or the pid of a thread which was
252 : * either not in the correct seccomp mode or it did not have an ancestral
253 : * seccomp filter.
254 : */
255 : static inline pid_t seccomp_can_sync_threads(void)
256 : {
257 : struct task_struct *thread, *caller;
258 :
259 0 : BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
260 : assert_spin_locked(¤t->sighand->siglock);
261 :
262 : /* Validate all threads being eligible for synchronization. */
263 : caller = current;
264 0 : for_each_thread(caller, thread) {
265 : pid_t failed;
266 :
267 : /* Skip current, since it is initiating the sync. */
268 0 : if (thread == caller)
269 0 : continue;
270 :
271 0 : if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
272 0 : (thread->seccomp.mode == SECCOMP_MODE_FILTER &&
273 0 : is_ancestor(thread->seccomp.filter,
274 : caller->seccomp.filter)))
275 0 : continue;
276 :
277 : /* Return the first thread that cannot be synchronized. */
278 : failed = task_pid_vnr(thread);
279 : /* If the pid cannot be resolved, then return -ESRCH */
280 0 : if (unlikely(WARN_ON(failed == 0)))
281 : failed = -ESRCH;
282 : return failed;
283 : }
284 :
285 : return 0;
286 : }
287 :
288 : /**
289 : * seccomp_sync_threads: sets all threads to use current's filter
290 : *
291 : * Expects sighand and cred_guard_mutex locks to be held, and for
292 : * seccomp_can_sync_threads() to have returned success already
293 : * without dropping the locks.
294 : *
295 : */
296 : static inline void seccomp_sync_threads(void)
297 : {
298 : struct task_struct *thread, *caller;
299 :
300 0 : BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
301 : assert_spin_locked(¤t->sighand->siglock);
302 :
303 : /* Synchronize all threads. */
304 : caller = current;
305 0 : for_each_thread(caller, thread) {
306 : /* Skip current, since it needs no changes. */
307 0 : if (thread == caller)
308 0 : continue;
309 :
310 : /* Get a task reference for the new leaf node. */
311 0 : get_seccomp_filter(caller);
312 : /*
313 : * Drop the task reference to the shared ancestor since
314 : * current's path will hold a reference. (This also
315 : * allows a put before the assignment.)
316 : */
317 0 : put_seccomp_filter(thread);
318 0 : smp_store_release(&thread->seccomp.filter,
319 : caller->seccomp.filter);
320 : /*
321 : * Opt the other thread into seccomp if needed.
322 : * As threads are considered to be trust-realm
323 : * equivalent (see ptrace_may_access), it is safe to
324 : * allow one thread to transition the other.
325 : */
326 0 : if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
327 : /*
328 : * Don't let an unprivileged task work around
329 : * the no_new_privs restriction by creating
330 : * a thread that sets it up, enters seccomp,
331 : * then dies.
332 : */
333 0 : if (task_no_new_privs(caller))
334 : task_set_no_new_privs(thread);
335 :
336 : seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
337 : }
338 : }
339 : }
340 :
341 : /**
342 : * seccomp_prepare_filter: Prepares a seccomp filter for use.
343 : * @fprog: BPF program to install
344 : *
345 : * Returns filter on success or an ERR_PTR on failure.
346 : */
347 0 : static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
348 : {
349 : struct seccomp_filter *filter;
350 : unsigned long fp_size;
351 : struct sock_filter *fp;
352 : int new_len;
353 : long ret;
354 :
355 0 : if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
356 : return ERR_PTR(-EINVAL);
357 : BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
358 0 : fp_size = fprog->len * sizeof(struct sock_filter);
359 :
360 : /*
361 : * Installing a seccomp filter requires that the task has
362 : * CAP_SYS_ADMIN in its namespace or be running with no_new_privs.
363 : * This avoids scenarios where unprivileged tasks can affect the
364 : * behavior of privileged children.
365 : */
366 0 : if (!task_no_new_privs(current) &&
367 0 : security_capable_noaudit(current_cred(), current_user_ns(),
368 : CAP_SYS_ADMIN) != 0)
369 : return ERR_PTR(-EACCES);
370 :
371 : fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN);
372 0 : if (!fp)
373 : return ERR_PTR(-ENOMEM);
374 :
375 : /* Copy the instructions from fprog. */
376 : ret = -EFAULT;
377 0 : if (copy_from_user(fp, fprog->filter, fp_size))
378 : goto free_prog;
379 :
380 : /* Check and rewrite the fprog via the skb checker */
381 0 : ret = bpf_check_classic(fp, fprog->len);
382 0 : if (ret)
383 : goto free_prog;
384 :
385 : /* Check and rewrite the fprog for seccomp use */
386 0 : ret = seccomp_check_filter(fp, fprog->len);
387 0 : if (ret)
388 : goto free_prog;
389 :
390 : /* Convert 'sock_filter' insns to 'bpf_insn' insns */
391 0 : ret = bpf_convert_filter(fp, fprog->len, NULL, &new_len);
392 0 : if (ret)
393 : goto free_prog;
394 :
395 : /* Allocate a new seccomp_filter */
396 : ret = -ENOMEM;
397 : filter = kzalloc(sizeof(struct seccomp_filter),
398 : GFP_KERNEL|__GFP_NOWARN);
399 0 : if (!filter)
400 : goto free_prog;
401 :
402 0 : filter->prog = bpf_prog_alloc(bpf_prog_size(new_len), __GFP_NOWARN);
403 0 : if (!filter->prog)
404 : goto free_filter;
405 :
406 0 : ret = bpf_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len);
407 0 : if (ret)
408 : goto free_filter_prog;
409 :
410 0 : kfree(fp);
411 0 : atomic_set(&filter->usage, 1);
412 0 : filter->prog->len = new_len;
413 :
414 0 : bpf_prog_select_runtime(filter->prog);
415 :
416 0 : return filter;
417 :
418 : free_filter_prog:
419 0 : __bpf_prog_free(filter->prog);
420 : free_filter:
421 0 : kfree(filter);
422 : free_prog:
423 0 : kfree(fp);
424 0 : return ERR_PTR(ret);
425 : }
426 :
427 : /**
428 : * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog
429 : * @user_filter: pointer to the user data containing a sock_fprog.
430 : *
431 : * Returns 0 on success and non-zero otherwise.
432 : */
433 : static struct seccomp_filter *
434 0 : seccomp_prepare_user_filter(const char __user *user_filter)
435 : {
436 : struct sock_fprog fprog;
437 : struct seccomp_filter *filter = ERR_PTR(-EFAULT);
438 :
439 : #ifdef CONFIG_COMPAT
440 : if (is_compat_task()) {
441 : struct compat_sock_fprog fprog32;
442 : if (copy_from_user(&fprog32, user_filter, sizeof(fprog32)))
443 : goto out;
444 : fprog.len = fprog32.len;
445 : fprog.filter = compat_ptr(fprog32.filter);
446 : } else /* falls through to the if below. */
447 : #endif
448 0 : if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
449 : goto out;
450 0 : filter = seccomp_prepare_filter(&fprog);
451 : out:
452 0 : return filter;
453 : }
454 :
455 : /**
456 : * seccomp_attach_filter: validate and attach filter
457 : * @flags: flags to change filter behavior
458 : * @filter: seccomp filter to add to the current process
459 : *
460 : * Caller must be holding current->sighand->siglock lock.
461 : *
462 : * Returns 0 on success, -ve on error.
463 : */
464 0 : static long seccomp_attach_filter(unsigned int flags,
465 : struct seccomp_filter *filter)
466 : {
467 : unsigned long total_insns;
468 : struct seccomp_filter *walker;
469 :
470 : assert_spin_locked(¤t->sighand->siglock);
471 :
472 : /* Validate resulting filter length. */
473 0 : total_insns = filter->prog->len;
474 0 : for (walker = current->seccomp.filter; walker; walker = walker->prev)
475 0 : total_insns += walker->prog->len + 4; /* 4 instr penalty */
476 0 : if (total_insns > MAX_INSNS_PER_PATH)
477 : return -ENOMEM;
478 :
479 : /* If thread sync has been requested, check that it is possible. */
480 0 : if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
481 : int ret;
482 :
483 : ret = seccomp_can_sync_threads();
484 0 : if (ret)
485 : return ret;
486 : }
487 :
488 : /*
489 : * If there is an existing filter, make it the prev and don't drop its
490 : * task reference.
491 : */
492 0 : filter->prev = current->seccomp.filter;
493 0 : current->seccomp.filter = filter;
494 :
495 : /* Now that the new filter is in place, synchronize to all threads. */
496 0 : if (flags & SECCOMP_FILTER_FLAG_TSYNC)
497 : seccomp_sync_threads();
498 :
499 : return 0;
500 : }
501 :
502 : /* get_seccomp_filter - increments the reference count of the filter on @tsk */
503 2993 : void get_seccomp_filter(struct task_struct *tsk)
504 : {
505 2993 : struct seccomp_filter *orig = tsk->seccomp.filter;
506 2993 : if (!orig)
507 2993 : return;
508 : /* Reference count is bounded by the number of total processes. */
509 0 : atomic_inc(&orig->usage);
510 : }
511 :
512 : static inline void seccomp_filter_free(struct seccomp_filter *filter)
513 : {
514 0 : if (filter) {
515 0 : bpf_prog_free(filter->prog);
516 0 : kfree(filter);
517 : }
518 : }
519 :
520 : /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
521 2915 : void put_seccomp_filter(struct task_struct *tsk)
522 : {
523 2915 : struct seccomp_filter *orig = tsk->seccomp.filter;
524 : /* Clean up single-reference branches iteratively. */
525 5830 : while (orig && atomic_dec_and_test(&orig->usage)) {
526 : struct seccomp_filter *freeme = orig;
527 0 : orig = orig->prev;
528 : seccomp_filter_free(freeme);
529 : }
530 2915 : }
531 :
532 : /**
533 : * seccomp_send_sigsys - signals the task to allow in-process syscall emulation
534 : * @syscall: syscall number to send to userland
535 : * @reason: filter-supplied reason code to send to userland (via si_errno)
536 : *
537 : * Forces a SIGSYS with a code of SYS_SECCOMP and related sigsys info.
538 : */
539 0 : static void seccomp_send_sigsys(int syscall, int reason)
540 : {
541 : struct siginfo info;
542 0 : memset(&info, 0, sizeof(info));
543 0 : info.si_signo = SIGSYS;
544 0 : info.si_code = SYS_SECCOMP;
545 0 : info.si_call_addr = (void __user *)KSTK_EIP(current);
546 0 : info.si_errno = reason;
547 0 : info.si_arch = syscall_get_arch();
548 0 : info.si_syscall = syscall;
549 0 : force_sig_info(SIGSYS, &info, current);
550 0 : }
551 : #endif /* CONFIG_SECCOMP_FILTER */
552 :
553 : /*
554 : * Secure computing mode 1 allows only read/write/exit/sigreturn.
555 : * To be fully secure this must be combined with rlimit
556 : * to limit the stack allocations too.
557 : */
558 : static int mode1_syscalls[] = {
559 : __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
560 : 0, /* null terminated */
561 : };
562 :
563 : #ifdef CONFIG_COMPAT
564 : static int mode1_syscalls_32[] = {
565 : __NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32,
566 : 0, /* null terminated */
567 : };
568 : #endif
569 :
570 0 : static void __secure_computing_strict(int this_syscall)
571 : {
572 : int *syscall_whitelist = mode1_syscalls;
573 : #ifdef CONFIG_COMPAT
574 : if (is_compat_task())
575 : syscall_whitelist = mode1_syscalls_32;
576 : #endif
577 : do {
578 0 : if (*syscall_whitelist == this_syscall)
579 0 : return;
580 0 : } while (*++syscall_whitelist);
581 :
582 : #ifdef SECCOMP_DEBUG
583 : dump_stack();
584 : #endif
585 : audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL);
586 0 : do_exit(SIGKILL);
587 : }
588 :
589 : #ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER
590 : void secure_computing_strict(int this_syscall)
591 : {
592 : int mode = current->seccomp.mode;
593 :
594 : if (mode == 0)
595 : return;
596 : else if (mode == SECCOMP_MODE_STRICT)
597 : __secure_computing_strict(this_syscall);
598 : else
599 : BUG();
600 : }
601 : #else
602 0 : int __secure_computing(void)
603 : {
604 0 : u32 phase1_result = seccomp_phase1(NULL);
605 :
606 0 : if (likely(phase1_result == SECCOMP_PHASE1_OK))
607 : return 0;
608 0 : else if (likely(phase1_result == SECCOMP_PHASE1_SKIP))
609 : return -1;
610 : else
611 0 : return seccomp_phase2(phase1_result);
612 : }
613 :
614 : #ifdef CONFIG_SECCOMP_FILTER
615 0 : static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd)
616 : {
617 : u32 filter_ret, action;
618 : int data;
619 :
620 : /*
621 : * Make sure that any changes to mode from another thread have
622 : * been seen after TIF_SECCOMP was seen.
623 : */
624 0 : rmb();
625 :
626 0 : filter_ret = seccomp_run_filters(sd);
627 0 : data = filter_ret & SECCOMP_RET_DATA;
628 0 : action = filter_ret & SECCOMP_RET_ACTION;
629 :
630 0 : switch (action) {
631 : case SECCOMP_RET_ERRNO:
632 : /* Set the low-order 16-bits as a errno. */
633 0 : syscall_set_return_value(current, task_pt_regs(current),
634 : -data, 0);
635 : goto skip;
636 :
637 : case SECCOMP_RET_TRAP:
638 : /* Show the handler the original registers. */
639 0 : syscall_rollback(current, task_pt_regs(current));
640 : /* Let the filter pass back 16 bits of data. */
641 0 : seccomp_send_sigsys(this_syscall, data);
642 0 : goto skip;
643 :
644 : case SECCOMP_RET_TRACE:
645 0 : return filter_ret; /* Save the rest for phase 2. */
646 :
647 : case SECCOMP_RET_ALLOW:
648 : return SECCOMP_PHASE1_OK;
649 :
650 : case SECCOMP_RET_KILL:
651 : default:
652 : audit_seccomp(this_syscall, SIGSYS, action);
653 0 : do_exit(SIGSYS);
654 : }
655 :
656 : unreachable();
657 :
658 : skip:
659 : audit_seccomp(this_syscall, 0, action);
660 : return SECCOMP_PHASE1_SKIP;
661 : }
662 : #endif
663 :
664 : /**
665 : * seccomp_phase1() - run fast path seccomp checks on the current syscall
666 : * @arg sd: The seccomp_data or NULL
667 : *
668 : * This only reads pt_regs via the syscall_xyz helpers. The only change
669 : * it will make to pt_regs is via syscall_set_return_value, and it will
670 : * only do that if it returns SECCOMP_PHASE1_SKIP.
671 : *
672 : * If sd is provided, it will not read pt_regs at all.
673 : *
674 : * It may also call do_exit or force a signal; these actions must be
675 : * safe.
676 : *
677 : * If it returns SECCOMP_PHASE1_OK, the syscall passes checks and should
678 : * be processed normally.
679 : *
680 : * If it returns SECCOMP_PHASE1_SKIP, then the syscall should not be
681 : * invoked. In this case, seccomp_phase1 will have set the return value
682 : * using syscall_set_return_value.
683 : *
684 : * If it returns anything else, then the return value should be passed
685 : * to seccomp_phase2 from a context in which ptrace hooks are safe.
686 : */
687 0 : u32 seccomp_phase1(struct seccomp_data *sd)
688 : {
689 0 : int mode = current->seccomp.mode;
690 0 : int this_syscall = sd ? sd->nr :
691 : syscall_get_nr(current, task_pt_regs(current));
692 :
693 0 : switch (mode) {
694 : case SECCOMP_MODE_STRICT:
695 0 : __secure_computing_strict(this_syscall); /* may call do_exit */
696 0 : return SECCOMP_PHASE1_OK;
697 : #ifdef CONFIG_SECCOMP_FILTER
698 : case SECCOMP_MODE_FILTER:
699 0 : return __seccomp_phase1_filter(this_syscall, sd);
700 : #endif
701 : default:
702 : BUG();
703 : }
704 : }
705 :
706 : /**
707 : * seccomp_phase2() - finish slow path seccomp work for the current syscall
708 : * @phase1_result: The return value from seccomp_phase1()
709 : *
710 : * This must be called from a context in which ptrace hooks can be used.
711 : *
712 : * Returns 0 if the syscall should be processed or -1 to skip the syscall.
713 : */
714 0 : int seccomp_phase2(u32 phase1_result)
715 : {
716 0 : struct pt_regs *regs = task_pt_regs(current);
717 : u32 action = phase1_result & SECCOMP_RET_ACTION;
718 0 : int data = phase1_result & SECCOMP_RET_DATA;
719 :
720 : BUG_ON(action != SECCOMP_RET_TRACE);
721 :
722 : audit_seccomp(syscall_get_nr(current, regs), 0, action);
723 :
724 : /* Skip these calls if there is no tracer. */
725 0 : if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
726 : syscall_set_return_value(current, regs,
727 : -ENOSYS, 0);
728 0 : return -1;
729 : }
730 :
731 : /* Allow the BPF to provide the event message */
732 0 : ptrace_event(PTRACE_EVENT_SECCOMP, data);
733 : /*
734 : * The delivery of a fatal signal during event
735 : * notification may silently skip tracer notification.
736 : * Terminating the task now avoids executing a system
737 : * call that may not be intended.
738 : */
739 0 : if (fatal_signal_pending(current))
740 0 : do_exit(SIGSYS);
741 0 : if (syscall_get_nr(current, regs) < 0)
742 : return -1; /* Explicit request to skip. */
743 :
744 0 : return 0;
745 : }
746 : #endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */
747 :
748 0 : long prctl_get_seccomp(void)
749 : {
750 0 : return current->seccomp.mode;
751 : }
752 :
753 : /**
754 : * seccomp_set_mode_strict: internal function for setting strict seccomp
755 : *
756 : * Once current->seccomp.mode is non-zero, it may not be changed.
757 : *
758 : * Returns 0 on success or -EINVAL on failure.
759 : */
760 0 : static long seccomp_set_mode_strict(void)
761 : {
762 : const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
763 : long ret = -EINVAL;
764 :
765 : spin_lock_irq(¤t->sighand->siglock);
766 :
767 0 : if (!seccomp_may_assign_mode(seccomp_mode))
768 : goto out;
769 :
770 : #ifdef TIF_NOTSC
771 : disable_TSC();
772 : #endif
773 : seccomp_assign_mode(current, seccomp_mode);
774 : ret = 0;
775 :
776 : out:
777 : spin_unlock_irq(¤t->sighand->siglock);
778 :
779 0 : return ret;
780 : }
781 :
782 : #ifdef CONFIG_SECCOMP_FILTER
783 : /**
784 : * seccomp_set_mode_filter: internal function for setting seccomp filter
785 : * @flags: flags to change filter behavior
786 : * @filter: struct sock_fprog containing filter
787 : *
788 : * This function may be called repeatedly to install additional filters.
789 : * Every filter successfully installed will be evaluated (in reverse order)
790 : * for each system call the task makes.
791 : *
792 : * Once current->seccomp.mode is non-zero, it may not be changed.
793 : *
794 : * Returns 0 on success or -EINVAL on failure.
795 : */
796 0 : static long seccomp_set_mode_filter(unsigned int flags,
797 : const char __user *filter)
798 : {
799 : const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
800 : struct seccomp_filter *prepared = NULL;
801 : long ret = -EINVAL;
802 :
803 : /* Validate flags. */
804 0 : if (flags & ~SECCOMP_FILTER_FLAG_MASK)
805 : return -EINVAL;
806 :
807 : /* Prepare the new filter before holding any locks. */
808 0 : prepared = seccomp_prepare_user_filter(filter);
809 0 : if (IS_ERR(prepared))
810 0 : return PTR_ERR(prepared);
811 :
812 : /*
813 : * Make sure we cannot change seccomp or nnp state via TSYNC
814 : * while another thread is in the middle of calling exec.
815 : */
816 0 : if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
817 0 : mutex_lock_killable(¤t->signal->cred_guard_mutex))
818 : goto out_free;
819 :
820 : spin_lock_irq(¤t->sighand->siglock);
821 :
822 0 : if (!seccomp_may_assign_mode(seccomp_mode))
823 : goto out;
824 :
825 0 : ret = seccomp_attach_filter(flags, prepared);
826 0 : if (ret)
827 : goto out;
828 : /* Do not free the successfully attached filter. */
829 : prepared = NULL;
830 :
831 0 : seccomp_assign_mode(current, seccomp_mode);
832 : out:
833 : spin_unlock_irq(¤t->sighand->siglock);
834 0 : if (flags & SECCOMP_FILTER_FLAG_TSYNC)
835 0 : mutex_unlock(¤t->signal->cred_guard_mutex);
836 : out_free:
837 : seccomp_filter_free(prepared);
838 0 : return ret;
839 : }
840 : #else
841 : static inline long seccomp_set_mode_filter(unsigned int flags,
842 : const char __user *filter)
843 : {
844 : return -EINVAL;
845 : }
846 : #endif
847 :
848 : /* Common entry point for both prctl and syscall. */
849 0 : static long do_seccomp(unsigned int op, unsigned int flags,
850 : const char __user *uargs)
851 : {
852 0 : switch (op) {
853 : case SECCOMP_SET_MODE_STRICT:
854 0 : if (flags != 0 || uargs != NULL)
855 : return -EINVAL;
856 0 : return seccomp_set_mode_strict();
857 : case SECCOMP_SET_MODE_FILTER:
858 0 : return seccomp_set_mode_filter(flags, uargs);
859 : default:
860 : return -EINVAL;
861 : }
862 : }
863 :
864 0 : SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
865 : const char __user *, uargs)
866 : {
867 0 : return do_seccomp(op, flags, uargs);
868 : }
869 :
870 : /**
871 : * prctl_set_seccomp: configures current->seccomp.mode
872 : * @seccomp_mode: requested mode to use
873 : * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
874 : *
875 : * Returns 0 on success or -EINVAL on failure.
876 : */
877 0 : long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
878 : {
879 : unsigned int op;
880 : char __user *uargs;
881 :
882 0 : switch (seccomp_mode) {
883 : case SECCOMP_MODE_STRICT:
884 : op = SECCOMP_SET_MODE_STRICT;
885 : /*
886 : * Setting strict mode through prctl always ignored filter,
887 : * so make sure it is always NULL here to pass the internal
888 : * check in do_seccomp().
889 : */
890 : uargs = NULL;
891 : break;
892 : case SECCOMP_MODE_FILTER:
893 : op = SECCOMP_SET_MODE_FILTER;
894 : uargs = filter;
895 0 : break;
896 : default:
897 : return -EINVAL;
898 : }
899 :
900 : /* prctl interface doesn't have flags, so they are always zero. */
901 0 : return do_seccomp(op, 0, uargs);
902 : }
|