LCOV - code coverage report
Current view: top level - kernel - seccomp.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 8 194 4.1 %
Date: 2015-04-12 14:34:49 Functions: 2 21 9.5 %

          Line data    Source code
       1             : /*
       2             :  * linux/kernel/seccomp.c
       3             :  *
       4             :  * Copyright 2004-2005  Andrea Arcangeli <andrea@cpushare.com>
       5             :  *
       6             :  * Copyright (C) 2012 Google, Inc.
       7             :  * Will Drewry <wad@chromium.org>
       8             :  *
       9             :  * This defines a simple but solid secure-computing facility.
      10             :  *
      11             :  * Mode 1 uses a fixed list of allowed system calls.
      12             :  * Mode 2 allows user-defined system call filters in the form
      13             :  *        of Berkeley Packet Filters/Linux Socket Filters.
      14             :  */
      15             : 
      16             : #include <linux/atomic.h>
      17             : #include <linux/audit.h>
      18             : #include <linux/compat.h>
      19             : #include <linux/sched.h>
      20             : #include <linux/seccomp.h>
      21             : #include <linux/slab.h>
      22             : #include <linux/syscalls.h>
      23             : 
      24             : #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
      25             : #include <asm/syscall.h>
      26             : #endif
      27             : 
      28             : #ifdef CONFIG_SECCOMP_FILTER
      29             : #include <linux/filter.h>
      30             : #include <linux/pid.h>
      31             : #include <linux/ptrace.h>
      32             : #include <linux/security.h>
      33             : #include <linux/tracehook.h>
      34             : #include <linux/uaccess.h>
      35             : 
      36             : /**
      37             :  * struct seccomp_filter - container for seccomp BPF programs
      38             :  *
      39             :  * @usage: reference count to manage the object lifetime.
      40             :  *         get/put helpers should be used when accessing an instance
      41             :  *         outside of a lifetime-guarded section.  In general, this
      42             :  *         is only needed for handling filters shared across tasks.
      43             :  * @prev: points to a previously installed, or inherited, filter
      44             :  * @len: the number of instructions in the program
      45             :  * @insnsi: the BPF program instructions to evaluate
      46             :  *
      47             :  * seccomp_filter objects are organized in a tree linked via the @prev
      48             :  * pointer.  For any task, it appears to be a singly-linked list starting
      49             :  * with current->seccomp.filter, the most recently attached or inherited filter.
      50             :  * However, multiple filters may share a @prev node, by way of fork(), which
      51             :  * results in a unidirectional tree existing in memory.  This is similar to
      52             :  * how namespaces work.
      53             :  *
      54             :  * seccomp_filter objects should never be modified after being attached
      55             :  * to a task_struct (other than @usage).
      56             :  */
      57             : struct seccomp_filter {
      58             :         atomic_t usage;
      59             :         struct seccomp_filter *prev;
      60             :         struct bpf_prog *prog;
      61             : };
      62             : 
      63             : /* Limit any path through the tree to 256KB worth of instructions. */
      64             : #define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter))
      65             : 
      66             : /*
      67             :  * Endianness is explicitly ignored and left for BPF program authors to manage
      68             :  * as per the specific architecture.
      69             :  */
      70           0 : static void populate_seccomp_data(struct seccomp_data *sd)
      71             : {
      72           0 :         struct task_struct *task = current;
      73           0 :         struct pt_regs *regs = task_pt_regs(task);
      74             :         unsigned long args[6];
      75             : 
      76           0 :         sd->nr = syscall_get_nr(task, regs);
      77           0 :         sd->arch = syscall_get_arch();
      78             :         syscall_get_arguments(task, regs, 0, 6, args);
      79             :         sd->args[0] = args[0];
      80           0 :         sd->args[1] = args[1];
      81           0 :         sd->args[2] = args[2];
      82           0 :         sd->args[3] = args[3];
      83           0 :         sd->args[4] = args[4];
      84           0 :         sd->args[5] = args[5];
      85           0 :         sd->instruction_pointer = KSTK_EIP(task);
      86           0 : }
      87             : 
      88             : /**
      89             :  *      seccomp_check_filter - verify seccomp filter code
      90             :  *      @filter: filter to verify
      91             :  *      @flen: length of filter
      92             :  *
      93             :  * Takes a previously checked filter (by bpf_check_classic) and
      94             :  * redirects all filter code that loads struct sk_buff data
      95             :  * and related data through seccomp_bpf_load.  It also
      96             :  * enforces length and alignment checking of those loads.
      97             :  *
      98             :  * Returns 0 if the rule set is legal or -EINVAL if not.
      99             :  */
     100           0 : static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
     101             : {
     102             :         int pc;
     103           0 :         for (pc = 0; pc < flen; pc++) {
     104           0 :                 struct sock_filter *ftest = &filter[pc];
     105           0 :                 u16 code = ftest->code;
     106           0 :                 u32 k = ftest->k;
     107             : 
     108           0 :                 switch (code) {
     109             :                 case BPF_LD | BPF_W | BPF_ABS:
     110           0 :                         ftest->code = BPF_LDX | BPF_W | BPF_ABS;
     111             :                         /* 32-bit aligned and not out of bounds. */
     112           0 :                         if (k >= sizeof(struct seccomp_data) || k & 3)
     113             :                                 return -EINVAL;
     114           0 :                         continue;
     115             :                 case BPF_LD | BPF_W | BPF_LEN:
     116           0 :                         ftest->code = BPF_LD | BPF_IMM;
     117           0 :                         ftest->k = sizeof(struct seccomp_data);
     118           0 :                         continue;
     119             :                 case BPF_LDX | BPF_W | BPF_LEN:
     120           0 :                         ftest->code = BPF_LDX | BPF_IMM;
     121           0 :                         ftest->k = sizeof(struct seccomp_data);
     122           0 :                         continue;
     123             :                 /* Explicitly include allowed calls. */
     124             :                 case BPF_RET | BPF_K:
     125             :                 case BPF_RET | BPF_A:
     126             :                 case BPF_ALU | BPF_ADD | BPF_K:
     127             :                 case BPF_ALU | BPF_ADD | BPF_X:
     128             :                 case BPF_ALU | BPF_SUB | BPF_K:
     129             :                 case BPF_ALU | BPF_SUB | BPF_X:
     130             :                 case BPF_ALU | BPF_MUL | BPF_K:
     131             :                 case BPF_ALU | BPF_MUL | BPF_X:
     132             :                 case BPF_ALU | BPF_DIV | BPF_K:
     133             :                 case BPF_ALU | BPF_DIV | BPF_X:
     134             :                 case BPF_ALU | BPF_AND | BPF_K:
     135             :                 case BPF_ALU | BPF_AND | BPF_X:
     136             :                 case BPF_ALU | BPF_OR | BPF_K:
     137             :                 case BPF_ALU | BPF_OR | BPF_X:
     138             :                 case BPF_ALU | BPF_XOR | BPF_K:
     139             :                 case BPF_ALU | BPF_XOR | BPF_X:
     140             :                 case BPF_ALU | BPF_LSH | BPF_K:
     141             :                 case BPF_ALU | BPF_LSH | BPF_X:
     142             :                 case BPF_ALU | BPF_RSH | BPF_K:
     143             :                 case BPF_ALU | BPF_RSH | BPF_X:
     144             :                 case BPF_ALU | BPF_NEG:
     145             :                 case BPF_LD | BPF_IMM:
     146             :                 case BPF_LDX | BPF_IMM:
     147             :                 case BPF_MISC | BPF_TAX:
     148             :                 case BPF_MISC | BPF_TXA:
     149             :                 case BPF_LD | BPF_MEM:
     150             :                 case BPF_LDX | BPF_MEM:
     151             :                 case BPF_ST:
     152             :                 case BPF_STX:
     153             :                 case BPF_JMP | BPF_JA:
     154             :                 case BPF_JMP | BPF_JEQ | BPF_K:
     155             :                 case BPF_JMP | BPF_JEQ | BPF_X:
     156             :                 case BPF_JMP | BPF_JGE | BPF_K:
     157             :                 case BPF_JMP | BPF_JGE | BPF_X:
     158             :                 case BPF_JMP | BPF_JGT | BPF_K:
     159             :                 case BPF_JMP | BPF_JGT | BPF_X:
     160             :                 case BPF_JMP | BPF_JSET | BPF_K:
     161             :                 case BPF_JMP | BPF_JSET | BPF_X:
     162           0 :                         continue;
     163             :                 default:
     164             :                         return -EINVAL;
     165             :                 }
     166             :         }
     167             :         return 0;
     168             : }
     169             : 
     170             : /**
     171             :  * seccomp_run_filters - evaluates all seccomp filters against @syscall
     172             :  * @syscall: number of the current system call
     173             :  *
     174             :  * Returns valid seccomp BPF response codes.
     175             :  */
     176           0 : static u32 seccomp_run_filters(struct seccomp_data *sd)
     177             : {
     178           0 :         struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
     179             :         struct seccomp_data sd_local;
     180             :         u32 ret = SECCOMP_RET_ALLOW;
     181             : 
     182             :         /* Ensure unexpected behavior doesn't result in failing open. */
     183           0 :         if (unlikely(WARN_ON(f == NULL)))
     184             :                 return SECCOMP_RET_KILL;
     185             : 
     186             :         /* Make sure cross-thread synced filter points somewhere sane. */
     187             :         smp_read_barrier_depends();
     188             : 
     189           0 :         if (!sd) {
     190           0 :                 populate_seccomp_data(&sd_local);
     191             :                 sd = &sd_local;
     192             :         }
     193             : 
     194             :         /*
     195             :          * All filters in the list are evaluated and the lowest BPF return
     196             :          * value always takes priority (ignoring the DATA).
     197             :          */
     198           0 :         for (; f; f = f->prev) {
     199           0 :                 u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)sd);
     200             : 
     201           0 :                 if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
     202             :                         ret = cur_ret;
     203             :         }
     204             :         return ret;
     205             : }
     206             : #endif /* CONFIG_SECCOMP_FILTER */
     207             : 
     208             : static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
     209             : {
     210             :         assert_spin_locked(&current->sighand->siglock);
     211             : 
     212           0 :         if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
     213             :                 return false;
     214             : 
     215             :         return true;
     216             : }
     217             : 
     218             : static inline void seccomp_assign_mode(struct task_struct *task,
     219             :                                        unsigned long seccomp_mode)
     220             : {
     221             :         assert_spin_locked(&task->sighand->siglock);
     222             : 
     223           0 :         task->seccomp.mode = seccomp_mode;
     224             :         /*
     225             :          * Make sure TIF_SECCOMP cannot be set before the mode (and
     226             :          * filter) is set.
     227             :          */
     228           0 :         smp_mb__before_atomic();
     229             :         set_tsk_thread_flag(task, TIF_SECCOMP);
     230             : }
     231             : 
     232             : #ifdef CONFIG_SECCOMP_FILTER
     233             : /* Returns 1 if the parent is an ancestor of the child. */
     234           0 : static int is_ancestor(struct seccomp_filter *parent,
     235             :                        struct seccomp_filter *child)
     236             : {
     237             :         /* NULL is the root ancestor. */
     238           0 :         if (parent == NULL)
     239             :                 return 1;
     240           0 :         for (; child; child = child->prev)
     241           0 :                 if (child == parent)
     242             :                         return 1;
     243             :         return 0;
     244             : }
     245             : 
     246             : /**
     247             :  * seccomp_can_sync_threads: checks if all threads can be synchronized
     248             :  *
     249             :  * Expects sighand and cred_guard_mutex locks to be held.
     250             :  *
     251             :  * Returns 0 on success, -ve on error, or the pid of a thread which was
     252             :  * either not in the correct seccomp mode or it did not have an ancestral
     253             :  * seccomp filter.
     254             :  */
     255             : static inline pid_t seccomp_can_sync_threads(void)
     256             : {
     257             :         struct task_struct *thread, *caller;
     258             : 
     259           0 :         BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
     260             :         assert_spin_locked(&current->sighand->siglock);
     261             : 
     262             :         /* Validate all threads being eligible for synchronization. */
     263             :         caller = current;
     264           0 :         for_each_thread(caller, thread) {
     265             :                 pid_t failed;
     266             : 
     267             :                 /* Skip current, since it is initiating the sync. */
     268           0 :                 if (thread == caller)
     269           0 :                         continue;
     270             : 
     271           0 :                 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
     272           0 :                     (thread->seccomp.mode == SECCOMP_MODE_FILTER &&
     273           0 :                      is_ancestor(thread->seccomp.filter,
     274             :                                  caller->seccomp.filter)))
     275           0 :                         continue;
     276             : 
     277             :                 /* Return the first thread that cannot be synchronized. */
     278             :                 failed = task_pid_vnr(thread);
     279             :                 /* If the pid cannot be resolved, then return -ESRCH */
     280           0 :                 if (unlikely(WARN_ON(failed == 0)))
     281             :                         failed = -ESRCH;
     282             :                 return failed;
     283             :         }
     284             : 
     285             :         return 0;
     286             : }
     287             : 
     288             : /**
     289             :  * seccomp_sync_threads: sets all threads to use current's filter
     290             :  *
     291             :  * Expects sighand and cred_guard_mutex locks to be held, and for
     292             :  * seccomp_can_sync_threads() to have returned success already
     293             :  * without dropping the locks.
     294             :  *
     295             :  */
     296             : static inline void seccomp_sync_threads(void)
     297             : {
     298             :         struct task_struct *thread, *caller;
     299             : 
     300           0 :         BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
     301             :         assert_spin_locked(&current->sighand->siglock);
     302             : 
     303             :         /* Synchronize all threads. */
     304             :         caller = current;
     305           0 :         for_each_thread(caller, thread) {
     306             :                 /* Skip current, since it needs no changes. */
     307           0 :                 if (thread == caller)
     308           0 :                         continue;
     309             : 
     310             :                 /* Get a task reference for the new leaf node. */
     311           0 :                 get_seccomp_filter(caller);
     312             :                 /*
     313             :                  * Drop the task reference to the shared ancestor since
     314             :                  * current's path will hold a reference.  (This also
     315             :                  * allows a put before the assignment.)
     316             :                  */
     317           0 :                 put_seccomp_filter(thread);
     318           0 :                 smp_store_release(&thread->seccomp.filter,
     319             :                                   caller->seccomp.filter);
     320             :                 /*
     321             :                  * Opt the other thread into seccomp if needed.
     322             :                  * As threads are considered to be trust-realm
     323             :                  * equivalent (see ptrace_may_access), it is safe to
     324             :                  * allow one thread to transition the other.
     325             :                  */
     326           0 :                 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
     327             :                         /*
     328             :                          * Don't let an unprivileged task work around
     329             :                          * the no_new_privs restriction by creating
     330             :                          * a thread that sets it up, enters seccomp,
     331             :                          * then dies.
     332             :                          */
     333           0 :                         if (task_no_new_privs(caller))
     334             :                                 task_set_no_new_privs(thread);
     335             : 
     336             :                         seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
     337             :                 }
     338             :         }
     339             : }
     340             : 
     341             : /**
     342             :  * seccomp_prepare_filter: Prepares a seccomp filter for use.
     343             :  * @fprog: BPF program to install
     344             :  *
     345             :  * Returns filter on success or an ERR_PTR on failure.
     346             :  */
     347           0 : static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
     348             : {
     349             :         struct seccomp_filter *filter;
     350             :         unsigned long fp_size;
     351             :         struct sock_filter *fp;
     352             :         int new_len;
     353             :         long ret;
     354             : 
     355           0 :         if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
     356             :                 return ERR_PTR(-EINVAL);
     357             :         BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
     358           0 :         fp_size = fprog->len * sizeof(struct sock_filter);
     359             : 
     360             :         /*
     361             :          * Installing a seccomp filter requires that the task has
     362             :          * CAP_SYS_ADMIN in its namespace or be running with no_new_privs.
     363             :          * This avoids scenarios where unprivileged tasks can affect the
     364             :          * behavior of privileged children.
     365             :          */
     366           0 :         if (!task_no_new_privs(current) &&
     367           0 :             security_capable_noaudit(current_cred(), current_user_ns(),
     368             :                                      CAP_SYS_ADMIN) != 0)
     369             :                 return ERR_PTR(-EACCES);
     370             : 
     371             :         fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN);
     372           0 :         if (!fp)
     373             :                 return ERR_PTR(-ENOMEM);
     374             : 
     375             :         /* Copy the instructions from fprog. */
     376             :         ret = -EFAULT;
     377           0 :         if (copy_from_user(fp, fprog->filter, fp_size))
     378             :                 goto free_prog;
     379             : 
     380             :         /* Check and rewrite the fprog via the skb checker */
     381           0 :         ret = bpf_check_classic(fp, fprog->len);
     382           0 :         if (ret)
     383             :                 goto free_prog;
     384             : 
     385             :         /* Check and rewrite the fprog for seccomp use */
     386           0 :         ret = seccomp_check_filter(fp, fprog->len);
     387           0 :         if (ret)
     388             :                 goto free_prog;
     389             : 
     390             :         /* Convert 'sock_filter' insns to 'bpf_insn' insns */
     391           0 :         ret = bpf_convert_filter(fp, fprog->len, NULL, &new_len);
     392           0 :         if (ret)
     393             :                 goto free_prog;
     394             : 
     395             :         /* Allocate a new seccomp_filter */
     396             :         ret = -ENOMEM;
     397             :         filter = kzalloc(sizeof(struct seccomp_filter),
     398             :                          GFP_KERNEL|__GFP_NOWARN);
     399           0 :         if (!filter)
     400             :                 goto free_prog;
     401             : 
     402           0 :         filter->prog = bpf_prog_alloc(bpf_prog_size(new_len), __GFP_NOWARN);
     403           0 :         if (!filter->prog)
     404             :                 goto free_filter;
     405             : 
     406           0 :         ret = bpf_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len);
     407           0 :         if (ret)
     408             :                 goto free_filter_prog;
     409             : 
     410           0 :         kfree(fp);
     411           0 :         atomic_set(&filter->usage, 1);
     412           0 :         filter->prog->len = new_len;
     413             : 
     414           0 :         bpf_prog_select_runtime(filter->prog);
     415             : 
     416           0 :         return filter;
     417             : 
     418             : free_filter_prog:
     419           0 :         __bpf_prog_free(filter->prog);
     420             : free_filter:
     421           0 :         kfree(filter);
     422             : free_prog:
     423           0 :         kfree(fp);
     424           0 :         return ERR_PTR(ret);
     425             : }
     426             : 
     427             : /**
     428             :  * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog
     429             :  * @user_filter: pointer to the user data containing a sock_fprog.
     430             :  *
     431             :  * Returns 0 on success and non-zero otherwise.
     432             :  */
     433             : static struct seccomp_filter *
     434           0 : seccomp_prepare_user_filter(const char __user *user_filter)
     435             : {
     436             :         struct sock_fprog fprog;
     437             :         struct seccomp_filter *filter = ERR_PTR(-EFAULT);
     438             : 
     439             : #ifdef CONFIG_COMPAT
     440             :         if (is_compat_task()) {
     441             :                 struct compat_sock_fprog fprog32;
     442             :                 if (copy_from_user(&fprog32, user_filter, sizeof(fprog32)))
     443             :                         goto out;
     444             :                 fprog.len = fprog32.len;
     445             :                 fprog.filter = compat_ptr(fprog32.filter);
     446             :         } else /* falls through to the if below. */
     447             : #endif
     448           0 :         if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
     449             :                 goto out;
     450           0 :         filter = seccomp_prepare_filter(&fprog);
     451             : out:
     452           0 :         return filter;
     453             : }
     454             : 
     455             : /**
     456             :  * seccomp_attach_filter: validate and attach filter
     457             :  * @flags:  flags to change filter behavior
     458             :  * @filter: seccomp filter to add to the current process
     459             :  *
     460             :  * Caller must be holding current->sighand->siglock lock.
     461             :  *
     462             :  * Returns 0 on success, -ve on error.
     463             :  */
     464           0 : static long seccomp_attach_filter(unsigned int flags,
     465             :                                   struct seccomp_filter *filter)
     466             : {
     467             :         unsigned long total_insns;
     468             :         struct seccomp_filter *walker;
     469             : 
     470             :         assert_spin_locked(&current->sighand->siglock);
     471             : 
     472             :         /* Validate resulting filter length. */
     473           0 :         total_insns = filter->prog->len;
     474           0 :         for (walker = current->seccomp.filter; walker; walker = walker->prev)
     475           0 :                 total_insns += walker->prog->len + 4;  /* 4 instr penalty */
     476           0 :         if (total_insns > MAX_INSNS_PER_PATH)
     477             :                 return -ENOMEM;
     478             : 
     479             :         /* If thread sync has been requested, check that it is possible. */
     480           0 :         if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
     481             :                 int ret;
     482             : 
     483             :                 ret = seccomp_can_sync_threads();
     484           0 :                 if (ret)
     485             :                         return ret;
     486             :         }
     487             : 
     488             :         /*
     489             :          * If there is an existing filter, make it the prev and don't drop its
     490             :          * task reference.
     491             :          */
     492           0 :         filter->prev = current->seccomp.filter;
     493           0 :         current->seccomp.filter = filter;
     494             : 
     495             :         /* Now that the new filter is in place, synchronize to all threads. */
     496           0 :         if (flags & SECCOMP_FILTER_FLAG_TSYNC)
     497             :                 seccomp_sync_threads();
     498             : 
     499             :         return 0;
     500             : }
     501             : 
     502             : /* get_seccomp_filter - increments the reference count of the filter on @tsk */
     503        2993 : void get_seccomp_filter(struct task_struct *tsk)
     504             : {
     505        2993 :         struct seccomp_filter *orig = tsk->seccomp.filter;
     506        2993 :         if (!orig)
     507        2993 :                 return;
     508             :         /* Reference count is bounded by the number of total processes. */
     509           0 :         atomic_inc(&orig->usage);
     510             : }
     511             : 
     512             : static inline void seccomp_filter_free(struct seccomp_filter *filter)
     513             : {
     514           0 :         if (filter) {
     515           0 :                 bpf_prog_free(filter->prog);
     516           0 :                 kfree(filter);
     517             :         }
     518             : }
     519             : 
     520             : /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
     521        2915 : void put_seccomp_filter(struct task_struct *tsk)
     522             : {
     523        2915 :         struct seccomp_filter *orig = tsk->seccomp.filter;
     524             :         /* Clean up single-reference branches iteratively. */
     525        5830 :         while (orig && atomic_dec_and_test(&orig->usage)) {
     526             :                 struct seccomp_filter *freeme = orig;
     527           0 :                 orig = orig->prev;
     528             :                 seccomp_filter_free(freeme);
     529             :         }
     530        2915 : }
     531             : 
     532             : /**
     533             :  * seccomp_send_sigsys - signals the task to allow in-process syscall emulation
     534             :  * @syscall: syscall number to send to userland
     535             :  * @reason: filter-supplied reason code to send to userland (via si_errno)
     536             :  *
     537             :  * Forces a SIGSYS with a code of SYS_SECCOMP and related sigsys info.
     538             :  */
     539           0 : static void seccomp_send_sigsys(int syscall, int reason)
     540             : {
     541             :         struct siginfo info;
     542           0 :         memset(&info, 0, sizeof(info));
     543           0 :         info.si_signo = SIGSYS;
     544           0 :         info.si_code = SYS_SECCOMP;
     545           0 :         info.si_call_addr = (void __user *)KSTK_EIP(current);
     546           0 :         info.si_errno = reason;
     547           0 :         info.si_arch = syscall_get_arch();
     548           0 :         info.si_syscall = syscall;
     549           0 :         force_sig_info(SIGSYS, &info, current);
     550           0 : }
     551             : #endif  /* CONFIG_SECCOMP_FILTER */
     552             : 
     553             : /*
     554             :  * Secure computing mode 1 allows only read/write/exit/sigreturn.
     555             :  * To be fully secure this must be combined with rlimit
     556             :  * to limit the stack allocations too.
     557             :  */
     558             : static int mode1_syscalls[] = {
     559             :         __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
     560             :         0, /* null terminated */
     561             : };
     562             : 
     563             : #ifdef CONFIG_COMPAT
     564             : static int mode1_syscalls_32[] = {
     565             :         __NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32,
     566             :         0, /* null terminated */
     567             : };
     568             : #endif
     569             : 
     570           0 : static void __secure_computing_strict(int this_syscall)
     571             : {
     572             :         int *syscall_whitelist = mode1_syscalls;
     573             : #ifdef CONFIG_COMPAT
     574             :         if (is_compat_task())
     575             :                 syscall_whitelist = mode1_syscalls_32;
     576             : #endif
     577             :         do {
     578           0 :                 if (*syscall_whitelist == this_syscall)
     579           0 :                         return;
     580           0 :         } while (*++syscall_whitelist);
     581             : 
     582             : #ifdef SECCOMP_DEBUG
     583             :         dump_stack();
     584             : #endif
     585             :         audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL);
     586           0 :         do_exit(SIGKILL);
     587             : }
     588             : 
     589             : #ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER
     590             : void secure_computing_strict(int this_syscall)
     591             : {
     592             :         int mode = current->seccomp.mode;
     593             : 
     594             :         if (mode == 0)
     595             :                 return;
     596             :         else if (mode == SECCOMP_MODE_STRICT)
     597             :                 __secure_computing_strict(this_syscall);
     598             :         else
     599             :                 BUG();
     600             : }
     601             : #else
     602           0 : int __secure_computing(void)
     603             : {
     604           0 :         u32 phase1_result = seccomp_phase1(NULL);
     605             : 
     606           0 :         if (likely(phase1_result == SECCOMP_PHASE1_OK))
     607             :                 return 0;
     608           0 :         else if (likely(phase1_result == SECCOMP_PHASE1_SKIP))
     609             :                 return -1;
     610             :         else
     611           0 :                 return seccomp_phase2(phase1_result);
     612             : }
     613             : 
     614             : #ifdef CONFIG_SECCOMP_FILTER
     615           0 : static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd)
     616             : {
     617             :         u32 filter_ret, action;
     618             :         int data;
     619             : 
     620             :         /*
     621             :          * Make sure that any changes to mode from another thread have
     622             :          * been seen after TIF_SECCOMP was seen.
     623             :          */
     624           0 :         rmb();
     625             : 
     626           0 :         filter_ret = seccomp_run_filters(sd);
     627           0 :         data = filter_ret & SECCOMP_RET_DATA;
     628           0 :         action = filter_ret & SECCOMP_RET_ACTION;
     629             : 
     630           0 :         switch (action) {
     631             :         case SECCOMP_RET_ERRNO:
     632             :                 /* Set the low-order 16-bits as a errno. */
     633           0 :                 syscall_set_return_value(current, task_pt_regs(current),
     634             :                                          -data, 0);
     635             :                 goto skip;
     636             : 
     637             :         case SECCOMP_RET_TRAP:
     638             :                 /* Show the handler the original registers. */
     639           0 :                 syscall_rollback(current, task_pt_regs(current));
     640             :                 /* Let the filter pass back 16 bits of data. */
     641           0 :                 seccomp_send_sigsys(this_syscall, data);
     642           0 :                 goto skip;
     643             : 
     644             :         case SECCOMP_RET_TRACE:
     645           0 :                 return filter_ret;  /* Save the rest for phase 2. */
     646             : 
     647             :         case SECCOMP_RET_ALLOW:
     648             :                 return SECCOMP_PHASE1_OK;
     649             : 
     650             :         case SECCOMP_RET_KILL:
     651             :         default:
     652             :                 audit_seccomp(this_syscall, SIGSYS, action);
     653           0 :                 do_exit(SIGSYS);
     654             :         }
     655             : 
     656             :         unreachable();
     657             : 
     658             : skip:
     659             :         audit_seccomp(this_syscall, 0, action);
     660             :         return SECCOMP_PHASE1_SKIP;
     661             : }
     662             : #endif
     663             : 
     664             : /**
     665             :  * seccomp_phase1() - run fast path seccomp checks on the current syscall
     666             :  * @arg sd: The seccomp_data or NULL
     667             :  *
     668             :  * This only reads pt_regs via the syscall_xyz helpers.  The only change
     669             :  * it will make to pt_regs is via syscall_set_return_value, and it will
     670             :  * only do that if it returns SECCOMP_PHASE1_SKIP.
     671             :  *
     672             :  * If sd is provided, it will not read pt_regs at all.
     673             :  *
     674             :  * It may also call do_exit or force a signal; these actions must be
     675             :  * safe.
     676             :  *
     677             :  * If it returns SECCOMP_PHASE1_OK, the syscall passes checks and should
     678             :  * be processed normally.
     679             :  *
     680             :  * If it returns SECCOMP_PHASE1_SKIP, then the syscall should not be
     681             :  * invoked.  In this case, seccomp_phase1 will have set the return value
     682             :  * using syscall_set_return_value.
     683             :  *
     684             :  * If it returns anything else, then the return value should be passed
     685             :  * to seccomp_phase2 from a context in which ptrace hooks are safe.
     686             :  */
     687           0 : u32 seccomp_phase1(struct seccomp_data *sd)
     688             : {
     689           0 :         int mode = current->seccomp.mode;
     690           0 :         int this_syscall = sd ? sd->nr :
     691             :                 syscall_get_nr(current, task_pt_regs(current));
     692             : 
     693           0 :         switch (mode) {
     694             :         case SECCOMP_MODE_STRICT:
     695           0 :                 __secure_computing_strict(this_syscall);  /* may call do_exit */
     696           0 :                 return SECCOMP_PHASE1_OK;
     697             : #ifdef CONFIG_SECCOMP_FILTER
     698             :         case SECCOMP_MODE_FILTER:
     699           0 :                 return __seccomp_phase1_filter(this_syscall, sd);
     700             : #endif
     701             :         default:
     702             :                 BUG();
     703             :         }
     704             : }
     705             : 
     706             : /**
     707             :  * seccomp_phase2() - finish slow path seccomp work for the current syscall
     708             :  * @phase1_result: The return value from seccomp_phase1()
     709             :  *
     710             :  * This must be called from a context in which ptrace hooks can be used.
     711             :  *
     712             :  * Returns 0 if the syscall should be processed or -1 to skip the syscall.
     713             :  */
     714           0 : int seccomp_phase2(u32 phase1_result)
     715             : {
     716           0 :         struct pt_regs *regs = task_pt_regs(current);
     717             :         u32 action = phase1_result & SECCOMP_RET_ACTION;
     718           0 :         int data = phase1_result & SECCOMP_RET_DATA;
     719             : 
     720             :         BUG_ON(action != SECCOMP_RET_TRACE);
     721             : 
     722             :         audit_seccomp(syscall_get_nr(current, regs), 0, action);
     723             : 
     724             :         /* Skip these calls if there is no tracer. */
     725           0 :         if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
     726             :                 syscall_set_return_value(current, regs,
     727             :                                          -ENOSYS, 0);
     728           0 :                 return -1;
     729             :         }
     730             : 
     731             :         /* Allow the BPF to provide the event message */
     732           0 :         ptrace_event(PTRACE_EVENT_SECCOMP, data);
     733             :         /*
     734             :          * The delivery of a fatal signal during event
     735             :          * notification may silently skip tracer notification.
     736             :          * Terminating the task now avoids executing a system
     737             :          * call that may not be intended.
     738             :          */
     739           0 :         if (fatal_signal_pending(current))
     740           0 :                 do_exit(SIGSYS);
     741           0 :         if (syscall_get_nr(current, regs) < 0)
     742             :                 return -1;  /* Explicit request to skip. */
     743             : 
     744           0 :         return 0;
     745             : }
     746             : #endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */
     747             : 
     748           0 : long prctl_get_seccomp(void)
     749             : {
     750           0 :         return current->seccomp.mode;
     751             : }
     752             : 
     753             : /**
     754             :  * seccomp_set_mode_strict: internal function for setting strict seccomp
     755             :  *
     756             :  * Once current->seccomp.mode is non-zero, it may not be changed.
     757             :  *
     758             :  * Returns 0 on success or -EINVAL on failure.
     759             :  */
     760           0 : static long seccomp_set_mode_strict(void)
     761             : {
     762             :         const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
     763             :         long ret = -EINVAL;
     764             : 
     765             :         spin_lock_irq(&current->sighand->siglock);
     766             : 
     767           0 :         if (!seccomp_may_assign_mode(seccomp_mode))
     768             :                 goto out;
     769             : 
     770             : #ifdef TIF_NOTSC
     771             :         disable_TSC();
     772             : #endif
     773             :         seccomp_assign_mode(current, seccomp_mode);
     774             :         ret = 0;
     775             : 
     776             : out:
     777             :         spin_unlock_irq(&current->sighand->siglock);
     778             : 
     779           0 :         return ret;
     780             : }
     781             : 
     782             : #ifdef CONFIG_SECCOMP_FILTER
     783             : /**
     784             :  * seccomp_set_mode_filter: internal function for setting seccomp filter
     785             :  * @flags:  flags to change filter behavior
     786             :  * @filter: struct sock_fprog containing filter
     787             :  *
     788             :  * This function may be called repeatedly to install additional filters.
     789             :  * Every filter successfully installed will be evaluated (in reverse order)
     790             :  * for each system call the task makes.
     791             :  *
     792             :  * Once current->seccomp.mode is non-zero, it may not be changed.
     793             :  *
     794             :  * Returns 0 on success or -EINVAL on failure.
     795             :  */
     796           0 : static long seccomp_set_mode_filter(unsigned int flags,
     797             :                                     const char __user *filter)
     798             : {
     799             :         const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
     800             :         struct seccomp_filter *prepared = NULL;
     801             :         long ret = -EINVAL;
     802             : 
     803             :         /* Validate flags. */
     804           0 :         if (flags & ~SECCOMP_FILTER_FLAG_MASK)
     805             :                 return -EINVAL;
     806             : 
     807             :         /* Prepare the new filter before holding any locks. */
     808           0 :         prepared = seccomp_prepare_user_filter(filter);
     809           0 :         if (IS_ERR(prepared))
     810           0 :                 return PTR_ERR(prepared);
     811             : 
     812             :         /*
     813             :          * Make sure we cannot change seccomp or nnp state via TSYNC
     814             :          * while another thread is in the middle of calling exec.
     815             :          */
     816           0 :         if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
     817           0 :             mutex_lock_killable(&current->signal->cred_guard_mutex))
     818             :                 goto out_free;
     819             : 
     820             :         spin_lock_irq(&current->sighand->siglock);
     821             : 
     822           0 :         if (!seccomp_may_assign_mode(seccomp_mode))
     823             :                 goto out;
     824             : 
     825           0 :         ret = seccomp_attach_filter(flags, prepared);
     826           0 :         if (ret)
     827             :                 goto out;
     828             :         /* Do not free the successfully attached filter. */
     829             :         prepared = NULL;
     830             : 
     831           0 :         seccomp_assign_mode(current, seccomp_mode);
     832             : out:
     833             :         spin_unlock_irq(&current->sighand->siglock);
     834           0 :         if (flags & SECCOMP_FILTER_FLAG_TSYNC)
     835           0 :                 mutex_unlock(&current->signal->cred_guard_mutex);
     836             : out_free:
     837             :         seccomp_filter_free(prepared);
     838           0 :         return ret;
     839             : }
     840             : #else
     841             : static inline long seccomp_set_mode_filter(unsigned int flags,
     842             :                                            const char __user *filter)
     843             : {
     844             :         return -EINVAL;
     845             : }
     846             : #endif
     847             : 
     848             : /* Common entry point for both prctl and syscall. */
     849           0 : static long do_seccomp(unsigned int op, unsigned int flags,
     850             :                        const char __user *uargs)
     851             : {
     852           0 :         switch (op) {
     853             :         case SECCOMP_SET_MODE_STRICT:
     854           0 :                 if (flags != 0 || uargs != NULL)
     855             :                         return -EINVAL;
     856           0 :                 return seccomp_set_mode_strict();
     857             :         case SECCOMP_SET_MODE_FILTER:
     858           0 :                 return seccomp_set_mode_filter(flags, uargs);
     859             :         default:
     860             :                 return -EINVAL;
     861             :         }
     862             : }
     863             : 
     864           0 : SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
     865             :                          const char __user *, uargs)
     866             : {
     867           0 :         return do_seccomp(op, flags, uargs);
     868             : }
     869             : 
     870             : /**
     871             :  * prctl_set_seccomp: configures current->seccomp.mode
     872             :  * @seccomp_mode: requested mode to use
     873             :  * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
     874             :  *
     875             :  * Returns 0 on success or -EINVAL on failure.
     876             :  */
     877           0 : long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
     878             : {
     879             :         unsigned int op;
     880             :         char __user *uargs;
     881             : 
     882           0 :         switch (seccomp_mode) {
     883             :         case SECCOMP_MODE_STRICT:
     884             :                 op = SECCOMP_SET_MODE_STRICT;
     885             :                 /*
     886             :                  * Setting strict mode through prctl always ignored filter,
     887             :                  * so make sure it is always NULL here to pass the internal
     888             :                  * check in do_seccomp().
     889             :                  */
     890             :                 uargs = NULL;
     891             :                 break;
     892             :         case SECCOMP_MODE_FILTER:
     893             :                 op = SECCOMP_SET_MODE_FILTER;
     894             :                 uargs = filter;
     895           0 :                 break;
     896             :         default:
     897             :                 return -EINVAL;
     898             :         }
     899             : 
     900             :         /* prctl interface doesn't have flags, so they are always zero. */
     901           0 :         return do_seccomp(op, 0, uargs);
     902             : }

Generated by: LCOV version 1.11