Line data Source code
1 : /*
2 : * kexec.c - kexec system call
3 : * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
4 : *
5 : * This source code is licensed under the GNU General Public License,
6 : * Version 2. See the file COPYING for more details.
7 : */
8 :
9 : #define pr_fmt(fmt) "kexec: " fmt
10 :
11 : #include <linux/capability.h>
12 : #include <linux/mm.h>
13 : #include <linux/file.h>
14 : #include <linux/slab.h>
15 : #include <linux/fs.h>
16 : #include <linux/kexec.h>
17 : #include <linux/mutex.h>
18 : #include <linux/list.h>
19 : #include <linux/highmem.h>
20 : #include <linux/syscalls.h>
21 : #include <linux/reboot.h>
22 : #include <linux/ioport.h>
23 : #include <linux/hardirq.h>
24 : #include <linux/elf.h>
25 : #include <linux/elfcore.h>
26 : #include <linux/utsname.h>
27 : #include <linux/numa.h>
28 : #include <linux/suspend.h>
29 : #include <linux/device.h>
30 : #include <linux/freezer.h>
31 : #include <linux/pm.h>
32 : #include <linux/cpu.h>
33 : #include <linux/console.h>
34 : #include <linux/vmalloc.h>
35 : #include <linux/swap.h>
36 : #include <linux/syscore_ops.h>
37 : #include <linux/compiler.h>
38 : #include <linux/hugetlb.h>
39 :
40 : #include <asm/page.h>
41 : #include <asm/uaccess.h>
42 : #include <asm/io.h>
43 : #include <asm/sections.h>
44 :
45 : #include <crypto/hash.h>
46 : #include <crypto/sha.h>
47 :
48 : /* Per cpu memory for storing cpu states in case of system crash. */
49 : note_buf_t __percpu *crash_notes;
50 :
51 : /* vmcoreinfo stuff */
52 : static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
53 : u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
54 : size_t vmcoreinfo_size;
55 : size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
56 :
57 : /* Flag to indicate we are going to kexec a new kernel */
58 : bool kexec_in_progress = false;
59 :
60 : /*
61 : * Declare these symbols weak so that if architecture provides a purgatory,
62 : * these will be overridden.
63 : */
64 : char __weak kexec_purgatory[0];
65 : size_t __weak kexec_purgatory_size = 0;
66 :
67 : #ifdef CONFIG_KEXEC_FILE
68 : static int kexec_calculate_store_digests(struct kimage *image);
69 : #endif
70 :
71 : /* Location of the reserved area for the crash kernel */
72 : struct resource crashk_res = {
73 : .name = "Crash kernel",
74 : .start = 0,
75 : .end = 0,
76 : .flags = IORESOURCE_BUSY | IORESOURCE_MEM
77 : };
78 : struct resource crashk_low_res = {
79 : .name = "Crash kernel",
80 : .start = 0,
81 : .end = 0,
82 : .flags = IORESOURCE_BUSY | IORESOURCE_MEM
83 : };
84 :
85 0 : int kexec_should_crash(struct task_struct *p)
86 : {
87 0 : if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
88 : return 1;
89 0 : return 0;
90 : }
91 :
92 : /*
93 : * When kexec transitions to the new kernel there is a one-to-one
94 : * mapping between physical and virtual addresses. On processors
95 : * where you can disable the MMU this is trivial, and easy. For
96 : * others it is still a simple predictable page table to setup.
97 : *
98 : * In that environment kexec copies the new kernel to its final
99 : * resting place. This means I can only support memory whose
100 : * physical address can fit in an unsigned long. In particular
101 : * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
102 : * If the assembly stub has more restrictive requirements
103 : * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
104 : * defined more restrictively in <asm/kexec.h>.
105 : *
106 : * The code for the transition from the current kernel to the
107 : * the new kernel is placed in the control_code_buffer, whose size
108 : * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single
109 : * page of memory is necessary, but some architectures require more.
110 : * Because this memory must be identity mapped in the transition from
111 : * virtual to physical addresses it must live in the range
112 : * 0 - TASK_SIZE, as only the user space mappings are arbitrarily
113 : * modifiable.
114 : *
115 : * The assembly stub in the control code buffer is passed a linked list
116 : * of descriptor pages detailing the source pages of the new kernel,
117 : * and the destination addresses of those source pages. As this data
118 : * structure is not used in the context of the current OS, it must
119 : * be self-contained.
120 : *
121 : * The code has been made to work with highmem pages and will use a
122 : * destination page in its final resting place (if it happens
123 : * to allocate it). The end product of this is that most of the
124 : * physical address space, and most of RAM can be used.
125 : *
126 : * Future directions include:
127 : * - allocating a page table with the control code buffer identity
128 : * mapped, to simplify machine_kexec and make kexec_on_panic more
129 : * reliable.
130 : */
131 :
132 : /*
133 : * KIMAGE_NO_DEST is an impossible destination address..., for
134 : * allocating pages whose destination address we do not care about.
135 : */
136 : #define KIMAGE_NO_DEST (-1UL)
137 :
138 : static int kimage_is_destination_range(struct kimage *image,
139 : unsigned long start, unsigned long end);
140 : static struct page *kimage_alloc_page(struct kimage *image,
141 : gfp_t gfp_mask,
142 : unsigned long dest);
143 :
144 0 : static int copy_user_segment_list(struct kimage *image,
145 : unsigned long nr_segments,
146 : struct kexec_segment __user *segments)
147 : {
148 : int ret;
149 : size_t segment_bytes;
150 :
151 : /* Read in the segments */
152 0 : image->nr_segments = nr_segments;
153 0 : segment_bytes = nr_segments * sizeof(*segments);
154 0 : ret = copy_from_user(image->segment, segments, segment_bytes);
155 0 : if (ret)
156 : ret = -EFAULT;
157 :
158 0 : return ret;
159 : }
160 :
161 0 : static int sanity_check_segment_list(struct kimage *image)
162 : {
163 : int result, i;
164 0 : unsigned long nr_segments = image->nr_segments;
165 :
166 : /*
167 : * Verify we have good destination addresses. The caller is
168 : * responsible for making certain we don't attempt to load
169 : * the new image into invalid or reserved areas of RAM. This
170 : * just verifies it is an address we can use.
171 : *
172 : * Since the kernel does everything in page size chunks ensure
173 : * the destination addresses are page aligned. Too many
174 : * special cases crop of when we don't do this. The most
175 : * insidious is getting overlapping destination addresses
176 : * simply because addresses are changed to page size
177 : * granularity.
178 : */
179 : result = -EADDRNOTAVAIL;
180 0 : for (i = 0; i < nr_segments; i++) {
181 : unsigned long mstart, mend;
182 :
183 0 : mstart = image->segment[i].mem;
184 0 : mend = mstart + image->segment[i].memsz;
185 0 : if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
186 : return result;
187 0 : if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
188 : return result;
189 : }
190 :
191 : /* Verify our destination addresses do not overlap.
192 : * If we alloed overlapping destination addresses
193 : * through very weird things can happen with no
194 : * easy explanation as one segment stops on another.
195 : */
196 : result = -EINVAL;
197 0 : for (i = 0; i < nr_segments; i++) {
198 : unsigned long mstart, mend;
199 : unsigned long j;
200 :
201 0 : mstart = image->segment[i].mem;
202 0 : mend = mstart + image->segment[i].memsz;
203 0 : for (j = 0; j < i; j++) {
204 : unsigned long pstart, pend;
205 0 : pstart = image->segment[j].mem;
206 0 : pend = pstart + image->segment[j].memsz;
207 : /* Do the segments overlap ? */
208 0 : if ((mend > pstart) && (mstart < pend))
209 : return result;
210 : }
211 : }
212 :
213 : /* Ensure our buffer sizes are strictly less than
214 : * our memory sizes. This should always be the case,
215 : * and it is easier to check up front than to be surprised
216 : * later on.
217 : */
218 : result = -EINVAL;
219 0 : for (i = 0; i < nr_segments; i++) {
220 0 : if (image->segment[i].bufsz > image->segment[i].memsz)
221 : return result;
222 : }
223 :
224 : /*
225 : * Verify we have good destination addresses. Normally
226 : * the caller is responsible for making certain we don't
227 : * attempt to load the new image into invalid or reserved
228 : * areas of RAM. But crash kernels are preloaded into a
229 : * reserved area of ram. We must ensure the addresses
230 : * are in the reserved area otherwise preloading the
231 : * kernel could corrupt things.
232 : */
233 :
234 0 : if (image->type == KEXEC_TYPE_CRASH) {
235 : result = -EADDRNOTAVAIL;
236 0 : for (i = 0; i < nr_segments; i++) {
237 : unsigned long mstart, mend;
238 :
239 0 : mstart = image->segment[i].mem;
240 0 : mend = mstart + image->segment[i].memsz - 1;
241 : /* Ensure we are within the crash kernel limits */
242 0 : if ((mstart < crashk_res.start) ||
243 0 : (mend > crashk_res.end))
244 : return result;
245 : }
246 : }
247 :
248 : return 0;
249 : }
250 :
251 0 : static struct kimage *do_kimage_alloc_init(void)
252 : {
253 : struct kimage *image;
254 :
255 : /* Allocate a controlling structure */
256 : image = kzalloc(sizeof(*image), GFP_KERNEL);
257 0 : if (!image)
258 : return NULL;
259 :
260 0 : image->head = 0;
261 0 : image->entry = &image->head;
262 0 : image->last_entry = &image->head;
263 0 : image->control_page = ~0; /* By default this does not apply */
264 0 : image->type = KEXEC_TYPE_DEFAULT;
265 :
266 : /* Initialize the list of control pages */
267 0 : INIT_LIST_HEAD(&image->control_pages);
268 :
269 : /* Initialize the list of destination pages */
270 0 : INIT_LIST_HEAD(&image->dest_pages);
271 :
272 : /* Initialize the list of unusable pages */
273 0 : INIT_LIST_HEAD(&image->unusable_pages);
274 :
275 0 : return image;
276 : }
277 :
278 : static void kimage_free_page_list(struct list_head *list);
279 :
280 0 : static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
281 : unsigned long nr_segments,
282 : struct kexec_segment __user *segments,
283 : unsigned long flags)
284 : {
285 : int ret;
286 : struct kimage *image;
287 0 : bool kexec_on_panic = flags & KEXEC_ON_CRASH;
288 :
289 0 : if (kexec_on_panic) {
290 : /* Verify we have a valid entry point */
291 0 : if ((entry < crashk_res.start) || (entry > crashk_res.end))
292 : return -EADDRNOTAVAIL;
293 : }
294 :
295 : /* Allocate and initialize a controlling structure */
296 0 : image = do_kimage_alloc_init();
297 0 : if (!image)
298 : return -ENOMEM;
299 :
300 0 : image->start = entry;
301 :
302 0 : ret = copy_user_segment_list(image, nr_segments, segments);
303 0 : if (ret)
304 : goto out_free_image;
305 :
306 0 : ret = sanity_check_segment_list(image);
307 0 : if (ret)
308 : goto out_free_image;
309 :
310 : /* Enable the special crash kernel control page allocation policy. */
311 0 : if (kexec_on_panic) {
312 0 : image->control_page = crashk_res.start;
313 0 : image->type = KEXEC_TYPE_CRASH;
314 : }
315 :
316 : /*
317 : * Find a location for the control code buffer, and add it
318 : * the vector of segments so that it's pages will also be
319 : * counted as destination pages.
320 : */
321 : ret = -ENOMEM;
322 0 : image->control_code_page = kimage_alloc_control_pages(image,
323 : get_order(KEXEC_CONTROL_PAGE_SIZE));
324 0 : if (!image->control_code_page) {
325 0 : pr_err("Could not allocate control_code_buffer\n");
326 0 : goto out_free_image;
327 : }
328 :
329 0 : if (!kexec_on_panic) {
330 0 : image->swap_page = kimage_alloc_control_pages(image, 0);
331 0 : if (!image->swap_page) {
332 0 : pr_err("Could not allocate swap buffer\n");
333 : goto out_free_control_pages;
334 : }
335 : }
336 :
337 0 : *rimage = image;
338 0 : return 0;
339 : out_free_control_pages:
340 0 : kimage_free_page_list(&image->control_pages);
341 : out_free_image:
342 0 : kfree(image);
343 0 : return ret;
344 : }
345 :
346 : #ifdef CONFIG_KEXEC_FILE
347 : static int copy_file_from_fd(int fd, void **buf, unsigned long *buf_len)
348 : {
349 : struct fd f = fdget(fd);
350 : int ret;
351 : struct kstat stat;
352 : loff_t pos;
353 : ssize_t bytes = 0;
354 :
355 : if (!f.file)
356 : return -EBADF;
357 :
358 : ret = vfs_getattr(&f.file->f_path, &stat);
359 : if (ret)
360 : goto out;
361 :
362 : if (stat.size > INT_MAX) {
363 : ret = -EFBIG;
364 : goto out;
365 : }
366 :
367 : /* Don't hand 0 to vmalloc, it whines. */
368 : if (stat.size == 0) {
369 : ret = -EINVAL;
370 : goto out;
371 : }
372 :
373 : *buf = vmalloc(stat.size);
374 : if (!*buf) {
375 : ret = -ENOMEM;
376 : goto out;
377 : }
378 :
379 : pos = 0;
380 : while (pos < stat.size) {
381 : bytes = kernel_read(f.file, pos, (char *)(*buf) + pos,
382 : stat.size - pos);
383 : if (bytes < 0) {
384 : vfree(*buf);
385 : ret = bytes;
386 : goto out;
387 : }
388 :
389 : if (bytes == 0)
390 : break;
391 : pos += bytes;
392 : }
393 :
394 : if (pos != stat.size) {
395 : ret = -EBADF;
396 : vfree(*buf);
397 : goto out;
398 : }
399 :
400 : *buf_len = pos;
401 : out:
402 : fdput(f);
403 : return ret;
404 : }
405 :
406 : /* Architectures can provide this probe function */
407 : int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
408 : unsigned long buf_len)
409 : {
410 : return -ENOEXEC;
411 : }
412 :
413 : void * __weak arch_kexec_kernel_image_load(struct kimage *image)
414 : {
415 : return ERR_PTR(-ENOEXEC);
416 : }
417 :
418 : void __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
419 : {
420 : }
421 :
422 : int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
423 : unsigned long buf_len)
424 : {
425 : return -EKEYREJECTED;
426 : }
427 :
428 : /* Apply relocations of type RELA */
429 : int __weak
430 : arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
431 : unsigned int relsec)
432 : {
433 : pr_err("RELA relocation unsupported.\n");
434 : return -ENOEXEC;
435 : }
436 :
437 : /* Apply relocations of type REL */
438 : int __weak
439 : arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
440 : unsigned int relsec)
441 : {
442 : pr_err("REL relocation unsupported.\n");
443 : return -ENOEXEC;
444 : }
445 :
446 : /*
447 : * Free up memory used by kernel, initrd, and comand line. This is temporary
448 : * memory allocation which is not needed any more after these buffers have
449 : * been loaded into separate segments and have been copied elsewhere.
450 : */
451 : static void kimage_file_post_load_cleanup(struct kimage *image)
452 : {
453 : struct purgatory_info *pi = &image->purgatory_info;
454 :
455 : vfree(image->kernel_buf);
456 : image->kernel_buf = NULL;
457 :
458 : vfree(image->initrd_buf);
459 : image->initrd_buf = NULL;
460 :
461 : kfree(image->cmdline_buf);
462 : image->cmdline_buf = NULL;
463 :
464 : vfree(pi->purgatory_buf);
465 : pi->purgatory_buf = NULL;
466 :
467 : vfree(pi->sechdrs);
468 : pi->sechdrs = NULL;
469 :
470 : /* See if architecture has anything to cleanup post load */
471 : arch_kimage_file_post_load_cleanup(image);
472 :
473 : /*
474 : * Above call should have called into bootloader to free up
475 : * any data stored in kimage->image_loader_data. It should
476 : * be ok now to free it up.
477 : */
478 : kfree(image->image_loader_data);
479 : image->image_loader_data = NULL;
480 : }
481 :
482 : /*
483 : * In file mode list of segments is prepared by kernel. Copy relevant
484 : * data from user space, do error checking, prepare segment list
485 : */
486 : static int
487 : kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
488 : const char __user *cmdline_ptr,
489 : unsigned long cmdline_len, unsigned flags)
490 : {
491 : int ret = 0;
492 : void *ldata;
493 :
494 : ret = copy_file_from_fd(kernel_fd, &image->kernel_buf,
495 : &image->kernel_buf_len);
496 : if (ret)
497 : return ret;
498 :
499 : /* Call arch image probe handlers */
500 : ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
501 : image->kernel_buf_len);
502 :
503 : if (ret)
504 : goto out;
505 :
506 : #ifdef CONFIG_KEXEC_VERIFY_SIG
507 : ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
508 : image->kernel_buf_len);
509 : if (ret) {
510 : pr_debug("kernel signature verification failed.\n");
511 : goto out;
512 : }
513 : pr_debug("kernel signature verification successful.\n");
514 : #endif
515 : /* It is possible that there no initramfs is being loaded */
516 : if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
517 : ret = copy_file_from_fd(initrd_fd, &image->initrd_buf,
518 : &image->initrd_buf_len);
519 : if (ret)
520 : goto out;
521 : }
522 :
523 : if (cmdline_len) {
524 : image->cmdline_buf = kzalloc(cmdline_len, GFP_KERNEL);
525 : if (!image->cmdline_buf) {
526 : ret = -ENOMEM;
527 : goto out;
528 : }
529 :
530 : ret = copy_from_user(image->cmdline_buf, cmdline_ptr,
531 : cmdline_len);
532 : if (ret) {
533 : ret = -EFAULT;
534 : goto out;
535 : }
536 :
537 : image->cmdline_buf_len = cmdline_len;
538 :
539 : /* command line should be a string with last byte null */
540 : if (image->cmdline_buf[cmdline_len - 1] != '\0') {
541 : ret = -EINVAL;
542 : goto out;
543 : }
544 : }
545 :
546 : /* Call arch image load handlers */
547 : ldata = arch_kexec_kernel_image_load(image);
548 :
549 : if (IS_ERR(ldata)) {
550 : ret = PTR_ERR(ldata);
551 : goto out;
552 : }
553 :
554 : image->image_loader_data = ldata;
555 : out:
556 : /* In case of error, free up all allocated memory in this function */
557 : if (ret)
558 : kimage_file_post_load_cleanup(image);
559 : return ret;
560 : }
561 :
562 : static int
563 : kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
564 : int initrd_fd, const char __user *cmdline_ptr,
565 : unsigned long cmdline_len, unsigned long flags)
566 : {
567 : int ret;
568 : struct kimage *image;
569 : bool kexec_on_panic = flags & KEXEC_FILE_ON_CRASH;
570 :
571 : image = do_kimage_alloc_init();
572 : if (!image)
573 : return -ENOMEM;
574 :
575 : image->file_mode = 1;
576 :
577 : if (kexec_on_panic) {
578 : /* Enable special crash kernel control page alloc policy. */
579 : image->control_page = crashk_res.start;
580 : image->type = KEXEC_TYPE_CRASH;
581 : }
582 :
583 : ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
584 : cmdline_ptr, cmdline_len, flags);
585 : if (ret)
586 : goto out_free_image;
587 :
588 : ret = sanity_check_segment_list(image);
589 : if (ret)
590 : goto out_free_post_load_bufs;
591 :
592 : ret = -ENOMEM;
593 : image->control_code_page = kimage_alloc_control_pages(image,
594 : get_order(KEXEC_CONTROL_PAGE_SIZE));
595 : if (!image->control_code_page) {
596 : pr_err("Could not allocate control_code_buffer\n");
597 : goto out_free_post_load_bufs;
598 : }
599 :
600 : if (!kexec_on_panic) {
601 : image->swap_page = kimage_alloc_control_pages(image, 0);
602 : if (!image->swap_page) {
603 : pr_err("Could not allocate swap buffer\n");
604 : goto out_free_control_pages;
605 : }
606 : }
607 :
608 : *rimage = image;
609 : return 0;
610 : out_free_control_pages:
611 : kimage_free_page_list(&image->control_pages);
612 : out_free_post_load_bufs:
613 : kimage_file_post_load_cleanup(image);
614 : out_free_image:
615 : kfree(image);
616 : return ret;
617 : }
618 : #else /* CONFIG_KEXEC_FILE */
619 : static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
620 : #endif /* CONFIG_KEXEC_FILE */
621 :
622 : static int kimage_is_destination_range(struct kimage *image,
623 : unsigned long start,
624 : unsigned long end)
625 : {
626 : unsigned long i;
627 :
628 0 : for (i = 0; i < image->nr_segments; i++) {
629 : unsigned long mstart, mend;
630 :
631 0 : mstart = image->segment[i].mem;
632 0 : mend = mstart + image->segment[i].memsz;
633 0 : if ((end > mstart) && (start < mend))
634 : return 1;
635 : }
636 :
637 : return 0;
638 : }
639 :
640 0 : static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
641 : {
642 : struct page *pages;
643 :
644 : pages = alloc_pages(gfp_mask, order);
645 0 : if (pages) {
646 : unsigned int count, i;
647 0 : pages->mapping = NULL;
648 0 : set_page_private(pages, order);
649 0 : count = 1 << order;
650 0 : for (i = 0; i < count; i++)
651 0 : SetPageReserved(pages + i);
652 : }
653 :
654 0 : return pages;
655 : }
656 :
657 0 : static void kimage_free_pages(struct page *page)
658 : {
659 : unsigned int order, count, i;
660 :
661 0 : order = page_private(page);
662 0 : count = 1 << order;
663 0 : for (i = 0; i < count; i++)
664 0 : ClearPageReserved(page + i);
665 0 : __free_pages(page, order);
666 0 : }
667 :
668 0 : static void kimage_free_page_list(struct list_head *list)
669 : {
670 : struct list_head *pos, *next;
671 :
672 0 : list_for_each_safe(pos, next, list) {
673 : struct page *page;
674 :
675 0 : page = list_entry(pos, struct page, lru);
676 : list_del(&page->lru);
677 0 : kimage_free_pages(page);
678 : }
679 0 : }
680 :
681 0 : static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
682 : unsigned int order)
683 : {
684 : /* Control pages are special, they are the intermediaries
685 : * that are needed while we copy the rest of the pages
686 : * to their final resting place. As such they must
687 : * not conflict with either the destination addresses
688 : * or memory the kernel is already using.
689 : *
690 : * The only case where we really need more than one of
691 : * these are for architectures where we cannot disable
692 : * the MMU and must instead generate an identity mapped
693 : * page table for all of the memory.
694 : *
695 : * At worst this runs in O(N) of the image size.
696 : */
697 : struct list_head extra_pages;
698 : struct page *pages;
699 : unsigned int count;
700 :
701 0 : count = 1 << order;
702 : INIT_LIST_HEAD(&extra_pages);
703 :
704 : /* Loop while I can allocate a page and the page allocated
705 : * is a destination page.
706 : */
707 : do {
708 : unsigned long pfn, epfn, addr, eaddr;
709 :
710 0 : pages = kimage_alloc_pages(GFP_KERNEL, order);
711 0 : if (!pages)
712 : break;
713 0 : pfn = page_to_pfn(pages);
714 0 : epfn = pfn + count;
715 0 : addr = pfn << PAGE_SHIFT;
716 0 : eaddr = epfn << PAGE_SHIFT;
717 0 : if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
718 : kimage_is_destination_range(image, addr, eaddr)) {
719 0 : list_add(&pages->lru, &extra_pages);
720 : pages = NULL;
721 : }
722 0 : } while (!pages);
723 :
724 0 : if (pages) {
725 : /* Remember the allocated page... */
726 0 : list_add(&pages->lru, &image->control_pages);
727 :
728 : /* Because the page is already in it's destination
729 : * location we will never allocate another page at
730 : * that address. Therefore kimage_alloc_pages
731 : * will not return it (again) and we don't need
732 : * to give it an entry in image->segment[].
733 : */
734 : }
735 : /* Deal with the destination pages I have inadvertently allocated.
736 : *
737 : * Ideally I would convert multi-page allocations into single
738 : * page allocations, and add everything to image->dest_pages.
739 : *
740 : * For now it is simpler to just free the pages.
741 : */
742 0 : kimage_free_page_list(&extra_pages);
743 :
744 0 : return pages;
745 : }
746 :
747 0 : static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
748 : unsigned int order)
749 : {
750 : /* Control pages are special, they are the intermediaries
751 : * that are needed while we copy the rest of the pages
752 : * to their final resting place. As such they must
753 : * not conflict with either the destination addresses
754 : * or memory the kernel is already using.
755 : *
756 : * Control pages are also the only pags we must allocate
757 : * when loading a crash kernel. All of the other pages
758 : * are specified by the segments and we just memcpy
759 : * into them directly.
760 : *
761 : * The only case where we really need more than one of
762 : * these are for architectures where we cannot disable
763 : * the MMU and must instead generate an identity mapped
764 : * page table for all of the memory.
765 : *
766 : * Given the low demand this implements a very simple
767 : * allocator that finds the first hole of the appropriate
768 : * size in the reserved memory region, and allocates all
769 : * of the memory up to and including the hole.
770 : */
771 : unsigned long hole_start, hole_end, size;
772 : struct page *pages;
773 :
774 : pages = NULL;
775 0 : size = (1 << order) << PAGE_SHIFT;
776 0 : hole_start = (image->control_page + (size - 1)) & ~(size - 1);
777 0 : hole_end = hole_start + size - 1;
778 0 : while (hole_end <= crashk_res.end) {
779 : unsigned long i;
780 :
781 : if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT)
782 : break;
783 : /* See if I overlap any of the segments */
784 0 : for (i = 0; i < image->nr_segments; i++) {
785 : unsigned long mstart, mend;
786 :
787 0 : mstart = image->segment[i].mem;
788 0 : mend = mstart + image->segment[i].memsz - 1;
789 0 : if ((hole_end >= mstart) && (hole_start <= mend)) {
790 : /* Advance the hole to the end of the segment */
791 0 : hole_start = (mend + (size - 1)) & ~(size - 1);
792 0 : hole_end = hole_start + size - 1;
793 0 : break;
794 : }
795 : }
796 : /* If I don't overlap any segments I have found my hole! */
797 0 : if (i == image->nr_segments) {
798 0 : pages = pfn_to_page(hole_start >> PAGE_SHIFT);
799 0 : break;
800 : }
801 : }
802 0 : if (pages)
803 0 : image->control_page = hole_end;
804 :
805 0 : return pages;
806 : }
807 :
808 :
809 0 : struct page *kimage_alloc_control_pages(struct kimage *image,
810 : unsigned int order)
811 : {
812 : struct page *pages = NULL;
813 :
814 0 : switch (image->type) {
815 : case KEXEC_TYPE_DEFAULT:
816 0 : pages = kimage_alloc_normal_control_pages(image, order);
817 0 : break;
818 : case KEXEC_TYPE_CRASH:
819 0 : pages = kimage_alloc_crash_control_pages(image, order);
820 0 : break;
821 : }
822 :
823 0 : return pages;
824 : }
825 :
826 0 : static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
827 : {
828 0 : if (*image->entry != 0)
829 0 : image->entry++;
830 :
831 0 : if (image->entry == image->last_entry) {
832 : kimage_entry_t *ind_page;
833 : struct page *page;
834 :
835 0 : page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
836 0 : if (!page)
837 : return -ENOMEM;
838 :
839 : ind_page = page_address(page);
840 0 : *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
841 0 : image->entry = ind_page;
842 0 : image->last_entry = ind_page +
843 : ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
844 : }
845 0 : *image->entry = entry;
846 0 : image->entry++;
847 0 : *image->entry = 0;
848 :
849 0 : return 0;
850 : }
851 :
852 : static int kimage_set_destination(struct kimage *image,
853 : unsigned long destination)
854 : {
855 : int result;
856 :
857 0 : destination &= PAGE_MASK;
858 0 : result = kimage_add_entry(image, destination | IND_DESTINATION);
859 0 : if (result == 0)
860 0 : image->destination = destination;
861 :
862 : return result;
863 : }
864 :
865 :
866 0 : static int kimage_add_page(struct kimage *image, unsigned long page)
867 : {
868 : int result;
869 :
870 0 : page &= PAGE_MASK;
871 0 : result = kimage_add_entry(image, page | IND_SOURCE);
872 0 : if (result == 0)
873 0 : image->destination += PAGE_SIZE;
874 :
875 0 : return result;
876 : }
877 :
878 :
879 : static void kimage_free_extra_pages(struct kimage *image)
880 : {
881 : /* Walk through and free any extra destination pages I may have */
882 0 : kimage_free_page_list(&image->dest_pages);
883 :
884 : /* Walk through and free any unusable pages I have cached */
885 0 : kimage_free_page_list(&image->unusable_pages);
886 :
887 : }
888 0 : static void kimage_terminate(struct kimage *image)
889 : {
890 0 : if (*image->entry != 0)
891 0 : image->entry++;
892 :
893 0 : *image->entry = IND_DONE;
894 0 : }
895 :
896 : #define for_each_kimage_entry(image, ptr, entry) \
897 : for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
898 : ptr = (entry & IND_INDIRECTION) ? \
899 : phys_to_virt((entry & PAGE_MASK)) : ptr + 1)
900 :
901 : static void kimage_free_entry(kimage_entry_t entry)
902 : {
903 : struct page *page;
904 :
905 0 : page = pfn_to_page(entry >> PAGE_SHIFT);
906 0 : kimage_free_pages(page);
907 : }
908 :
909 0 : static void kimage_free(struct kimage *image)
910 : {
911 : kimage_entry_t *ptr, entry;
912 : kimage_entry_t ind = 0;
913 :
914 0 : if (!image)
915 0 : return;
916 :
917 : kimage_free_extra_pages(image);
918 0 : for_each_kimage_entry(image, ptr, entry) {
919 0 : if (entry & IND_INDIRECTION) {
920 : /* Free the previous indirection page */
921 0 : if (ind & IND_INDIRECTION)
922 : kimage_free_entry(ind);
923 : /* Save this indirection page until we are
924 : * done with it.
925 : */
926 : ind = entry;
927 0 : } else if (entry & IND_SOURCE)
928 : kimage_free_entry(entry);
929 : }
930 : /* Free the final indirection page */
931 0 : if (ind & IND_INDIRECTION)
932 : kimage_free_entry(ind);
933 :
934 : /* Handle any machine specific cleanup */
935 0 : machine_kexec_cleanup(image);
936 :
937 : /* Free the kexec control pages... */
938 0 : kimage_free_page_list(&image->control_pages);
939 :
940 : /*
941 : * Free up any temporary buffers allocated. This might hit if
942 : * error occurred much later after buffer allocation.
943 : */
944 : if (image->file_mode)
945 : kimage_file_post_load_cleanup(image);
946 :
947 0 : kfree(image);
948 : }
949 :
950 0 : static kimage_entry_t *kimage_dst_used(struct kimage *image,
951 : unsigned long page)
952 : {
953 : kimage_entry_t *ptr, entry;
954 : unsigned long destination = 0;
955 :
956 0 : for_each_kimage_entry(image, ptr, entry) {
957 0 : if (entry & IND_DESTINATION)
958 0 : destination = entry & PAGE_MASK;
959 0 : else if (entry & IND_SOURCE) {
960 0 : if (page == destination)
961 : return ptr;
962 0 : destination += PAGE_SIZE;
963 : }
964 : }
965 :
966 : return NULL;
967 : }
968 :
969 0 : static struct page *kimage_alloc_page(struct kimage *image,
970 : gfp_t gfp_mask,
971 : unsigned long destination)
972 : {
973 : /*
974 : * Here we implement safeguards to ensure that a source page
975 : * is not copied to its destination page before the data on
976 : * the destination page is no longer useful.
977 : *
978 : * To do this we maintain the invariant that a source page is
979 : * either its own destination page, or it is not a
980 : * destination page at all.
981 : *
982 : * That is slightly stronger than required, but the proof
983 : * that no problems will not occur is trivial, and the
984 : * implementation is simply to verify.
985 : *
986 : * When allocating all pages normally this algorithm will run
987 : * in O(N) time, but in the worst case it will run in O(N^2)
988 : * time. If the runtime is a problem the data structures can
989 : * be fixed.
990 : */
991 : struct page *page;
992 : unsigned long addr;
993 :
994 : /*
995 : * Walk through the list of destination pages, and see if I
996 : * have a match.
997 : */
998 0 : list_for_each_entry(page, &image->dest_pages, lru) {
999 0 : addr = page_to_pfn(page) << PAGE_SHIFT;
1000 0 : if (addr == destination) {
1001 : list_del(&page->lru);
1002 0 : return page;
1003 : }
1004 : }
1005 : page = NULL;
1006 : while (1) {
1007 : kimage_entry_t *old;
1008 :
1009 : /* Allocate a page, if we run out of memory give up */
1010 0 : page = kimage_alloc_pages(gfp_mask, 0);
1011 0 : if (!page)
1012 : return NULL;
1013 : /* If the page cannot be used file it away */
1014 0 : if (page_to_pfn(page) >
1015 : (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
1016 0 : list_add(&page->lru, &image->unusable_pages);
1017 0 : continue;
1018 : }
1019 0 : addr = page_to_pfn(page) << PAGE_SHIFT;
1020 :
1021 : /* If it is the destination page we want use it */
1022 0 : if (addr == destination)
1023 : break;
1024 :
1025 : /* If the page is not a destination page use it */
1026 0 : if (!kimage_is_destination_range(image, addr,
1027 : addr + PAGE_SIZE))
1028 : break;
1029 :
1030 : /*
1031 : * I know that the page is someones destination page.
1032 : * See if there is already a source page for this
1033 : * destination page. And if so swap the source pages.
1034 : */
1035 0 : old = kimage_dst_used(image, addr);
1036 0 : if (old) {
1037 : /* If so move it */
1038 : unsigned long old_addr;
1039 : struct page *old_page;
1040 :
1041 0 : old_addr = *old & PAGE_MASK;
1042 0 : old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
1043 : copy_highpage(page, old_page);
1044 0 : *old = addr | (*old & ~PAGE_MASK);
1045 :
1046 : /* The old page I have found cannot be a
1047 : * destination page, so return it if it's
1048 : * gfp_flags honor the ones passed in.
1049 : */
1050 : if (!(gfp_mask & __GFP_HIGHMEM) &&
1051 : PageHighMem(old_page)) {
1052 : kimage_free_pages(old_page);
1053 : continue;
1054 : }
1055 : addr = old_addr;
1056 : page = old_page;
1057 0 : break;
1058 : } else {
1059 : /* Place the page on the destination list I
1060 : * will use it later.
1061 : */
1062 0 : list_add(&page->lru, &image->dest_pages);
1063 : }
1064 : }
1065 :
1066 0 : return page;
1067 : }
1068 :
1069 0 : static int kimage_load_normal_segment(struct kimage *image,
1070 : struct kexec_segment *segment)
1071 : {
1072 : unsigned long maddr;
1073 : size_t ubytes, mbytes;
1074 : int result;
1075 : unsigned char __user *buf = NULL;
1076 : unsigned char *kbuf = NULL;
1077 :
1078 : result = 0;
1079 0 : if (image->file_mode)
1080 0 : kbuf = segment->kbuf;
1081 : else
1082 0 : buf = segment->buf;
1083 0 : ubytes = segment->bufsz;
1084 0 : mbytes = segment->memsz;
1085 0 : maddr = segment->mem;
1086 :
1087 : result = kimage_set_destination(image, maddr);
1088 0 : if (result < 0)
1089 : goto out;
1090 :
1091 0 : while (mbytes) {
1092 : struct page *page;
1093 : char *ptr;
1094 : size_t uchunk, mchunk;
1095 :
1096 0 : page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
1097 0 : if (!page) {
1098 : result = -ENOMEM;
1099 : goto out;
1100 : }
1101 0 : result = kimage_add_page(image, page_to_pfn(page)
1102 : << PAGE_SHIFT);
1103 0 : if (result < 0)
1104 : goto out;
1105 :
1106 : ptr = kmap(page);
1107 : /* Start with a clear page */
1108 0 : clear_page(ptr);
1109 0 : ptr += maddr & ~PAGE_MASK;
1110 0 : mchunk = min_t(size_t, mbytes,
1111 : PAGE_SIZE - (maddr & ~PAGE_MASK));
1112 0 : uchunk = min(ubytes, mchunk);
1113 :
1114 : /* For file based kexec, source pages are in kernel memory */
1115 0 : if (image->file_mode)
1116 0 : memcpy(ptr, kbuf, uchunk);
1117 : else
1118 0 : result = copy_from_user(ptr, buf, uchunk);
1119 : kunmap(page);
1120 0 : if (result) {
1121 : result = -EFAULT;
1122 : goto out;
1123 : }
1124 0 : ubytes -= uchunk;
1125 0 : maddr += mchunk;
1126 0 : if (image->file_mode)
1127 0 : kbuf += mchunk;
1128 : else
1129 0 : buf += mchunk;
1130 0 : mbytes -= mchunk;
1131 : }
1132 : out:
1133 0 : return result;
1134 : }
1135 :
1136 0 : static int kimage_load_crash_segment(struct kimage *image,
1137 : struct kexec_segment *segment)
1138 : {
1139 : /* For crash dumps kernels we simply copy the data from
1140 : * user space to it's destination.
1141 : * We do things a page at a time for the sake of kmap.
1142 : */
1143 : unsigned long maddr;
1144 : size_t ubytes, mbytes;
1145 : int result;
1146 : unsigned char __user *buf = NULL;
1147 : unsigned char *kbuf = NULL;
1148 :
1149 : result = 0;
1150 0 : if (image->file_mode)
1151 0 : kbuf = segment->kbuf;
1152 : else
1153 0 : buf = segment->buf;
1154 0 : ubytes = segment->bufsz;
1155 0 : mbytes = segment->memsz;
1156 0 : maddr = segment->mem;
1157 0 : while (mbytes) {
1158 : struct page *page;
1159 : char *ptr;
1160 : size_t uchunk, mchunk;
1161 :
1162 0 : page = pfn_to_page(maddr >> PAGE_SHIFT);
1163 0 : if (!page) {
1164 : result = -ENOMEM;
1165 : goto out;
1166 : }
1167 : ptr = kmap(page);
1168 0 : ptr += maddr & ~PAGE_MASK;
1169 0 : mchunk = min_t(size_t, mbytes,
1170 : PAGE_SIZE - (maddr & ~PAGE_MASK));
1171 0 : uchunk = min(ubytes, mchunk);
1172 0 : if (mchunk > uchunk) {
1173 : /* Zero the trailing part of the page */
1174 0 : memset(ptr + uchunk, 0, mchunk - uchunk);
1175 : }
1176 :
1177 : /* For file based kexec, source pages are in kernel memory */
1178 0 : if (image->file_mode)
1179 0 : memcpy(ptr, kbuf, uchunk);
1180 : else
1181 0 : result = copy_from_user(ptr, buf, uchunk);
1182 : kexec_flush_icache_page(page);
1183 : kunmap(page);
1184 0 : if (result) {
1185 : result = -EFAULT;
1186 : goto out;
1187 : }
1188 0 : ubytes -= uchunk;
1189 0 : maddr += mchunk;
1190 0 : if (image->file_mode)
1191 0 : kbuf += mchunk;
1192 : else
1193 0 : buf += mchunk;
1194 0 : mbytes -= mchunk;
1195 : }
1196 : out:
1197 0 : return result;
1198 : }
1199 :
1200 0 : static int kimage_load_segment(struct kimage *image,
1201 : struct kexec_segment *segment)
1202 : {
1203 : int result = -ENOMEM;
1204 :
1205 0 : switch (image->type) {
1206 : case KEXEC_TYPE_DEFAULT:
1207 0 : result = kimage_load_normal_segment(image, segment);
1208 0 : break;
1209 : case KEXEC_TYPE_CRASH:
1210 0 : result = kimage_load_crash_segment(image, segment);
1211 0 : break;
1212 : }
1213 :
1214 0 : return result;
1215 : }
1216 :
1217 : /*
1218 : * Exec Kernel system call: for obvious reasons only root may call it.
1219 : *
1220 : * This call breaks up into three pieces.
1221 : * - A generic part which loads the new kernel from the current
1222 : * address space, and very carefully places the data in the
1223 : * allocated pages.
1224 : *
1225 : * - A generic part that interacts with the kernel and tells all of
1226 : * the devices to shut down. Preventing on-going dmas, and placing
1227 : * the devices in a consistent state so a later kernel can
1228 : * reinitialize them.
1229 : *
1230 : * - A machine specific part that includes the syscall number
1231 : * and then copies the image to it's final destination. And
1232 : * jumps into the image at entry.
1233 : *
1234 : * kexec does not sync, or unmount filesystems so if you need
1235 : * that to happen you need to do that yourself.
1236 : */
1237 : struct kimage *kexec_image;
1238 : struct kimage *kexec_crash_image;
1239 : int kexec_load_disabled;
1240 :
1241 : static DEFINE_MUTEX(kexec_mutex);
1242 :
1243 0 : SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
1244 : struct kexec_segment __user *, segments, unsigned long, flags)
1245 : {
1246 : struct kimage **dest_image, *image;
1247 : int result;
1248 :
1249 : /* We only trust the superuser with rebooting the system. */
1250 0 : if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
1251 : return -EPERM;
1252 :
1253 : /*
1254 : * Verify we have a legal set of flags
1255 : * This leaves us room for future extensions.
1256 : */
1257 0 : if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
1258 : return -EINVAL;
1259 :
1260 : /* Verify we are on the appropriate architecture */
1261 0 : if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
1262 : ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
1263 : return -EINVAL;
1264 :
1265 : /* Put an artificial cap on the number
1266 : * of segments passed to kexec_load.
1267 : */
1268 0 : if (nr_segments > KEXEC_SEGMENT_MAX)
1269 : return -EINVAL;
1270 :
1271 0 : image = NULL;
1272 : result = 0;
1273 :
1274 : /* Because we write directly to the reserved memory
1275 : * region when loading crash kernels we need a mutex here to
1276 : * prevent multiple crash kernels from attempting to load
1277 : * simultaneously, and to prevent a crash kernel from loading
1278 : * over the top of a in use crash kernel.
1279 : *
1280 : * KISS: always take the mutex.
1281 : */
1282 0 : if (!mutex_trylock(&kexec_mutex))
1283 : return -EBUSY;
1284 :
1285 : dest_image = &kexec_image;
1286 0 : if (flags & KEXEC_ON_CRASH)
1287 : dest_image = &kexec_crash_image;
1288 0 : if (nr_segments > 0) {
1289 : unsigned long i;
1290 :
1291 : /* Loading another kernel to reboot into */
1292 0 : if ((flags & KEXEC_ON_CRASH) == 0)
1293 0 : result = kimage_alloc_init(&image, entry, nr_segments,
1294 : segments, flags);
1295 : /* Loading another kernel to switch to if this one crashes */
1296 0 : else if (flags & KEXEC_ON_CRASH) {
1297 : /* Free any current crash dump kernel before
1298 : * we corrupt it.
1299 : */
1300 0 : kimage_free(xchg(&kexec_crash_image, NULL));
1301 0 : result = kimage_alloc_init(&image, entry, nr_segments,
1302 : segments, flags);
1303 0 : crash_map_reserved_pages();
1304 : }
1305 0 : if (result)
1306 : goto out;
1307 :
1308 0 : if (flags & KEXEC_PRESERVE_CONTEXT)
1309 0 : image->preserve_context = 1;
1310 0 : result = machine_kexec_prepare(image);
1311 0 : if (result)
1312 : goto out;
1313 :
1314 0 : for (i = 0; i < nr_segments; i++) {
1315 0 : result = kimage_load_segment(image, &image->segment[i]);
1316 0 : if (result)
1317 : goto out;
1318 : }
1319 0 : kimage_terminate(image);
1320 0 : if (flags & KEXEC_ON_CRASH)
1321 0 : crash_unmap_reserved_pages();
1322 : }
1323 : /* Install the new kernel, and Uninstall the old */
1324 0 : image = xchg(dest_image, image);
1325 :
1326 : out:
1327 0 : mutex_unlock(&kexec_mutex);
1328 0 : kimage_free(image);
1329 :
1330 : return result;
1331 : }
1332 :
1333 : /*
1334 : * Add and remove page tables for crashkernel memory
1335 : *
1336 : * Provide an empty default implementation here -- architecture
1337 : * code may override this
1338 : */
1339 0 : void __weak crash_map_reserved_pages(void)
1340 0 : {}
1341 :
1342 0 : void __weak crash_unmap_reserved_pages(void)
1343 0 : {}
1344 :
1345 : #ifdef CONFIG_COMPAT
1346 : COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
1347 : compat_ulong_t, nr_segments,
1348 : struct compat_kexec_segment __user *, segments,
1349 : compat_ulong_t, flags)
1350 : {
1351 : struct compat_kexec_segment in;
1352 : struct kexec_segment out, __user *ksegments;
1353 : unsigned long i, result;
1354 :
1355 : /* Don't allow clients that don't understand the native
1356 : * architecture to do anything.
1357 : */
1358 : if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
1359 : return -EINVAL;
1360 :
1361 : if (nr_segments > KEXEC_SEGMENT_MAX)
1362 : return -EINVAL;
1363 :
1364 : ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
1365 : for (i = 0; i < nr_segments; i++) {
1366 : result = copy_from_user(&in, &segments[i], sizeof(in));
1367 : if (result)
1368 : return -EFAULT;
1369 :
1370 : out.buf = compat_ptr(in.buf);
1371 : out.bufsz = in.bufsz;
1372 : out.mem = in.mem;
1373 : out.memsz = in.memsz;
1374 :
1375 : result = copy_to_user(&ksegments[i], &out, sizeof(out));
1376 : if (result)
1377 : return -EFAULT;
1378 : }
1379 :
1380 : return sys_kexec_load(entry, nr_segments, ksegments, flags);
1381 : }
1382 : #endif
1383 :
1384 : #ifdef CONFIG_KEXEC_FILE
1385 : SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
1386 : unsigned long, cmdline_len, const char __user *, cmdline_ptr,
1387 : unsigned long, flags)
1388 : {
1389 : int ret = 0, i;
1390 : struct kimage **dest_image, *image;
1391 :
1392 : /* We only trust the superuser with rebooting the system. */
1393 : if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
1394 : return -EPERM;
1395 :
1396 : /* Make sure we have a legal set of flags */
1397 : if (flags != (flags & KEXEC_FILE_FLAGS))
1398 : return -EINVAL;
1399 :
1400 : image = NULL;
1401 :
1402 : if (!mutex_trylock(&kexec_mutex))
1403 : return -EBUSY;
1404 :
1405 : dest_image = &kexec_image;
1406 : if (flags & KEXEC_FILE_ON_CRASH)
1407 : dest_image = &kexec_crash_image;
1408 :
1409 : if (flags & KEXEC_FILE_UNLOAD)
1410 : goto exchange;
1411 :
1412 : /*
1413 : * In case of crash, new kernel gets loaded in reserved region. It is
1414 : * same memory where old crash kernel might be loaded. Free any
1415 : * current crash dump kernel before we corrupt it.
1416 : */
1417 : if (flags & KEXEC_FILE_ON_CRASH)
1418 : kimage_free(xchg(&kexec_crash_image, NULL));
1419 :
1420 : ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr,
1421 : cmdline_len, flags);
1422 : if (ret)
1423 : goto out;
1424 :
1425 : ret = machine_kexec_prepare(image);
1426 : if (ret)
1427 : goto out;
1428 :
1429 : ret = kexec_calculate_store_digests(image);
1430 : if (ret)
1431 : goto out;
1432 :
1433 : for (i = 0; i < image->nr_segments; i++) {
1434 : struct kexec_segment *ksegment;
1435 :
1436 : ksegment = &image->segment[i];
1437 : pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n",
1438 : i, ksegment->buf, ksegment->bufsz, ksegment->mem,
1439 : ksegment->memsz);
1440 :
1441 : ret = kimage_load_segment(image, &image->segment[i]);
1442 : if (ret)
1443 : goto out;
1444 : }
1445 :
1446 : kimage_terminate(image);
1447 :
1448 : /*
1449 : * Free up any temporary buffers allocated which are not needed
1450 : * after image has been loaded
1451 : */
1452 : kimage_file_post_load_cleanup(image);
1453 : exchange:
1454 : image = xchg(dest_image, image);
1455 : out:
1456 : mutex_unlock(&kexec_mutex);
1457 : kimage_free(image);
1458 : return ret;
1459 : }
1460 :
1461 : #endif /* CONFIG_KEXEC_FILE */
1462 :
1463 0 : void crash_kexec(struct pt_regs *regs)
1464 : {
1465 : /* Take the kexec_mutex here to prevent sys_kexec_load
1466 : * running on one cpu from replacing the crash kernel
1467 : * we are using after a panic on a different cpu.
1468 : *
1469 : * If the crash kernel was not located in a fixed area
1470 : * of memory the xchg(&kexec_crash_image) would be
1471 : * sufficient. But since I reuse the memory...
1472 : */
1473 0 : if (mutex_trylock(&kexec_mutex)) {
1474 0 : if (kexec_crash_image) {
1475 : struct pt_regs fixed_regs;
1476 :
1477 : crash_setup_regs(&fixed_regs, regs);
1478 0 : crash_save_vmcoreinfo();
1479 0 : machine_crash_shutdown(&fixed_regs);
1480 0 : machine_kexec(kexec_crash_image);
1481 : }
1482 0 : mutex_unlock(&kexec_mutex);
1483 : }
1484 0 : }
1485 :
1486 0 : size_t crash_get_memory_size(void)
1487 : {
1488 : size_t size = 0;
1489 0 : mutex_lock(&kexec_mutex);
1490 0 : if (crashk_res.end != crashk_res.start)
1491 : size = resource_size(&crashk_res);
1492 0 : mutex_unlock(&kexec_mutex);
1493 0 : return size;
1494 : }
1495 :
1496 0 : void __weak crash_free_reserved_phys_range(unsigned long begin,
1497 : unsigned long end)
1498 : {
1499 : unsigned long addr;
1500 :
1501 0 : for (addr = begin; addr < end; addr += PAGE_SIZE)
1502 0 : free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
1503 0 : }
1504 :
1505 0 : int crash_shrink_memory(unsigned long new_size)
1506 : {
1507 : int ret = 0;
1508 : unsigned long start, end;
1509 : unsigned long old_size;
1510 : struct resource *ram_res;
1511 :
1512 0 : mutex_lock(&kexec_mutex);
1513 :
1514 0 : if (kexec_crash_image) {
1515 : ret = -ENOENT;
1516 : goto unlock;
1517 : }
1518 0 : start = crashk_res.start;
1519 0 : end = crashk_res.end;
1520 0 : old_size = (end == 0) ? 0 : end - start + 1;
1521 0 : if (new_size >= old_size) {
1522 0 : ret = (new_size == old_size) ? 0 : -EINVAL;
1523 0 : goto unlock;
1524 : }
1525 :
1526 : ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
1527 0 : if (!ram_res) {
1528 : ret = -ENOMEM;
1529 : goto unlock;
1530 : }
1531 :
1532 0 : start = roundup(start, KEXEC_CRASH_MEM_ALIGN);
1533 0 : end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN);
1534 :
1535 0 : crash_map_reserved_pages();
1536 0 : crash_free_reserved_phys_range(end, crashk_res.end);
1537 :
1538 0 : if ((start == end) && (crashk_res.parent != NULL))
1539 0 : release_resource(&crashk_res);
1540 :
1541 0 : ram_res->start = end;
1542 0 : ram_res->end = crashk_res.end;
1543 0 : ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
1544 0 : ram_res->name = "System RAM";
1545 :
1546 0 : crashk_res.end = end - 1;
1547 :
1548 0 : insert_resource(&iomem_resource, ram_res);
1549 0 : crash_unmap_reserved_pages();
1550 :
1551 : unlock:
1552 0 : mutex_unlock(&kexec_mutex);
1553 0 : return ret;
1554 : }
1555 :
1556 1 : static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
1557 : size_t data_len)
1558 : {
1559 : struct elf_note note;
1560 :
1561 1 : note.n_namesz = strlen(name) + 1;
1562 1 : note.n_descsz = data_len;
1563 1 : note.n_type = type;
1564 1 : memcpy(buf, ¬e, sizeof(note));
1565 1 : buf += (sizeof(note) + 3)/4;
1566 1 : memcpy(buf, name, note.n_namesz);
1567 1 : buf += (note.n_namesz + 3)/4;
1568 1 : memcpy(buf, data, note.n_descsz);
1569 1 : buf += (note.n_descsz + 3)/4;
1570 :
1571 1 : return buf;
1572 : }
1573 :
1574 : static void final_note(u32 *buf)
1575 : {
1576 : struct elf_note note;
1577 :
1578 1 : note.n_namesz = 0;
1579 1 : note.n_descsz = 0;
1580 1 : note.n_type = 0;
1581 1 : memcpy(buf, ¬e, sizeof(note));
1582 : }
1583 :
1584 0 : void crash_save_cpu(struct pt_regs *regs, int cpu)
1585 : {
1586 : struct elf_prstatus prstatus;
1587 : u32 *buf;
1588 :
1589 0 : if ((cpu < 0) || (cpu >= nr_cpu_ids))
1590 0 : return;
1591 :
1592 : /* Using ELF notes here is opportunistic.
1593 : * I need a well defined structure format
1594 : * for the data I pass, and I need tags
1595 : * on the data to indicate what information I have
1596 : * squirrelled away. ELF notes happen to provide
1597 : * all of that, so there is no need to invent something new.
1598 : */
1599 0 : buf = (u32 *)per_cpu_ptr(crash_notes, cpu);
1600 0 : if (!buf)
1601 : return;
1602 0 : memset(&prstatus, 0, sizeof(prstatus));
1603 0 : prstatus.pr_pid = current->pid;
1604 : elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
1605 0 : buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
1606 : &prstatus, sizeof(prstatus));
1607 : final_note(buf);
1608 : }
1609 :
1610 1 : static int __init crash_notes_memory_init(void)
1611 : {
1612 : /* Allocate memory for saving cpu registers. */
1613 1 : crash_notes = alloc_percpu(note_buf_t);
1614 1 : if (!crash_notes) {
1615 0 : pr_warn("Kexec: Memory allocation for saving cpu register states failed\n");
1616 0 : return -ENOMEM;
1617 : }
1618 : return 0;
1619 : }
1620 : subsys_initcall(crash_notes_memory_init);
1621 :
1622 :
1623 : /*
1624 : * parsing the "crashkernel" commandline
1625 : *
1626 : * this code is intended to be called from architecture specific code
1627 : */
1628 :
1629 :
1630 : /*
1631 : * This function parses command lines in the format
1632 : *
1633 : * crashkernel=ramsize-range:size[,...][@offset]
1634 : *
1635 : * The function returns 0 on success and -EINVAL on failure.
1636 : */
1637 0 : static int __init parse_crashkernel_mem(char *cmdline,
1638 : unsigned long long system_ram,
1639 : unsigned long long *crash_size,
1640 : unsigned long long *crash_base)
1641 : {
1642 : char *cur = cmdline, *tmp;
1643 :
1644 : /* for each entry of the comma-separated list */
1645 : do {
1646 : unsigned long long start, end = ULLONG_MAX, size;
1647 :
1648 : /* get the start of the range */
1649 0 : start = memparse(cur, &tmp);
1650 0 : if (cur == tmp) {
1651 0 : pr_warn("crashkernel: Memory value expected\n");
1652 0 : return -EINVAL;
1653 : }
1654 : cur = tmp;
1655 0 : if (*cur != '-') {
1656 0 : pr_warn("crashkernel: '-' expected\n");
1657 0 : return -EINVAL;
1658 : }
1659 0 : cur++;
1660 :
1661 : /* if no ':' is here, than we read the end */
1662 0 : if (*cur != ':') {
1663 0 : end = memparse(cur, &tmp);
1664 0 : if (cur == tmp) {
1665 0 : pr_warn("crashkernel: Memory value expected\n");
1666 0 : return -EINVAL;
1667 : }
1668 : cur = tmp;
1669 0 : if (end <= start) {
1670 0 : pr_warn("crashkernel: end <= start\n");
1671 0 : return -EINVAL;
1672 : }
1673 : }
1674 :
1675 0 : if (*cur != ':') {
1676 0 : pr_warn("crashkernel: ':' expected\n");
1677 0 : return -EINVAL;
1678 : }
1679 0 : cur++;
1680 :
1681 0 : size = memparse(cur, &tmp);
1682 0 : if (cur == tmp) {
1683 0 : pr_warn("Memory value expected\n");
1684 0 : return -EINVAL;
1685 : }
1686 : cur = tmp;
1687 0 : if (size >= system_ram) {
1688 0 : pr_warn("crashkernel: invalid size\n");
1689 0 : return -EINVAL;
1690 : }
1691 :
1692 : /* match ? */
1693 0 : if (system_ram >= start && system_ram < end) {
1694 0 : *crash_size = size;
1695 0 : break;
1696 : }
1697 0 : } while (*cur++ == ',');
1698 :
1699 0 : if (*crash_size > 0) {
1700 0 : while (*cur && *cur != ' ' && *cur != '@')
1701 0 : cur++;
1702 0 : if (*cur == '@') {
1703 0 : cur++;
1704 0 : *crash_base = memparse(cur, &tmp);
1705 0 : if (cur == tmp) {
1706 0 : pr_warn("Memory value expected after '@'\n");
1707 0 : return -EINVAL;
1708 : }
1709 : }
1710 : }
1711 :
1712 : return 0;
1713 : }
1714 :
1715 : /*
1716 : * That function parses "simple" (old) crashkernel command lines like
1717 : *
1718 : * crashkernel=size[@offset]
1719 : *
1720 : * It returns 0 on success and -EINVAL on failure.
1721 : */
1722 0 : static int __init parse_crashkernel_simple(char *cmdline,
1723 : unsigned long long *crash_size,
1724 : unsigned long long *crash_base)
1725 : {
1726 0 : char *cur = cmdline;
1727 :
1728 0 : *crash_size = memparse(cmdline, &cur);
1729 0 : if (cmdline == cur) {
1730 0 : pr_warn("crashkernel: memory value expected\n");
1731 0 : return -EINVAL;
1732 : }
1733 :
1734 0 : if (*cur == '@')
1735 0 : *crash_base = memparse(cur+1, &cur);
1736 0 : else if (*cur != ' ' && *cur != '\0') {
1737 0 : pr_warn("crashkernel: unrecognized char\n");
1738 0 : return -EINVAL;
1739 : }
1740 :
1741 : return 0;
1742 : }
1743 :
1744 : #define SUFFIX_HIGH 0
1745 : #define SUFFIX_LOW 1
1746 : #define SUFFIX_NULL 2
1747 : static __initdata char *suffix_tbl[] = {
1748 : [SUFFIX_HIGH] = ",high",
1749 : [SUFFIX_LOW] = ",low",
1750 : [SUFFIX_NULL] = NULL,
1751 : };
1752 :
1753 : /*
1754 : * That function parses "suffix" crashkernel command lines like
1755 : *
1756 : * crashkernel=size,[high|low]
1757 : *
1758 : * It returns 0 on success and -EINVAL on failure.
1759 : */
1760 0 : static int __init parse_crashkernel_suffix(char *cmdline,
1761 : unsigned long long *crash_size,
1762 : const char *suffix)
1763 : {
1764 0 : char *cur = cmdline;
1765 :
1766 0 : *crash_size = memparse(cmdline, &cur);
1767 0 : if (cmdline == cur) {
1768 0 : pr_warn("crashkernel: memory value expected\n");
1769 0 : return -EINVAL;
1770 : }
1771 :
1772 : /* check with suffix */
1773 0 : if (strncmp(cur, suffix, strlen(suffix))) {
1774 0 : pr_warn("crashkernel: unrecognized char\n");
1775 0 : return -EINVAL;
1776 : }
1777 0 : cur += strlen(suffix);
1778 0 : if (*cur != ' ' && *cur != '\0') {
1779 0 : pr_warn("crashkernel: unrecognized char\n");
1780 0 : return -EINVAL;
1781 : }
1782 :
1783 : return 0;
1784 : }
1785 :
1786 1 : static __init char *get_last_crashkernel(char *cmdline,
1787 : const char *name,
1788 : const char *suffix)
1789 : {
1790 : char *p = cmdline, *ck_cmdline = NULL;
1791 :
1792 : /* find crashkernel and use the last one if there are more */
1793 1 : p = strstr(p, name);
1794 2 : while (p) {
1795 0 : char *end_p = strchr(p, ' ');
1796 : char *q;
1797 :
1798 0 : if (!end_p)
1799 0 : end_p = p + strlen(p);
1800 :
1801 0 : if (!suffix) {
1802 : int i;
1803 :
1804 : /* skip the one with any known suffix */
1805 0 : for (i = 0; suffix_tbl[i]; i++) {
1806 0 : q = end_p - strlen(suffix_tbl[i]);
1807 0 : if (!strncmp(q, suffix_tbl[i],
1808 : strlen(suffix_tbl[i])))
1809 : goto next;
1810 : }
1811 : ck_cmdline = p;
1812 : } else {
1813 0 : q = end_p - strlen(suffix);
1814 0 : if (!strncmp(q, suffix, strlen(suffix)))
1815 : ck_cmdline = p;
1816 : }
1817 : next:
1818 0 : p = strstr(p+1, name);
1819 : }
1820 :
1821 1 : if (!ck_cmdline)
1822 : return NULL;
1823 :
1824 0 : return ck_cmdline;
1825 : }
1826 :
1827 1 : static int __init __parse_crashkernel(char *cmdline,
1828 : unsigned long long system_ram,
1829 : unsigned long long *crash_size,
1830 : unsigned long long *crash_base,
1831 : const char *name,
1832 : const char *suffix)
1833 : {
1834 : char *first_colon, *first_space;
1835 : char *ck_cmdline;
1836 :
1837 : BUG_ON(!crash_size || !crash_base);
1838 1 : *crash_size = 0;
1839 1 : *crash_base = 0;
1840 :
1841 1 : ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
1842 :
1843 1 : if (!ck_cmdline)
1844 : return -EINVAL;
1845 :
1846 0 : ck_cmdline += strlen(name);
1847 :
1848 0 : if (suffix)
1849 0 : return parse_crashkernel_suffix(ck_cmdline, crash_size,
1850 : suffix);
1851 : /*
1852 : * if the commandline contains a ':', then that's the extended
1853 : * syntax -- if not, it must be the classic syntax
1854 : */
1855 0 : first_colon = strchr(ck_cmdline, ':');
1856 0 : first_space = strchr(ck_cmdline, ' ');
1857 0 : if (first_colon && (!first_space || first_colon < first_space))
1858 0 : return parse_crashkernel_mem(ck_cmdline, system_ram,
1859 : crash_size, crash_base);
1860 :
1861 0 : return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base);
1862 : }
1863 :
1864 : /*
1865 : * That function is the entry point for command line parsing and should be
1866 : * called from the arch-specific code.
1867 : */
1868 1 : int __init parse_crashkernel(char *cmdline,
1869 : unsigned long long system_ram,
1870 : unsigned long long *crash_size,
1871 : unsigned long long *crash_base)
1872 : {
1873 1 : return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1874 : "crashkernel=", NULL);
1875 : }
1876 :
1877 0 : int __init parse_crashkernel_high(char *cmdline,
1878 : unsigned long long system_ram,
1879 : unsigned long long *crash_size,
1880 : unsigned long long *crash_base)
1881 : {
1882 0 : return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1883 0 : "crashkernel=", suffix_tbl[SUFFIX_HIGH]);
1884 : }
1885 :
1886 0 : int __init parse_crashkernel_low(char *cmdline,
1887 : unsigned long long system_ram,
1888 : unsigned long long *crash_size,
1889 : unsigned long long *crash_base)
1890 : {
1891 0 : return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1892 0 : "crashkernel=", suffix_tbl[SUFFIX_LOW]);
1893 : }
1894 :
1895 1 : static void update_vmcoreinfo_note(void)
1896 : {
1897 : u32 *buf = vmcoreinfo_note;
1898 :
1899 1 : if (!vmcoreinfo_size)
1900 1 : return;
1901 1 : buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
1902 : vmcoreinfo_size);
1903 : final_note(buf);
1904 : }
1905 :
1906 0 : void crash_save_vmcoreinfo(void)
1907 : {
1908 0 : vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
1909 0 : update_vmcoreinfo_note();
1910 0 : }
1911 :
1912 53 : void vmcoreinfo_append_str(const char *fmt, ...)
1913 : {
1914 : va_list args;
1915 : char buf[0x50];
1916 : size_t r;
1917 :
1918 53 : va_start(args, fmt);
1919 53 : r = vscnprintf(buf, sizeof(buf), fmt, args);
1920 53 : va_end(args);
1921 :
1922 53 : r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
1923 :
1924 53 : memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
1925 :
1926 53 : vmcoreinfo_size += r;
1927 53 : }
1928 :
1929 : /*
1930 : * provide an empty default implementation here -- architecture
1931 : * code may override this
1932 : */
1933 0 : void __weak arch_crash_save_vmcoreinfo(void)
1934 0 : {}
1935 :
1936 0 : unsigned long __weak paddr_vmcoreinfo_note(void)
1937 : {
1938 0 : return __pa((unsigned long)(char *)&vmcoreinfo_note);
1939 : }
1940 :
1941 1 : static int __init crash_save_vmcoreinfo_init(void)
1942 : {
1943 1 : VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
1944 1 : VMCOREINFO_PAGESIZE(PAGE_SIZE);
1945 :
1946 1 : VMCOREINFO_SYMBOL(init_uts_ns);
1947 1 : VMCOREINFO_SYMBOL(node_online_map);
1948 : #ifdef CONFIG_MMU
1949 1 : VMCOREINFO_SYMBOL(swapper_pg_dir);
1950 : #endif
1951 1 : VMCOREINFO_SYMBOL(_stext);
1952 1 : VMCOREINFO_SYMBOL(vmap_area_list);
1953 :
1954 : #ifndef CONFIG_NEED_MULTIPLE_NODES
1955 1 : VMCOREINFO_SYMBOL(mem_map);
1956 1 : VMCOREINFO_SYMBOL(contig_page_data);
1957 : #endif
1958 : #ifdef CONFIG_SPARSEMEM
1959 : VMCOREINFO_SYMBOL(mem_section);
1960 : VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
1961 : VMCOREINFO_STRUCT_SIZE(mem_section);
1962 : VMCOREINFO_OFFSET(mem_section, section_mem_map);
1963 : #endif
1964 1 : VMCOREINFO_STRUCT_SIZE(page);
1965 1 : VMCOREINFO_STRUCT_SIZE(pglist_data);
1966 1 : VMCOREINFO_STRUCT_SIZE(zone);
1967 1 : VMCOREINFO_STRUCT_SIZE(free_area);
1968 1 : VMCOREINFO_STRUCT_SIZE(list_head);
1969 1 : VMCOREINFO_SIZE(nodemask_t);
1970 1 : VMCOREINFO_OFFSET(page, flags);
1971 1 : VMCOREINFO_OFFSET(page, _count);
1972 1 : VMCOREINFO_OFFSET(page, mapping);
1973 1 : VMCOREINFO_OFFSET(page, lru);
1974 1 : VMCOREINFO_OFFSET(page, _mapcount);
1975 1 : VMCOREINFO_OFFSET(page, private);
1976 1 : VMCOREINFO_OFFSET(pglist_data, node_zones);
1977 1 : VMCOREINFO_OFFSET(pglist_data, nr_zones);
1978 : #ifdef CONFIG_FLAT_NODE_MEM_MAP
1979 1 : VMCOREINFO_OFFSET(pglist_data, node_mem_map);
1980 : #endif
1981 1 : VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
1982 1 : VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
1983 1 : VMCOREINFO_OFFSET(pglist_data, node_id);
1984 1 : VMCOREINFO_OFFSET(zone, free_area);
1985 1 : VMCOREINFO_OFFSET(zone, vm_stat);
1986 1 : VMCOREINFO_OFFSET(zone, spanned_pages);
1987 1 : VMCOREINFO_OFFSET(free_area, free_list);
1988 1 : VMCOREINFO_OFFSET(list_head, next);
1989 1 : VMCOREINFO_OFFSET(list_head, prev);
1990 1 : VMCOREINFO_OFFSET(vmap_area, va_start);
1991 1 : VMCOREINFO_OFFSET(vmap_area, list);
1992 1 : VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
1993 1 : log_buf_kexec_setup();
1994 1 : VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
1995 1 : VMCOREINFO_NUMBER(NR_FREE_PAGES);
1996 1 : VMCOREINFO_NUMBER(PG_lru);
1997 1 : VMCOREINFO_NUMBER(PG_private);
1998 1 : VMCOREINFO_NUMBER(PG_swapcache);
1999 1 : VMCOREINFO_NUMBER(PG_slab);
2000 : #ifdef CONFIG_MEMORY_FAILURE
2001 : VMCOREINFO_NUMBER(PG_hwpoison);
2002 : #endif
2003 1 : VMCOREINFO_NUMBER(PG_head_mask);
2004 1 : VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
2005 : #ifdef CONFIG_HUGETLBFS
2006 : VMCOREINFO_SYMBOL(free_huge_page);
2007 : #endif
2008 :
2009 1 : arch_crash_save_vmcoreinfo();
2010 1 : update_vmcoreinfo_note();
2011 :
2012 1 : return 0;
2013 : }
2014 :
2015 : subsys_initcall(crash_save_vmcoreinfo_init);
2016 :
2017 : #ifdef CONFIG_KEXEC_FILE
2018 : static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
2019 : struct kexec_buf *kbuf)
2020 : {
2021 : struct kimage *image = kbuf->image;
2022 : unsigned long temp_start, temp_end;
2023 :
2024 : temp_end = min(end, kbuf->buf_max);
2025 : temp_start = temp_end - kbuf->memsz;
2026 :
2027 : do {
2028 : /* align down start */
2029 : temp_start = temp_start & (~(kbuf->buf_align - 1));
2030 :
2031 : if (temp_start < start || temp_start < kbuf->buf_min)
2032 : return 0;
2033 :
2034 : temp_end = temp_start + kbuf->memsz - 1;
2035 :
2036 : /*
2037 : * Make sure this does not conflict with any of existing
2038 : * segments
2039 : */
2040 : if (kimage_is_destination_range(image, temp_start, temp_end)) {
2041 : temp_start = temp_start - PAGE_SIZE;
2042 : continue;
2043 : }
2044 :
2045 : /* We found a suitable memory range */
2046 : break;
2047 : } while (1);
2048 :
2049 : /* If we are here, we found a suitable memory range */
2050 : kbuf->mem = temp_start;
2051 :
2052 : /* Success, stop navigating through remaining System RAM ranges */
2053 : return 1;
2054 : }
2055 :
2056 : static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
2057 : struct kexec_buf *kbuf)
2058 : {
2059 : struct kimage *image = kbuf->image;
2060 : unsigned long temp_start, temp_end;
2061 :
2062 : temp_start = max(start, kbuf->buf_min);
2063 :
2064 : do {
2065 : temp_start = ALIGN(temp_start, kbuf->buf_align);
2066 : temp_end = temp_start + kbuf->memsz - 1;
2067 :
2068 : if (temp_end > end || temp_end > kbuf->buf_max)
2069 : return 0;
2070 : /*
2071 : * Make sure this does not conflict with any of existing
2072 : * segments
2073 : */
2074 : if (kimage_is_destination_range(image, temp_start, temp_end)) {
2075 : temp_start = temp_start + PAGE_SIZE;
2076 : continue;
2077 : }
2078 :
2079 : /* We found a suitable memory range */
2080 : break;
2081 : } while (1);
2082 :
2083 : /* If we are here, we found a suitable memory range */
2084 : kbuf->mem = temp_start;
2085 :
2086 : /* Success, stop navigating through remaining System RAM ranges */
2087 : return 1;
2088 : }
2089 :
2090 : static int locate_mem_hole_callback(u64 start, u64 end, void *arg)
2091 : {
2092 : struct kexec_buf *kbuf = (struct kexec_buf *)arg;
2093 : unsigned long sz = end - start + 1;
2094 :
2095 : /* Returning 0 will take to next memory range */
2096 : if (sz < kbuf->memsz)
2097 : return 0;
2098 :
2099 : if (end < kbuf->buf_min || start > kbuf->buf_max)
2100 : return 0;
2101 :
2102 : /*
2103 : * Allocate memory top down with-in ram range. Otherwise bottom up
2104 : * allocation.
2105 : */
2106 : if (kbuf->top_down)
2107 : return locate_mem_hole_top_down(start, end, kbuf);
2108 : return locate_mem_hole_bottom_up(start, end, kbuf);
2109 : }
2110 :
2111 : /*
2112 : * Helper function for placing a buffer in a kexec segment. This assumes
2113 : * that kexec_mutex is held.
2114 : */
2115 : int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
2116 : unsigned long memsz, unsigned long buf_align,
2117 : unsigned long buf_min, unsigned long buf_max,
2118 : bool top_down, unsigned long *load_addr)
2119 : {
2120 :
2121 : struct kexec_segment *ksegment;
2122 : struct kexec_buf buf, *kbuf;
2123 : int ret;
2124 :
2125 : /* Currently adding segment this way is allowed only in file mode */
2126 : if (!image->file_mode)
2127 : return -EINVAL;
2128 :
2129 : if (image->nr_segments >= KEXEC_SEGMENT_MAX)
2130 : return -EINVAL;
2131 :
2132 : /*
2133 : * Make sure we are not trying to add buffer after allocating
2134 : * control pages. All segments need to be placed first before
2135 : * any control pages are allocated. As control page allocation
2136 : * logic goes through list of segments to make sure there are
2137 : * no destination overlaps.
2138 : */
2139 : if (!list_empty(&image->control_pages)) {
2140 : WARN_ON(1);
2141 : return -EINVAL;
2142 : }
2143 :
2144 : memset(&buf, 0, sizeof(struct kexec_buf));
2145 : kbuf = &buf;
2146 : kbuf->image = image;
2147 : kbuf->buffer = buffer;
2148 : kbuf->bufsz = bufsz;
2149 :
2150 : kbuf->memsz = ALIGN(memsz, PAGE_SIZE);
2151 : kbuf->buf_align = max(buf_align, PAGE_SIZE);
2152 : kbuf->buf_min = buf_min;
2153 : kbuf->buf_max = buf_max;
2154 : kbuf->top_down = top_down;
2155 :
2156 : /* Walk the RAM ranges and allocate a suitable range for the buffer */
2157 : if (image->type == KEXEC_TYPE_CRASH)
2158 : ret = walk_iomem_res("Crash kernel",
2159 : IORESOURCE_MEM | IORESOURCE_BUSY,
2160 : crashk_res.start, crashk_res.end, kbuf,
2161 : locate_mem_hole_callback);
2162 : else
2163 : ret = walk_system_ram_res(0, -1, kbuf,
2164 : locate_mem_hole_callback);
2165 : if (ret != 1) {
2166 : /* A suitable memory range could not be found for buffer */
2167 : return -EADDRNOTAVAIL;
2168 : }
2169 :
2170 : /* Found a suitable memory range */
2171 : ksegment = &image->segment[image->nr_segments];
2172 : ksegment->kbuf = kbuf->buffer;
2173 : ksegment->bufsz = kbuf->bufsz;
2174 : ksegment->mem = kbuf->mem;
2175 : ksegment->memsz = kbuf->memsz;
2176 : image->nr_segments++;
2177 : *load_addr = ksegment->mem;
2178 : return 0;
2179 : }
2180 :
2181 : /* Calculate and store the digest of segments */
2182 : static int kexec_calculate_store_digests(struct kimage *image)
2183 : {
2184 : struct crypto_shash *tfm;
2185 : struct shash_desc *desc;
2186 : int ret = 0, i, j, zero_buf_sz, sha_region_sz;
2187 : size_t desc_size, nullsz;
2188 : char *digest;
2189 : void *zero_buf;
2190 : struct kexec_sha_region *sha_regions;
2191 : struct purgatory_info *pi = &image->purgatory_info;
2192 :
2193 : zero_buf = __va(page_to_pfn(ZERO_PAGE(0)) << PAGE_SHIFT);
2194 : zero_buf_sz = PAGE_SIZE;
2195 :
2196 : tfm = crypto_alloc_shash("sha256", 0, 0);
2197 : if (IS_ERR(tfm)) {
2198 : ret = PTR_ERR(tfm);
2199 : goto out;
2200 : }
2201 :
2202 : desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
2203 : desc = kzalloc(desc_size, GFP_KERNEL);
2204 : if (!desc) {
2205 : ret = -ENOMEM;
2206 : goto out_free_tfm;
2207 : }
2208 :
2209 : sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region);
2210 : sha_regions = vzalloc(sha_region_sz);
2211 : if (!sha_regions)
2212 : goto out_free_desc;
2213 :
2214 : desc->tfm = tfm;
2215 : desc->flags = 0;
2216 :
2217 : ret = crypto_shash_init(desc);
2218 : if (ret < 0)
2219 : goto out_free_sha_regions;
2220 :
2221 : digest = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL);
2222 : if (!digest) {
2223 : ret = -ENOMEM;
2224 : goto out_free_sha_regions;
2225 : }
2226 :
2227 : for (j = i = 0; i < image->nr_segments; i++) {
2228 : struct kexec_segment *ksegment;
2229 :
2230 : ksegment = &image->segment[i];
2231 : /*
2232 : * Skip purgatory as it will be modified once we put digest
2233 : * info in purgatory.
2234 : */
2235 : if (ksegment->kbuf == pi->purgatory_buf)
2236 : continue;
2237 :
2238 : ret = crypto_shash_update(desc, ksegment->kbuf,
2239 : ksegment->bufsz);
2240 : if (ret)
2241 : break;
2242 :
2243 : /*
2244 : * Assume rest of the buffer is filled with zero and
2245 : * update digest accordingly.
2246 : */
2247 : nullsz = ksegment->memsz - ksegment->bufsz;
2248 : while (nullsz) {
2249 : unsigned long bytes = nullsz;
2250 :
2251 : if (bytes > zero_buf_sz)
2252 : bytes = zero_buf_sz;
2253 : ret = crypto_shash_update(desc, zero_buf, bytes);
2254 : if (ret)
2255 : break;
2256 : nullsz -= bytes;
2257 : }
2258 :
2259 : if (ret)
2260 : break;
2261 :
2262 : sha_regions[j].start = ksegment->mem;
2263 : sha_regions[j].len = ksegment->memsz;
2264 : j++;
2265 : }
2266 :
2267 : if (!ret) {
2268 : ret = crypto_shash_final(desc, digest);
2269 : if (ret)
2270 : goto out_free_digest;
2271 : ret = kexec_purgatory_get_set_symbol(image, "sha_regions",
2272 : sha_regions, sha_region_sz, 0);
2273 : if (ret)
2274 : goto out_free_digest;
2275 :
2276 : ret = kexec_purgatory_get_set_symbol(image, "sha256_digest",
2277 : digest, SHA256_DIGEST_SIZE, 0);
2278 : if (ret)
2279 : goto out_free_digest;
2280 : }
2281 :
2282 : out_free_digest:
2283 : kfree(digest);
2284 : out_free_sha_regions:
2285 : vfree(sha_regions);
2286 : out_free_desc:
2287 : kfree(desc);
2288 : out_free_tfm:
2289 : kfree(tfm);
2290 : out:
2291 : return ret;
2292 : }
2293 :
2294 : /* Actually load purgatory. Lot of code taken from kexec-tools */
2295 : static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
2296 : unsigned long max, int top_down)
2297 : {
2298 : struct purgatory_info *pi = &image->purgatory_info;
2299 : unsigned long align, buf_align, bss_align, buf_sz, bss_sz, bss_pad;
2300 : unsigned long memsz, entry, load_addr, curr_load_addr, bss_addr, offset;
2301 : unsigned char *buf_addr, *src;
2302 : int i, ret = 0, entry_sidx = -1;
2303 : const Elf_Shdr *sechdrs_c;
2304 : Elf_Shdr *sechdrs = NULL;
2305 : void *purgatory_buf = NULL;
2306 :
2307 : /*
2308 : * sechdrs_c points to section headers in purgatory and are read
2309 : * only. No modifications allowed.
2310 : */
2311 : sechdrs_c = (void *)pi->ehdr + pi->ehdr->e_shoff;
2312 :
2313 : /*
2314 : * We can not modify sechdrs_c[] and its fields. It is read only.
2315 : * Copy it over to a local copy where one can store some temporary
2316 : * data and free it at the end. We need to modify ->sh_addr and
2317 : * ->sh_offset fields to keep track of permanent and temporary
2318 : * locations of sections.
2319 : */
2320 : sechdrs = vzalloc(pi->ehdr->e_shnum * sizeof(Elf_Shdr));
2321 : if (!sechdrs)
2322 : return -ENOMEM;
2323 :
2324 : memcpy(sechdrs, sechdrs_c, pi->ehdr->e_shnum * sizeof(Elf_Shdr));
2325 :
2326 : /*
2327 : * We seem to have multiple copies of sections. First copy is which
2328 : * is embedded in kernel in read only section. Some of these sections
2329 : * will be copied to a temporary buffer and relocated. And these
2330 : * sections will finally be copied to their final destination at
2331 : * segment load time.
2332 : *
2333 : * Use ->sh_offset to reflect section address in memory. It will
2334 : * point to original read only copy if section is not allocatable.
2335 : * Otherwise it will point to temporary copy which will be relocated.
2336 : *
2337 : * Use ->sh_addr to contain final address of the section where it
2338 : * will go during execution time.
2339 : */
2340 : for (i = 0; i < pi->ehdr->e_shnum; i++) {
2341 : if (sechdrs[i].sh_type == SHT_NOBITS)
2342 : continue;
2343 :
2344 : sechdrs[i].sh_offset = (unsigned long)pi->ehdr +
2345 : sechdrs[i].sh_offset;
2346 : }
2347 :
2348 : /*
2349 : * Identify entry point section and make entry relative to section
2350 : * start.
2351 : */
2352 : entry = pi->ehdr->e_entry;
2353 : for (i = 0; i < pi->ehdr->e_shnum; i++) {
2354 : if (!(sechdrs[i].sh_flags & SHF_ALLOC))
2355 : continue;
2356 :
2357 : if (!(sechdrs[i].sh_flags & SHF_EXECINSTR))
2358 : continue;
2359 :
2360 : /* Make entry section relative */
2361 : if (sechdrs[i].sh_addr <= pi->ehdr->e_entry &&
2362 : ((sechdrs[i].sh_addr + sechdrs[i].sh_size) >
2363 : pi->ehdr->e_entry)) {
2364 : entry_sidx = i;
2365 : entry -= sechdrs[i].sh_addr;
2366 : break;
2367 : }
2368 : }
2369 :
2370 : /* Determine how much memory is needed to load relocatable object. */
2371 : buf_align = 1;
2372 : bss_align = 1;
2373 : buf_sz = 0;
2374 : bss_sz = 0;
2375 :
2376 : for (i = 0; i < pi->ehdr->e_shnum; i++) {
2377 : if (!(sechdrs[i].sh_flags & SHF_ALLOC))
2378 : continue;
2379 :
2380 : align = sechdrs[i].sh_addralign;
2381 : if (sechdrs[i].sh_type != SHT_NOBITS) {
2382 : if (buf_align < align)
2383 : buf_align = align;
2384 : buf_sz = ALIGN(buf_sz, align);
2385 : buf_sz += sechdrs[i].sh_size;
2386 : } else {
2387 : /* bss section */
2388 : if (bss_align < align)
2389 : bss_align = align;
2390 : bss_sz = ALIGN(bss_sz, align);
2391 : bss_sz += sechdrs[i].sh_size;
2392 : }
2393 : }
2394 :
2395 : /* Determine the bss padding required to align bss properly */
2396 : bss_pad = 0;
2397 : if (buf_sz & (bss_align - 1))
2398 : bss_pad = bss_align - (buf_sz & (bss_align - 1));
2399 :
2400 : memsz = buf_sz + bss_pad + bss_sz;
2401 :
2402 : /* Allocate buffer for purgatory */
2403 : purgatory_buf = vzalloc(buf_sz);
2404 : if (!purgatory_buf) {
2405 : ret = -ENOMEM;
2406 : goto out;
2407 : }
2408 :
2409 : if (buf_align < bss_align)
2410 : buf_align = bss_align;
2411 :
2412 : /* Add buffer to segment list */
2413 : ret = kexec_add_buffer(image, purgatory_buf, buf_sz, memsz,
2414 : buf_align, min, max, top_down,
2415 : &pi->purgatory_load_addr);
2416 : if (ret)
2417 : goto out;
2418 :
2419 : /* Load SHF_ALLOC sections */
2420 : buf_addr = purgatory_buf;
2421 : load_addr = curr_load_addr = pi->purgatory_load_addr;
2422 : bss_addr = load_addr + buf_sz + bss_pad;
2423 :
2424 : for (i = 0; i < pi->ehdr->e_shnum; i++) {
2425 : if (!(sechdrs[i].sh_flags & SHF_ALLOC))
2426 : continue;
2427 :
2428 : align = sechdrs[i].sh_addralign;
2429 : if (sechdrs[i].sh_type != SHT_NOBITS) {
2430 : curr_load_addr = ALIGN(curr_load_addr, align);
2431 : offset = curr_load_addr - load_addr;
2432 : /* We already modifed ->sh_offset to keep src addr */
2433 : src = (char *) sechdrs[i].sh_offset;
2434 : memcpy(buf_addr + offset, src, sechdrs[i].sh_size);
2435 :
2436 : /* Store load address and source address of section */
2437 : sechdrs[i].sh_addr = curr_load_addr;
2438 :
2439 : /*
2440 : * This section got copied to temporary buffer. Update
2441 : * ->sh_offset accordingly.
2442 : */
2443 : sechdrs[i].sh_offset = (unsigned long)(buf_addr + offset);
2444 :
2445 : /* Advance to the next address */
2446 : curr_load_addr += sechdrs[i].sh_size;
2447 : } else {
2448 : bss_addr = ALIGN(bss_addr, align);
2449 : sechdrs[i].sh_addr = bss_addr;
2450 : bss_addr += sechdrs[i].sh_size;
2451 : }
2452 : }
2453 :
2454 : /* Update entry point based on load address of text section */
2455 : if (entry_sidx >= 0)
2456 : entry += sechdrs[entry_sidx].sh_addr;
2457 :
2458 : /* Make kernel jump to purgatory after shutdown */
2459 : image->start = entry;
2460 :
2461 : /* Used later to get/set symbol values */
2462 : pi->sechdrs = sechdrs;
2463 :
2464 : /*
2465 : * Used later to identify which section is purgatory and skip it
2466 : * from checksumming.
2467 : */
2468 : pi->purgatory_buf = purgatory_buf;
2469 : return ret;
2470 : out:
2471 : vfree(sechdrs);
2472 : vfree(purgatory_buf);
2473 : return ret;
2474 : }
2475 :
2476 : static int kexec_apply_relocations(struct kimage *image)
2477 : {
2478 : int i, ret;
2479 : struct purgatory_info *pi = &image->purgatory_info;
2480 : Elf_Shdr *sechdrs = pi->sechdrs;
2481 :
2482 : /* Apply relocations */
2483 : for (i = 0; i < pi->ehdr->e_shnum; i++) {
2484 : Elf_Shdr *section, *symtab;
2485 :
2486 : if (sechdrs[i].sh_type != SHT_RELA &&
2487 : sechdrs[i].sh_type != SHT_REL)
2488 : continue;
2489 :
2490 : /*
2491 : * For section of type SHT_RELA/SHT_REL,
2492 : * ->sh_link contains section header index of associated
2493 : * symbol table. And ->sh_info contains section header
2494 : * index of section to which relocations apply.
2495 : */
2496 : if (sechdrs[i].sh_info >= pi->ehdr->e_shnum ||
2497 : sechdrs[i].sh_link >= pi->ehdr->e_shnum)
2498 : return -ENOEXEC;
2499 :
2500 : section = &sechdrs[sechdrs[i].sh_info];
2501 : symtab = &sechdrs[sechdrs[i].sh_link];
2502 :
2503 : if (!(section->sh_flags & SHF_ALLOC))
2504 : continue;
2505 :
2506 : /*
2507 : * symtab->sh_link contain section header index of associated
2508 : * string table.
2509 : */
2510 : if (symtab->sh_link >= pi->ehdr->e_shnum)
2511 : /* Invalid section number? */
2512 : continue;
2513 :
2514 : /*
2515 : * Respective archicture needs to provide support for applying
2516 : * relocations of type SHT_RELA/SHT_REL.
2517 : */
2518 : if (sechdrs[i].sh_type == SHT_RELA)
2519 : ret = arch_kexec_apply_relocations_add(pi->ehdr,
2520 : sechdrs, i);
2521 : else if (sechdrs[i].sh_type == SHT_REL)
2522 : ret = arch_kexec_apply_relocations(pi->ehdr,
2523 : sechdrs, i);
2524 : if (ret)
2525 : return ret;
2526 : }
2527 :
2528 : return 0;
2529 : }
2530 :
2531 : /* Load relocatable purgatory object and relocate it appropriately */
2532 : int kexec_load_purgatory(struct kimage *image, unsigned long min,
2533 : unsigned long max, int top_down,
2534 : unsigned long *load_addr)
2535 : {
2536 : struct purgatory_info *pi = &image->purgatory_info;
2537 : int ret;
2538 :
2539 : if (kexec_purgatory_size <= 0)
2540 : return -EINVAL;
2541 :
2542 : if (kexec_purgatory_size < sizeof(Elf_Ehdr))
2543 : return -ENOEXEC;
2544 :
2545 : pi->ehdr = (Elf_Ehdr *)kexec_purgatory;
2546 :
2547 : if (memcmp(pi->ehdr->e_ident, ELFMAG, SELFMAG) != 0
2548 : || pi->ehdr->e_type != ET_REL
2549 : || !elf_check_arch(pi->ehdr)
2550 : || pi->ehdr->e_shentsize != sizeof(Elf_Shdr))
2551 : return -ENOEXEC;
2552 :
2553 : if (pi->ehdr->e_shoff >= kexec_purgatory_size
2554 : || (pi->ehdr->e_shnum * sizeof(Elf_Shdr) >
2555 : kexec_purgatory_size - pi->ehdr->e_shoff))
2556 : return -ENOEXEC;
2557 :
2558 : ret = __kexec_load_purgatory(image, min, max, top_down);
2559 : if (ret)
2560 : return ret;
2561 :
2562 : ret = kexec_apply_relocations(image);
2563 : if (ret)
2564 : goto out;
2565 :
2566 : *load_addr = pi->purgatory_load_addr;
2567 : return 0;
2568 : out:
2569 : vfree(pi->sechdrs);
2570 : vfree(pi->purgatory_buf);
2571 : return ret;
2572 : }
2573 :
2574 : static Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi,
2575 : const char *name)
2576 : {
2577 : Elf_Sym *syms;
2578 : Elf_Shdr *sechdrs;
2579 : Elf_Ehdr *ehdr;
2580 : int i, k;
2581 : const char *strtab;
2582 :
2583 : if (!pi->sechdrs || !pi->ehdr)
2584 : return NULL;
2585 :
2586 : sechdrs = pi->sechdrs;
2587 : ehdr = pi->ehdr;
2588 :
2589 : for (i = 0; i < ehdr->e_shnum; i++) {
2590 : if (sechdrs[i].sh_type != SHT_SYMTAB)
2591 : continue;
2592 :
2593 : if (sechdrs[i].sh_link >= ehdr->e_shnum)
2594 : /* Invalid strtab section number */
2595 : continue;
2596 : strtab = (char *)sechdrs[sechdrs[i].sh_link].sh_offset;
2597 : syms = (Elf_Sym *)sechdrs[i].sh_offset;
2598 :
2599 : /* Go through symbols for a match */
2600 : for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) {
2601 : if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL)
2602 : continue;
2603 :
2604 : if (strcmp(strtab + syms[k].st_name, name) != 0)
2605 : continue;
2606 :
2607 : if (syms[k].st_shndx == SHN_UNDEF ||
2608 : syms[k].st_shndx >= ehdr->e_shnum) {
2609 : pr_debug("Symbol: %s has bad section index %d.\n",
2610 : name, syms[k].st_shndx);
2611 : return NULL;
2612 : }
2613 :
2614 : /* Found the symbol we are looking for */
2615 : return &syms[k];
2616 : }
2617 : }
2618 :
2619 : return NULL;
2620 : }
2621 :
2622 : void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name)
2623 : {
2624 : struct purgatory_info *pi = &image->purgatory_info;
2625 : Elf_Sym *sym;
2626 : Elf_Shdr *sechdr;
2627 :
2628 : sym = kexec_purgatory_find_symbol(pi, name);
2629 : if (!sym)
2630 : return ERR_PTR(-EINVAL);
2631 :
2632 : sechdr = &pi->sechdrs[sym->st_shndx];
2633 :
2634 : /*
2635 : * Returns the address where symbol will finally be loaded after
2636 : * kexec_load_segment()
2637 : */
2638 : return (void *)(sechdr->sh_addr + sym->st_value);
2639 : }
2640 :
2641 : /*
2642 : * Get or set value of a symbol. If "get_value" is true, symbol value is
2643 : * returned in buf otherwise symbol value is set based on value in buf.
2644 : */
2645 : int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
2646 : void *buf, unsigned int size, bool get_value)
2647 : {
2648 : Elf_Sym *sym;
2649 : Elf_Shdr *sechdrs;
2650 : struct purgatory_info *pi = &image->purgatory_info;
2651 : char *sym_buf;
2652 :
2653 : sym = kexec_purgatory_find_symbol(pi, name);
2654 : if (!sym)
2655 : return -EINVAL;
2656 :
2657 : if (sym->st_size != size) {
2658 : pr_err("symbol %s size mismatch: expected %lu actual %u\n",
2659 : name, (unsigned long)sym->st_size, size);
2660 : return -EINVAL;
2661 : }
2662 :
2663 : sechdrs = pi->sechdrs;
2664 :
2665 : if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) {
2666 : pr_err("symbol %s is in a bss section. Cannot %s\n", name,
2667 : get_value ? "get" : "set");
2668 : return -EINVAL;
2669 : }
2670 :
2671 : sym_buf = (unsigned char *)sechdrs[sym->st_shndx].sh_offset +
2672 : sym->st_value;
2673 :
2674 : if (get_value)
2675 : memcpy((void *)buf, sym_buf, size);
2676 : else
2677 : memcpy((void *)sym_buf, buf, size);
2678 :
2679 : return 0;
2680 : }
2681 : #endif /* CONFIG_KEXEC_FILE */
2682 :
2683 : /*
2684 : * Move into place and start executing a preloaded standalone
2685 : * executable. If nothing was preloaded return an error.
2686 : */
2687 0 : int kernel_kexec(void)
2688 : {
2689 : int error = 0;
2690 :
2691 0 : if (!mutex_trylock(&kexec_mutex))
2692 : return -EBUSY;
2693 0 : if (!kexec_image) {
2694 : error = -EINVAL;
2695 : goto Unlock;
2696 : }
2697 :
2698 : #ifdef CONFIG_KEXEC_JUMP
2699 : if (kexec_image->preserve_context) {
2700 : lock_system_sleep();
2701 : pm_prepare_console();
2702 : error = freeze_processes();
2703 : if (error) {
2704 : error = -EBUSY;
2705 : goto Restore_console;
2706 : }
2707 : suspend_console();
2708 : error = dpm_suspend_start(PMSG_FREEZE);
2709 : if (error)
2710 : goto Resume_console;
2711 : /* At this point, dpm_suspend_start() has been called,
2712 : * but *not* dpm_suspend_end(). We *must* call
2713 : * dpm_suspend_end() now. Otherwise, drivers for
2714 : * some devices (e.g. interrupt controllers) become
2715 : * desynchronized with the actual state of the
2716 : * hardware at resume time, and evil weirdness ensues.
2717 : */
2718 : error = dpm_suspend_end(PMSG_FREEZE);
2719 : if (error)
2720 : goto Resume_devices;
2721 : error = disable_nonboot_cpus();
2722 : if (error)
2723 : goto Enable_cpus;
2724 : local_irq_disable();
2725 : error = syscore_suspend();
2726 : if (error)
2727 : goto Enable_irqs;
2728 : } else
2729 : #endif
2730 : {
2731 0 : kexec_in_progress = true;
2732 0 : kernel_restart_prepare(NULL);
2733 0 : migrate_to_reboot_cpu();
2734 :
2735 : /*
2736 : * migrate_to_reboot_cpu() disables CPU hotplug assuming that
2737 : * no further code needs to use CPU hotplug (which is true in
2738 : * the reboot case). However, the kexec path depends on using
2739 : * CPU hotplug again; so re-enable it here.
2740 : */
2741 : cpu_hotplug_enable();
2742 0 : pr_emerg("Starting new kernel\n");
2743 0 : machine_shutdown();
2744 : }
2745 :
2746 0 : machine_kexec(kexec_image);
2747 :
2748 : #ifdef CONFIG_KEXEC_JUMP
2749 : if (kexec_image->preserve_context) {
2750 : syscore_resume();
2751 : Enable_irqs:
2752 : local_irq_enable();
2753 : Enable_cpus:
2754 : enable_nonboot_cpus();
2755 : dpm_resume_start(PMSG_RESTORE);
2756 : Resume_devices:
2757 : dpm_resume_end(PMSG_RESTORE);
2758 : Resume_console:
2759 : resume_console();
2760 : thaw_processes();
2761 : Restore_console:
2762 : pm_restore_console();
2763 : unlock_system_sleep();
2764 : }
2765 : #endif
2766 :
2767 : Unlock:
2768 0 : mutex_unlock(&kexec_mutex);
2769 0 : return error;
2770 : }
|