Line data Source code
1 : /*
2 : * Dynamic DMA mapping support.
3 : *
4 : * This implementation is a fallback for platforms that do not support
5 : * I/O TLBs (aka DMA address translation hardware).
6 : * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
7 : * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
8 : * Copyright (C) 2000, 2003 Hewlett-Packard Co
9 : * David Mosberger-Tang <davidm@hpl.hp.com>
10 : *
11 : * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API.
12 : * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid
13 : * unnecessary i-cache flushing.
14 : * 04/07/.. ak Better overflow handling. Assorted fixes.
15 : * 05/09/10 linville Add support for syncing ranges, support syncing for
16 : * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
17 : * 08/12/11 beckyb Add highmem support
18 : */
19 :
20 : #include <linux/cache.h>
21 : #include <linux/dma-mapping.h>
22 : #include <linux/mm.h>
23 : #include <linux/export.h>
24 : #include <linux/spinlock.h>
25 : #include <linux/string.h>
26 : #include <linux/swiotlb.h>
27 : #include <linux/pfn.h>
28 : #include <linux/types.h>
29 : #include <linux/ctype.h>
30 : #include <linux/highmem.h>
31 : #include <linux/gfp.h>
32 :
33 : #include <asm/io.h>
34 : #include <asm/dma.h>
35 : #include <asm/scatterlist.h>
36 :
37 : #include <linux/init.h>
38 : #include <linux/bootmem.h>
39 : #include <linux/iommu-helper.h>
40 :
41 : #define CREATE_TRACE_POINTS
42 : #include <trace/events/swiotlb.h>
43 :
44 : #define OFFSET(val,align) ((unsigned long) \
45 : ( (val) & ( (align) - 1)))
46 :
47 : #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
48 :
49 : /*
50 : * Minimum IO TLB size to bother booting with. Systems with mainly
51 : * 64bit capable cards will only lightly use the swiotlb. If we can't
52 : * allocate a contiguous 1MB, we're probably in trouble anyway.
53 : */
54 : #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
55 :
56 : int swiotlb_force;
57 :
58 : /*
59 : * Used to do a quick range check in swiotlb_tbl_unmap_single and
60 : * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
61 : * API.
62 : */
63 : static phys_addr_t io_tlb_start, io_tlb_end;
64 :
65 : /*
66 : * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
67 : * io_tlb_end. This is command line adjustable via setup_io_tlb_npages.
68 : */
69 : static unsigned long io_tlb_nslabs;
70 :
71 : /*
72 : * When the IOMMU overflows we return a fallback buffer. This sets the size.
73 : */
74 : static unsigned long io_tlb_overflow = 32*1024;
75 :
76 : static phys_addr_t io_tlb_overflow_buffer;
77 :
78 : /*
79 : * This is a free list describing the number of free entries available from
80 : * each index
81 : */
82 : static unsigned int *io_tlb_list;
83 : static unsigned int io_tlb_index;
84 :
85 : /*
86 : * We need to save away the original address corresponding to a mapped entry
87 : * for the sync operations.
88 : */
89 : #define INVALID_PHYS_ADDR (~(phys_addr_t)0)
90 : static phys_addr_t *io_tlb_orig_addr;
91 :
92 : /*
93 : * Protect the above data structures in the map and unmap calls
94 : */
95 : static DEFINE_SPINLOCK(io_tlb_lock);
96 :
97 : static int late_alloc;
98 :
99 : static int __init
100 0 : setup_io_tlb_npages(char *str)
101 : {
102 0 : if (isdigit(*str)) {
103 0 : io_tlb_nslabs = simple_strtoul(str, &str, 0);
104 : /* avoid tail segment of size < IO_TLB_SEGSIZE */
105 0 : io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
106 : }
107 0 : if (*str == ',')
108 0 : ++str;
109 0 : if (!strcmp(str, "force"))
110 0 : swiotlb_force = 1;
111 :
112 0 : return 0;
113 : }
114 : early_param("swiotlb", setup_io_tlb_npages);
115 : /* make io_tlb_overflow tunable too? */
116 :
117 0 : unsigned long swiotlb_nr_tbl(void)
118 : {
119 0 : return io_tlb_nslabs;
120 : }
121 : EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
122 :
123 : /* default to 64MB */
124 : #define IO_TLB_DEFAULT_SIZE (64UL<<20)
125 0 : unsigned long swiotlb_size_or_default(void)
126 : {
127 : unsigned long size;
128 :
129 0 : size = io_tlb_nslabs << IO_TLB_SHIFT;
130 :
131 0 : return size ? size : (IO_TLB_DEFAULT_SIZE);
132 : }
133 :
134 : /* Note that this doesn't work with highmem page */
135 : static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
136 : volatile void *address)
137 : {
138 : return phys_to_dma(hwdev, virt_to_phys(address));
139 : }
140 :
141 : static bool no_iotlb_memory;
142 :
143 0 : void swiotlb_print_info(void)
144 : {
145 0 : unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
146 : unsigned char *vstart, *vend;
147 :
148 0 : if (no_iotlb_memory) {
149 0 : pr_warn("software IO TLB: No low mem\n");
150 0 : return;
151 : }
152 :
153 0 : vstart = phys_to_virt(io_tlb_start);
154 0 : vend = phys_to_virt(io_tlb_end);
155 :
156 0 : printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n",
157 : (unsigned long long)io_tlb_start,
158 : (unsigned long long)io_tlb_end,
159 : bytes >> 20, vstart, vend - 1);
160 : }
161 :
162 0 : int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
163 : {
164 : void *v_overflow_buffer;
165 : unsigned long i, bytes;
166 :
167 0 : bytes = nslabs << IO_TLB_SHIFT;
168 :
169 0 : io_tlb_nslabs = nslabs;
170 0 : io_tlb_start = __pa(tlb);
171 0 : io_tlb_end = io_tlb_start + bytes;
172 :
173 : /*
174 : * Get the overflow emergency buffer
175 : */
176 0 : v_overflow_buffer = memblock_virt_alloc_low_nopanic(
177 0 : PAGE_ALIGN(io_tlb_overflow),
178 : PAGE_SIZE);
179 0 : if (!v_overflow_buffer)
180 : return -ENOMEM;
181 :
182 0 : io_tlb_overflow_buffer = __pa(v_overflow_buffer);
183 :
184 : /*
185 : * Allocate and initialize the free list array. This array is used
186 : * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
187 : * between io_tlb_start and io_tlb_end.
188 : */
189 0 : io_tlb_list = memblock_virt_alloc(
190 0 : PAGE_ALIGN(io_tlb_nslabs * sizeof(int)),
191 : PAGE_SIZE);
192 0 : io_tlb_orig_addr = memblock_virt_alloc(
193 0 : PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)),
194 : PAGE_SIZE);
195 0 : for (i = 0; i < io_tlb_nslabs; i++) {
196 0 : io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
197 0 : io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
198 : }
199 0 : io_tlb_index = 0;
200 :
201 0 : if (verbose)
202 0 : swiotlb_print_info();
203 :
204 : return 0;
205 : }
206 :
207 : /*
208 : * Statically reserve bounce buffer space and initialize bounce buffer data
209 : * structures for the software IO TLB used to implement the DMA API.
210 : */
211 : void __init
212 0 : swiotlb_init(int verbose)
213 : {
214 : size_t default_size = IO_TLB_DEFAULT_SIZE;
215 : unsigned char *vstart;
216 : unsigned long bytes;
217 :
218 0 : if (!io_tlb_nslabs) {
219 0 : io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
220 : io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
221 : }
222 :
223 0 : bytes = io_tlb_nslabs << IO_TLB_SHIFT;
224 :
225 : /* Get IO TLB memory from the low pages */
226 0 : vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
227 0 : if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
228 0 : return;
229 :
230 0 : if (io_tlb_start)
231 0 : memblock_free_early(io_tlb_start,
232 0 : PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
233 0 : pr_warn("Cannot allocate SWIOTLB buffer");
234 0 : no_iotlb_memory = true;
235 : }
236 :
237 : /*
238 : * Systems with larger DMA zones (those that don't support ISA) can
239 : * initialize the swiotlb later using the slab allocator if needed.
240 : * This should be just like above, but with some error catching.
241 : */
242 : int
243 0 : swiotlb_late_init_with_default_size(size_t default_size)
244 : {
245 0 : unsigned long bytes, req_nslabs = io_tlb_nslabs;
246 : unsigned char *vstart = NULL;
247 : unsigned int order;
248 : int rc = 0;
249 :
250 0 : if (!io_tlb_nslabs) {
251 0 : io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
252 0 : io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
253 : }
254 :
255 : /*
256 : * Get IO TLB memory from the low pages
257 : */
258 0 : order = get_order(io_tlb_nslabs << IO_TLB_SHIFT);
259 0 : io_tlb_nslabs = SLABS_PER_PAGE << order;
260 0 : bytes = io_tlb_nslabs << IO_TLB_SHIFT;
261 :
262 0 : while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
263 0 : vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
264 : order);
265 0 : if (vstart)
266 : break;
267 0 : order--;
268 : }
269 :
270 0 : if (!vstart) {
271 0 : io_tlb_nslabs = req_nslabs;
272 0 : return -ENOMEM;
273 : }
274 0 : if (order != get_order(bytes)) {
275 0 : printk(KERN_WARNING "Warning: only able to allocate %ld MB "
276 0 : "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
277 0 : io_tlb_nslabs = SLABS_PER_PAGE << order;
278 : }
279 0 : rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs);
280 0 : if (rc)
281 0 : free_pages((unsigned long)vstart, order);
282 0 : return rc;
283 : }
284 :
285 : int
286 0 : swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
287 : {
288 : unsigned long i, bytes;
289 : unsigned char *v_overflow_buffer;
290 :
291 0 : bytes = nslabs << IO_TLB_SHIFT;
292 :
293 0 : io_tlb_nslabs = nslabs;
294 0 : io_tlb_start = virt_to_phys(tlb);
295 0 : io_tlb_end = io_tlb_start + bytes;
296 :
297 0 : memset(tlb, 0, bytes);
298 :
299 : /*
300 : * Get the overflow emergency buffer
301 : */
302 0 : v_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
303 0 : get_order(io_tlb_overflow));
304 0 : if (!v_overflow_buffer)
305 : goto cleanup2;
306 :
307 0 : io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
308 :
309 : /*
310 : * Allocate and initialize the free list array. This array is used
311 : * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
312 : * between io_tlb_start and io_tlb_end.
313 : */
314 0 : io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
315 0 : get_order(io_tlb_nslabs * sizeof(int)));
316 0 : if (!io_tlb_list)
317 : goto cleanup3;
318 :
319 0 : io_tlb_orig_addr = (phys_addr_t *)
320 0 : __get_free_pages(GFP_KERNEL,
321 0 : get_order(io_tlb_nslabs *
322 : sizeof(phys_addr_t)));
323 0 : if (!io_tlb_orig_addr)
324 : goto cleanup4;
325 :
326 0 : for (i = 0; i < io_tlb_nslabs; i++) {
327 0 : io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
328 0 : io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
329 : }
330 0 : io_tlb_index = 0;
331 :
332 0 : swiotlb_print_info();
333 :
334 0 : late_alloc = 1;
335 :
336 0 : return 0;
337 :
338 : cleanup4:
339 0 : free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
340 : sizeof(int)));
341 0 : io_tlb_list = NULL;
342 : cleanup3:
343 0 : free_pages((unsigned long)v_overflow_buffer,
344 0 : get_order(io_tlb_overflow));
345 0 : io_tlb_overflow_buffer = 0;
346 : cleanup2:
347 0 : io_tlb_end = 0;
348 0 : io_tlb_start = 0;
349 0 : io_tlb_nslabs = 0;
350 0 : return -ENOMEM;
351 : }
352 :
353 0 : void __init swiotlb_free(void)
354 : {
355 0 : if (!io_tlb_orig_addr)
356 0 : return;
357 :
358 0 : if (late_alloc) {
359 0 : free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer),
360 0 : get_order(io_tlb_overflow));
361 0 : free_pages((unsigned long)io_tlb_orig_addr,
362 0 : get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
363 0 : free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
364 : sizeof(int)));
365 0 : free_pages((unsigned long)phys_to_virt(io_tlb_start),
366 0 : get_order(io_tlb_nslabs << IO_TLB_SHIFT));
367 : } else {
368 0 : memblock_free_late(io_tlb_overflow_buffer,
369 0 : PAGE_ALIGN(io_tlb_overflow));
370 0 : memblock_free_late(__pa(io_tlb_orig_addr),
371 0 : PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
372 0 : memblock_free_late(__pa(io_tlb_list),
373 0 : PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
374 0 : memblock_free_late(io_tlb_start,
375 0 : PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
376 : }
377 0 : io_tlb_nslabs = 0;
378 : }
379 :
380 0 : int is_swiotlb_buffer(phys_addr_t paddr)
381 : {
382 0 : return paddr >= io_tlb_start && paddr < io_tlb_end;
383 : }
384 :
385 : /*
386 : * Bounce: copy the swiotlb buffer back to the original dma location
387 : */
388 0 : static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
389 : size_t size, enum dma_data_direction dir)
390 : {
391 : unsigned long pfn = PFN_DOWN(orig_addr);
392 : unsigned char *vaddr = phys_to_virt(tlb_addr);
393 :
394 : if (PageHighMem(pfn_to_page(pfn))) {
395 : /* The buffer does not have a mapping. Map it in and copy */
396 : unsigned int offset = orig_addr & ~PAGE_MASK;
397 : char *buffer;
398 : unsigned int sz = 0;
399 : unsigned long flags;
400 :
401 : while (size) {
402 : sz = min_t(size_t, PAGE_SIZE - offset, size);
403 :
404 : local_irq_save(flags);
405 : buffer = kmap_atomic(pfn_to_page(pfn));
406 : if (dir == DMA_TO_DEVICE)
407 : memcpy(vaddr, buffer + offset, sz);
408 : else
409 : memcpy(buffer + offset, vaddr, sz);
410 : kunmap_atomic(buffer);
411 : local_irq_restore(flags);
412 :
413 : size -= sz;
414 : pfn++;
415 : vaddr += sz;
416 : offset = 0;
417 : }
418 0 : } else if (dir == DMA_TO_DEVICE) {
419 0 : memcpy(vaddr, phys_to_virt(orig_addr), size);
420 : } else {
421 0 : memcpy(phys_to_virt(orig_addr), vaddr, size);
422 : }
423 0 : }
424 :
425 0 : phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
426 : dma_addr_t tbl_dma_addr,
427 : phys_addr_t orig_addr, size_t size,
428 : enum dma_data_direction dir)
429 : {
430 : unsigned long flags;
431 : phys_addr_t tlb_addr;
432 : unsigned int nslots, stride, index, wrap;
433 : int i;
434 : unsigned long mask;
435 : unsigned long offset_slots;
436 : unsigned long max_slots;
437 :
438 0 : if (no_iotlb_memory)
439 0 : panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
440 :
441 : mask = dma_get_seg_boundary(hwdev);
442 :
443 0 : tbl_dma_addr &= mask;
444 :
445 0 : offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
446 :
447 : /*
448 : * Carefully handle integer overflow which can occur when mask == ~0UL.
449 : */
450 : max_slots = mask + 1
451 0 : ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
452 0 : : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
453 :
454 : /*
455 : * For mappings greater than a page, we limit the stride (and
456 : * hence alignment) to a page size.
457 : */
458 0 : nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
459 0 : if (size > PAGE_SIZE)
460 : stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
461 : else
462 : stride = 1;
463 :
464 : BUG_ON(!nslots);
465 :
466 : /*
467 : * Find suitable number of IO TLB entries size that will fit this
468 : * request and allocate a buffer from that IO TLB pool.
469 : */
470 0 : spin_lock_irqsave(&io_tlb_lock, flags);
471 0 : index = ALIGN(io_tlb_index, stride);
472 0 : if (index >= io_tlb_nslabs)
473 : index = 0;
474 : wrap = index;
475 :
476 : do {
477 0 : while (iommu_is_span_boundary(index, nslots, offset_slots,
478 : max_slots)) {
479 0 : index += stride;
480 0 : if (index >= io_tlb_nslabs)
481 : index = 0;
482 0 : if (index == wrap)
483 : goto not_found;
484 : }
485 :
486 : /*
487 : * If we find a slot that indicates we have 'nslots' number of
488 : * contiguous buffers, we allocate the buffers from that slot
489 : * and mark the entries as '0' indicating unavailable.
490 : */
491 0 : if (io_tlb_list[index] >= nslots) {
492 : int count = 0;
493 :
494 0 : for (i = index; i < (int) (index + nslots); i++)
495 0 : io_tlb_list[i] = 0;
496 0 : for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
497 0 : io_tlb_list[i] = ++count;
498 0 : tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
499 :
500 : /*
501 : * Update the indices to avoid searching in the next
502 : * round.
503 : */
504 0 : io_tlb_index = ((index + nslots) < io_tlb_nslabs
505 0 : ? (index + nslots) : 0);
506 :
507 : goto found;
508 : }
509 0 : index += stride;
510 0 : if (index >= io_tlb_nslabs)
511 : index = 0;
512 0 : } while (index != wrap);
513 :
514 : not_found:
515 : spin_unlock_irqrestore(&io_tlb_lock, flags);
516 0 : if (printk_ratelimit())
517 0 : dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size);
518 : return SWIOTLB_MAP_ERROR;
519 : found:
520 : spin_unlock_irqrestore(&io_tlb_lock, flags);
521 :
522 : /*
523 : * Save away the mapping from the original address to the DMA address.
524 : * This is needed when we sync the memory. Then we sync the buffer if
525 : * needed.
526 : */
527 0 : for (i = 0; i < nslots; i++)
528 0 : io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
529 0 : if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
530 0 : swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
531 :
532 0 : return tlb_addr;
533 : }
534 : EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
535 :
536 : /*
537 : * Allocates bounce buffer and returns its kernel virtual address.
538 : */
539 :
540 0 : phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size,
541 : enum dma_data_direction dir)
542 : {
543 0 : dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start);
544 :
545 0 : return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir);
546 : }
547 :
548 : /*
549 : * dma_addr is the kernel virtual address of the bounce buffer to unmap.
550 : */
551 0 : void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
552 : size_t size, enum dma_data_direction dir)
553 : {
554 : unsigned long flags;
555 0 : int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
556 0 : int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
557 0 : phys_addr_t orig_addr = io_tlb_orig_addr[index];
558 :
559 : /*
560 : * First, sync the memory before unmapping the entry
561 : */
562 0 : if (orig_addr != INVALID_PHYS_ADDR &&
563 0 : ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
564 0 : swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE);
565 :
566 : /*
567 : * Return the buffer to the free list by setting the corresponding
568 : * entries to indicate the number of contiguous entries available.
569 : * While returning the entries to the free list, we merge the entries
570 : * with slots below and above the pool being returned.
571 : */
572 0 : spin_lock_irqsave(&io_tlb_lock, flags);
573 : {
574 0 : count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
575 0 : io_tlb_list[index + nslots] : 0);
576 : /*
577 : * Step 1: return the slots to the free list, merging the
578 : * slots with superceeding slots
579 : */
580 0 : for (i = index + nslots - 1; i >= index; i--) {
581 0 : io_tlb_list[i] = ++count;
582 0 : io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
583 : }
584 : /*
585 : * Step 2: merge the returned slots with the preceding slots,
586 : * if available (non zero)
587 : */
588 0 : for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
589 0 : io_tlb_list[i] = ++count;
590 : }
591 : spin_unlock_irqrestore(&io_tlb_lock, flags);
592 0 : }
593 : EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
594 :
595 0 : void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
596 : size_t size, enum dma_data_direction dir,
597 : enum dma_sync_target target)
598 : {
599 0 : int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
600 0 : phys_addr_t orig_addr = io_tlb_orig_addr[index];
601 :
602 0 : if (orig_addr == INVALID_PHYS_ADDR)
603 0 : return;
604 0 : orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
605 :
606 0 : switch (target) {
607 : case SYNC_FOR_CPU:
608 0 : if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
609 0 : swiotlb_bounce(orig_addr, tlb_addr,
610 : size, DMA_FROM_DEVICE);
611 : else
612 : BUG_ON(dir != DMA_TO_DEVICE);
613 : break;
614 : case SYNC_FOR_DEVICE:
615 0 : if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
616 0 : swiotlb_bounce(orig_addr, tlb_addr,
617 : size, DMA_TO_DEVICE);
618 : else
619 : BUG_ON(dir != DMA_FROM_DEVICE);
620 : break;
621 : default:
622 : BUG();
623 : }
624 : }
625 : EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single);
626 :
627 : void *
628 0 : swiotlb_alloc_coherent(struct device *hwdev, size_t size,
629 : dma_addr_t *dma_handle, gfp_t flags)
630 : {
631 : dma_addr_t dev_addr;
632 : void *ret;
633 0 : int order = get_order(size);
634 : u64 dma_mask = DMA_BIT_MASK(32);
635 :
636 0 : if (hwdev && hwdev->coherent_dma_mask)
637 : dma_mask = hwdev->coherent_dma_mask;
638 :
639 0 : ret = (void *)__get_free_pages(flags, order);
640 0 : if (ret) {
641 : dev_addr = swiotlb_virt_to_bus(hwdev, ret);
642 0 : if (dev_addr + size - 1 > dma_mask) {
643 : /*
644 : * The allocated memory isn't reachable by the device.
645 : */
646 0 : free_pages((unsigned long) ret, order);
647 : ret = NULL;
648 : }
649 : }
650 0 : if (!ret) {
651 : /*
652 : * We are either out of memory or the device can't DMA to
653 : * GFP_DMA memory; fall back on map_single(), which
654 : * will grab memory from the lowest available address range.
655 : */
656 0 : phys_addr_t paddr = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
657 0 : if (paddr == SWIOTLB_MAP_ERROR)
658 : return NULL;
659 :
660 : ret = phys_to_virt(paddr);
661 : dev_addr = phys_to_dma(hwdev, paddr);
662 :
663 : /* Confirm address can be DMA'd by device */
664 0 : if (dev_addr + size - 1 > dma_mask) {
665 0 : printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
666 : (unsigned long long)dma_mask,
667 : (unsigned long long)dev_addr);
668 :
669 : /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
670 0 : swiotlb_tbl_unmap_single(hwdev, paddr,
671 : size, DMA_TO_DEVICE);
672 0 : return NULL;
673 : }
674 : }
675 :
676 0 : *dma_handle = dev_addr;
677 0 : memset(ret, 0, size);
678 :
679 0 : return ret;
680 : }
681 : EXPORT_SYMBOL(swiotlb_alloc_coherent);
682 :
683 : void
684 0 : swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
685 : dma_addr_t dev_addr)
686 : {
687 : phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
688 :
689 : WARN_ON(irqs_disabled());
690 0 : if (!is_swiotlb_buffer(paddr))
691 0 : free_pages((unsigned long)vaddr, get_order(size));
692 : else
693 : /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */
694 0 : swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE);
695 0 : }
696 : EXPORT_SYMBOL(swiotlb_free_coherent);
697 :
698 : static void
699 0 : swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
700 : int do_panic)
701 : {
702 : /*
703 : * Ran out of IOMMU space for this operation. This is very bad.
704 : * Unfortunately the drivers cannot handle this operation properly.
705 : * unless they check for dma_mapping_error (most don't)
706 : * When the mapping is small enough return a static buffer to limit
707 : * the damage, or panic when the transfer is too big.
708 : */
709 0 : printk(KERN_ERR "DMA: Out of SW-IOMMU space for %zu bytes at "
710 : "device %s\n", size, dev ? dev_name(dev) : "?");
711 :
712 0 : if (size <= io_tlb_overflow || !do_panic)
713 0 : return;
714 :
715 0 : if (dir == DMA_BIDIRECTIONAL)
716 0 : panic("DMA: Random memory could be DMA accessed\n");
717 0 : if (dir == DMA_FROM_DEVICE)
718 0 : panic("DMA: Random memory could be DMA written\n");
719 0 : if (dir == DMA_TO_DEVICE)
720 0 : panic("DMA: Random memory could be DMA read\n");
721 : }
722 :
723 : /*
724 : * Map a single buffer of the indicated size for DMA in streaming mode. The
725 : * physical address to use is returned.
726 : *
727 : * Once the device is given the dma address, the device owns this memory until
728 : * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed.
729 : */
730 0 : dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
731 : unsigned long offset, size_t size,
732 : enum dma_data_direction dir,
733 : struct dma_attrs *attrs)
734 : {
735 0 : phys_addr_t map, phys = page_to_phys(page) + offset;
736 : dma_addr_t dev_addr = phys_to_dma(dev, phys);
737 :
738 : BUG_ON(dir == DMA_NONE);
739 : /*
740 : * If the address happens to be in the device's DMA window,
741 : * we can safely return the device addr and not worry about bounce
742 : * buffering it.
743 : */
744 0 : if (dma_capable(dev, dev_addr, size) && !swiotlb_force)
745 : return dev_addr;
746 :
747 : trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
748 :
749 : /* Oh well, have to allocate and map a bounce buffer. */
750 0 : map = map_single(dev, phys, size, dir);
751 0 : if (map == SWIOTLB_MAP_ERROR) {
752 0 : swiotlb_full(dev, size, dir, 1);
753 0 : return phys_to_dma(dev, io_tlb_overflow_buffer);
754 : }
755 :
756 : dev_addr = phys_to_dma(dev, map);
757 :
758 : /* Ensure that the address returned is DMA'ble */
759 0 : if (!dma_capable(dev, dev_addr, size)) {
760 0 : swiotlb_tbl_unmap_single(dev, map, size, dir);
761 0 : return phys_to_dma(dev, io_tlb_overflow_buffer);
762 : }
763 :
764 : return dev_addr;
765 : }
766 : EXPORT_SYMBOL_GPL(swiotlb_map_page);
767 :
768 : /*
769 : * Unmap a single streaming mode DMA translation. The dma_addr and size must
770 : * match what was provided for in a previous swiotlb_map_page call. All
771 : * other usages are undefined.
772 : *
773 : * After this call, reads by the cpu to the buffer are guaranteed to see
774 : * whatever the device wrote there.
775 : */
776 0 : static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
777 : size_t size, enum dma_data_direction dir)
778 : {
779 : phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
780 :
781 : BUG_ON(dir == DMA_NONE);
782 :
783 0 : if (is_swiotlb_buffer(paddr)) {
784 0 : swiotlb_tbl_unmap_single(hwdev, paddr, size, dir);
785 0 : return;
786 : }
787 :
788 : if (dir != DMA_FROM_DEVICE)
789 : return;
790 :
791 : /*
792 : * phys_to_virt doesn't work with hihgmem page but we could
793 : * call dma_mark_clean() with hihgmem page here. However, we
794 : * are fine since dma_mark_clean() is null on POWERPC. We can
795 : * make dma_mark_clean() take a physical address if necessary.
796 : */
797 : dma_mark_clean(phys_to_virt(paddr), size);
798 : }
799 :
800 0 : void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
801 : size_t size, enum dma_data_direction dir,
802 : struct dma_attrs *attrs)
803 : {
804 0 : unmap_single(hwdev, dev_addr, size, dir);
805 0 : }
806 : EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
807 :
808 : /*
809 : * Make physical memory consistent for a single streaming mode DMA translation
810 : * after a transfer.
811 : *
812 : * If you perform a swiotlb_map_page() but wish to interrogate the buffer
813 : * using the cpu, yet do not wish to teardown the dma mapping, you must
814 : * call this function before doing so. At the next point you give the dma
815 : * address back to the card, you must first perform a
816 : * swiotlb_dma_sync_for_device, and then the device again owns the buffer
817 : */
818 : static void
819 0 : swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
820 : size_t size, enum dma_data_direction dir,
821 : enum dma_sync_target target)
822 : {
823 : phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
824 :
825 : BUG_ON(dir == DMA_NONE);
826 :
827 0 : if (is_swiotlb_buffer(paddr)) {
828 0 : swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
829 0 : return;
830 : }
831 :
832 : if (dir != DMA_FROM_DEVICE)
833 : return;
834 :
835 : dma_mark_clean(phys_to_virt(paddr), size);
836 : }
837 :
838 : void
839 0 : swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
840 : size_t size, enum dma_data_direction dir)
841 : {
842 0 : swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
843 0 : }
844 : EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
845 :
846 : void
847 0 : swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
848 : size_t size, enum dma_data_direction dir)
849 : {
850 0 : swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
851 0 : }
852 : EXPORT_SYMBOL(swiotlb_sync_single_for_device);
853 :
854 : /*
855 : * Map a set of buffers described by scatterlist in streaming mode for DMA.
856 : * This is the scatter-gather version of the above swiotlb_map_page
857 : * interface. Here the scatter gather list elements are each tagged with the
858 : * appropriate dma address and length. They are obtained via
859 : * sg_dma_{address,length}(SG).
860 : *
861 : * NOTE: An implementation may be able to use a smaller number of
862 : * DMA address/length pairs than there are SG table elements.
863 : * (for example via virtual mapping capabilities)
864 : * The routine returns the number of addr/length pairs actually
865 : * used, at most nents.
866 : *
867 : * Device ownership issues as mentioned above for swiotlb_map_page are the
868 : * same here.
869 : */
870 : int
871 0 : swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
872 : enum dma_data_direction dir, struct dma_attrs *attrs)
873 : {
874 : struct scatterlist *sg;
875 : int i;
876 :
877 : BUG_ON(dir == DMA_NONE);
878 :
879 0 : for_each_sg(sgl, sg, nelems, i) {
880 : phys_addr_t paddr = sg_phys(sg);
881 : dma_addr_t dev_addr = phys_to_dma(hwdev, paddr);
882 :
883 0 : if (swiotlb_force ||
884 0 : !dma_capable(hwdev, dev_addr, sg->length)) {
885 0 : phys_addr_t map = map_single(hwdev, sg_phys(sg),
886 : sg->length, dir);
887 0 : if (map == SWIOTLB_MAP_ERROR) {
888 : /* Don't panic here, we expect map_sg users
889 : to do proper error handling. */
890 0 : swiotlb_full(hwdev, sg->length, dir, 0);
891 0 : swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
892 : attrs);
893 0 : sg_dma_len(sgl) = 0;
894 0 : return 0;
895 : }
896 0 : sg->dma_address = phys_to_dma(hwdev, map);
897 : } else
898 0 : sg->dma_address = dev_addr;
899 : sg_dma_len(sg) = sg->length;
900 : }
901 : return nelems;
902 : }
903 : EXPORT_SYMBOL(swiotlb_map_sg_attrs);
904 :
905 : int
906 0 : swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
907 : enum dma_data_direction dir)
908 : {
909 0 : return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
910 : }
911 : EXPORT_SYMBOL(swiotlb_map_sg);
912 :
913 : /*
914 : * Unmap a set of streaming mode DMA translations. Again, cpu read rules
915 : * concerning calls here are the same as for swiotlb_unmap_page() above.
916 : */
917 : void
918 0 : swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
919 : int nelems, enum dma_data_direction dir, struct dma_attrs *attrs)
920 : {
921 : struct scatterlist *sg;
922 : int i;
923 :
924 : BUG_ON(dir == DMA_NONE);
925 :
926 0 : for_each_sg(sgl, sg, nelems, i)
927 0 : unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir);
928 :
929 0 : }
930 : EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
931 :
932 : void
933 0 : swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
934 : enum dma_data_direction dir)
935 : {
936 0 : return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
937 : }
938 : EXPORT_SYMBOL(swiotlb_unmap_sg);
939 :
940 : /*
941 : * Make physical memory consistent for a set of streaming mode DMA translations
942 : * after a transfer.
943 : *
944 : * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
945 : * and usage.
946 : */
947 : static void
948 0 : swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
949 : int nelems, enum dma_data_direction dir,
950 : enum dma_sync_target target)
951 : {
952 : struct scatterlist *sg;
953 : int i;
954 :
955 0 : for_each_sg(sgl, sg, nelems, i)
956 0 : swiotlb_sync_single(hwdev, sg->dma_address,
957 : sg_dma_len(sg), dir, target);
958 0 : }
959 :
960 : void
961 0 : swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
962 : int nelems, enum dma_data_direction dir)
963 : {
964 0 : swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
965 0 : }
966 : EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
967 :
968 : void
969 0 : swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
970 : int nelems, enum dma_data_direction dir)
971 : {
972 0 : swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
973 0 : }
974 : EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
975 :
976 : int
977 0 : swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
978 : {
979 0 : return (dma_addr == phys_to_dma(hwdev, io_tlb_overflow_buffer));
980 : }
981 : EXPORT_SYMBOL(swiotlb_dma_mapping_error);
982 :
983 : /*
984 : * Return whether the given device DMA address mask can be supported
985 : * properly. For example, if your device can only drive the low 24-bits
986 : * during bus mastering, then you would pass 0x00ffffff as the mask to
987 : * this function.
988 : */
989 : int
990 0 : swiotlb_dma_supported(struct device *hwdev, u64 mask)
991 : {
992 0 : return phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
993 : }
994 : EXPORT_SYMBOL(swiotlb_dma_supported);
|