File drivers/gpu/alga/amd/si/bus/ba.c changed (mode: 100644) (index fb0db1b..e9b650b) |
29 |
29 |
#include "bus/ba_private.h" |
#include "bus/ba_private.h" |
30 |
30 |
#include "bus/core_coherent.h" |
#include "bus/core_coherent.h" |
31 |
31 |
#include "bus/core_sg_kernel.h" |
#include "bus/core_sg_kernel.h" |
|
32 |
|
#include "bus/sg_user.h" |
32 |
33 |
|
|
33 |
34 |
void tlb_flush(struct pci_dev *dev) |
void tlb_flush(struct pci_dev *dev) |
34 |
35 |
{ |
{ |
|
... |
... |
static void cleanup(struct pci_dev *dev, struct ba_map *m, u8 flgs) |
246 |
247 |
case BA_MAP_KERNEL_SG: |
case BA_MAP_KERNEL_SG: |
247 |
248 |
core_sg_kernel_cleanup(dev, m, flgs); |
core_sg_kernel_cleanup(dev, m, flgs); |
248 |
249 |
break; |
break; |
249 |
|
default: |
|
250 |
|
dev_err(&dev->dev, "ba:unable to cleanup unknown type mapping %u\n", |
|
251 |
|
m->type); |
|
|
250 |
|
case BA_MAP_USER_SG: |
|
251 |
|
sg_user_cleanup(dev, m, flgs); |
252 |
252 |
break; |
break; |
253 |
253 |
} |
} |
254 |
254 |
} |
} |
|
... |
... |
err: |
413 |
413 |
return -BA_ERR; |
return -BA_ERR; |
414 |
414 |
} |
} |
415 |
415 |
|
|
416 |
|
long ba_map(struct pci_dev *dev, struct scatterlist *sgl, int nents, u8 flgs) |
|
|
416 |
|
long ba_map(struct pci_dev *dev, struct sg_table *sg_tbl, int nents) |
417 |
417 |
{ |
{ |
418 |
|
struct ba_map *m; |
|
419 |
|
struct scatterlist *sg; |
|
420 |
|
int i; |
|
421 |
|
struct dev_drv_data *dd; |
|
422 |
|
long r; |
|
423 |
|
u64 ps_of; |
|
424 |
|
|
|
425 |
|
m = kzalloc(GFP_KERNEL, sizeof(*m)); |
|
426 |
|
if (m == NULL) { |
|
427 |
|
dev_err(&dev->dev, "ba:unable to allocate memory for sg mapping\n"); |
|
428 |
|
goto err; |
|
429 |
|
} |
|
430 |
|
|
|
431 |
|
m->type = BA_MAP_USER_SG; |
|
432 |
|
m->ptes_n = 0; |
|
433 |
|
|
|
434 |
|
/* first count how many gpu ps we need */ |
|
435 |
|
for_each_sg(sgl, sg, nents, i) { |
|
436 |
|
dma_addr_t bus_segment_addr = sg_dma_address(sg); |
|
437 |
|
unsigned int bus_segment_sz= sg_dma_len(sg); |
|
438 |
|
|
|
439 |
|
if (!IS_GPU_PAGE_ALIGNED(bus_segment_addr)) { |
|
440 |
|
dev_err(&dev->dev,"ba:trying to map a bus segment not aligned on a gpu page\n"); |
|
441 |
|
goto err_free_map; |
|
442 |
|
} |
|
|
418 |
|
return sg_user_map(dev, sg_tbl, nents); |
|
419 |
|
} |
443 |
420 |
|
|
444 |
|
if (!IS_GPU_PAGE_ALIGNED(bus_segment_sz)) { |
|
445 |
|
dev_err(&dev->dev,"ba:trying to map a bus segment of size not aligned on gpu page size\n"); |
|
446 |
|
goto err_free_map; |
|
447 |
|
} |
|
448 |
|
m->ptes_n += bus_segment_sz / GPU_PAGE_SZ; |
|
449 |
|
} |
|
|
421 |
|
void ba_unmap(struct pci_dev *dev, void __iomem *cpu_addr) |
|
422 |
|
{ |
|
423 |
|
struct ba_map *pos; |
|
424 |
|
struct ba_map *tmp; |
|
425 |
|
struct dev_drv_data *dd; |
450 |
426 |
|
|
451 |
427 |
dd = pci_get_drvdata(dev); |
dd = pci_get_drvdata(dev); |
452 |
428 |
|
|
453 |
|
/* allocate a range of the aperture */ |
|
454 |
|
r = rng_alloc_align(&m->gpu_addr, &dd->ba.mng, GPU_PAGE_SZ * m->ptes_n, |
|
455 |
|
GPU_PAGE_SZ); |
|
456 |
|
if (r == -ALGA_ERR ) { |
|
457 |
|
dev_err(&dev->dev, "ba:unable to allocate gpu address space for sg mapping\n"); |
|
458 |
|
goto err_free_map; |
|
|
429 |
|
list_for_each_entry_safe(pos, tmp, &dd->ba.maps, n) { |
|
430 |
|
if (pos->cpu_addr == cpu_addr) { |
|
431 |
|
list_del(&pos->n); |
|
432 |
|
sg_user_cleanup(dev, pos, 0); |
|
433 |
|
break; |
|
434 |
|
} |
459 |
435 |
} |
} |
460 |
|
|
|
461 |
|
ps_of = (m->gpu_addr - dd->ba.mng.s) / GPU_PAGE_SZ; |
|
462 |
|
m->ptes_start = dd->ba.pt_start + ps_of * PTE_SZ; |
|
463 |
|
|
|
464 |
|
/* TODO:need a *sg* core mapping of the PT */ |
|
465 |
|
/* TODO:implement PT update with DMA_PKT_WR packets */ |
|
466 |
|
return 0; |
|
467 |
|
|
|
468 |
|
err_free_map: |
|
469 |
|
kfree(m); |
|
470 |
|
err: |
|
471 |
|
return -BA_ERR; |
|
472 |
436 |
} |
} |
473 |
437 |
|
|
474 |
438 |
/* we need finer unmapping for suspend support, then the flgs */ |
/* we need finer unmapping for suspend support, then the flgs */ |
File drivers/gpu/alga/amd/si/bus/core_sg_kernel.c changed (mode: 100644) (index 9a8bb0c..304c8bc) |
... |
... |
static void cpu_kernel_unmap(struct ba_map *m) |
50 |
50 |
static long cpu_kernel_map(struct pci_dev *dev, struct ba_map *m) |
static long cpu_kernel_map(struct pci_dev *dev, struct ba_map *m) |
51 |
51 |
{ |
{ |
52 |
52 |
struct dev_drv_data *dd; |
struct dev_drv_data *dd; |
53 |
|
u64 cpu_page; |
|
|
53 |
|
u64 cpu_p; |
|
54 |
|
u64 map_sz; |
54 |
55 |
|
|
55 |
56 |
dd = pci_get_drvdata(dev); |
dd = pci_get_drvdata(dev); |
56 |
57 |
|
|
57 |
|
m->cpu_ps_n = (m->ptes_n * GPU_PAGE_SZ) >> PAGE_SHIFT; |
|
58 |
|
m->cpu_ps = kzalloc(m->cpu_ps_n * sizeof(m->cpu_ps), GFP_KERNEL); |
|
|
58 |
|
map_sz = m->ptes_n << GPU_PAGE_SHIFT; |
|
59 |
|
if (map_sz & ~PAGE_MASK) { |
|
60 |
|
dev_err(&dev->dev, "ba:core_sg_kernel:map size is not aligned on CPU PAGE_SIZE\n"); |
|
61 |
|
goto err; |
|
62 |
|
} |
|
63 |
|
m->cpu_ps_n = map_sz >> PAGE_SHIFT; |
|
64 |
|
m->cpu_ps = kzalloc(m->cpu_ps_n * sizeof(*m->cpu_ps), GFP_KERNEL); |
59 |
65 |
|
|
60 |
|
for (cpu_page = 0; cpu_page < m->cpu_ps_n; ++cpu_page) { |
|
61 |
|
m->cpu_ps[cpu_page] = alloc_page(GFP_KERNEL | __GFP_ZERO |
|
|
66 |
|
for (cpu_p = 0; cpu_p < m->cpu_ps_n; ++cpu_p) { |
|
67 |
|
m->cpu_ps[cpu_p] = alloc_page(GFP_KERNEL | __GFP_ZERO |
62 |
68 |
| __GFP_NOWARN); |
| __GFP_NOWARN); |
63 |
|
if (!m->cpu_ps[cpu_page]) { |
|
64 |
|
if (cpu_page) |
|
65 |
|
cpu_ps_free_partial(m, cpu_page - 1); |
|
66 |
|
return -BA_ERR; |
|
|
69 |
|
if (!m->cpu_ps[cpu_p]) { |
|
70 |
|
if (cpu_p) |
|
71 |
|
cpu_ps_free_partial(m, cpu_p - 1); |
|
72 |
|
goto err; |
67 |
73 |
} |
} |
68 |
74 |
} |
} |
69 |
75 |
|
|
70 |
|
m->cpu_addr = vmap(m->cpu_ps, m->cpu_ps_n, VM_MAP, PAGE_KERNEL_IO); |
|
|
76 |
|
m->cpu_addr = vmap(m->cpu_ps, m->cpu_ps_n, VM_MAP, PAGE_KERNEL); |
71 |
77 |
if (m->cpu_addr == NULL) |
if (m->cpu_addr == NULL) |
72 |
78 |
goto err_free_ps; |
goto err_free_ps; |
73 |
79 |
return 0; |
return 0; |
74 |
80 |
|
|
75 |
81 |
err_free_ps: |
err_free_ps: |
76 |
82 |
cpu_ps_free(m); |
cpu_ps_free(m); |
|
83 |
|
err: |
77 |
84 |
return -BA_ERR; |
return -BA_ERR; |
78 |
85 |
} |
} |
79 |
86 |
|
|
|
... |
... |
err: |
106 |
113 |
|
|
107 |
114 |
/* return the updated pte_addr */ |
/* return the updated pte_addr */ |
108 |
115 |
static u64 bus_segment_map(struct pci_dev *dev, struct ba_map *m, |
static u64 bus_segment_map(struct pci_dev *dev, struct ba_map *m, |
109 |
|
u64 bus_segment_addr, u64 bus_segment_len, u64 pte_addr) |
|
|
116 |
|
u64 bus_segment_addr, u64 bus_segment_sz, u64 pte_addr) |
110 |
117 |
{ |
{ |
111 |
118 |
u64 gpu_ps_n; |
u64 gpu_ps_n; |
112 |
119 |
u64 gpu_p; |
u64 gpu_p; |
113 |
120 |
u64 bus_addr; |
u64 bus_addr; |
114 |
121 |
|
|
115 |
122 |
bus_addr = bus_segment_addr; |
bus_addr = bus_segment_addr; |
116 |
|
gpu_ps_n = bus_segment_len / GPU_PAGE_SZ; |
|
|
123 |
|
gpu_ps_n = GPU_PAGE_IDX(bus_segment_sz); |
117 |
124 |
|
|
118 |
125 |
for (gpu_p = 0; gpu_p < gpu_ps_n; ++gpu_p) { |
for (gpu_p = 0; gpu_p < gpu_ps_n; ++gpu_p) { |
119 |
126 |
pte_mmio_regs_install(dev, pte_addr, bus_addr); |
pte_mmio_regs_install(dev, pte_addr, bus_addr); |
|
... |
... |
static long gpu_bus_map(struct pci_dev *dev, struct ba_map *m) |
172 |
179 |
struct scatterlist *sg; |
struct scatterlist *sg; |
173 |
180 |
int i; |
int i; |
174 |
181 |
long r; |
long r; |
175 |
|
u64 ps_of; |
|
|
182 |
|
u64 first_pte_idx; |
176 |
183 |
u64 pte_addr; |
u64 pte_addr; |
|
184 |
|
u64 map_sz; |
177 |
185 |
|
|
178 |
186 |
dd = pci_get_drvdata(dev); |
dd = pci_get_drvdata(dev); |
179 |
187 |
|
|
180 |
|
r = rng_alloc_align(&m->gpu_addr, &dd->ba.mng, m->ptes_n * GPU_PAGE_SZ, |
|
181 |
|
GPU_PAGE_SZ); |
|
|
188 |
|
map_sz = m->ptes_n << GPU_PAGE_SHIFT; |
|
189 |
|
r = rng_alloc_align(&m->gpu_addr, &dd->ba.mng, map_sz, GPU_PAGE_SZ); |
182 |
190 |
if (r == -ALGA_ERR) |
if (r == -ALGA_ERR) |
183 |
191 |
goto err; |
goto err; |
184 |
192 |
|
|
185 |
|
ps_of = (m->gpu_addr - dd->ba.mng.s) / GPU_PAGE_SZ; |
|
186 |
|
m->ptes_start = dd->ba.pt_start + ps_of * PTE_SZ; |
|
|
193 |
|
first_pte_idx = GPU_PAGE_IDX(m->gpu_addr - dd->ba.mng.s); |
|
194 |
|
m->ptes_start = dd->ba.pt_start + first_pte_idx * PTE_SZ; |
187 |
195 |
pte_addr = m->ptes_start; |
pte_addr = m->ptes_start; |
188 |
196 |
|
|
189 |
197 |
for_each_sg(m->sg_tbl.sgl, sg, m->sg_tbl_list_nents, i) { |
for_each_sg(m->sg_tbl.sgl, sg, m->sg_tbl_list_nents, i) { |
190 |
|
dma_addr_t bus_segment_addr = sg_dma_address(sg); |
|
191 |
|
unsigned int bus_segment_sz = sg_dma_len(sg); |
|
|
198 |
|
dma_addr_t bus_segment_addr; |
|
199 |
|
unsigned int bus_segment_sz; |
|
200 |
|
|
|
201 |
|
bus_segment_addr = sg_dma_address(sg); |
|
202 |
|
bus_segment_sz = sg_dma_len(sg); |
192 |
203 |
|
|
193 |
204 |
if (!IS_GPU_PAGE_ALIGNED(bus_segment_addr)) { |
if (!IS_GPU_PAGE_ALIGNED(bus_segment_addr)) { |
194 |
205 |
dev_err(&dev->dev,"ba:core_sg_kernel:trying to map a bus segment not aligned on a gpu page\n"); |
dev_err(&dev->dev,"ba:core_sg_kernel:trying to map a bus segment not aligned on a gpu page\n"); |
|
... |
... |
err: |
211 |
222 |
return -BA_ERR; |
return -BA_ERR; |
212 |
223 |
} |
} |
213 |
224 |
|
|
|
225 |
|
/* just in case... */ |
|
226 |
|
static void cpu_ps_dummy_fill(struct pci_dev *dev, struct ba_map *m) |
|
227 |
|
{ |
|
228 |
|
struct dev_drv_data *dd; |
|
229 |
|
u64 __iomem *pte_addr; |
|
230 |
|
u64 ptes_n; |
|
231 |
|
u64 dummy_pte; |
|
232 |
|
|
|
233 |
|
dd = pci_get_drvdata(dev); |
|
234 |
|
|
|
235 |
|
dummy_pte = dd->ba.dummy_bus_addr | PTE_VALID | PTE_SYSTEM |
|
236 |
|
| PTE_SNOOPED | PTE_READABLE | PTE_WRITEABLE; |
|
237 |
|
|
|
238 |
|
pte_addr = m->cpu_addr; |
|
239 |
|
ptes_n = m->ptes_n; |
|
240 |
|
while (ptes_n--) |
|
241 |
|
*pte_addr++ = dummy_pte; |
|
242 |
|
} |
|
243 |
|
|
214 |
244 |
long core_sg_kernel_map(struct pci_dev *dev, u64 sz, struct ba_map **m) |
long core_sg_kernel_map(struct pci_dev *dev, u64 sz, struct ba_map **m) |
215 |
245 |
{ |
{ |
216 |
246 |
struct dev_drv_data *dd; |
struct dev_drv_data *dd; |
217 |
247 |
long r; |
long r; |
218 |
248 |
|
|
219 |
|
if (sz % GPU_PAGE_SZ) { |
|
|
249 |
|
if (!IS_GPU_PAGE_ALIGNED(sz)) { |
220 |
250 |
dev_err(&dev->dev, "ba:core_sg_kernel:size not aligned on gpu page size\n"); |
dev_err(&dev->dev, "ba:core_sg_kernel:size not aligned on gpu page size\n"); |
221 |
251 |
goto err; |
goto err; |
222 |
252 |
} |
} |
|
... |
... |
long core_sg_kernel_map(struct pci_dev *dev, u64 sz, struct ba_map **m) |
237 |
267 |
dev_err(&dev->dev, "ba:core_sg_kernel:unable to create cpu mapping\n"); |
dev_err(&dev->dev, "ba:core_sg_kernel:unable to create cpu mapping\n"); |
238 |
268 |
goto err_free_map; |
goto err_free_map; |
239 |
269 |
} |
} |
|
270 |
|
|
|
271 |
|
cpu_ps_dummy_fill(dev, *m); |
240 |
272 |
|
|
241 |
273 |
r = cpu_bus_map(dev, *m); |
r = cpu_bus_map(dev, *m); |
242 |
274 |
if (r == -BA_ERR) { |
if (r == -BA_ERR) { |
File drivers/gpu/alga/amd/si/bus/sg_user.c added (mode: 100644) (index 0000000..78227c6) |
|
1 |
|
/* |
|
2 |
|
author Sylvain Bertrand <digital.ragnarok@gmail.com> |
|
3 |
|
Protected by GNU Affero GPL v3 with some exceptions. |
|
4 |
|
See README at root of alga tree. |
|
5 |
|
*/ |
|
6 |
|
#include <linux/pci.h> |
|
7 |
|
#include <asm/byteorder.h> |
|
8 |
|
#include <linux/cdev.h> |
|
9 |
|
#include <linux/vmalloc.h> |
|
10 |
|
|
|
11 |
|
#include <alga/alga.h> |
|
12 |
|
#include <alga/rng_mng.h> |
|
13 |
|
#include <alga/timing.h> |
|
14 |
|
#include <alga/pixel_fmts.h> |
|
15 |
|
#include <alga/amd/dce6/dce6.h> |
|
16 |
|
|
|
17 |
|
#include "mc.h" |
|
18 |
|
#include "ih.h" |
|
19 |
|
#include "rlc.h" |
|
20 |
|
#include "dmas.h" |
|
21 |
|
#include "bus/ba.h" |
|
22 |
|
#include "gpu/cps.h" |
|
23 |
|
#include "gpu/gpu.h" |
|
24 |
|
#include "drv.h" |
|
25 |
|
|
|
26 |
|
#include "regs.h" |
|
27 |
|
|
|
28 |
|
#include "bus/ba_private.h" |
|
29 |
|
|
|
30 |
|
static long ptes_account(struct pci_dev *dev, struct sg_table *sg_tbl, |
|
31 |
|
int nents, struct ba_map *m) |
|
32 |
|
{ |
|
33 |
|
struct scatterlist *sg; |
|
34 |
|
int i; |
|
35 |
|
|
|
36 |
|
for_each_sg(sg_tbl->sgl, sg, nents, i) { |
|
37 |
|
dma_addr_t bus_segment_addr; |
|
38 |
|
unsigned int bus_segment_sz; |
|
39 |
|
|
|
40 |
|
bus_segment_addr = sg_dma_address(sg); |
|
41 |
|
bus_segment_sz = sg_dma_len(sg); |
|
42 |
|
|
|
43 |
|
if (!IS_GPU_PAGE_ALIGNED(bus_segment_addr)) { |
|
44 |
|
dev_err(&dev->dev,"ba:sg_user:trying to map a bus segment not aligned on a gpu page\n"); |
|
45 |
|
return -BA_ERR; |
|
46 |
|
} |
|
47 |
|
|
|
48 |
|
if (!IS_GPU_PAGE_ALIGNED(bus_segment_sz)) { |
|
49 |
|
dev_err(&dev->dev,"ba:sg_user:trying to map a bus segment of size not aligned on gpu page size\n"); |
|
50 |
|
return -BA_ERR; |
|
51 |
|
} |
|
52 |
|
m->ptes_n += GPU_PAGE_IDX(bus_segment_sz); |
|
53 |
|
} |
|
54 |
|
return 0; |
|
55 |
|
} |
|
56 |
|
|
|
57 |
|
/* return the updated pte_cpu_addr */ |
|
58 |
|
static u64 __iomem *bus_segment_map(struct pci_dev *dev, u64 bus_segment_addr, |
|
59 |
|
u64 bus_segment_sz, u64 __iomem *pte_cpu_addr) |
|
60 |
|
{ |
|
61 |
|
u64 gpu_ps_n; |
|
62 |
|
u64 gpu_p; |
|
63 |
|
u64 bus_addr; |
|
64 |
|
|
|
65 |
|
bus_addr = bus_segment_addr; |
|
66 |
|
gpu_ps_n = GPU_PAGE_IDX(bus_segment_sz); |
|
67 |
|
|
|
68 |
|
for (gpu_p = 0; gpu_p < gpu_ps_n; ++gpu_p) { |
|
69 |
|
*pte_cpu_addr = cpu_to_le64(bus_addr | PTE_VALID | PTE_SYSTEM |
|
70 |
|
| PTE_SNOOPED | PTE_READABLE | PTE_WRITEABLE); |
|
71 |
|
++pte_cpu_addr; |
|
72 |
|
bus_addr += GPU_PAGE_SZ; |
|
73 |
|
} |
|
74 |
|
return pte_cpu_addr; |
|
75 |
|
} |
|
76 |
|
|
|
77 |
|
static void ptes_cpu_side_update(struct pci_dev *dev, struct ba_map *m, |
|
78 |
|
struct sg_table *sg_tbl, int nents) |
|
79 |
|
{ |
|
80 |
|
struct dev_drv_data *dd; |
|
81 |
|
u64 __iomem *pte_cpu_addr; |
|
82 |
|
struct scatterlist *sg; |
|
83 |
|
int i; |
|
84 |
|
|
|
85 |
|
dd = pci_get_drvdata(dev); |
|
86 |
|
|
|
87 |
|
/* locate the first pte of this mapping cpu side */ |
|
88 |
|
pte_cpu_addr = dd->ba.pt_map->cpu_addr; |
|
89 |
|
pte_cpu_addr += GPU_PAGE_IDX(m->gpu_addr - dd->ba.mng.s); |
|
90 |
|
|
|
91 |
|
for_each_sg(sg_tbl->sgl, sg, nents, i) { |
|
92 |
|
dma_addr_t bus_segment_addr; |
|
93 |
|
unsigned int bus_segment_sz; |
|
94 |
|
|
|
95 |
|
bus_segment_addr = sg_dma_address(sg); |
|
96 |
|
bus_segment_sz = sg_dma_len(sg); |
|
97 |
|
|
|
98 |
|
pte_cpu_addr = bus_segment_map(dev, bus_segment_addr, |
|
99 |
|
bus_segment_sz, pte_cpu_addr); |
|
100 |
|
} |
|
101 |
|
} |
|
102 |
|
|
|
103 |
|
static void ptes_gpu_side_update(struct pci_dev *dev, struct ba_map *m) |
|
104 |
|
{ |
|
105 |
|
struct dev_drv_data *dd; |
|
106 |
|
u8 dma; |
|
107 |
|
u64 remaining_dma_sz; |
|
108 |
|
u64 ptes_start_aperture; /* source */ |
|
109 |
|
u64 ptes_start_vram; /* destination */ |
|
110 |
|
|
|
111 |
|
dd = pci_get_drvdata(dev); |
|
112 |
|
|
|
113 |
|
remaining_dma_sz = m->ptes_n * PTE_SZ; |
|
114 |
|
|
|
115 |
|
ptes_start_vram = m->ptes_start; |
|
116 |
|
ptes_start_aperture = dd->ba.pt_map->gpu_addr |
|
117 |
|
+ GPU_PAGE_IDX(m->gpu_addr - dd->ba.mng.s) * PTE_SZ; |
|
118 |
|
/* |
|
119 |
|
* WARNING: there are 3 nents... the original... the one from the |
|
120 |
|
* sg table init, then the final one from the actual mapping. |
|
121 |
|
* The dma api functions must me called using the nents from the sg |
|
122 |
|
* table init. The parsing fo the scatterlist must be done using the |
|
123 |
|
* final one. |
|
124 |
|
*/ |
|
125 |
|
dma_sync_sg_for_device(&dev->dev, dd->ba.pt_map->sg_tbl.sgl, |
|
126 |
|
dd->ba.pt_map->sg_tbl.nents, DMA_TO_DEVICE); |
|
127 |
|
|
|
128 |
|
dma = dmas_select(dev); |
|
129 |
|
/* |
|
130 |
|
* TODO: |
|
131 |
|
* - should do a ring reservation mecanism able to wait |
|
132 |
|
* based on the rptr writeback value |
|
133 |
|
*/ |
|
134 |
|
spin_lock(&dd->dmas[dma].lock); |
|
135 |
|
while (remaining_dma_sz) { |
|
136 |
|
u64 dma_sz; |
|
137 |
|
|
|
138 |
|
dma_sz = remaining_dma_sz; |
|
139 |
|
if (dma_sz > 0xfffff) |
|
140 |
|
dma_sz = 0xfffff; |
|
141 |
|
remaining_dma_sz -= dma_sz; |
|
142 |
|
|
|
143 |
|
dma_wr(dev, dma, DMA_PKT(DMA_PKT_CPY, 1, 0, 0, dma_sz)); |
|
144 |
|
dma_wr(dev, dma, lower_32_bits(ptes_start_vram)); |
|
145 |
|
dma_wr(dev, dma, lower_32_bits(ptes_start_aperture)); |
|
146 |
|
dma_wr(dev, dma, upper_32_bits(ptes_start_vram) & 0xff); |
|
147 |
|
dma_wr(dev, dma, upper_32_bits(ptes_start_aperture) & 0xff); |
|
148 |
|
ptes_start_aperture += dma_sz; |
|
149 |
|
ptes_start_vram += dma_sz; |
|
150 |
|
} |
|
151 |
|
|
|
152 |
|
/* TODO: add a fence system */ |
|
153 |
|
|
|
154 |
|
/* write the fence */ |
|
155 |
|
dma_wr(dev, dma, DMA_PKT(DMA_PKT_FENCE, 0, 0, 0, 0)); |
|
156 |
|
dma_wr(dev, dma, 0x80000000 & 0xfffffffc); |
|
157 |
|
dma_wr(dev, dma, upper_32_bits(0x80000000) & 0xff); |
|
158 |
|
dma_wr(dev, dma, 0xcafedead); |
|
159 |
|
|
|
160 |
|
/* generate an interrupt */ |
|
161 |
|
dma_wr(dev, dma, DMA_PKT(DMA_PKT_TRAP, 0, 0, 0, 0)); |
|
162 |
|
|
|
163 |
|
/* XXX: why?? */ |
|
164 |
|
dma_wr(dev, dma, DMA_PKT(DMA_PKT_SRBM_WR, 0, 0, 0, 0)); |
|
165 |
|
dma_wr(dev, dma, (0xf << 16) | (VM_CTX_0_PT_BASE_ADDR >> 2)); |
|
166 |
|
dma_wr(dev, dma, GPU_PAGE_IDX(dd->ba.pt_start)); |
|
167 |
|
|
|
168 |
|
/* flush hdp cache */ |
|
169 |
|
dma_wr(dev, dma, DMA_PKT(DMA_PKT_SRBM_WR, 0, 0, 0, 0)); |
|
170 |
|
dma_wr(dev, dma, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CTL >> 2)); |
|
171 |
|
dma_wr(dev, dma, 1); |
|
172 |
|
|
|
173 |
|
/* bits 0-15 are the vm contexts 0-15 */ |
|
174 |
|
dma_wr(dev, dma, DMA_PKT(DMA_PKT_SRBM_WR, 0, 0, 0, 0)); |
|
175 |
|
dma_wr(dev, dma, (0xf << 16) | (VM_INVALIDATE_REQ >> 2)); |
|
176 |
|
dma_wr(dev, dma, 1); |
|
177 |
|
dma_commit(dev,dma); |
|
178 |
|
spin_unlock(&dd->dmas[dma].lock); |
|
179 |
|
} |
|
180 |
|
|
|
181 |
|
long sg_user_map(struct pci_dev *dev, struct sg_table *sg_tbl, int nents) |
|
182 |
|
{ |
|
183 |
|
struct ba_map *m; |
|
184 |
|
struct dev_drv_data *dd; |
|
185 |
|
long r; |
|
186 |
|
|
|
187 |
|
m = kzalloc(GFP_KERNEL, sizeof(*m)); |
|
188 |
|
if (m == NULL) { |
|
189 |
|
dev_err(&dev->dev, "ba:sg_user:unable to allocate memory for map\n"); |
|
190 |
|
goto err; |
|
191 |
|
} |
|
192 |
|
|
|
193 |
|
m->type = BA_MAP_USER_SG; |
|
194 |
|
m->ptes_n = 0; |
|
195 |
|
|
|
196 |
|
r = ptes_account(dev, sg_tbl, nents, m); |
|
197 |
|
if (r == -BA_ERR) |
|
198 |
|
goto err_free_map; |
|
199 |
|
|
|
200 |
|
dd = pci_get_drvdata(dev); |
|
201 |
|
|
|
202 |
|
/* allocate a range of the aperture */ |
|
203 |
|
r = rng_alloc_align(&m->gpu_addr, &dd->ba.mng, GPU_PAGE_SZ * m->ptes_n, |
|
204 |
|
GPU_PAGE_SZ); |
|
205 |
|
if (r == -ALGA_ERR ) { |
|
206 |
|
dev_err(&dev->dev, "ba:sg_user:unable to allocate gpu address space\n"); |
|
207 |
|
goto err_free_map; |
|
208 |
|
} |
|
209 |
|
|
|
210 |
|
m->ptes_start = dd->ba.pt_start |
|
211 |
|
+ GPU_PAGE_IDX(m->gpu_addr - dd->ba.mng.s) * PTE_SZ; |
|
212 |
|
ptes_cpu_side_update(dev, m, sg_tbl, nents); |
|
213 |
|
ptes_gpu_side_update(dev, m); |
|
214 |
|
|
|
215 |
|
list_add(&m->n, &dd->ba.maps); |
|
216 |
|
return 0; |
|
217 |
|
|
|
218 |
|
err_free_map: |
|
219 |
|
kfree(m); |
|
220 |
|
err: |
|
221 |
|
return -BA_ERR; |
|
222 |
|
} |
|
223 |
|
|
|
224 |
|
static void dummy_ptes_cpu_side_update(struct pci_dev *dev, struct ba_map *m) |
|
225 |
|
{ |
|
226 |
|
struct dev_drv_data *dd; |
|
227 |
|
u64 __iomem *pte_cpu_addr; |
|
228 |
|
u64 ptes_n; |
|
229 |
|
u64 dummy_pte; |
|
230 |
|
|
|
231 |
|
dd = pci_get_drvdata(dev); |
|
232 |
|
|
|
233 |
|
dummy_pte = dd->ba.dummy_bus_addr | PTE_VALID | PTE_SYSTEM |
|
234 |
|
| PTE_SNOOPED | PTE_READABLE | PTE_WRITEABLE; |
|
235 |
|
|
|
236 |
|
/* locate the first pte of this mapping cpu side */ |
|
237 |
|
pte_cpu_addr = dd->ba.pt_map->cpu_addr; |
|
238 |
|
pte_cpu_addr += GPU_PAGE_IDX(m->gpu_addr - dd->ba.mng.s); |
|
239 |
|
|
|
240 |
|
ptes_n = m->ptes_n; |
|
241 |
|
while (ptes_n--) |
|
242 |
|
*pte_cpu_addr++ = dummy_pte; |
|
243 |
|
} |
|
244 |
|
|
|
245 |
|
void sg_user_cleanup(struct pci_dev *dev, struct ba_map *m, u8 flgs) |
|
246 |
|
{ |
|
247 |
|
struct dev_drv_data *dd; |
|
248 |
|
|
|
249 |
|
dd = pci_get_drvdata(dev); |
|
250 |
|
|
|
251 |
|
if (!(flgs & BA_NO_PT_UPDATE)) { |
|
252 |
|
dummy_ptes_cpu_side_update(dev, m); |
|
253 |
|
ptes_gpu_side_update(dev, m); |
|
254 |
|
} |
|
255 |
|
rng_free(&dd->ba.mng, m->gpu_addr); |
|
256 |
|
} |
File drivers/gpu/alga/amd/si/dmas.c changed (mode: 100644) (index 2d73d3e..4eefe52) |
... |
... |
void dmas_resume(struct pci_dev *dev) |
105 |
105 |
|
|
106 |
106 |
for (dma = 0; dma < DMAS_N; ++dma) |
for (dma = 0; dma < DMAS_N; ++dma) |
107 |
107 |
resume(dev, dma); |
resume(dev, dma); |
|
108 |
|
|
|
109 |
|
} |
|
110 |
|
|
|
111 |
|
static void intr_ena(struct pci_dev *dev, u8 dma) |
|
112 |
|
{ |
|
113 |
|
u32 dma_ctl; |
|
114 |
|
dma_ctl = rr32(dev, regs_dma_ctl[dma]); |
|
115 |
|
dma_ctl |= DC_TRAP_ENA; |
|
116 |
|
wr32(dev, dma_ctl, regs_dma_ctl[dma]); |
|
117 |
|
} |
|
118 |
|
|
|
119 |
|
void dmas_intr_ena(struct pci_dev *dev) |
|
120 |
|
{ |
|
121 |
|
u8 dma; |
|
122 |
|
|
|
123 |
|
for (dma = 0; dma < DMAS_N; ++dma) |
|
124 |
|
intr_ena(dev, dma); |
|
125 |
|
} |
|
126 |
|
|
|
127 |
|
static void intr_reset(struct pci_dev *dev, u8 dma) |
|
128 |
|
{ |
|
129 |
|
u32 dma_ctl; |
|
130 |
|
dma_ctl = rr32(dev, regs_dma_ctl[dma]); |
|
131 |
|
dma_ctl &= ~DC_TRAP_ENA; |
|
132 |
|
wr32(dev, dma_ctl, regs_dma_ctl[dma]); |
|
133 |
|
} |
|
134 |
|
|
|
135 |
|
void dmas_intr_reset(struct pci_dev *dev) |
|
136 |
|
{ |
|
137 |
|
u8 dma; |
|
138 |
|
|
|
139 |
|
for (dma = 0; dma < DMAS_N; ++dma) |
|
140 |
|
intr_reset(dev, dma); |
108 |
141 |
} |
} |
109 |
142 |
|
|
110 |
143 |
void dmas_init_once(struct pci_dev *dev) |
void dmas_init_once(struct pci_dev *dev) |
|
... |
... |
void dmas_init_once(struct pci_dev *dev) |
114 |
147 |
|
|
115 |
148 |
dd = pci_get_drvdata(dev); |
dd = pci_get_drvdata(dev); |
116 |
149 |
|
|
117 |
|
for (dma = 0; dma < DMAS_N; ++dma) { |
|
|
150 |
|
for (dma = 0; dma < DMAS_N; ++dma) |
118 |
151 |
spin_lock_init(&dd->dmas[dma].lock); |
spin_lock_init(&dd->dmas[dma].lock); |
119 |
|
} |
|
120 |
152 |
} |
} |
121 |
153 |
|
|
122 |
154 |
void dma_wr(struct pci_dev *dev, u8 dma, u32 v) |
void dma_wr(struct pci_dev *dev, u8 dma, u32 v) |
|
... |
... |
void dma_commit(struct pci_dev *dev, u8 dma) |
153 |
185 |
wr32(dev, dd->dmas[dma].wptr, regs_dma_rb_wptr[dma]); |
wr32(dev, dd->dmas[dma].wptr, regs_dma_rb_wptr[dma]); |
154 |
186 |
rr32(dev, regs_dma_rb_wptr[dma]); |
rr32(dev, regs_dma_rb_wptr[dma]); |
155 |
187 |
} |
} |
|
188 |
|
|
|
189 |
|
u8 dmas_select(struct pci_dev *dev) |
|
190 |
|
{ |
|
191 |
|
struct dev_drv_data *dd; |
|
192 |
|
int selector; |
|
193 |
|
|
|
194 |
|
dd = pci_get_drvdata(dev); |
|
195 |
|
selector = atomic_inc_return(&dd->dmas_selector); |
|
196 |
|
return selector & 1; |
|
197 |
|
} |
File drivers/gpu/alga/amd/si/drv.c changed (mode: 100644) (index 514b6cf..20dc6a7) |
... |
... |
void vram_w32(struct pci_dev *dev, u32 val, u64 aligned_of) |
105 |
105 |
writel(cpu_to_le32(lower_32_bits(aligned_of) | MI_VRAM), |
writel(cpu_to_le32(lower_32_bits(aligned_of) | MI_VRAM), |
106 |
106 |
dd->regs + MM_IDX); |
dd->regs + MM_IDX); |
107 |
107 |
writel(cpu_to_le32(val), dd->regs + MM_DATA); |
writel(cpu_to_le32(val), dd->regs + MM_DATA); |
|
108 |
|
mb(); |
108 |
109 |
} |
} |
109 |
110 |
|
|
110 |
111 |
static u32 extern_rr32(struct device *dev, u32 of) |
static u32 extern_rr32(struct device *dev, u32 of) |
|
... |
... |
static int probe(struct pci_dev *dev, const struct pci_device_id *id) |
444 |
445 |
int r; |
int r; |
445 |
446 |
struct dev_drv_data *dd; |
struct dev_drv_data *dd; |
446 |
447 |
struct atb_pp_defaults atb_pp_defaults; |
struct atb_pp_defaults atb_pp_defaults; |
447 |
|
//TEST DMA ------------------------------------------------------------- |
|
448 |
|
u64 gpu_addr; |
|
449 |
|
u32 i; |
|
450 |
|
//TEST DMA ------------------------------------------------------------- |
|
451 |
448 |
|
|
452 |
449 |
r = pci_enable_device(dev); |
r = pci_enable_device(dev); |
453 |
450 |
if (r) { |
if (r) { |
|
... |
... |
static int probe(struct pci_dev *dev, const struct pci_device_id *id) |
623 |
620 |
pci_set_master(dev); |
pci_set_master(dev); |
624 |
621 |
dce6_hpds_intr_ena(dd->dce); |
dce6_hpds_intr_ena(dd->dce); |
625 |
622 |
cps_intr_ena(dev); |
cps_intr_ena(dev); |
|
623 |
|
dmas_intr_ena(dev); |
626 |
624 |
|
|
627 |
625 |
//TEST MANUAL PM ------------------------------------------------------- |
//TEST MANUAL PM ------------------------------------------------------- |
628 |
626 |
atb_pp(dd->atb); |
atb_pp(dd->atb); |
|
... |
... |
static int probe(struct pci_dev *dev, const struct pci_device_id *id) |
634 |
632 |
//atb_mem_clk_set(dd->atb, 15000);/* 150MHz */ |
//atb_mem_clk_set(dd->atb, 15000);/* 150MHz */ |
635 |
633 |
//atb_voltage_set(dd->atb, ATB_VOLTAGE_TYPE_VDDC, 850);/* 850 mV */ |
//atb_voltage_set(dd->atb, ATB_VOLTAGE_TYPE_VDDC, 850);/* 850 mV */ |
636 |
634 |
//TEST MANUAL PM ------------------------------------------------------- |
//TEST MANUAL PM ------------------------------------------------------- |
637 |
|
|
|
638 |
|
//TEST DMA ------------------------------------------------------------- |
|
639 |
|
rng_alloc_align(&gpu_addr, &dd->vram.mng, 2*GPU_PAGE_SZ, GPU_PAGE_SZ); |
|
640 |
|
printk(KERN_INFO "GPU_ADDR=0x%016llx\n",gpu_addr); |
|
641 |
|
vram_w32(dev, 0xdeadbeef, gpu_addr); |
|
642 |
|
vram_w32(dev, 0xcafedead, gpu_addr + GPU_PAGE_SZ); |
|
643 |
|
printk(KERN_INFO "BEFORE:0x%08x\n", vram_r32(dev, gpu_addr |
|
644 |
|
+ GPU_PAGE_SZ)); |
|
645 |
|
|
|
646 |
|
spin_lock(&dd->dmas[0].lock); |
|
647 |
|
|
|
648 |
|
dma_wr(dev, 0, DMA_PKT(DMA_PKT_CPY, 1, 0, 0, GPU_PAGE_SZ)); |
|
649 |
|
dma_wr(dev, 0, lower_32_bits(gpu_addr + GPU_PAGE_SZ)); |
|
650 |
|
dma_wr(dev, 0, lower_32_bits(gpu_addr)); |
|
651 |
|
dma_wr(dev, 0, upper_32_bits(gpu_addr + GPU_PAGE_SZ) & 0xff); |
|
652 |
|
dma_wr(dev, 0, upper_32_bits(gpu_addr) & 0xff); |
|
653 |
|
//dma_wr(dev, 0, DMA_PKT(DMA_PKT_WR, 0, 0, 0, 1)); |
|
654 |
|
//dma_wr(dev, 0, lower_32_bits(gpu_addr + GPU_PAGE_SZ)); |
|
655 |
|
//dma_wr(dev, 0, upper_32_bits(gpu_addr + GPU_PAGE_SZ) & 0xff); |
|
656 |
|
//dma_wr(dev, 0, 0xdeadbeef); |
|
657 |
|
|
|
658 |
|
dma_commit(dev,0); |
|
659 |
|
spin_unlock(&dd->dmas[0].lock); |
|
660 |
|
|
|
661 |
|
for (i = 0; i < 100000; ++i) { |
|
662 |
|
/* must read the vram location */ |
|
663 |
|
u32 val = vram_r32(dev, gpu_addr + GPU_PAGE_SZ); |
|
664 |
|
if (val == 0xdeadbeef) break; |
|
665 |
|
udelay(1); |
|
666 |
|
} |
|
667 |
|
printk(KERN_INFO "AFTER:0x%08x\n", vram_r32(dev, gpu_addr |
|
668 |
|
+ GPU_PAGE_SZ)); |
|
669 |
|
rng_free(&dd->vram.mng, gpu_addr); |
|
670 |
|
//TEST DMA ------------------------------------------------------------- |
|
671 |
|
|
|
672 |
635 |
dev_info(&dev->dev, "ready\n"); |
dev_info(&dev->dev, "ready\n"); |
673 |
636 |
return 0; |
return 0; |
674 |
637 |
|
|
File drivers/gpu/alga/amd/si/fops.c changed (mode: 100644) (index 6250349..bd3d59d) |
... |
... |
static void vma_open(struct vm_area_struct *vma) |
405 |
405 |
static void vma_close(struct vm_area_struct *vma) |
static void vma_close(struct vm_area_struct *vma) |
406 |
406 |
{ |
{ |
407 |
407 |
struct vma_private_data *vma_data; |
struct vma_private_data *vma_data; |
|
408 |
|
struct file_private_data *f_data; |
|
409 |
|
|
408 |
410 |
vma_data = vma->vm_private_data; |
vma_data = vma->vm_private_data; |
|
411 |
|
f_data = vma->vm_file->private_data; |
409 |
412 |
|
|
410 |
413 |
if (!atomic_dec_and_test(&vma_data->refs_n)) |
if (!atomic_dec_and_test(&vma_data->refs_n)) |
411 |
414 |
return; |
return; |
412 |
415 |
|
|
413 |
416 |
/* we are the last */ |
/* we are the last */ |
414 |
417 |
|
|
|
418 |
|
ba_unmap(f_data->d, (void __iomem *)vma->vm_start); |
415 |
419 |
cpu_bus_mapping_remove(vma); |
cpu_bus_mapping_remove(vma); |
416 |
420 |
cpu_mapping_remove(vma); |
cpu_mapping_remove(vma); |
417 |
|
vma_data = NULL; |
|
418 |
421 |
} |
} |
419 |
422 |
|
|
420 |
423 |
static int vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
static int vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
|
... |
... |
static int cpu_mapping_init_once(struct vm_area_struct *vma) |
484 |
487 |
vma->vm_ops = &vm_ops; |
vma->vm_ops = &vm_ops; |
485 |
488 |
vma->vm_private_data = vma_data; |
vma->vm_private_data = vma_data; |
486 |
489 |
vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; |
vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; |
487 |
|
/* XXX: PAGE_KERNEL_IO_WC usefull for vma->vm_page_prot ? */ |
|
488 |
490 |
return 0; |
return 0; |
489 |
491 |
|
|
490 |
492 |
err_free_cpu_ps_mem: |
err_free_cpu_ps_mem: |
|
... |
... |
static int gpu_bus_mapping_init(struct vm_area_struct *vma) |
541 |
543 |
vma_data = vma->vm_private_data; |
vma_data = vma->vm_private_data; |
542 |
544 |
f_data = vma->vm_file->private_data; |
f_data = vma->vm_file->private_data; |
543 |
545 |
|
|
544 |
|
/* TODO: KEEP GOING HERE ONCE BA_MAP IMPLEMENTED (need DMA engines) */ |
|
545 |
|
ba_map(f_data->d, vma_data->sg_tbl.sgl, vma_data->sg_tbl_list_nents, |
|
546 |
|
BA_USE_MMIO_REGS); |
|
547 |
|
return 0; |
|
|
546 |
|
return ba_map(f_data->d, &vma_data->sg_tbl, |
|
547 |
|
vma_data->sg_tbl_list_nents); |
548 |
548 |
} |
} |
549 |
549 |
|
|
550 |
|
/* TODO: check if mmap is serialized */ |
|
551 |
550 |
static int mmap(struct file *f, struct vm_area_struct *vma) |
static int mmap(struct file *f, struct vm_area_struct *vma) |
552 |
551 |
{ |
{ |
553 |
552 |
int r; |
int r; |
|
... |
... |
static int mmap(struct file *f, struct vm_area_struct *vma) |
561 |
560 |
goto err_remove_cpu_mapping; |
goto err_remove_cpu_mapping; |
562 |
561 |
|
|
563 |
562 |
r = gpu_bus_mapping_init(vma); |
r = gpu_bus_mapping_init(vma); |
564 |
|
if (r !=0 ) |
|
|
563 |
|
if (r != 0) |
565 |
564 |
goto err_remove_gpu_bus_mapping; |
goto err_remove_gpu_bus_mapping; |
566 |
565 |
|
|
567 |
566 |
vma_open(vma); |
vma_open(vma); |