List of commits:
Subject Hash Author Date (UTC)
triangle basic pattern working 97faa34485d6d8b7b19dd54740d5f71fdae7691f Sylvain BERTRAND 2012-10-09 00:55:39
more regs b04101d3e2fcc6d5bac47c3b7999a63b8e70ec86 Sylvain BERTRAND 2012-10-06 13:49:36
add CPs interrupts 35f9940a0ac89e875d852b8ca32009954d92faa5 Sylvain BERTRAND 2012-10-06 13:23:06
tiling, hdp fixes aacf6796f68771f4f6b680844ae495147849ec64 Sylvain BERTRAND 2012-10-06 02:51:51
minor cps cleanup ed2a417ef5177972951226b2d6988257ab66c935 Sylvain BERTRAND 2012-10-05 11:04:15
pattern: triangle, not crashing the GPU anymore 8d21ea32a699a9796e2c89fd69469d4f3da3f96d Sylvain BERTRAND 2012-09-27 13:09:55
triangle pattern code, not working 750cb80c49e44266398ff9c920192b5b5d7ba5a1 Sylvain BERTRAND 2012-09-26 18:13:46
linux 3.5.3 abd704f81d01024f1398b51fbb29e429f6f60ec2 Sylvain BERTRAND 2012-09-12 12:08:29
hw deals properly with pwr gating crtc pairs b5c8a75222e00819814dcff0a4042cd186ebb4ec Sylvain BERTRAND 2012-09-03 09:39:54
upstream: agressive shutdown of memory requests 1e88e5a202ba227e7d5e95f7047b8ee7ca647792 Sylvain BERTRAND 2012-08-22 23:19:59
towards basic GPU programming 01acf921a2efddf521967487865a49347f3f4045 Sylvain BERTRAND 2012-08-16 00:28:19
triangle pattern plumbering 853a73e0052f48b9183f16d02ea9398a2556d040 Sylvain BERTRAND 2012-08-13 15:26:43
pattern ioctl, framebuffer fill pattern 9c59bb9b9c16bbb822587b2a44ab38f38a4371ac Sylvain BERTRAND 2012-08-12 10:51:09
context clear of command processors 9e02a78b903706e3df0af0abc24322c62317c13e Sylvain BERTRAND 2012-08-09 23:09:38
register love de4d6c0aa40f2ca9f2d69190d14bc651f7612b06 Sylvain BERTRAND 2012-08-09 22:55:11
mainly cosmetics 11a2377b2c41cc118078170e6e6fb589fc5937a0 Sylvain BERTRAND 2012-08-09 08:04:21
try to improve stability 25211097e7ecfd370feed5c5d682d26d80c96ae2 Sylvain BERTRAND 2012-08-05 18:40:17
wrong micro engine magic init magic numbers 0d2d45c69b909788418b0d87b7c6727292d2ef70 Sylvain BERTRAND 2012-08-05 16:57:34
relocate set pci master b62baf5e967e3ad6b4f4c5319beec6f7a5225de0 Sylvain BERTRAND 2012-08-04 21:10:37
don't be too hasty at dp hotplug connexion 17a2687ef35639fe70d860b46e45ed061b04999f Sylvain BERTRAND 2012-08-04 20:24:12
Commit 97faa34485d6d8b7b19dd54740d5f71fdae7691f - triangle basic pattern working
Author: Sylvain BERTRAND
Author date (UTC): 2012-10-09 00:55
Committer name: Sylvain BERTRAND
Committer date (UTC): 2012-10-09 00:55
Parent(s): b04101d3e2fcc6d5bac47c3b7999a63b8e70ec86
Signer:
Signing key:
Signing status: N
Tree: 846b8fc1ca75790bacbf4042f5a5d8c40c92298a
File Lines added Lines deleted
drivers/gpu/alga/amd/si/bus/ba.c 7 14
drivers/gpu/alga/amd/si/gpu/regs_ctx.h 67 18
drivers/gpu/alga/amd/si/ih.c 3 0
drivers/gpu/alga/amd/si/patterns/tri.c 451 393
drivers/gpu/alga/amd/si/regs.h 4 3
drivers/gpu/alga/amd/si/silicium_blks/mc 5 5
File drivers/gpu/alga/amd/si/bus/ba.c changed (mode: 100644) (index 5d9fad0..f8aafb7)
... ... static void ctx0_tlb_flush(struct pci_dev *dev)
42 42 wr32(dev, 1, VM_INVALIDATE_REQ); wr32(dev, 1, VM_INVALIDATE_REQ);
43 43 } }
44 44
45 static void ctx_tlb_flush(struct ctx *ctx)
46 {
47 /* only ctx0 in currently supported */
48 ctx0_tlb_flush(ctx->dev);
49 }
50
51 45 static int dummy_page(struct pci_dev *dev) static int dummy_page(struct pci_dev *dev)
52 46 { {
53 47 struct dev_drv_data *dd; struct dev_drv_data *dd;
 
... ... static int ctx_init(struct pci_dev *dev, struct ctx *ctx, u64 start, u64 sz)
95 89 return 0; return 0;
96 90 } }
97 91
98 /* only ctx0 */
99 92 int ba_init(struct pci_dev *dev) int ba_init(struct pci_dev *dev)
100 93 { {
101 94 struct dev_drv_data *dd; struct dev_drv_data *dd;
 
... ... int ba_init(struct pci_dev *dev)
136 129
137 130 ctx0_tlb_flush(dev); ctx0_tlb_flush(dev);
138 131
139 /* setup tlb control, MX, X is generic for various blocks */
132 /* setup tlb control, MX, X is generic for various client blocks */
140 133 wr32(dev, (0xa << 7)/* ??? */ wr32(dev, (0xa << 7)/* ??? */
141 134 | MVMLTC_ENA_L1_TLB | MVMLTC_ENA_L1_TLB
142 | set(MVMLTC_SYS_ACCESS_MODE, MVMLTC_NOT_IN_SYS)
135 | set(MVMLTC_SYS_ACCESS_MODE, MVMLTC_MAPPED_ACCESS_NOT_IN_SYS)
143 136 | MVMLTC_ENA_ADVANCED_DRIVER_MODEL | MVMLTC_ENA_ADVANCED_DRIVER_MODEL
144 137 | set(MVMLTC_SYS_APER_UNMAPPED_ACCESS, MVMLTC_PASS_THRU), | set(MVMLTC_SYS_APER_UNMAPPED_ACCESS, MVMLTC_PASS_THRU),
145 138 MC_VM_MX_L1_TLB_CTL); MC_VM_MX_L1_TLB_CTL);
 
... ... int ba_init(struct pci_dev *dev)
170 163 wr32(dev, 0, 0x15d8); wr32(dev, 0, 0x15d8);
171 164 wr32(dev, 0, 0x15dc); wr32(dev, 0, 0x15dc);
172 165
173 /* indentity to max 4GB */
166 /* context 1 register is actually the settings for context 1-15 */
174 167 wr32(dev, 0, VM_CTX_1_PT_START_ADDR); wr32(dev, 0, VM_CTX_1_PT_START_ADDR);
175 168 wr32(dev, 1 << 20, VM_CTX_1_PT_END_ADDR); wr32(dev, 1 << 20, VM_CTX_1_PT_END_ADDR);
176 169 for (i = 1; i < VM_CTXS_N; ++i) for (i = 1; i < VM_CTXS_N; ++i)
170 /* fake page table */
177 171 wr32(dev, GPU_PAGE_IDX(dd->ba.ctx0.pt_start), wr32(dev, GPU_PAGE_IDX(dd->ba.ctx0.pt_start),
178 172 regs_vm_ctx_pt_base_addr[i]); regs_vm_ctx_pt_base_addr[i]);
179 173 wr32(dev, GPU_PAGE_IDX(dd->ba.dummy_bus_addr), wr32(dev, GPU_PAGE_IDX(dd->ba.dummy_bus_addr),
 
... ... int ba_init(struct pci_dev *dev)
186 180 return 0; return 0;
187 181 } }
188 182
189 /* only ctx0 */
190 183 void ba_shutdown(struct pci_dev *dev) void ba_shutdown(struct pci_dev *dev)
191 184 { {
192 185 struct dev_drv_data *dd; struct dev_drv_data *dd;
 
... ... void ba_shutdown(struct pci_dev *dev)
196 189 wr32(dev, 0, VM_CTX_1_CTL_0); wr32(dev, 0, VM_CTX_1_CTL_0);
197 190
198 191 /* setup tlb control */ /* setup tlb control */
199 wr32(dev, set(MVMLTC_SYS_ACCESS_MODE, MVMLTC_NOT_IN_SYS)
192 wr32(dev, set(MVMLTC_SYS_ACCESS_MODE, MVMLTC_MAPPED_ACCESS_NOT_IN_SYS)
200 193 | set(MVMLTC_SYS_APER_UNMAPPED_ACCESS, MVMLTC_PASS_THRU), | set(MVMLTC_SYS_APER_UNMAPPED_ACCESS, MVMLTC_PASS_THRU),
201 194 MC_VM_MX_L1_TLB_CTL); MC_VM_MX_L1_TLB_CTL);
202 195
 
... ... static struct ba_map *ctx_map_coherent_contig(struct ctx *ctx, unsigned gpu_ps_n
305 298 wmb(); wmb();
306 299
307 300 /* flush ctx tlb to make live the mapping on the gpu */ /* flush ctx tlb to make live the mapping on the gpu */
308 ctx_tlb_flush(ctx);
301 ctx0_tlb_flush(ctx->dev);
309 302 return m; return m;
310 303
311 304 err_free_rng: err_free_rng:
 
... ... static void unmap(struct ba_map *m)
357 350 wmb(); wmb();
358 351
359 352 /* flush ctx tlb to make live the restored dummy pages */ /* flush ctx tlb to make live the restored dummy pages */
360 ctx_tlb_flush(ctx);
353 ctx0_tlb_flush(ctx->dev);
361 354 } }
362 355
363 356 /* just an helper for the core mappings */ /* just an helper for the core mappings */
File drivers/gpu/alga/amd/si/gpu/regs_ctx.h changed (mode: 100644) (index 9112d61..63aad21)
974 974 #define VPIE_RESET_ON_EOI_DIS BIT(1) #define VPIE_RESET_ON_EOI_DIS BIT(1)
975 975
976 976 #define VGT_PRIM_ID_RESET 0x28a8c #define VGT_PRIM_ID_RESET 0x28a8c
977
977 #define VGT_EVENT_INITIATOR 0x28a90
978 #define VEI_EVENT_TYPE 0x0000003f
979 #define VEI_SAMPLE_STREAMOUTSTATS1 0x01
980 #define VEI_SAMPLE_STREAMOUTSTATS2 0x02
981 #define VEI_SAMPLE_STREAMOUTSTATS3 0x03
982 #define VEI_CACHE_FLUSH_TS 0x04
983 #define VEI_CONTEXT_DONE 0x05
984 #define VEI_CACHE_FLUSH 0x06
985 #define VEI_CS_PARTIAL_FLUSH 0x07
986 #define VEI_VGT_STREAMOUT_SYNC 0x08
987 #define VEI_VGT_STREAMOUT_RESET 0x0a
988 #define VEI_END_OF_PIPE_INCR_DE 0x0b
989 #define VEI_END_OF_PIPE_IB_END 0x0c
990 #define VEI_RST_PIX_CNT 0x0d
991 #define VEI_VS_PARTIAL_FLUSH 0x0f
992 #define VEI_PS_PARTIAL_FLUSH 0x10
993 #define VEI_FLUSH_HS_OUTPUT 0x11
994 #define VEI_FLUSH_LS_OUTPUT 0x12
995 #define VEI_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
996 #define VEI_ZPASS_DONE 0x15
997 #define VEI_CACHE_FLUSH_AND_INV_EVENT 0x16
998 #define VEI_PERFCOUNTER_START 0x17
999 #define VEI_PERFCOUNTER_STOP 0x18
1000 #define VEI_PIPELINESTAT_START 0x19
1001 #define VEI_PIPELINESTAT_STOP 0x1a
1002 #define VEI_PERFCOUNTER_SAMPLE 0x1b
1003 #define VEI_FLUSH_ES_OUTPUT 0x1c
1004 #define VEI_FLUSH_GS_OUTPUT 0x1d
1005 #define VEI_SAMPLE_PIPELINESTAT 0x1e
1006 #define VEI_SO_VGTSTREAMOUT_FLUSH 0x1f
1007 #define VEI_SAMPLE_STREAMOUTSTATS 0x20
1008 #define VEI_RESET_VTX_CNT 0x21
1009 #define VEI_BLOCK_CONTEXT_DONE 0x22
1010 #define VEI_CS_CONTEXT_DONE 0x23
1011 #define VEI_VGT_FLUSH 0x24
1012 #define VEI_SC_SEND_DB_VPZ 0x27
1013 #define VEI_BOTTOM_OF_PIPE_TS 0x28
1014 #define VEI_DB_CACHE_FLUSH_AND_INV 0x2a
1015 #define VEI_FLUSH_AND_INV_DB_DATA_TS 0x2b
1016 #define VEI_FLUSH_AND_INV_DB_META 0x2c
1017 #define VEI_FLUSH_AND_INV_CB_DATA_TS 0x2d
1018 #define VEI_FLUSH_AND_INV_CB_META 0x2e
1019 #define VEI_CS_DONE 0x2f
1020 #define VEI_PS_DONE 0x30
1021 #define VEI_FLUSH_AND_INV_CB_PIXEL_DATA 0x31
1022 #define VEI_THREAD_TRACE_START 0x33
1023 #define VEI_THREAD_TRACE_STOP 0x34
1024 #define VEI_THREAD_TRACE_MARKER 0x35
1025 #define VEI_THREAD_TRACE_FLUSH 0x36
1026 #define VEI_THREAD_TRACE_FINISH 0x37
978 1027 #define VGT_MULTI_PRIM_IB_RESET_ENA 0x28a94 #define VGT_MULTI_PRIM_IB_RESET_ENA 0x28a94
979 1028 #define VMPIRE_RESET_ENA BIT(0) #define VMPIRE_RESET_ENA BIT(0)
980 1029
 
992 1041 #define VGT_VTX_CNT_ENA 0x28ab8 #define VGT_VTX_CNT_ENA 0x28ab8
993 1042 #define VVCE_VTX_CNT_ENA BIT(0) #define VVCE_VTX_CNT_ENA BIT(0)
994 1043
995 #define VGT_SHADER_STAGES_ENA 0x28b54
996 #define VSSE_LS_ENA 0x00000003
997 #define VSSE_LS_STAGE_OFF 0
998 #define VSSE_LS_STAGE_ON 1
999 #define VSSE_CS_STAGE_ON 2
1000 #define VSSE_HS_ENA BIT(2)
1001 #define VSSE_ES_ENA 0x00000018
1002 #define VSSE_ES_STAGE_OFF 0
1003 #define VSSE_ES_STAGE_DS 1
1004 #define VSSE_ES_STAGE_REAL 2
1005 #define VSSE_GS_ENA BIT(5)
1006 #define VSSE_VS_ENA 0x000000c0
1007 #define VSSE_VS_STAGE_REAL 0
1008 #define VSSE_VS_STAGE_DS 1
1009 #define VSSE_VS_STAGE_COPY_SHADER 2
1010 #define VSSE_DYNAMIC_HS BIT(8)
1011
1012 1044 #define DB_SRESULTS_CMP_STATE_0 0x28ac0 #define DB_SRESULTS_CMP_STATE_0 0x28ac0
1013 1045 #define DSCS_CMP_FUNC 0x00000007 #define DSCS_CMP_FUNC 0x00000007
1014 1046 #define DSCS_REF_NEVER 0 #define DSCS_REF_NEVER 0
 
1029 1061 #define DPC_MAX_X 0x00ff0000 #define DPC_MAX_X 0x00ff0000
1030 1062 #define DPC_MAX_Y 0xff000000 #define DPC_MAX_Y 0xff000000
1031 1063
1064 #define VGT_SHADER_STAGES_ENA 0x28b54
1065 #define VSSE_LS_ENA 0x00000003
1066 #define VSSE_LS_STAGE_OFF 0
1067 #define VSSE_LS_STAGE_ON 1
1068 #define VSSE_CS_STAGE_ON 2
1069 #define VSSE_HS_ENA BIT(2)
1070 #define VSSE_ES_ENA 0x00000018
1071 #define VSSE_ES_STAGE_OFF 0
1072 #define VSSE_ES_STAGE_DS 1
1073 #define VSSE_ES_STAGE_REAL 2
1074 #define VSSE_GS_ENA BIT(5)
1075 #define VSSE_VS_ENA 0x000000c0
1076 #define VSSE_VS_STAGE_REAL 0
1077 #define VSSE_VS_STAGE_DS 1
1078 #define VSSE_VS_STAGE_COPY_SHADER 2
1079 #define VSSE_DYNAMIC_HS BIT(8)
1080
1032 1081 #define DB_ALPHA_TO_MASK 0x28b70 #define DB_ALPHA_TO_MASK 0x28b70
1033 1082 #define DATM_ALPHA_TO_MASK_ENA BIT(0) #define DATM_ALPHA_TO_MASK_ENA BIT(0)
1034 1083 #define DATM_ALPHA_TO_MASK_OF_0 0x00000300 #define DATM_ALPHA_TO_MASK_OF_0 0x00000300
File drivers/gpu/alga/amd/si/ih.c changed (mode: 100644) (index 24d8670..402eceb)
... ... bool ih_parse(struct pci_dev *dev)
218 218 data = le32_to_cpup(dd->ba.ih_ring_map->cpu_addr + rp data = le32_to_cpup(dd->ba.ih_ring_map->cpu_addr + rp
219 219 + sizeof(id)) & 0xfffffff; + sizeof(id)) & 0xfffffff;
220 220
221 //DEBUG
222 printk(KERN_INFO "IRQ vector:id=0x%08x;data0=0x%08x;data1=0x%08x\n",
223 id,data,le32_to_cpup(dd->ba.ih_ring_map->cpu_addr + 8));
221 224 vector(dev, id, data, &irq_thd); vector(dev, id, data, &irq_thd);
222 225
223 226 rp += VECTOR_SZ; rp += VECTOR_SZ;
File drivers/gpu/alga/amd/si/patterns/tri.c changed (mode: 100644) (index 07614b5..6c83341)
24 24 #include "gpu/gpu.h" #include "gpu/gpu.h"
25 25 #include "drv.h" #include "drv.h"
26 26
27 /*
28 * TODO: manage properly endianness.
29 * This code is mode to work on little endian 64 bits host systems.
30 */
31
32 #define IB_DWS_N_MAX (16 * 64)
33 struct ib {
34 u64 gpu_addr;
35 unsigned dws;
36 u32 d[IB_DWS_N_MAX];
37 };
38
27 39 static u64 vtx_buf; static u64 vtx_buf;
28 40 static u64 vs_buf; static u64 vs_buf;
29 41 static u64 ps_buf; static u64 ps_buf;
30 static u64 buf_res_descs_buf;
42 static struct ib ib;
31 43
32 44 struct vertex { struct vertex {
33 45 float position[4]; float position[4];
34 float color[4];
46 float param0[4];
35 47 }; };
36 48
37 /* the last vertex is "null" */
38 49 static struct vertex vertices[4] = { static struct vertex vertices[4] = {
39 50 { {
40 { 0.0f, -0.9f, 0.0f, 1.0f },
51 { -0.2f, -0.9f, 0.0f, 1.0f },
41 52 { 1.0f, 0.0f, 0.0f, 1.0f } { 1.0f, 0.0f, 0.0f, 1.0f }
42 53 }, },
43 54 { {
 
... ... static struct vertex vertices[4] = {
56 67
57 68 static u32 buf_res_descs[] = { static u32 buf_res_descs[] = {
58 69 /* init with the vram lower 32 bits vertex position buffer address */ /* init with the vram lower 32 bits vertex position buffer address */
59 0xaaaaaaaa,
70 0x00000000,
60 71 /* /*
61 72 * oring the upper 8 remaining bits of buffer address. * oring the upper 8 remaining bits of buffer address.
62 73 * stride=0x20 (8 floats (4 position+4 color components) of 4 bytes. * stride=0x20 (8 floats (4 position+4 color components) of 4 bytes.
63 74 */ */
64 0x002000aa,
75 0x00200000,
65 76 /* 4 records, namely 4 vertices, the last one in "null" */ /* 4 records, namely 4 vertices, the last one in "null" */
66 77 0x00000004, 0x00000004,
67 78 /* /*
 
... ... static u32 buf_res_descs[] = {
70 81 */ */
71 82 0x00077fac, 0x00077fac,
72 83 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
73 /* init with the vram lower 32 bits vertex colors buffer address */
74 0xaaaaaaaa,
84 /* init with the vram lower 32 bits vertex param 0 buffer address */
85 0x00000000,
75 86 /* /*
76 87 * oring the upper 8 remaining bits of buffer address. * oring the upper 8 remaining bits of buffer address.
77 * stride=0x20 (8 floats (4 position+4 color components) of 4 bytes.
88 * stride=0x20 (8 floats (4 position+4 param 0 components) of 4 bytes.
78 89 */ */
79 0x002000aa,
90 0x00200000,
80 91 /* 4 records, namely 4 vertices, the last one in "null" */ /* 4 records, namely 4 vertices, the last one in "null" */
81 92 0x00000004, 0x00000004,
82 93 /* /*
83 94 * dst_sel_x=4(r) dst_sel_y=5(g) dst_sel_z=6(b) dst_sel_w=7(a) * dst_sel_x=4(r) dst_sel_y=5(g) dst_sel_z=6(b) dst_sel_w=7(a)
95 * (customary to use color terminology for params)
84 96 * nfmt=7(float) dfmt=14(32_32_32_32) * nfmt=7(float) dfmt=14(32_32_32_32)
85 97 */ */
86 98 0x00077fac 0x00077fac
87 99 }; };
88 100
89 101 /* /*
90 * This shader uses 8 sgprs and 8 vgprs, does 2 exports which 1 is a param
91 * export (1 param export is mandatory for the hw).
92 * Load the buffer resource descriptor of the vertex positions, from the vram
93 * little endian pointer stored in sgpr6-7, in sgpr0-3.
94 * Load the vertex position components (XYZW), using the buffer resource
95 * descriptor stored in sgpr0-3 and index in vgpr0 (from spi/vgt blocks), in
96 * vgpr4-7.
97 * Load the buffer resource descriptor of the vertex colors, from the vram
98 * little endian pointer stored in sgpr6-7 + 4 dws, in sgpr0-3.
99 * Load the vertex color components (RGBA), using the buffer resource descriptor
100 * stored in sgpr0-3 and index in vgpr0 (from spi/vgt blocks), in vgpr0-3.
101 * Export the color components (RGBA) stored in vgpr0-3 to parameter 0 buffer.
102 * Export the position components (XYZW) stored in vgpr4-7 to position 0 buffer.
103 * note: the buffer resource descriptor buffer point gets loaded in sgpr6-7
104 * that forces the load of the previous 6 user sgprs (SPI_SH_USER_DATA_VS_X).
102 * o USER_SGPR[3:0]<--buffer resouce descriptor of the buffer of vertex
103 * positions
104 * o USER_SGPR[7:4]<--buffer resouce descriptor of the buffer of vertex
105 * parameter 0 (unused here)
106 * note: the done bit in export instructions is only for vertex positions.
105 107 */ */
106 static u32 vs[] = {
107 0xc0800700,/* s_load_dwordx4 sbase=3 imm=1 */
108 0xbf8c0000,/* s_waitcnt */
108 static const unsigned vs_vgprs_n=9;
109 static const unsigned vs_user_sgprs_n=8;
110 static const unsigned vs_sgprs_n=8;/* at least vs_user_sgprs_n */
111 static const unsigned vs_exported_params_n=1;
112 static const u32 vs[] = {
109 113 0xe00c2000,/* buffer_load_format_xyzw idxen=1 */ 0xe00c2000,/* buffer_load_format_xyzw idxen=1 */
110 0x80000400,/* soffset=128(=0) vdata=4 */
111 0xbf8c0000,/* s_waitcnt */
112 0xc0800704,/* s_load_dwordx4 sbase=3 imm=1 offset=4 */
114 0x80000100,/* soffset=128(=0) vdata=1 */
113 115 0xbf8c0000,/* s_waitcnt */ 0xbf8c0000,/* s_waitcnt */
114 116 0xe00c2000,/* buffer_load_format_xyzw idxen=1 */ 0xe00c2000,/* buffer_load_format_xyzw idxen=1 */
115 0x80000000,/* soffset=128(=0) */
117 0x80010500,/* soffset=128(=0) srsrc=1(sgprs[4:7]) vdata=5 */
116 118 0xbf8c0000,/* s_waitcnt */ 0xbf8c0000,/* s_waitcnt */
117 0xf800020f,/* export en=0b1111 tgt=32(param0) */
118 0x03020100,/* vsrc1=1 vsrc2=2 vsrc3=3 */
119 0xf80008cf,/* export en=0b1111 done=1 tgt=12(pos0) */
120 0x04030201,/* vsrc0=1 vsrc1=2 vsrc2=3 vsrc3=4 */
119 121 0xbf8c0000,/* s_waitcnt */ 0xbf8c0000,/* s_waitcnt */
120 0xf80008cf,/* export en=0b1111 tgt=12(pos0) done=1 */
121 0x07060504,/* vsrc0=4 vsrc1=5 vsrc2=6 vsrc3=7 */
122 0xf800020f,/* export en=0b1111 tgt=32(param0) */
123 0x08070605,/* vsrc0=5 vsrc1=6 vsrc2=7 vsrc3=8 */
122 124 0xbf8c0000,/* s_waitcnt */ 0xbf8c0000,/* s_waitcnt */
123 125 0xbf810000/* s_endpgm */ 0xbf810000/* s_endpgm */
124 126 }; };
125 127
126 128 /* /*
127 * This shader uses 6 sgprs (+1 fo m0) and 5 vgprs
128 * Enable all threads in enabled thread groups of exec_lo (4 threads per group).
129 * Init the m0 reg mandatory for the following interpolation instructions.
130 * Interpolate the parameters 0 (colors) of the primitive (triangle) vertices in
131 * order to compute the final pixel color which is exported to the db0/cb0(mrt0)
132 * blocks.
133 * note: the spi block puts proper m0 in sgpr6 (=the first sgpr after the loaded
134 * user sgpr, here which are 6) and i/j barycentric coords in vgpr0 and vgpr1
135 * before telling the sq block to run the shader. The spi is told to do so by
136 * enabling the PERSP_CENTER_ENA bit in SPI_PS_INPUT_ENA, for this example.
129 * m0 is put by the spi right after the last user pre-loaded sgprs. m0 must
130 * be loaded in order to index properly the parameters in lds.
131 * note: we don't deal with the "valid mask" for pixer en exec register.
137 132 */ */
133 static const unsigned ps_vgprs_n=4;
134 static const unsigned ps_user_sgprs_n=0;
135 static const unsigned ps_sgprs_n=0;/* at least ps_user_sgprs_n */
138 136 static u32 ps[] = { static u32 ps[] = {
139 0xbefe0a7e,/* s_wq_b64 sdst=126(exec_lo) ssrc=126(exec_lo) */
140 0xbefc0306,/* s_mov_b32 sdst=124(m0) ssrc0=6 */
141 0xc8080300,/* v_interp_p1_f32 vdst=2 attrchan=3 */
142 0xc8090301,/* v_interp_p2_f32 vdst=2 attrchan=3 vsrc=1 */
143 0xc80c0200,/* v_interp_p1_f32 vdst=3 attrchan=2 */
144 0xc80d0201,/* v_interp_p2_f32 vdst=3 attrchan=2 vsrc=1 */
145 0xc8100100,/* v_interp_p1_f32 vdst=4 attrchan=1 */
146 0xc8110101,/* v_interp_p2_f32 vdst=4 attrchan=1 vsrc=1 */
147 0xc8000000,/* v_interp_p1_f32 */
148 0xc8010001,/* v_interp_p2_f32 vsrc =1 */
149 0xf800180f,/* export en=0b1111 tgt=0(mrt0) done=1 vm=1 */
150 0x02030400,/* vsrc1=4 vsrc2=3 vsrc3=2 */
137 0x7e0002f2,/* v_mov_b32 src0=242(1.0f) vdst=0 */
138 0xbf8c0000,/* s_waitcnt */
139 0x7e0202f2,/* v_mov_b32 src0=242(1.0f) vdst=1 */
140 0xbf8c0000,/* s_waitcnt */
141 0x7e0402f2,/* v_mov_b32 src0=242(1.0f) vdst=2 */
142 0xbf8c0000,/* s_waitcnt */
143 0x7e0602f2,/* v_mov_b32 src0=242(1.0f) vdst=3 */
144 0xbf8c0000,/* s_waitcnt */
145 0x5e000300,/* v_cvt_pkrtz_f16_f32 vdst=0 vsrc1=1 src0=256(vgpr0) */
146 0x5e020702,/* v_cvt_pkrtz_f16_f32 vdst=1 vsrc1=3 src0=258(vgpr2) */
147 0xf8001c0f,/* exp vm=1 done=1 compr=1 en=0x1111 */
148 0x01000100,/* vsrc3=1 vsrc2=0 vsrc1=1 vsrc0=0 */
151 149 0xbf8c0000,/* s_waitcnt */ 0xbf8c0000,/* s_waitcnt */
152 150 0xbf810000/* s_endpgm */ 0xbf810000/* s_endpgm */
153 151 }; };
 
... ... static u32 ps[] = {
155 153 static void ctx_vgt(struct pci_dev *dev) static void ctx_vgt(struct pci_dev *dev)
156 154 { {
157 155 /* VGT (Vertex Grouper and Tesselator block) */ /* VGT (Vertex Grouper and Tesselator block) */
158 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 5));
159 cp0_wr(dev, CTX_REG_IDX(VGT_MAX_VTX_IDX));
156 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 5);
157 ib.d[ib.dws++]=CTX_REG_IDX(VGT_MAX_VTX_IDX);
160 158 /* VGT_MAX_VTX_IDX */ /* VGT_MAX_VTX_IDX */
161 cp0_wr(dev, ~0);
159 ib.d[ib.dws++]=~0;
162 160 /* VGT_MIN_VTX_IDX */ /* VGT_MIN_VTX_IDX */
163 cp0_wr(dev, 0);
161 ib.d[ib.dws++]=0;
164 162 /* VGT_IDX_OF */ /* VGT_IDX_OF */
165 cp0_wr(dev, 0);
163 ib.d[ib.dws++]=0;
166 164 /* VGT_MULTI_PRIM_IB_RESET_IDX */ /* VGT_MULTI_PRIM_IB_RESET_IDX */
167 cp0_wr(dev, 0);
165 ib.d[ib.dws++]=0;
168 166
169 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
170 cp0_wr(dev, CTX_REG_IDX(VGT_MULTI_PRIM_IB_RESET_ENA));
167 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
168 ib.d[ib.dws++]=CTX_REG_IDX(VGT_MULTI_PRIM_IB_RESET_ENA);
171 169 /* VGT_MULTI_PRIM_IB_RESET_ENA */ /* VGT_MULTI_PRIM_IB_RESET_ENA */
172 cp0_wr(dev, 0);
170 ib.d[ib.dws++]=0;
173 171 } }
174 172
175 173 static void ctx_spi_sh_vs(struct pci_dev *dev) static void ctx_spi_sh_vs(struct pci_dev *dev)
176 174 { {
177 175 /* setup specific for the vertex shader */ /* setup specific for the vertex shader */
178 cp0_wr(dev, PKT3(PKT3_SET_SH_REG, 5));
179 cp0_wr(dev, SH_REG_IDX(SPI_SH_PGM_LO_VS));
176
177 /* Tell the spi to pre-load the buffer descriptors in user sgprs */
178 ib.d[ib.dws++]=PKT3(PKT3_SET_SH_REG,9);
179 ib.d[ib.dws++]=SH_REG_IDX(SPI_SH_USER_DATA_VS_0);
180 /* SPI_SH_USER_DATA_VS_0 */
181 ib.d[ib.dws++]=buf_res_descs[0];
182 /* SPI_SH_USER_DATA_VS_1 */
183 ib.d[ib.dws++]=buf_res_descs[1];
184 /* SPI_SH_USER_DATA_VS_2 */
185 ib.d[ib.dws++]=buf_res_descs[2];
186 /* SPI_SH_USER_DATA_VS_3 */
187 ib.d[ib.dws++]=buf_res_descs[3];
188 /* SPI_SH_USER_DATA_VS_4 */
189 ib.d[ib.dws++]=buf_res_descs[4];
190 /* SPI_SH_USER_DATA_VS_5 */
191 ib.d[ib.dws++]=buf_res_descs[5];
192 /* SPI_SH_USER_DATA_VS_6 */
193 ib.d[ib.dws++]=buf_res_descs[6];
194 /* SPI_SH_USER_DATA_VS_7 */
195 ib.d[ib.dws++]=buf_res_descs[7];
196
197 ib.d[ib.dws++]=PKT3(PKT3_SET_SH_REG, 5);
198 ib.d[ib.dws++]=SH_REG_IDX(SPI_SH_PGM_LO_VS);
180 199 /* SPI_SH_PGM_LO_VS */ /* SPI_SH_PGM_LO_VS */
181 cp0_wr(dev, lower_32_bits(vs_buf));
200 ib.d[ib.dws++]=lower_32_bits(vs_buf>>8);
182 201 /* SPI_SH_PGM_HI_VS */ /* SPI_SH_PGM_HI_VS */
183 cp0_wr(dev, set(SSPHV_MEM_BASE, upper_32_bits(vs_buf)));
202 ib.d[ib.dws++]=set(SSPHV_MEM_BASE, upper_32_bits(vs_buf>>8));
184 203 /* /*
185 204 * SPI_SH_PGM_RSRC_VS_0: the vgrs are allocated using units of 4 vgprs, * SPI_SH_PGM_RSRC_VS_0: the vgrs are allocated using units of 4 vgprs,
186 205 * sgprs using units of 8 sgprs. Don't forget to book 2 additionnal * sgprs using units of 8 sgprs. Don't forget to book 2 additionnal
187 206 * sgprs for vcc. Both counts are minus one unit. * sgprs for vcc. Both counts are minus one unit.
188 207 */ */
189 cp0_wr(dev, set(SSPRV_VGPRS, ((8 - 1) / 4))
190 | set (SSPRV_SGPRS, ((8 + 2) - 1) / 8));
208 ib.d[ib.dws++]=set(SSPRV_VGPRS, ((vs_vgprs_n - 1) / 4))
209 | set (SSPRV_SGPRS, ((vs_sgprs_n + 2) - 1) / 8);
191 210 /* /*
192 211 * SPI_SH_PGM_RSRC_VS_1: tell the spi the count of sgprs which are not * SPI_SH_PGM_RSRC_VS_1: tell the spi the count of sgprs which are not
193 212 * vcc. * vcc.
194 213 */ */
195 cp0_wr(dev, set(SSPRV_USER_SGPR, 8));
196
197 /*
198 * Tell the spi to pre-load the pointer of the buffer resource
199 * descriptors in the sgpr6-7 for our vertex shader.
200 */
201 cp0_wr(dev, PKT3(PKT3_SET_SH_REG, 3));
202 cp0_wr(dev, SH_REG_IDX(SPI_SH_USER_DATA_VS_6));
203 /* SPI_SH_USER_DATA_VS_6 */
204 cp0_wr(dev, lower_32_bits(buf_res_descs_buf));
205 /* SPI_SH_USER_DATA_VS_7 */
206 cp0_wr(dev, upper_32_bits(buf_res_descs_buf));
214 ib.d[ib.dws++]=set(SSPRV_USER_SGPR, vs_user_sgprs_n);
207 215
208 216 /* our vertex shader export only the color as parameter */ /* our vertex shader export only the color as parameter */
209 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
210 cp0_wr(dev, CTX_REG_IDX(SPI_VS_OUT_CFG));
217 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
218 ib.d[ib.dws++]=CTX_REG_IDX(SPI_VS_OUT_CFG);
211 219 /* SPI_VS_OUT_CFG */ /* SPI_VS_OUT_CFG */
212 cp0_wr(dev, set(SVOC_VS_PARAM_EXPORT_COUNT, 1 - 1));
220 ib.d[ib.dws++]=set(SVOC_VS_PARAM_EXPORT_COUNT,
221 vs_exported_params_n - 1);
213 222
214 223 /* /*
215 224 * The spi needs to be told what packing format is used by the vertex * The spi needs to be told what packing format is used by the vertex
216 225 * shader to export the position. * shader to export the position.
217 226 */ */
218 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
219 cp0_wr(dev, CTX_REG_IDX(SPI_SH_POS_FMT));
227 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
228 ib.d[ib.dws++]=CTX_REG_IDX(SPI_SH_POS_FMT);
220 229 /* SPI_SH_POS_FMT */ /* SPI_SH_POS_FMT */
221 cp0_wr(dev, set(SSPF_POS_0_EXPORT_FMT, SSPF_4COMP));
230 ib.d[ib.dws++]=set(SSPF_POS_0_EXPORT_FMT, SSPF_4COMP);
222 231 } }
223 232
224 233 static void ctx_spi_sh_ps(struct pci_dev *dev) static void ctx_spi_sh_ps(struct pci_dev *dev)
225 234 { {
226 235 /* setup specific for the pixel/fragment shader */ /* setup specific for the pixel/fragment shader */
227 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 3));
228 cp0_wr(dev, CTX_REG_IDX(SPI_PS_INPUT_ENA));
229 /* SPI_PS_INPUT_ENA */
230 cp0_wr(dev, SPIE_PERSP_CENTER_ENA);
231 /* SPI_PS_INPUT_ADDR */
232 cp0_wr(dev, SPIA_PERSP_CENTER_ENA);
233
234 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
235 cp0_wr(dev, CTX_REG_IDX(SPI_PS_IN_CTL));
236 //XXX: the original code wants to interpolate 2 parameters
237 /* SPI_PS_IN_CTL: 1 parameter to interpolate, the color */
238 cp0_wr(dev, set(SPIC_INTERP_N, 1));
239
240 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
241 cp0_wr(dev, CTX_REG_IDX(SPI_SH_Z_FMT));
242 /* SPI_SH_Z_FMT */
243 cp0_wr(dev, set(SSZF_Z_EXPORT_FMT, SSZF_ZERO));
244
245 cp0_wr(dev, PKT3(PKT3_SET_SH_REG, 5));
246 cp0_wr(dev, SH_REG_IDX(SPI_SH_PGM_LO_PS));
236 ib.d[ib.dws++]=PKT3(PKT3_SET_SH_REG, 5);
237 ib.d[ib.dws++]=SH_REG_IDX(SPI_SH_PGM_LO_PS);
247 238 /* SPI_SH_PGM_LO_PS */ /* SPI_SH_PGM_LO_PS */
248 cp0_wr(dev, lower_32_bits(ps_buf));
239 ib.d[ib.dws++]=lower_32_bits(ps_buf>>8);
249 240 /* SPI_SH_PGM_HI_PS */ /* SPI_SH_PGM_HI_PS */
250 cp0_wr(dev, set(SSPHP_MEM_BASE, upper_32_bits(ps_buf)));
241 ib.d[ib.dws++]=set(SSPHP_MEM_BASE, upper_32_bits(ps_buf>>8));
251 242 /* /*
252 243 * SPI_SH_PGM_RSRC_PS_0: we must account 1 additional sgpr for m0 since * SPI_SH_PGM_RSRC_PS_0: we must account 1 additional sgpr for m0 since
253 244 * which will be loaded in the sgpr right after the last user sgpr. * which will be loaded in the sgpr right after the last user sgpr.
254 245 */ */
255 cp0_wr(dev, set(SSPRP_VGPRS, ((5 - 1) / 4))
256 | set(SSPRP_SGPRS, ((6 + 1 + 2) - 1) / 8));
246 ib.d[ib.dws++]=set(SSPRP_VGPRS, ((ps_vgprs_n - 1) / 4))
247 | set(SSPRP_SGPRS, ((ps_sgprs_n + 1 + 2) - 1) / 8);
257 248 /* /*
258 249 * SPI_SH_PGM_RSRC_PS_1: same constrains than the vertex shaders * SPI_SH_PGM_RSRC_PS_1: same constrains than the vertex shaders
259 250 * plus the fact the spi will load the m0 in the first sgpr after the * plus the fact the spi will load the m0 in the first sgpr after the
260 251 * last user loaded sgpr, namely sgpr6 in this case. * last user loaded sgpr, namely sgpr6 in this case.
261 252 */ */
262 cp0_wr(dev, set(SSPRP_USER_SGPR, 6));
253 ib.d[ib.dws++]=set(SSPRP_USER_SGPR, ps_sgprs_n);
254
255 /*
256 * tell the spi the pixel/fragment shader will need perpective center
257 * interpolation data in input (mandatory or gpu hang)
258 */
259 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 3);
260 ib.d[ib.dws++]=CTX_REG_IDX(SPI_PS_INPUT_ENA);
261 /* SPI_PS_INPUT_ENA */
262 ib.d[ib.dws++]=SPIE_PERSP_CENTER_ENA;
263 /* SPI_PS_INPUT_ADDR */
264 ib.d[ib.dws++]=SPIA_PERSP_CENTER_ENA;
265
266 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
267 ib.d[ib.dws++]=CTX_REG_IDX(SPI_PS_IN_CTL);
268 /* SPI_PS_IN_CTL: 1 parameter to interpolate. Must have at least one */
269 ib.d[ib.dws++]=set(SPIC_INTERP_N, 1);
270
271 /* don't care about z depth export */
272 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
273 ib.d[ib.dws++]=CTX_REG_IDX(SPI_SH_Z_FMT);
274 /* SPI_SH_Z_FMT */
275 ib.d[ib.dws++]=set(SSZF_Z_EXPORT_FMT, SSZF_ZERO);
263 276
264 //XXX: the orginal code wants to interpolate 2 parameters
265 277 /* only 1 input param on 32, then only SPI_PS_INPUT_CTL_00 */ /* only 1 input param on 32, then only SPI_PS_INPUT_CTL_00 */
266 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
267 cp0_wr(dev, CTX_REG_IDX(SPI_PS_INPUT_CTL_00));
278 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
279 ib.d[ib.dws++]=CTX_REG_IDX(SPI_PS_INPUT_CTL_00);
268 280 /* SPI_PS_INPUT_CTL_00 */ /* SPI_PS_INPUT_CTL_00 */
269 cp0_wr(dev, 0);
281 ib.d[ib.dws++]=0;
270 282
271 283 /* /*
272 284 * The spi sends the pixel color exported by a pixel/fragment shader to * The spi sends the pixel color exported by a pixel/fragment shader to
273 285 * a cb, it needs to be told about the special color packing format the * a cb, it needs to be told about the special color packing format the
274 286 * shader used. * shader used.
275 287 */ */
276 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
277 cp0_wr(dev, CTX_REG_IDX(SPI_SH_COLOR_FMT));
288 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
289 ib.d[ib.dws++]=CTX_REG_IDX(SPI_SH_COLOR_FMT);
278 290 /* SPI_SH_COLOR_FMT */ /* SPI_SH_COLOR_FMT */
279 cp0_wr(dev, set(SSCF_COLOR_0_EXPORT_FMT, SSCF_FP16_ABGR));
291 ib.d[ib.dws++]=set(SSCF_COLOR_0_EXPORT_FMT, SSCF_FP16_ABGR);
280 292 } }
281 293
282 294 static void ctx_spi_sh(struct pci_dev *dev) static void ctx_spi_sh(struct pci_dev *dev)
 
... ... static void ctx_spi(struct pci_dev *dev)
290 302 { {
291 303 /* SPI (Shader Processor Interpolator) */ /* SPI (Shader Processor Interpolator) */
292 304 /* disable the point primitive sprite */ /* disable the point primitive sprite */
293 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
294 cp0_wr(dev, CTX_REG_IDX(SPI_INTERPOL_CTL_0));
305 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
306 ib.d[ib.dws++]=CTX_REG_IDX(SPI_INTERPOL_CTL_0);
295 307 /* SPI_INTERPOL_CTL_0 */ /* SPI_INTERPOL_CTL_0 */
296 cp0_wr(dev, 0);
308 ib.d[ib.dws++]=0;
297 309
298 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
299 cp0_wr(dev, CTX_REG_IDX(SPI_BARYC_CTL));
300 /* SPI_BARYC_CTL */
301 cp0_wr(dev, SBC_PERSP_CENTER_CTL);
310 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
311 ib.d[ib.dws++]=CTX_REG_IDX(SPI_BARYC_CTL);
312 /* SPI_BARYC_CTL: want 0 in working sample */
313 ib.d[ib.dws++]=0;
302 314
303 315 ctx_spi_sh(dev); ctx_spi_sh(dev);
304 316 } }
 
... ... static void ctx_dbs(struct pci_dev *dev)
307 319 { {
308 320 /* DBs (Depth Blocks) */ /* DBs (Depth Blocks) */
309 321 /* disable the depth stencil/z-buffer */ /* disable the depth stencil/z-buffer */
310 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 3));
311 cp0_wr(dev, CTX_REG_IDX(DB_Z_INFO));
322 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 3);
323 ib.d[ib.dws++]=CTX_REG_IDX(DB_Z_INFO);
312 324 /* DB_Z_INFO */ /* DB_Z_INFO */
313 cp0_wr(dev, 0);
325 ib.d[ib.dws++]=0;
314 326 /* DB_STENCIL_INFO */ /* DB_STENCIL_INFO */
315 cp0_wr(dev, 0);
327 ib.d[ib.dws++]=0;
316 328
317 329 /* even if disabled, setup some clean values in a few regs */ /* even if disabled, setup some clean values in a few regs */
318 330
319 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 5));
320 cp0_wr(dev, CTX_REG_IDX(DB_DEPTH_BOUNDS_MIN));
331 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
332 ib.d[ib.dws++]=CTX_REG_IDX(DB_DEPTH_CTL);
333 /* DB_DEPTH_CTL */
334 ib.d[ib.dws++]=0;
335
336 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 5);
337 ib.d[ib.dws++]=CTX_REG_IDX(DB_DEPTH_BOUNDS_MIN);
321 338 /* DB_DEPTH_BOUNDS_MIN */ /* DB_DEPTH_BOUNDS_MIN */
322 cp0_wr(dev, 0);
339 ib.d[ib.dws++]=0;
323 340 /* DB_DEPTH_BOUNDS_MAX */ /* DB_DEPTH_BOUNDS_MAX */
324 cp0_wr(dev, 0);
341 ib.d[ib.dws++]=0;
325 342 /* DB_STENCIL_CLR */ /* DB_STENCIL_CLR */
326 cp0_wr(dev, 0);
343 ib.d[ib.dws++]=0;
327 344 /* DB_DEPTH_CLR */ /* DB_DEPTH_CLR */
328 cp0_wr(dev, f2u(1.0f));
345 ib.d[ib.dws++]=f2u(1.0f);
329 346
330 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
331 cp0_wr(dev, CTX_REG_IDX(DB_RENDER_CTL));
347 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
348 ib.d[ib.dws++]=CTX_REG_IDX(DB_RENDER_CTL);
332 349 /* DB_RENDER_CTL */ /* DB_RENDER_CTL */
333 cp0_wr(dev, 0);
350 ib.d[ib.dws++]=0;
334 351
335 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
336 cp0_wr(dev, CTX_REG_IDX(DB_RENDER_OVERRIDE_0));
352 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
353 ib.d[ib.dws++]=CTX_REG_IDX(DB_RENDER_OVERRIDE_0);
337 354 /* DB_RENDER_OVERRIDE_0 */ /* DB_RENDER_OVERRIDE_0 */
338 cp0_wr(dev, set(DRO_FORCE_HIZ_ENA, DRO_FORCE_DIS)
355 ib.d[ib.dws++]=set(DRO_FORCE_HIZ_ENA, DRO_FORCE_DIS)
339 356 | set(DRO_FORCE_HIS_ENA_0, DRO_FORCE_DIS) | set(DRO_FORCE_HIS_ENA_0, DRO_FORCE_DIS)
340 | set(DRO_FORCE_HIS_ENA_1, DRO_FORCE_DIS));
357 | set(DRO_FORCE_HIS_ENA_1, DRO_FORCE_DIS);
341 358
342 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
343 cp0_wr(dev, CTX_REG_IDX(DB_STENCIL_CTL));
359 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
360 ib.d[ib.dws++]=CTX_REG_IDX(DB_STENCIL_CTL);
344 361 /* DB_STENCIL_CTL */ /* DB_STENCIL_CTL */
345 cp0_wr(dev, 0);
362 ib.d[ib.dws++]=0;
346 363
347 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 4));
348 cp0_wr(dev, CTX_REG_IDX(DB_SRESULTS_CMP_STATE_0));
364 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 4);
365 ib.d[ib.dws++]=CTX_REG_IDX(DB_SRESULTS_CMP_STATE_0);
349 366 /* DB_SRESULTS_CMP_STATE_0 */ /* DB_SRESULTS_CMP_STATE_0 */
350 cp0_wr(dev, 0);
367 ib.d[ib.dws++]=0;
351 368 /* DB_SRESULTS_CMP_STATE_1 */ /* DB_SRESULTS_CMP_STATE_1 */
352 cp0_wr(dev, 0);
369 ib.d[ib.dws++]=0;
353 370 /* DB_PRELOAD_CTL */ /* DB_PRELOAD_CTL */
354 cp0_wr(dev, 0);
371 ib.d[ib.dws++]=0;
355 372
356 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
357 cp0_wr(dev, CTX_REG_IDX(DB_ALPHA_TO_MASK));
373 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
374 ib.d[ib.dws++]=CTX_REG_IDX(DB_ALPHA_TO_MASK);
358 375 /* DB_ALPHA_TO_MASK */ /* DB_ALPHA_TO_MASK */
359 cp0_wr(dev, set(DATM_ALPHA_TO_MASK_OF_0, 2)
376 ib.d[ib.dws++]=set(DATM_ALPHA_TO_MASK_OF_0, 2)
360 377 | set(DATM_ALPHA_TO_MASK_OF_1, 2) | set(DATM_ALPHA_TO_MASK_OF_1, 2)
361 378 | set(DATM_ALPHA_TO_MASK_OF_2, 2) | set(DATM_ALPHA_TO_MASK_OF_2, 2)
362 | set(DATM_ALPHA_TO_MASK_OF_3, 2));
379 | set(DATM_ALPHA_TO_MASK_OF_3, 2);
363 380
364 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 3));
365 cp0_wr(dev, CTX_REG_IDX(DB_STENCILREFMASK));
381 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 3);
382 ib.d[ib.dws++]=CTX_REG_IDX(DB_STENCILREFMASK);
366 383 /* DB_STENCILREFMASK */ /* DB_STENCILREFMASK */
367 cp0_wr(dev, 0);
384 ib.d[ib.dws++]=0;
368 385 /* DB_STENCILREFMASK_BF */ /* DB_STENCILREFMASK_BF */
369 cp0_wr(dev, 0);
386 ib.d[ib.dws++]=0;
370 387
371 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
372 cp0_wr(dev, CTX_REG_IDX(DB_SH_CTL));
388 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
389 ib.d[ib.dws++]=CTX_REG_IDX(DB_SH_CTL);
373 390 /* DB_SH_CTL */ /* DB_SH_CTL */
374 cp0_wr(dev, set(DSC_Z_ORDER, DSC_EARLY_Z_THEN_LATE_Z));
391 ib.d[ib.dws++]=set(DSC_Z_ORDER, DSC_EARLY_Z_THEN_LATE_Z);
375 392 } }
376 393
377 394 static void ctx_cb_0(struct pci_dev *dev, struct ptn_tri *p) static void ctx_cb_0(struct pci_dev *dev, struct ptn_tri *p)
378 395 { {
379 396 /* CB 0 (Color Block 0) */ /* CB 0 (Color Block 0) */
380 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 7));
381 cp0_wr(dev, CTX_REG_IDX(CB_0_COLOR_BASE));
397 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 7);
398 ib.d[ib.dws++]=CTX_REG_IDX(CB_0_COLOR_BASE);
382 399 /* CB_0_COLOR_BASE */ /* CB_0_COLOR_BASE */
383 cp0_wr(dev, p->fb_gpu_addr >> 8);
400 ib.d[ib.dws++]=p->fb_gpu_addr >> 8;
384 401 /* CB_0_COLOR_PITCH: a thin1 tile is 8x8 pixels */ /* CB_0_COLOR_PITCH: a thin1 tile is 8x8 pixels */
385 cp0_wr(dev, set(CCP_TILE_MAX, p->w / 8 - 1));
402 ib.d[ib.dws++]=set(CCP_TILE_MAX, p->w / 8 - 1);
386 403 /* CB_0_COLOR_SLICE: a thin1 tile is 8x8 pixels */ /* CB_0_COLOR_SLICE: a thin1 tile is 8x8 pixels */
387 cp0_wr(dev, set(CCS_TILE_MAX, p->w * p->h / 64 - 1));
404 ib.d[ib.dws++]=set(CCS_TILE_MAX, p->w * p->h / 64 - 1);
388 405 /* CB_0_COLOR_VIEW: 0, or last tile index for an array of slices */ /* CB_0_COLOR_VIEW: 0, or last tile index for an array of slices */
389 cp0_wr(dev, 0);
390 /* CB_0_COLOR_INFO */
391 cp0_wr(dev, set(CCI_ENDIAN, CCI_ENDIAN_NONE)
406 ib.d[ib.dws++]=0;
407 /*
408 * CB_0_COLOR_INFO: for sRGB color space, in 8 bits little endian argb,
409 * the color component swap is ALT for the color components from the
410 * pixel/fragment shader and value must be clamped before and after
411 * blending to mrt range.
412 */
413 ib.d[ib.dws++]=set(CCI_ENDIAN, CCI_ENDIAN_NONE)
392 414 | set(CCI_FMT, CCI_COLOR_8_8_8_8) | set(CCI_FMT, CCI_COLOR_8_8_8_8)
393 | set(CCI_COMP_SWAP, CCI_SWAP_STD)
394 | set(CCI_NUMBER_TYPE, CCI_NUMBER_UINT)
395 | CCI_BLEND_BYPASS);
415 | set(CCI_COMP_SWAP, CCI_SWAP_ALT)
416 | set(CCI_NUMBER_TYPE, CCI_NUMBER_UNORM)
417 | CCI_BLEND_CLAMP;
396 418 /* CB_0_COLOR_ATTRIB: see gpu/tiling.c */ /* CB_0_COLOR_ATTRIB: see gpu/tiling.c */
397 cp0_wr(dev, set(CCA_TILE_MODE_IDX, 8));
419 ib.d[ib.dws++]=set(CCA_TILE_MODE_IDX, 8);
398 420 } }
399 421
400 422 static void ctx_cbs_blend(struct pci_dev *dev) static void ctx_cbs_blend(struct pci_dev *dev)
401 423 { {
402 424 /* blend blocks of CBs (Color Blocks) */ /* blend blocks of CBs (Color Blocks) */
403 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 9));
404 cp0_wr(dev, CTX_REG_IDX(CB_0_BLEND_CTL));
425 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 9);
426 ib.d[ib.dws++]=CTX_REG_IDX(CB_0_BLEND_CTL);
405 427 /* CB_0_BLEND_CTL: disable blending */ /* CB_0_BLEND_CTL: disable blending */
406 cp0_wr(dev, 0);
428 ib.d[ib.dws++]=0;
407 429 /* CB_1_BLEND_CTL: disable blending */ /* CB_1_BLEND_CTL: disable blending */
408 cp0_wr(dev, 0);
430 ib.d[ib.dws++]=0;
409 431 /* CB_2_BLEND_CTL: disable blending */ /* CB_2_BLEND_CTL: disable blending */
410 cp0_wr(dev, 0);
432 ib.d[ib.dws++]=0;
411 433 /* CB_3_BLEND_CTL: disable blending */ /* CB_3_BLEND_CTL: disable blending */
412 cp0_wr(dev, 0);
434 ib.d[ib.dws++]=0;
413 435 /* CB_4_BLEND_CTL: disable blending */ /* CB_4_BLEND_CTL: disable blending */
414 cp0_wr(dev, 0);
436 ib.d[ib.dws++]=0;
415 437 /* CB_5_BLEND_CTL: disable blending */ /* CB_5_BLEND_CTL: disable blending */
416 cp0_wr(dev, 0);
438 ib.d[ib.dws++]=0;
417 439 /* CB_6_BLEND_CTL: disable blending */ /* CB_6_BLEND_CTL: disable blending */
418 cp0_wr(dev, 0);
440 ib.d[ib.dws++]=0;
419 441 /* CB_7_BLEND_CTL: disable blending */ /* CB_7_BLEND_CTL: disable blending */
420 cp0_wr(dev, 0);
442 ib.d[ib.dws++]=0;
421 443 } }
422 444
423 445 static void ctx_cbs(struct pci_dev *dev, struct ptn_tri *p) static void ctx_cbs(struct pci_dev *dev, struct ptn_tri *p)
 
... ... static void ctx_cbs(struct pci_dev *dev, struct ptn_tri *p)
432 454 * to be used by the CB 0 and do enable CB 0 to output all computed * to be used by the CB 0 and do enable CB 0 to output all computed
433 455 * color components to target (here our framebuffer) * color components to target (here our framebuffer)
434 456 */ */
435 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 3));
436 cp0_wr(dev, CTX_REG_IDX(CB_TGT_MASK));
457 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 3);
458 ib.d[ib.dws++]=CTX_REG_IDX(CB_TGT_MASK);
437 459 /* CB_TGT_MASK */ /* CB_TGT_MASK */
438 cp0_wr(dev, set(CTM_TGT_0_ENA, CTM_TGT_RED | CTM_TGT_GREEN
439 | CTM_TGT_BLUE | CTM_TGT_ALPHA));
460 ib.d[ib.dws++]=set(CTM_TGT_0_ENA, CTM_TGT_RED | CTM_TGT_GREEN
461 | CTM_TGT_BLUE | CTM_TGT_ALPHA);
440 462 /* CB_SH_MASK */ /* CB_SH_MASK */
441 cp0_wr(dev, set(CSM_OUTPUT_0_ENA, CSM_OUTPUT_RED | CSM_OUTPUT_GREEN
442 | CSM_OUTPUT_BLUE | CSM_OUTPUT_ALPHA));
463 ib.d[ib.dws++]=set(CSM_OUTPUT_0_ENA, CSM_OUTPUT_RED | CSM_OUTPUT_GREEN
464 | CSM_OUTPUT_BLUE | CSM_OUTPUT_ALPHA);
443 465
444 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
445 cp0_wr(dev, CTX_REG_IDX(CB_COLOR_CTL));
466 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
467 ib.d[ib.dws++]=CTX_REG_IDX(CB_COLOR_CTL);
446 468 /* CB_COLOR_CTL: switch normal mode for all CBs */ /* CB_COLOR_CTL: switch normal mode for all CBs */
447 cp0_wr(dev, set(CCC_MODE, CCC_CB_NORMAL) | set(CCC_ROP3, CCC_0XCC));
469 ib.d[ib.dws++]=set(CCC_MODE, CCC_CB_NORMAL) | set(CCC_ROP3, CCC_0XCC);
448 470 } }
449 471
450 472 static void ctx_pa_su(struct pci_dev *dev) static void ctx_pa_su(struct pci_dev *dev)
451 473 { {
452 474 /* PA (Primitive Assembler) SU (Setup Unit) */ /* PA (Primitive Assembler) SU (Setup Unit) */
453 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
454 cp0_wr(dev, CTX_REG_IDX(PA_SU_VTX_CTL));
475 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
476 ib.d[ib.dws++]=CTX_REG_IDX(PA_SU_VTX_CTL);
455 477 /* /*
456 478 * PA_SU_VTX_CTL: tells the PA (Primitive Assembler) SU (Setup Unit) * PA_SU_VTX_CTL: tells the PA (Primitive Assembler) SU (Setup Unit)
457 479 * to place the(?) pixel at the center of the vertex? * to place the(?) pixel at the center of the vertex?
458 480 */ */
459 cp0_wr(dev, PSVC_PIX_CENTER);
481 ib.d[ib.dws++]=PSVC_PIX_CENTER;
460 482
461 483 /* /*
462 484 * setup for the PA (Primitive Assembler) SU (Setup Unit) for the * setup for the PA (Primitive Assembler) SU (Setup Unit) for the
463 485 * point/line primitive rendering: we do not render point * point/line primitive rendering: we do not render point
464 486 * or line primitives. * or line primitives.
487 * Set it to 8 like in working samples
465 488 */ */
466 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 4));
467 cp0_wr(dev, CTX_REG_IDX(PA_SU_POINT_SZ));
489 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 4);
490 ib.d[ib.dws++]=CTX_REG_IDX(PA_SU_POINT_SZ);
468 491 /* PA_SU_POINT_SZ */ /* PA_SU_POINT_SZ */
469 cp0_wr(dev, 0);
492 ib.d[ib.dws++]=set(PSPS_H, 8) | set(PSPS_W, 8);
470 493 /* PA_SU_POINT_MINMAX */ /* PA_SU_POINT_MINMAX */
471 cp0_wr(dev, 0);
494 ib.d[ib.dws++]=set(PSPM_MIN, 8) | set(PSPM_MAX, 8);
472 495 /* PA_SU_LINE_CTL */ /* PA_SU_LINE_CTL */
473 cp0_wr(dev, 0);
496 ib.d[ib.dws++]=set(PSLC_W, 8);
474 497
475 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
476 cp0_wr(dev, CTX_REG_IDX(PA_SU_POLY_OF_CLAMP));
498 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
499 ib.d[ib.dws++]=CTX_REG_IDX(PA_SU_POLY_OF_CLAMP);
477 500 /* /*
478 501 * PA_SU_POLY_OF_CLAMP: tell the PA (Primitive Assembler) SU * PA_SU_POLY_OF_CLAMP: tell the PA (Primitive Assembler) SU
479 502 * (Setup Unit) for polygon not to clamp something ? * (Setup Unit) for polygon not to clamp something ?
480 503 */ */
481 cp0_wr(dev, 0);
504 ib.d[ib.dws++]=0;
482 505
483 506 /* related to the SC (Scan Converter) */ /* related to the SC (Scan Converter) */
484 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
485 cp0_wr(dev, CTX_REG_IDX(PA_SU_SC_MODE_CTL));
486 /* PA_SU_SC_MODE_CTL */
487 cp0_wr(dev, PSSMC_FACE
488 | set(PSSMC_POLY_MODE_FRONT_PTYPE, PSSMC_DRAW_TRIANGLES)
507 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
508 ib.d[ib.dws++]=CTX_REG_IDX(PA_SU_SC_MODE_CTL);
509 /* PA_SU_SC_MODE_CTL: removed FACE to follow working samples */
510 ib.d[ib.dws++]=set(PSSMC_POLY_MODE_FRONT_PTYPE, PSSMC_DRAW_TRIANGLES)
489 511 | set(PSSMC_POLY_MODE_BACK_PTYPE, PSSMC_DRAW_TRIANGLES) | set(PSSMC_POLY_MODE_BACK_PTYPE, PSSMC_DRAW_TRIANGLES)
490 | PSSMC_PROVOKING_VTX_LAST);
512 | PSSMC_PROVOKING_VTX_LAST;
513
514 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
515 ib.d[ib.dws++]=CTX_REG_IDX(PA_SU_PRIM_FILTER_CTL);
516 /* PA_SU_PRIM_FILTER_CTL */
517 ib.d[ib.dws++]=0;
491 518 } }
492 519
493 520 static void ctx_pa_cl(struct pci_dev *dev) static void ctx_pa_cl(struct pci_dev *dev)
494 521 { {
495 522 /* PA (Primitive Assembler) CL (CLipper) */ /* PA (Primitive Assembler) CL (CLipper) */
496 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 5));
497 cp0_wr(dev, CTX_REG_IDX(PA_CL_GB_VERT_CLIP_ADJ));
523 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 5);
524 ib.d[ib.dws++]=CTX_REG_IDX(PA_CL_GB_VERT_CLIP_ADJ);
498 525 /* disable GB (Guard Band) by setting those registers to 1.0f */ /* disable GB (Guard Band) by setting those registers to 1.0f */
499 526 /* PA_CL_GB_VERT_CLIP_ADJ */ /* PA_CL_GB_VERT_CLIP_ADJ */
500 cp0_wr(dev, f2u(1.0f));
527 ib.d[ib.dws++]=f2u(1.0f);
501 528 /* PA_CL_GB_VERT_DISC_ADJ */ /* PA_CL_GB_VERT_DISC_ADJ */
502 cp0_wr(dev, f2u(1.0f));
529 ib.d[ib.dws++]=f2u(1.0f);
503 530 /* PA_CL_GB_HORZ_CLIP_ADJ */ /* PA_CL_GB_HORZ_CLIP_ADJ */
504 cp0_wr(dev, f2u(1.0f));
531 ib.d[ib.dws++]=f2u(1.0f);
505 532 /* PA_CL_GB_HORZ_DISC_ADJ */ /* PA_CL_GB_HORZ_DISC_ADJ */
506 cp0_wr(dev, f2u(1.0f));
533 ib.d[ib.dws++]=f2u(1.0f);
507 534
508 535 /* /*
509 536 * define the way the PA (Primitive Assembler) CL (CLipper) will * define the way the PA (Primitive Assembler) CL (CLipper) will
510 537 * behave regarding NAN (Not A Number) and INF (INFinity) values * behave regarding NAN (Not A Number) and INF (INFinity) values
511 538 */ */
512 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
513 cp0_wr(dev, CTX_REG_IDX(PA_CL_NANINF_CTL));
539 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
540 ib.d[ib.dws++]=CTX_REG_IDX(PA_CL_NANINF_CTL);
514 541 /* PA_CL_NANINF_CTL: to hardware default behaviour */ /* PA_CL_NANINF_CTL: to hardware default behaviour */
515 cp0_wr(dev, 0);
542 ib.d[ib.dws++]=0;
516 543
517 544 /* no clipping done on the input from the vertex shader */ /* no clipping done on the input from the vertex shader */
518 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
519 cp0_wr(dev, CTX_REG_IDX(PA_CL_VS_OUT_CTL));
545 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
546 ib.d[ib.dws++]=CTX_REG_IDX(PA_CL_VS_OUT_CTL);
520 547 /* PA_CL_VS_OUT_CTL */ /* PA_CL_VS_OUT_CTL */
521 cp0_wr(dev, 0);
548 ib.d[ib.dws++]=0;
522 549
523 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
524 cp0_wr(dev, CTX_REG_IDX(PA_CL_CLIP_CTL));
525 /* PA_CL_CLIP_CTL */
526 cp0_wr(dev, set(PCCC_PS_UCP_MODE, 3) | PCCC_DX_LINEAR_ATTR_CLIP_ENA);
550 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
551 ib.d[ib.dws++]=CTX_REG_IDX(PA_CL_CLIP_CTL);
552 /* PA_CL_CLIP_CTL: ucp mode 3=always expand and clip as trifan */
553 ib.d[ib.dws++]=set(PCCC_PS_UCP_MODE, 3) | PCCC_DX_LINEAR_ATTR_CLIP_ENA;
527 554 } }
528 555
529 556 static void ctx_pa_sc_vport_0_te(struct pci_dev *dev, struct ptn_tri *p) static void ctx_pa_sc_vport_0_te(struct pci_dev *dev, struct ptn_tri *p)
530 557 { {
531 float near;
532 float far;
533
534 /* XXX:need for the sample to run */
535 near = 30.0f;
536 far = 1000.0f;
537
538 558 /* /*
539 559 * PA (Primitive Assembler) SC (Scan Converter) VPORT (ViewPORT) 0 TE * PA (Primitive Assembler) SC (Scan Converter) VPORT (ViewPORT) 0 TE
540 560 * (Transform Engine) * (Transform Engine)
541 561 */ */
542 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 7));
543 cp0_wr(dev, CTX_REG_IDX(PA_SC_VPORT_0_TE_X_SCALE));
562 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 7);
563 ib.d[ib.dws++]=CTX_REG_IDX(PA_SC_VPORT_0_TE_X_SCALE);
544 564 /* PA_SC_VPORT_0_TE_X_SCALE */ /* PA_SC_VPORT_0_TE_X_SCALE */
545 cp0_wr(dev, f2u(p->w / 2.0f));
565 ib.d[ib.dws++]=f2u(p->w / 2.0f);
546 566 /* PA_SC_VPORT_0_TE_X_OF */ /* PA_SC_VPORT_0_TE_X_OF */
547 cp0_wr(dev, f2u(p->w / 2.0f));
567 ib.d[ib.dws++]=f2u(p->w / 2.0f);
548 568 /* PA_SC_VPORT_0_TE_Y_SCALE */ /* PA_SC_VPORT_0_TE_Y_SCALE */
549 cp0_wr(dev, f2u(p->h / 2.0f));
569 ib.d[ib.dws++]=f2u(p->h / 2.0f);
550 570 /* PA_SC_VPORT_0_TE_Y_OF */ /* PA_SC_VPORT_0_TE_Y_OF */
551 cp0_wr(dev, f2u(p->h / 2.0f));
552 /* PA_SC_VPORT_0_TE_Z_SCALE */
553 cp0_wr(dev, f2u((far - near) / 2.0f));
554 /* PA_SC_VPORT_0_TE_Z_OF */
555 cp0_wr(dev, f2u((far - near) / 2.0f + near));
556
557 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 3));
558 cp0_wr(dev, CTX_REG_IDX(PA_SC_VPORT_0_TE_ZMIN));
571 ib.d[ib.dws++]=f2u(p->h / 2.0f);
572 /* PA_SC_VPORT_0_TE_Z_SCALE: stick to working sample values */
573 ib.d[ib.dws++]=f2u(0.5f);
574 /* PA_SC_VPORT_0_TE_Z_OF: stick to working sample values */
575 ib.d[ib.dws++]=f2u(0.5f);
576
577 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 3);
578 ib.d[ib.dws++]=CTX_REG_IDX(PA_SC_VPORT_0_TE_ZMIN);
559 579 /* PA_SC_VPORT_0_TE_ZMIN: min Z value from VPORT TE */ /* PA_SC_VPORT_0_TE_ZMIN: min Z value from VPORT TE */
560 cp0_wr(dev, 0);
580 ib.d[ib.dws++]=f2u(0.0f);
561 581 /* PA_SC_VPORT_0_TE_ZMAX: max Z value from VPORT TE */ /* PA_SC_VPORT_0_TE_ZMAX: max Z value from VPORT TE */
562 cp0_wr(dev, f2u(1.0f));
582 ib.d[ib.dws++]=f2u(1.0f);
563 583 } }
564 584
565 585 static void ctx_pa_sc_vport_0(struct pci_dev *dev, struct ptn_tri *p) static void ctx_pa_sc_vport_0(struct pci_dev *dev, struct ptn_tri *p)
566 586 { {
567 587 /* PA (Primitive Assembler) SC (Scan Converter) VPORT (ViewPORT) 0 */ /* PA (Primitive Assembler) SC (Scan Converter) VPORT (ViewPORT) 0 */
568 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 3));
569 cp0_wr(dev, CTX_REG_IDX(PA_SC_VPORT_0_SCISSOR_TL));
588 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 3);
589 ib.d[ib.dws++]=CTX_REG_IDX(PA_SC_VPORT_0_SCISSOR_TL);
570 590 /* PA_SC_VPORT_0_SCISSOR_TL */ /* PA_SC_VPORT_0_SCISSOR_TL */
571 cp0_wr(dev, set(PSVST_X, 0) | set(PSVST_Y, 0));
591 ib.d[ib.dws++]=set(PSVST_X, 0) | set(PSVST_Y, 0);
572 592 /* PA_SC_VPORT_0_SCISSOR_BR */ /* PA_SC_VPORT_0_SCISSOR_BR */
573 cp0_wr(dev, set(PSVSB_X, p->w) | set(PSVSB_Y, p->h));
593 ib.d[ib.dws++]=set(PSVSB_X, p->w) | set(PSVSB_Y, p->h);
574 594
575 595 ctx_pa_sc_vport_0_te(dev, p); ctx_pa_sc_vport_0_te(dev, p);
576 596 } }
 
... ... static void ctx_pa_sc_vports_te(struct pci_dev *dev)
581 601 * PA (Primitive Assembler) SC (Scan Converter) VPORT (ViewPORT) * PA (Primitive Assembler) SC (Scan Converter) VPORT (ViewPORT)
582 602 * TE (Transform Engine) * TE (Transform Engine)
583 603 */ */
584 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
585 cp0_wr(dev, CTX_REG_IDX(PA_SC_VPORT_TE_CTL));
586 /* PA_SC_VPORT_TE_CTL */
587 cp0_wr(dev, PSVTC_VPORT_X_SCALE_ENA | PSVTC_VPORT_X_OF_ENA
604 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
605 ib.d[ib.dws++]=CTX_REG_IDX(PA_SC_VPORT_TE_CTL);
606 /* PA_SC_VPORT_TE_CTL: no so called perpective division */
607 ib.d[ib.dws++]=PSVTC_VPORT_X_SCALE_ENA | PSVTC_VPORT_X_OF_ENA
588 608 | PSVTC_VPORT_Y_SCALE_ENA | PSVTC_VPORT_Y_OF_ENA | PSVTC_VPORT_Y_SCALE_ENA | PSVTC_VPORT_Y_OF_ENA
589 609 | PSVTC_VPORT_Z_SCALE_ENA | PSVTC_VPORT_Z_OF_ENA | PSVTC_VPORT_Z_SCALE_ENA | PSVTC_VPORT_Z_OF_ENA
590 | PSVTC_VTX_W0_FMT);
610 | PSVTC_VTX_W0_FMT;
591 611 } }
592 612
593 613 static void ctx_pa_sc_vports(struct pci_dev *dev, struct ptn_tri *p) static void ctx_pa_sc_vports(struct pci_dev *dev, struct ptn_tri *p)
 
... ... static void ctx_pa_sc_vports(struct pci_dev *dev, struct ptn_tri *p)
600 620 static void ctx_pa_sc(struct pci_dev *dev, struct ptn_tri *p) static void ctx_pa_sc(struct pci_dev *dev, struct ptn_tri *p)
601 621 { {
602 622 /* PA (Primitive Assembler) SC (Scan Converter) */ /* PA (Primitive Assembler) SC (Scan Converter) */
603 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 3));
604 cp0_wr(dev, CTX_REG_IDX(PA_SC_MODE_CTL_0));
623 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 3);
624 ib.d[ib.dws++]=CTX_REG_IDX(PA_SC_MODE_CTL_0);
605 625 /* PA_SC_MODE_CTL_0 */ /* PA_SC_MODE_CTL_0 */
606 cp0_wr(dev, 0);
626 ib.d[ib.dws++]=0;
607 627 /* PA_SC_MODE_CTL_1 */ /* PA_SC_MODE_CTL_1 */
608 cp0_wr(dev, 0);
628 ib.d[ib.dws++]=0;
609 629
610 630 /* defines how to render the edge of primitives */ /* defines how to render the edge of primitives */
611 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
612 cp0_wr(dev, CTX_REG_IDX(PA_SC_EDGERULE));
631 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
632 ib.d[ib.dws++]=CTX_REG_IDX(PA_SC_EDGERULE);
613 633 /* PA_SC_EDGERULE */ /* PA_SC_EDGERULE */
614 cp0_wr(dev, 0xaaaaaaaa);
634 ib.d[ib.dws++]=0xaaaaaaaa;
615 635
616 636 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
617 637 /* Anti-Aliasing... probably */ /* Anti-Aliasing... probably */
618 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
619 cp0_wr(dev, CTX_REG_IDX(PA_SC_AA_CFG));
638 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
639 ib.d[ib.dws++]=CTX_REG_IDX(PA_SC_AA_CFG);
620 640 /* PA_SC_AA_CFG */ /* PA_SC_AA_CFG */
621 cp0_wr(dev, 0);
641 ib.d[ib.dws++]=0;
622 642
623 643 /* do something AA related */ /* do something AA related */
624 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 3));
625 cp0_wr(dev, CTX_REG_IDX(PA_SC_AA_MASK_X0Y0_X1Y0));
644 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 3);
645 ib.d[ib.dws++]=CTX_REG_IDX(PA_SC_AA_MASK_X0Y0_X1Y0);
626 646 /* PA_SC_AA_MASK_X0Y0_X1Y0 */ /* PA_SC_AA_MASK_X0Y0_X1Y0 */
627 cp0_wr(dev, 0xffffffff);
647 ib.d[ib.dws++]=0xffffffff;
628 648 /* PA_SC_AA_MASK_X0Y1_X1Y1 */ /* PA_SC_AA_MASK_X0Y1_X1Y1 */
629 cp0_wr(dev, 0xffffffff);
649 ib.d[ib.dws++]=0xffffffff;
630 650 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
631 651
632 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
633 cp0_wr(dev, CTX_REG_IDX(PA_SC_CLIPRECT_RULE));
652 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 10);
653 ib.d[ib.dws++]=CTX_REG_IDX(PA_SC_CLIPRECT_RULE);
634 654 /* /*
635 655 * PA_SC_CLIPRECT_RULE: no scissor required then clip rule * PA_SC_CLIPRECT_RULE: no scissor required then clip rule
636 656 * is 0xffff (no specs provided) * is 0xffff (no specs provided)
637 657 */ */
638 cp0_wr(dev, set(PSCR_CLIP_RULE, 0xffff));
658 ib.d[ib.dws++]=set(PSCR_CLIP_RULE, 0xffff);
659 /* PA_SC_CLIPRECT_0_TL */
660 ib.d[ib.dws++]=0;
661 /* PA_SC_CLIPRECT_0_BR */
662 ib.d[ib.dws++]=set(PSCB_X, p->w) | set(PSCB_Y, p->h);
663 /* PA_SC_CLIPRECT_1_TL */
664 ib.d[ib.dws++]=0;
665 /* PA_SC_CLIPRECT_1_BR */
666 ib.d[ib.dws++]=set(PSCB_X, p->w) | set(PSCB_Y, p->h);
667 /* PA_SC_CLIPRECT_2_TL */
668 ib.d[ib.dws++]=0;
669 /* PA_SC_CLIPRECT_2_BR */
670 ib.d[ib.dws++]=set(PSCB_X, p->w) | set(PSCB_Y, p->h);
671 /* PA_SC_CLIPRECT_3_TL */
672 ib.d[ib.dws++]=0;
673 /* PA_SC_CLIPRECT_3_BR */
674 ib.d[ib.dws++]=set(PSCB_X, p->w) | set(PSCB_Y, p->h);
639 675
640 676 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
641 677 /* /*
 
... ... static void ctx_pa_sc(struct pci_dev *dev, struct ptn_tri *p)
644 680 * XXX: ORed register? Because if not will set all bits to 0! * XXX: ORed register? Because if not will set all bits to 0!
645 681 * We only want to set to 0 LINE_STIPPLE_ENA. * We only want to set to 0 LINE_STIPPLE_ENA.
646 682 */ */
647 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
648 cp0_wr(dev, CTX_REG_IDX(PA_SC_LINE_STIPPLE));
683 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
684 ib.d[ib.dws++]=CTX_REG_IDX(PA_SC_LINE_STIPPLE);
649 685 /* PA_SC_LINE_STIPPLE */ /* PA_SC_LINE_STIPPLE */
650 cp0_wr(dev, 0);
686 ib.d[ib.dws++]=0;
651 687
652 688 /* /*
653 689 * Even if we are not rendering line primitives, tells the * Even if we are not rendering line primitives, tells the
654 690 * PA (Primitive Assembler) SC (scan converter/rasteriser) * PA (Primitive Assembler) SC (scan converter/rasteriser)
655 691 * to do "something with the last pixel * to do "something with the last pixel
656 692 */ */
657 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
658 cp0_wr(dev, CTX_REG_IDX(PA_SC_LINE_CTL));
693 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
694 ib.d[ib.dws++]=CTX_REG_IDX(PA_SC_LINE_CTL);
659 695 /* PA_SC_LINE_CTL */ /* PA_SC_LINE_CTL */
660 cp0_wr(dev, PSLC_LAST_PIXEL);
696 ib.d[ib.dws++]=PSLC_LAST_PIXEL;
661 697 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
662 698
663 699 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
664 700 /* set the value of the scissors */ /* set the value of the scissors */
665 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 3));
666 cp0_wr(dev, CTX_REG_IDX(PA_SC_GENERIC_SCISSOR_TL));
701 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 3);
702 ib.d[ib.dws++]=CTX_REG_IDX(PA_SC_GENERIC_SCISSOR_TL);
667 703 /* PA_SC_GENERIC_SCISSOR_TL */ /* PA_SC_GENERIC_SCISSOR_TL */
668 cp0_wr(dev, set(PSGST_X, 0) | set(PSGST_Y, 0));
704 ib.d[ib.dws++]=set(PSGST_X, 0) | set(PSGST_Y, 0);
669 705 /* PA_SC_GENERIC_SCISSOR_BR */ /* PA_SC_GENERIC_SCISSOR_BR */
670 cp0_wr(dev, set(PSGSB_X, p->w) | set(PSGSB_Y, p->h));
706 ib.d[ib.dws++]=set(PSGSB_X, p->w) | set(PSGSB_Y, p->h);
671 707
672 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 3));
673 cp0_wr(dev, CTX_REG_IDX(PA_SC_SCREEN_SCISSOR_TL));
708 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 3);
709 ib.d[ib.dws++]=CTX_REG_IDX(PA_SC_SCREEN_SCISSOR_TL);
674 710 /* PA_SC_SCREEN_SCISSOR_TL */ /* PA_SC_SCREEN_SCISSOR_TL */
675 cp0_wr(dev, set(PSSST_X, 0) | set(PSSST_Y, 0));
711 ib.d[ib.dws++]=set(PSSST_X, 0) | set(PSSST_Y, 0);
676 712 /* PA_SC_SCREEN_SCISSOR_BR */ /* PA_SC_SCREEN_SCISSOR_BR */
677 cp0_wr(dev, set(PSSSB_X, p->w) | set(PSSSB_Y, p->h));
713 ib.d[ib.dws++]=set(PSSSB_X, p->w) | set(PSSSB_Y, p->h);
678 714
679 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 4));
680 cp0_wr(dev, CTX_REG_IDX(PA_SC_WND_OF));
715 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 4);
716 ib.d[ib.dws++]=CTX_REG_IDX(PA_SC_WND_OF);
681 717 /* /*
682 718 * PA_SC_WND_OF: the window offset in the screen which can be used by * PA_SC_WND_OF: the window offset in the screen which can be used by
683 719 * many scissors. * many scissors.
684 720 */ */
685 cp0_wr(dev, 0);
721 ib.d[ib.dws++]=0;
686 722 /* PA_SC_WND_SCISSOR_TL */ /* PA_SC_WND_SCISSOR_TL */
687 cp0_wr(dev, set(PSWST_X, 0) | set(PSWST_Y, 0));
723 ib.d[ib.dws++]=set(PSWST_X, 0) | set(PSWST_Y, 0);
688 724 /* PA_SC_WND_SCISSOR_BR */ /* PA_SC_WND_SCISSOR_BR */
689 cp0_wr(dev, set(PSWSB_X, p->w) | set(PSWSB_Y, p->h));
725 ib.d[ib.dws++]=set(PSWSB_X, p->w) | set(PSWSB_Y, p->h);
690 726 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
691 727
692 728 ctx_pa_sc_vports(dev, p); ctx_pa_sc_vports(dev, p);
 
... ... void ctx_misc_init(struct pci_dev *dev)
704 740 { {
705 741 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
706 742 /* basic init GPU context, XXX: not using the CLR_CTX command ??? */ /* basic init GPU context, XXX: not using the CLR_CTX command ??? */
707 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 14));
708 cp0_wr(dev, CTX_REG_IDX(VGT_OUTPUT_PATH_CTL));
743 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 14);
744 ib.d[ib.dws++]=CTX_REG_IDX(VGT_OUTPUT_PATH_CTL);
709 745 /* VGT_OUTPUT_PATH_CTL */ /* VGT_OUTPUT_PATH_CTL */
710 cp0_wr(dev, 0);
746 ib.d[ib.dws++]=0;
711 747 /* VGT_HOS_CTL */ /* VGT_HOS_CTL */
712 cp0_wr(dev, 0);
748 ib.d[ib.dws++]=0;
713 749 /* VGT_HOS_MAX_TESS_LVL */ /* VGT_HOS_MAX_TESS_LVL */
714 cp0_wr(dev, 0);
750 ib.d[ib.dws++]=0;
715 751 /* VGT_HOS_MIN_TESS_LVL */ /* VGT_HOS_MIN_TESS_LVL */
716 cp0_wr(dev, 0);
752 ib.d[ib.dws++]=0;
717 753 /* VGT_HOS_REUSE_DEPTH */ /* VGT_HOS_REUSE_DEPTH */
718 cp0_wr(dev, 0);
754 ib.d[ib.dws++]=0;
719 755 /* VGT_GROUP_PRIM_TYPE */ /* VGT_GROUP_PRIM_TYPE */
720 cp0_wr(dev, 0);
756 ib.d[ib.dws++]=0;
721 757 /* VGT_GROUP_FIRST_DECR */ /* VGT_GROUP_FIRST_DECR */
722 cp0_wr(dev, 0);
758 ib.d[ib.dws++]=0;
723 759 /* VGT_GROUP_DECR */ /* VGT_GROUP_DECR */
724 cp0_wr(dev, 0);
760 ib.d[ib.dws++]=0;
725 761 /* VGT_GROUP_VECT_0_CTL */ /* VGT_GROUP_VECT_0_CTL */
726 cp0_wr(dev, 0);
762 ib.d[ib.dws++]=0;
727 763 /* VGT_GROUP_VECT_1_CTL */ /* VGT_GROUP_VECT_1_CTL */
728 cp0_wr(dev, 0);
764 ib.d[ib.dws++]=0;
729 765 /* VGT_GROUP_VECT_0_FMT_CTL */ /* VGT_GROUP_VECT_0_FMT_CTL */
730 cp0_wr(dev, 0);
766 ib.d[ib.dws++]=0;
731 767 /* VGT_GROUP_VECT_1_FMT_CTL */ /* VGT_GROUP_VECT_1_FMT_CTL */
732 cp0_wr(dev, 0);
768 ib.d[ib.dws++]=0;
733 769 /* VGT_GS_MODE */ /* VGT_GS_MODE */
734 cp0_wr(dev, 0);
770 ib.d[ib.dws++]=0;
735 771
736 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
737 cp0_wr(dev, CTX_REG_IDX(VGT_PRIM_ID_ENA));
772 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
773 ib.d[ib.dws++]=CTX_REG_IDX(VGT_PRIM_ID_ENA);
738 774 /* VGT_PRIM_ID_ENA */ /* VGT_PRIM_ID_ENA */
739 cp0_wr(dev, 0);
775 ib.d[ib.dws++]=0;
740 776
741 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
742 cp0_wr(dev, CTX_REG_IDX(VGT_PRIM_ID_RESET));
777 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
778 ib.d[ib.dws++]=CTX_REG_IDX(VGT_PRIM_ID_RESET);
743 779 /* VGT_PRIM_ID_RESET */ /* VGT_PRIM_ID_RESET */
744 cp0_wr(dev, 0);
780 ib.d[ib.dws++]=0;
745 781
746 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 3));
747 cp0_wr(dev, CTX_REG_IDX(VGT_STRMOUT_CFG));
782 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 3);
783 ib.d[ib.dws++]=CTX_REG_IDX(VGT_STRMOUT_CFG);
748 784 /* VGT_STRMOUT_CFG */ /* VGT_STRMOUT_CFG */
749 cp0_wr(dev, 0);
785 ib.d[ib.dws++]=0;
750 786 /* VGT_STRMOUT_BUF_CFG */ /* VGT_STRMOUT_BUF_CFG */
751 cp0_wr(dev, 0);
787 ib.d[ib.dws++]=0;
752 788
753 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
754 cp0_wr(dev, CTX_REG_IDX(IA_MULTI_VGT_PARAM));
789 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
790 ib.d[ib.dws++]=CTX_REG_IDX(IA_MULTI_VGT_PARAM);
755 791 /* IA_MULTI_VGT_PARAM */ /* IA_MULTI_VGT_PARAM */
756 cp0_wr(dev, IMVP_SWITCH_ON_EOP | IMVP_PARTIAL_VS_WAVE_ON
757 | set(IMVP_PRIM_GROUP_SZ, 63));
792 ib.d[ib.dws++]=IMVP_SWITCH_ON_EOP | IMVP_PARTIAL_VS_WAVE_ON
793 | set(IMVP_PRIM_GROUP_SZ, 63);
758 794
759 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 3));
760 cp0_wr(dev, CTX_REG_IDX(VGT_REUSE_OFF));
795 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 3);
796 ib.d[ib.dws++]=CTX_REG_IDX(VGT_REUSE_OFF);
761 797 /* VGT_REUSE_OFF */ /* VGT_REUSE_OFF */
762 cp0_wr(dev, 0);
798 ib.d[ib.dws++]=0;
763 799 /* VGT_VTX_CNT_ENA */ /* VGT_VTX_CNT_ENA */
764 cp0_wr(dev, 0);
800 ib.d[ib.dws++]=0;
765 801
766 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
767 cp0_wr(dev, CTX_REG_IDX(VGT_SHADER_STAGES_ENA));
802 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
803 ib.d[ib.dws++]=CTX_REG_IDX(VGT_SHADER_STAGES_ENA);
768 804 /* VGT_SHADER_STAGES_ENA */ /* VGT_SHADER_STAGES_ENA */
769 cp0_wr(dev, 0);
805 ib.d[ib.dws++]=0;
770 806
771 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 3));
772 cp0_wr(dev, CTX_REG_IDX(PA_SC_CENTROID_PRIORITY_0));
807 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 3);
808 ib.d[ib.dws++]=CTX_REG_IDX(PA_SC_CENTROID_PRIORITY_0);
773 809 /* PA_SC_CENTROID_PRIORITY_0 */ /* PA_SC_CENTROID_PRIORITY_0 */
774 cp0_wr(dev, 0x76543210);
810 ib.d[ib.dws++]=0x76543210;
775 811 /* PA_SC_CENTROID_PRIORITY_1 */ /* PA_SC_CENTROID_PRIORITY_1 */
776 cp0_wr(dev, 0xfedcba98);
812 ib.d[ib.dws++]=0xfedcba98;
777 813
778 cp0_wr(dev, PKT3(PKT3_SET_CTX_REG, 2));
779 cp0_wr(dev, CTX_REG_IDX(DB_EQAA));
814 ib.d[ib.dws++]=PKT3(PKT3_SET_CTX_REG, 2);
815 ib.d[ib.dws++]=CTX_REG_IDX(DB_EQAA);
780 816 /* DB_EQAA */ /* DB_EQAA */
781 cp0_wr(dev, 0x00110000);
782 /*--------------------------------------------------------------------*/
783
784 //XXX: may not be of use
785 /*--------------------------------------------------------------------*/
786 /* disable predicate rendering */
787 cp0_wr(dev, PKT3(PKT3_SET_PREDICATION, 2));
788 cp0_wr(dev, 0);
789 cp0_wr(dev, set(PRED_OP, PRED_OP_CLR));
817 ib.d[ib.dws++]=0x00110000;
790 818 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
791 819 } }
792 820
 
... ... static void cfg(struct pci_dev *dev, struct ptn_tri *p)
799 827 { {
800 828 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
801 829 /* VGT (Vertex Grouper and Tesselator block) */ /* VGT (Vertex Grouper and Tesselator block) */
802 cp0_wr(dev, PKT3(PKT3_SET_CFG_REG, 2));
803 cp0_wr(dev, CFG_REG_IDX(VGT_PRIM_TYPE));
830 ib.d[ib.dws++]=PKT3(PKT3_SET_CFG_REG, 2);
831 ib.d[ib.dws++]=CFG_REG_IDX(VGT_PRIM_TYPE);
804 832 /* VGT_PRIM_TYPE */ /* VGT_PRIM_TYPE */
805 cp0_wr(dev, set(VPT_PRIM_TYPE, VPT_TRILIST));
833 ib.d[ib.dws++]=set(VPT_PRIM_TYPE, VPT_TRILIST);
806 834 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
807 835
808 836 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
809 837 /* PA (Primitive Assembler) CL (CLipper) */ /* PA (Primitive Assembler) CL (CLipper) */
810 cp0_wr(dev, PKT3(PKT3_SET_CFG_REG, 2));
811 cp0_wr(dev, CFG_REG_IDX(PA_CL_ENHANCE));
838 ib.d[ib.dws++]=PKT3(PKT3_SET_CFG_REG, 2);
839 ib.d[ib.dws++]=CFG_REG_IDX(PA_CL_ENHANCE);
812 840 /* PA_CL_ENHANCE */ /* PA_CL_ENHANCE */
813 cp0_wr(dev, set(PCE_CLIP_SEQ_N, 3) | PCE_CLIP_VTX_REORDER_ENA);
841 ib.d[ib.dws++]=set(PCE_CLIP_SEQ_N, 3) | PCE_CLIP_VTX_REORDER_ENA;
814 842 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
815 843 } }
816 844
 
... ... int ptn_tri(struct pci_dev *dev, struct ptn_tri *p)
843 871 " buffer in (v)ram\n"); " buffer in (v)ram\n");
844 872 return -ENOMEM; return -ENOMEM;
845 873 } }
846 memcpy(dd->vram.bar0 + vtx_buf, &vertices, sizeof(vertices));
874 memcpy_toio(dd->vram.bar0 + vtx_buf, &vertices, sizeof(vertices));
847 875 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
848 876
849 877 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
 
... ... int ptn_tri(struct pci_dev *dev, struct ptn_tri *p)
851 879
852 880 /* vertex position buffer start address */ /* vertex position buffer start address */
853 881 buf_res_descs[0] = lower_32_bits(vtx_buf); buf_res_descs[0] = lower_32_bits(vtx_buf);
854 buf_res_descs[1] |= upper_32_bits(vtx_buf) & 0xff; /* 40 bits address */
882 buf_res_descs[1] |= upper_32_bits(vtx_buf) & 0xff;
855 883 /* vertex color buffer start address */ /* vertex color buffer start address */
856 884 buf_res_descs[4] = lower_32_bits(vtx_buf + 4 * sizeof(float)); buf_res_descs[4] = lower_32_bits(vtx_buf + 4 * sizeof(float));
857 885 buf_res_descs[5] |= upper_32_bits(vtx_buf + 4 * sizeof(float)) & 0xff; buf_res_descs[5] |= upper_32_bits(vtx_buf + 4 * sizeof(float)) & 0xff;
858
859 r = rng_alloc_align(&buf_res_descs_buf, &dd->vram.mng,
860 sizeof(buf_res_descs), 256);
861 if (r != 0) {
862 dev_err(&dev->dev, "pattern triangle: unable to allocate buffer"
863 " resource descriptors buffer in (v)ram\n");
864 return -ENOMEM;
865 }
866 memcpy(dd->vram.bar0 + buf_res_descs_buf, &buf_res_descs,
867 sizeof(buf_res_descs));
868 886 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
869 887
870 888 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
 
... ... int ptn_tri(struct pci_dev *dev, struct ptn_tri *p)
875 893 " shader buffer in (v)ram\n"); " shader buffer in (v)ram\n");
876 894 return -ENOMEM; return -ENOMEM;
877 895 } }
878 memcpy(dd->vram.bar0 + vs_buf, &vs, sizeof(vs));
896 memcpy_toio(dd->vram.bar0 + vs_buf, &vs, sizeof(vs));
879 897 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
880 898
881 899 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
 
... ... int ptn_tri(struct pci_dev *dev, struct ptn_tri *p)
886 904 "/fragment shader buffer in (v)ram\n"); "/fragment shader buffer in (v)ram\n");
887 905 return -ENOMEM; return -ENOMEM;
888 906 } }
889 memcpy(dd->vram.bar0 + ps_buf, &ps, sizeof(ps));
907 memcpy_toio(dd->vram.bar0 + ps_buf, &ps, sizeof(ps));
908 /*--------------------------------------------------------------------*/
909
910 /*--------------------------------------------------------------------*/
911 /* create the indirect buffer */
912 r = rng_alloc_align(&ib.gpu_addr, &dd->vram.mng, sizeof(ib.d), 16 * 4);
913 if (r != 0) {
914 dev_err(&dev->dev, "pattern triangle: unable to indirect buffer"
915 " in (v)ram\n");
916 return -ENOMEM;
917 }
918 ib.dws=0;
890 919 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
891 920
892 921 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
893 922 /* sync shader caches, texture cache, color block caches */ /* sync shader caches, texture cache, color block caches */
894 cp0_wr(dev, PKT3(PKT3_SURF_SYNC, 4));
895 /* CP_COHER_CTL */
896 cp0_wr(dev, CCC_SH_ICACHE_ACTION_ENA | CCC_SH_KCACHE_ACTION_ENA
923 ib.d[ib.dws++]=PKT3(PKT3_SET_CFG_REG, 2);
924 ib.d[ib.dws++]=CFG_REG_IDX(CP_COHER_CTL_1);
925 ib.d[ib.dws++]=0;
926 ib.d[ib.dws++]=PKT3(PKT3_SURF_SYNC, 4);
927 /* CP_COHER_CTL_0 */
928 ib.d[ib.dws++]=CCC_SH_ICACHE_ACTION_ENA | CCC_SH_KCACHE_ACTION_ENA
897 929 | CCC_TC_ACTION_ENA | CCC_CB_ACTION_ENA | CCC_TC_ACTION_ENA | CCC_CB_ACTION_ENA
898 | CCC_CB0_DEST_BASE_ENA);
930 | CCC_CB0_DEST_BASE_ENA;
899 931 /* CP_COHER_SZ */ /* CP_COHER_SZ */
900 cp0_wr(dev, 0xffffffff);
932 ib.d[ib.dws++]=0xffffffff;
901 933 /* CP_COHER_BASE */ /* CP_COHER_BASE */
902 cp0_wr(dev, 0);
903 cp0_wr(dev, 0x0000000a);/* polling interval, 0xa(10) * 16 clocks */
934 ib.d[ib.dws++]=0;
935 ib.d[ib.dws++]=0x0000000a;/* polling interval, 0xa(10) * 16 clocks */
904 936 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
905 937
906 938 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
907 /* seems mandatory at the start of a command stream (IB?) */
908 cp0_wr(dev, PKT3(PKT3_CTX_CTL, 2));
909 cp0_wr(dev, 0x80000000);
910 cp0_wr(dev, 0x80000000);
939 /* seems mandatory at the start of a command stream */
940 ib.d[ib.dws++]=PKT3(PKT3_CTX_CTL, 2);
941 ib.d[ib.dws++]=0x80000000;
942 ib.d[ib.dws++]=0x80000000;
911 943 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
912 944
913 945 /*====================================================================*/ /*====================================================================*/
 
... ... int ptn_tri(struct pci_dev *dev, struct ptn_tri *p)
918 950
919 951 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
920 952 /* the draw command launch */ /* the draw command launch */
921 cp0_wr(dev, PKT3(PKT3_IDX_TYPE, 1));
922 cp0_wr(dev, set(PKT3_SZ, PKT3_16BITS));
953 ib.d[ib.dws++]=PKT3(PKT3_IDX_TYPE, 1);
954 ib.d[ib.dws++]=set(PKT3_SZ, PKT3_16BITS);
923 955
924 cp0_wr(dev, PKT3(PKT3_INST_N, 1));
925 cp0_wr(dev, 1);
956 ib.d[ib.dws++]=PKT3(PKT3_INST_N, 1);
957 ib.d[ib.dws++]=1;
926 958
927 cp0_wr(dev, PKT3(PKT3_DRAW_IDX_AUTO, 2));
959 ib.d[ib.dws++]=PKT3(PKT3_DRAW_IDX_AUTO, 2);
928 960 /* 3 indices to generate */ /* 3 indices to generate */
929 cp0_wr(dev, 3);
961 ib.d[ib.dws++]=3;
930 962 /* VGT_DRAW_INITIATOR */ /* VGT_DRAW_INITIATOR */
931 cp0_wr(dev, set(VDI_SRC_SELECT, VDI_AUTO_IDX));
963 ib.d[ib.dws++]=set(VDI_SRC_SELECT, VDI_AUTO_IDX);
932 964 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
933 965
934 966 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
935 967 /* flush CBs and DB, XXX: miss the main CB? */ /* flush CBs and DB, XXX: miss the main CB? */
936 cp0_wr(dev, PKT3(PKT3_SURF_SYNC, 4));
937 /* CP_COHER_CTL */
938 cp0_wr(dev, CCC_CB0_DEST_BASE_ENA | CCC_CB1_DEST_BASE_ENA
968 ib.d[ib.dws++]=PKT3(PKT3_SET_CFG_REG, 2);
969 ib.d[ib.dws++]=CFG_REG_IDX(CP_COHER_CTL_1);
970 ib.d[ib.dws++]=0;
971 ib.d[ib.dws++]=PKT3(PKT3_SURF_SYNC, 4);
972 /* CP_COHER_CTL_0 */
973 ib.d[ib.dws++]=CCC_CB0_DEST_BASE_ENA | CCC_CB1_DEST_BASE_ENA
939 974 | CCC_CB2_DEST_BASE_ENA | CCC_CB3_DEST_BASE_ENA | CCC_CB2_DEST_BASE_ENA | CCC_CB3_DEST_BASE_ENA
940 975 | CCC_CB4_DEST_BASE_ENA | CCC_CB5_DEST_BASE_ENA | CCC_CB4_DEST_BASE_ENA | CCC_CB5_DEST_BASE_ENA
941 976 | CCC_CB6_DEST_BASE_ENA | CCC_CB7_DEST_BASE_ENA | CCC_CB6_DEST_BASE_ENA | CCC_CB7_DEST_BASE_ENA
942 | CCC_DB_DEST_BASE_ENA | CCC_DB_ACTION_ENA);
977 | CCC_DB_DEST_BASE_ENA | CCC_DB_ACTION_ENA | CCC_TCL1_ACTION_ENA
978 | CCC_TC_ACTION_ENA | CCC_SH_KCACHE_ACTION_ENA
979 | CCC_SH_ICACHE_ACTION_ENA;
943 980 /* CP_COHER_SZ */ /* CP_COHER_SZ */
944 cp0_wr(dev, 0xffffffff);
981 ib.d[ib.dws++]=0xffffffff;
945 982 /* CP_COHER_BASE */ /* CP_COHER_BASE */
946 cp0_wr(dev, 0);
947 cp0_wr(dev, 0x0000000a);/* polling interval, 0xa(10) * 16 clocks */
983 ib.d[ib.dws++]=0;
984 ib.d[ib.dws++]=0x0000000a;/* polling interval, 0xa(10) * 16 clocks */
985 /*--------------------------------------------------------------------*/
986
987 /*--------------------------------------------------------------------*/
988 /* EOP event with some caches flush and invalidation */
989 ib.d[ib.dws++]=PKT3(PKT3_EVENT_WR_EOP, 5);
990 ib.d[ib.dws++]=set(PKT3_EVENT_IDX, 5) | set(VEI_EVENT_TYPE,
991 VEI_CACHE_FLUSH_AND_INV_TS_EVENT);
992 ib.d[ib.dws++]=lower_32_bits(p->fb_gpu_addr);
993 ib.d[ib.dws++]=(upper_32_bits(p->fb_gpu_addr) & 0xff)
994 | set(PKT3_DATA_SEL, 2) | set(PKT3_INT_SEL, 2);
995 ib.d[ib.dws++]=0xdeadbeef;
996 ib.d[ib.dws++]=0xcafedead;
948 997 /*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
949 998
999 /* align size, upload and run the indirect buffer */
1000 while ((ib.dws & CP_RING_PFP_DW_MASK) != 0)
1001 ib.d[ib.dws++] = PKT2;
1002 memcpy_toio(dd->vram.bar0 + ib.gpu_addr, ib.d,
1003 ib.dws * sizeof(ib.d[0]));
1004 cp0_wr(dev, PKT3(PKT3_IB, 3));
1005 cp0_wr(dev, lower_32_bits(ib.gpu_addr));
1006 cp0_wr(dev, upper_32_bits(ib.gpu_addr));
1007 cp0_wr(dev, ib.dws);
950 1008 cp0_commit(dev); cp0_commit(dev);
951 1009 return 0; return 0;
952 1010 } }
File drivers/gpu/alga/amd/si/regs.h changed (mode: 100644) (index 783b9e7..06ea6da)
... ... static inline u32 get(u32 mask, u32 v)
133 133 #define MVMLTC_ENA_L1_FRAG_PROCESSING BIT(1) #define MVMLTC_ENA_L1_FRAG_PROCESSING BIT(1)
134 134 #define MVMLTC_SYS_ACCESS_MODE 0x00000018 #define MVMLTC_SYS_ACCESS_MODE 0x00000018
135 135 #define MVMLTC_PA_ONLY 0 #define MVMLTC_PA_ONLY 0
136 #define MVMLTC_USE_SYS_MAP 1
137 #define MVMLTC_IN_SYS 2
138 #define MVMLTC_NOT_IN_SYS 3
136 #define MVMLTC_ALWAYS_USE_SYS_MAP 1
137 #define MVMLTC_MAPPED_ACCESS_IS_IN_SYS_APER 2
138 #define MVMLTC_MAPPED_ACCESS_NOT_IN_SYS 3
139 139 #define MVMLTC_SYS_APER_UNMAPPED_ACCESS 0xffffffe0 #define MVMLTC_SYS_APER_UNMAPPED_ACCESS 0xffffffe0
140 140 #define MVMLTC_PASS_THRU 0 #define MVMLTC_PASS_THRU 0
141 #define MVMLTC_DISCARD_WR_RD_DEFAULT 1
141 142 #define MVMLTC_ENA_ADVANCED_DRIVER_MODEL BIT(6) #define MVMLTC_ENA_ADVANCED_DRIVER_MODEL BIT(6)
142 143
143 144 #define MC_SHARED_BLACKOUT_CTL 0x20ac #define MC_SHARED_BLACKOUT_CTL 0x20ac
File drivers/gpu/alga/amd/si/silicium_blks/mc changed (mode: 100644) (index 344605b..8b2a650)
... ... address in the system aperture.
23 23 The VM (historically page unit 0) has 8 contexts which each maps a GPU address The VM (historically page unit 0) has 8 contexts which each maps a GPU address
24 24 range to other addresses ranges using a page table (for us, it will be one bus range to other addresses ranges using a page table (for us, it will be one bus
25 25 address range). Only the context 0 is used. The context 0 does more for the VM address range). Only the context 0 is used. The context 0 does more for the VM
26 than the other 7. Then the start GPU address of the range is written in
27 VM_CTX_0_PT_START_ADDR register, end GPU address of the range is written
28 in VM_CTX_0_PT_END_ADDR register. The bus address range is defined by a
29 table which maps a GPU pages to a bus addresses. This table is stored in VRAM.
30 The VRAM GPU address of this table is to be written in
26 than the other 7, it is called the "system context". Then the start GPU address
27 of the range is written in VM_CTX_0_PT_START_ADDR register, end GPU address of
28 the range is written in VM_CTX_0_PT_END_ADDR register. The bus address range is
29 defined by a table which maps a GPU pages to a bus addresses. This table is
30 stored in VRAM. The VRAM GPU address of this table is to be written in
31 31 VM_CTX_0_PT_BASE_ADDR register. Of course, this GPU address is to be VM_CTX_0_PT_BASE_ADDR register. Of course, this GPU address is to be
32 32 compatible with the previoulsy chosen addresses for the memory controller. compatible with the previoulsy chosen addresses for the memory controller.
33 33 If there is no page in order to map a GPU address from a request to a bus If there is no page in order to map a GPU address from a request to a bus
Hints:
Before first commit, do not forget to setup your git environment:
git config --global user.name "your_name_here"
git config --global user.email "your@email_here"

Clone this repository using HTTP(S):
git clone https://rocketgit.com/user/sylware/linux-gpu-amd-si

Clone this repository using ssh (do not forget to upload a key first):
git clone ssh://rocketgit@ssh.rocketgit.com/user/sylware/linux-gpu-amd-si

Clone this repository using git:
git clone git://git.rocketgit.com/user/sylware/linux-gpu-amd-si

You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a merge request:
... clone the repository ...
... make some changes and some commits ...
git push origin main