File src/compute/cmd_unit.cpp changed (mode: 100644) (index 26655d9..1141cac) |
... |
... |
read_from_allocation(jen::DeviceBufferPart *p_src, void *p_dst, |
66 |
66 |
|
|
67 |
67 |
|
|
68 |
68 |
struct jen::ComputeCmdUnit::Data { |
struct jen::ComputeCmdUnit::Data { |
69 |
|
[[nodiscard]] Result init(Device *p_dev) { |
|
70 |
|
this->p_dev = p_dev; |
|
71 |
|
Result res; |
|
72 |
|
res = compute_cmds |
|
73 |
|
.init(*p_dev, p_dev->queue_indices.compute.family, |
|
74 |
|
vkw::CmdPoolFlag::MANUAL_CMD_RESET); |
|
75 |
|
if (res != VK_SUCCESS) |
|
76 |
|
return res; |
|
77 |
|
|
|
78 |
|
res = transfer_cmds |
|
79 |
|
.init(*p_dev, p_dev->queue_indices.transfer.family, |
|
80 |
|
vkw::CmdPoolFlag::MANUAL_CMD_RESET); |
|
81 |
|
if (res != VK_SUCCESS) |
|
82 |
|
goto CCC; |
|
83 |
|
|
|
84 |
|
res = syncs.init(*p_dev); |
|
85 |
|
if (res != VK_SUCCESS) |
|
86 |
|
goto CTC; |
|
87 |
|
|
|
88 |
|
wait_transfer_write = wait_transfer_read = wait_compute = false; |
|
89 |
|
reset_fence = {}; |
|
90 |
|
return VK_SUCCESS; |
|
|
69 |
|
[[nodiscard]] Result |
|
70 |
|
init(Device*); |
|
71 |
|
void |
|
72 |
|
destroy(); |
|
73 |
|
[[nodiscard]] jen::Result |
|
74 |
|
wait(); |
|
75 |
|
[[nodiscard]] Result |
|
76 |
|
proceed_writes(BufferTransfers, ImagesTransfers); |
|
77 |
|
[[nodiscard]] Result |
|
78 |
|
proceed_staging_reads(BufferTransfers, ImagesTransfers); |
91 |
79 |
|
|
|
80 |
|
struct SyncCounts : vk::SyncContainerCounts { |
|
81 |
|
constexpr static const uint32_t FENCES = 2; |
|
82 |
|
constexpr static const uint32_t SEMAPHORES = 2; |
|
83 |
|
}; |
|
84 |
|
Device *p_dev; |
|
85 |
|
vk::CmdPoolContainer<1, 0> compute_cmds; |
|
86 |
|
vk::CmdPoolContainer<2, 0> transfer_cmds; |
|
87 |
|
vk::SyncContainer<SyncCounts> syncs; |
|
88 |
|
bool wait_transfer_write; |
|
89 |
|
bool wait_compute; |
|
90 |
|
bool wait_transfer_read; |
|
91 |
|
jl::array<bool, SyncCounts::FENCES> reset_fence; |
|
92 |
|
}; |
|
93 |
|
[[nodiscard]] Result ComputeCmdUnit::Data:: |
|
94 |
|
init(Device *p_dev) { |
|
95 |
|
this->p_dev = p_dev; |
|
96 |
|
Result res; |
|
97 |
|
res = compute_cmds |
|
98 |
|
.init(*p_dev, p_dev->queue_indices.compute.family, |
|
99 |
|
vkw::CmdPoolFlag::MANUAL_CMD_RESET); |
|
100 |
|
if (res != VK_SUCCESS) |
|
101 |
|
return res; |
|
102 |
|
res = transfer_cmds |
|
103 |
|
.init(*p_dev, p_dev->queue_indices.transfer.family, |
|
104 |
|
vkw::CmdPoolFlag::MANUAL_CMD_RESET); |
|
105 |
|
if (res != VK_SUCCESS) |
|
106 |
|
goto CCC; |
|
107 |
|
res = syncs.init(*p_dev); |
|
108 |
|
if (res != VK_SUCCESS) |
|
109 |
|
goto CTC; |
|
110 |
|
wait_transfer_write = wait_transfer_read = wait_compute = false; |
|
111 |
|
reset_fence = {}; |
|
112 |
|
return VK_SUCCESS; |
92 |
113 |
CTC: |
CTC: |
93 |
|
transfer_cmds.destroy(*p_dev); |
|
|
114 |
|
transfer_cmds.destroy(*p_dev); |
94 |
115 |
CCC: |
CCC: |
95 |
|
compute_cmds.destroy(*p_dev); |
|
96 |
|
return res; |
|
|
116 |
|
compute_cmds.destroy(*p_dev); |
|
117 |
|
return res; |
|
118 |
|
} |
|
119 |
|
[[nodiscard]] Result ComputeCmdUnit:: |
|
120 |
|
init(ModuleCompute mc) { |
|
121 |
|
if (not jl::allocate(&p)) |
|
122 |
|
return VK_ERROR_OUT_OF_HOST_MEMORY; |
|
123 |
|
Result res = p->init(&mc.p->device); |
|
124 |
|
if (res != VK_SUCCESS) |
|
125 |
|
jl::deallocate(&p); |
|
126 |
|
return res; |
|
127 |
|
} |
|
128 |
|
void ComputeCmdUnit::destroy() { |
|
129 |
|
p->destroy(); |
|
130 |
|
jl::deallocate(&p); |
|
131 |
|
} |
|
132 |
|
void ComputeCmdUnit::Data:: |
|
133 |
|
destroy() { |
|
134 |
|
transfer_cmds.destroy(*p_dev); |
|
135 |
|
compute_cmds.destroy(*p_dev); |
|
136 |
|
syncs.destroy(*p_dev); |
|
137 |
|
} |
|
138 |
|
[[nodiscard]] Result ComputeCmdUnit::Data:: |
|
139 |
|
wait() { |
|
140 |
|
jen::Result res; |
|
141 |
|
if (wait_transfer_write or wait_compute) { |
|
142 |
|
res = syncs.fences[0].wait(*p_dev, vkw::TIMEOUT_INFINITE); |
|
143 |
|
if (res != VK_SUCCESS) |
|
144 |
|
return res; |
|
145 |
|
wait_compute = false; |
|
146 |
|
wait_transfer_write = false; |
97 |
147 |
} |
} |
98 |
|
void destroy() { |
|
99 |
|
transfer_cmds.destroy(*p_dev); |
|
100 |
|
compute_cmds.destroy(*p_dev); |
|
101 |
|
syncs.destroy(*p_dev); |
|
|
148 |
|
if (wait_transfer_read) { |
|
149 |
|
res = syncs.fences[1].wait_and_reset(*p_dev, vkw::TIMEOUT_INFINITE); |
|
150 |
|
if (res != VK_SUCCESS) |
|
151 |
|
return res; |
|
152 |
|
wait_transfer_read = false; |
102 |
153 |
} |
} |
103 |
|
|
|
104 |
|
[[nodiscard]] jen::Result |
|
105 |
|
wait() { |
|
106 |
|
jen::Result res; |
|
107 |
|
if (wait_transfer_write or wait_compute) { |
|
108 |
|
res = syncs.fences[0].wait(*p_dev, vkw::TIMEOUT_INFINITE); |
|
|
154 |
|
for (uint32_t i = 0; i < reset_fence.count(); ++i) { |
|
155 |
|
if (reset_fence[i]) { |
|
156 |
|
res = syncs.fences[i].reset(*p_dev); |
109 |
157 |
if (res != VK_SUCCESS) |
if (res != VK_SUCCESS) |
110 |
158 |
return res; |
return res; |
111 |
|
wait_compute = false; |
|
112 |
|
wait_transfer_write = false; |
|
|
159 |
|
reset_fence[i] = false; |
113 |
160 |
} |
} |
114 |
|
if (wait_transfer_read) { |
|
115 |
|
res = syncs.fences[1].wait_and_reset(*p_dev, vkw::TIMEOUT_INFINITE); |
|
|
161 |
|
} |
|
162 |
|
return VK_SUCCESS; |
|
163 |
|
} |
|
164 |
|
[[nodiscard]] Result ComputeCmdUnit::Data:: |
|
165 |
|
proceed_writes(BufferTransfers buffer_writes, |
|
166 |
|
ImagesTransfers images_writes) |
|
167 |
|
{ |
|
168 |
|
auto &cmd = transfer_cmds.primary[0]; |
|
169 |
|
auto begin = [&cmd, this]() -> jen::Result { |
|
170 |
|
if (not wait_transfer_write) { |
|
171 |
|
jen::Result res; |
|
172 |
|
res = cmd.begin(vkw::CmdUsage::ONE_TIME_SUBMIT); |
116 |
173 |
if (res != VK_SUCCESS) |
if (res != VK_SUCCESS) |
117 |
174 |
return res; |
return res; |
118 |
|
wait_transfer_read = false; |
|
119 |
|
} |
|
120 |
|
for (uint32_t i = 0; i < reset_fence.count(); ++i) { |
|
121 |
|
if (reset_fence[i]) { |
|
122 |
|
res = syncs.fences[i].reset(*p_dev); |
|
123 |
|
if (res != VK_SUCCESS) |
|
124 |
|
return res; |
|
125 |
|
reset_fence[i] = false; |
|
126 |
|
} |
|
|
175 |
|
wait_transfer_write = true; |
127 |
176 |
} |
} |
128 |
177 |
return VK_SUCCESS; |
return VK_SUCCESS; |
129 |
|
} |
|
130 |
|
|
|
131 |
|
[[nodiscard]] jen::Result |
|
132 |
|
proceed_writes(BufferTransfers buffer_writes, |
|
133 |
|
ImagesTransfers images_writes) |
|
134 |
|
{ |
|
135 |
|
auto &cmd = transfer_cmds.primary[0]; |
|
136 |
|
|
|
137 |
|
auto begin = [&cmd, this]() -> jen::Result { |
|
138 |
|
if (not wait_transfer_write) { |
|
139 |
|
jen::Result res; |
|
140 |
|
res = cmd.begin(vkw::CmdUsage::ONE_TIME_SUBMIT); |
|
141 |
|
if (res != VK_SUCCESS) |
|
142 |
|
return res; |
|
143 |
|
wait_transfer_write = true; |
|
144 |
|
} |
|
145 |
|
return VK_SUCCESS; |
|
146 |
|
}; |
|
147 |
|
|
|
148 |
|
for (uint32_t i = 0; i < buffer_writes.count(); ++i) { |
|
149 |
|
auto &write = buffer_writes[i]; |
|
150 |
|
auto &buffer = *write.p_buffer; |
|
151 |
|
|
|
152 |
|
jen::DeviceBufferPart *p_part; |
|
153 |
|
if (buffer.use_staging) |
|
154 |
|
p_part = &buffer.staging; |
|
155 |
|
else |
|
156 |
|
p_part = &buffer.part; |
|
157 |
|
|
|
158 |
|
write_to_allocation(write.p_data, p_part, write.offset, write.size); |
|
159 |
|
|
|
160 |
|
if (buffer.use_staging) { |
|
161 |
|
vkw::BufferChange bs; |
|
162 |
|
bs.src = buffer.staging.buffer; |
|
163 |
|
bs.dst = buffer.part.buffer; |
|
164 |
|
vkw::BufferRegion region; |
|
165 |
|
region.offsets.src = buffer.staging.offset(); |
|
166 |
|
region.offsets.dst = buffer.part.offset(); |
|
167 |
|
region.size = write.size; |
|
168 |
|
auto res = begin(); |
|
169 |
|
if (res != VK_SUCCESS) |
|
170 |
|
return res; |
|
171 |
|
cmd.cmd_cp_buffer(bs, region); |
|
172 |
|
} |
|
173 |
|
} |
|
174 |
|
|
|
175 |
|
for (uint32_t i = 0; i < images_writes.count(); ++i) { |
|
|
178 |
|
}; |
|
179 |
|
for (uint32_t i = 0; i < buffer_writes.count(); ++i) { |
|
180 |
|
auto &write = buffer_writes[i]; |
|
181 |
|
auto &buffer = *write.p_buffer; |
|
182 |
|
jen::DeviceBufferPart *p_part; |
|
183 |
|
if (buffer.use_staging) |
|
184 |
|
p_part = &buffer.staging; |
|
185 |
|
else |
|
186 |
|
p_part = &buffer.part; |
|
187 |
|
write_to_allocation(write.p_data, p_part, write.offset, write.size); |
|
188 |
|
if (buffer.use_staging) { |
|
189 |
|
vkw::BufferChange bs; |
|
190 |
|
bs.src = buffer.staging.buffer; |
|
191 |
|
bs.dst = buffer.part.buffer; |
|
192 |
|
vkw::BufferRegion region; |
|
193 |
|
region.offsets.src = buffer.staging.offset(); |
|
194 |
|
region.offsets.dst = buffer.part.offset(); |
|
195 |
|
region.size = write.size; |
176 |
196 |
auto res = begin(); |
auto res = begin(); |
177 |
197 |
if (res != VK_SUCCESS) |
if (res != VK_SUCCESS) |
178 |
198 |
return res; |
return res; |
179 |
|
|
|
180 |
|
auto &w = images_writes[i]; |
|
181 |
|
auto &im = *w.p_image; |
|
182 |
|
|
|
183 |
|
if (im.layout != vkw::ImLayout::TRANSFER_DST) { |
|
184 |
|
vkw::StageMaskChange stages; |
|
185 |
|
stages.src = vkw::StageFlag::TOP_OF_PIPE; |
|
186 |
|
stages.dst = vkw::StageFlag::TRANSFER; |
|
187 |
|
transitionLayout(&im, &cmd, vkw::ImLayout::TRANSFER_DST, stages); |
|
|
199 |
|
cmd.cmd_cp_buffer(bs, region); |
|
200 |
|
} |
|
201 |
|
} |
|
202 |
|
for (uint32_t i = 0; i < images_writes.count(); ++i) { |
|
203 |
|
auto res = begin(); |
|
204 |
|
if (res != VK_SUCCESS) |
|
205 |
|
return res; |
|
206 |
|
auto &w = images_writes[i]; |
|
207 |
|
auto &im = *w.p_image; |
|
208 |
|
if (im.layout != vkw::ImLayout::TRANSFER_DST) { |
|
209 |
|
vkw::StageMaskChange stages; |
|
210 |
|
stages.src = vkw::StageFlag::TOP_OF_PIPE; |
|
211 |
|
stages.dst = vkw::StageFlag::TRANSFER; |
|
212 |
|
transitionLayout(&im, &cmd, vkw::ImLayout::TRANSFER_DST, stages); |
|
213 |
|
} |
|
214 |
|
uint32_t x_size = vkw::format_size(im.format); |
|
215 |
|
for (auto &r : w.transfers) { |
|
216 |
|
auto ext = im.extent; |
|
217 |
|
uint64_t moffset = 0; |
|
218 |
|
for (uint32_t i = 1; i < r.mip_level; ++i) { |
|
219 |
|
moffset += ext.all_scale() * x_size; |
|
220 |
|
ext /= 2; |
|
221 |
|
ext.x = jl::max(ext.x, 1u); |
|
222 |
|
ext.y = jl::max(ext.y, 1u); |
|
223 |
|
ext.z = jl::max(ext.z, 1u); |
188 |
224 |
} |
} |
189 |
|
|
|
190 |
|
vkw::DeviceSize offset = 0; |
|
191 |
|
for (auto &r : w.transfers) { |
|
192 |
|
auto size = r.extent.volume() * vkw::format_size(im.format) |
|
193 |
|
* r.layer_count; |
|
194 |
|
write_to_allocation(r.p_data, &im.staging, offset, size); |
|
195 |
|
|
|
196 |
|
vkw::BufferAndImageRegion region; { |
|
197 |
|
region.bufferOffset = im.staging.offset() + offset; |
|
198 |
|
region.bufferRowLength = region.bufferImageHeight = 0; |
|
199 |
|
region.imageSubresource = { |
|
200 |
|
vkw::ImAspect::COLOR, |
|
201 |
|
r.mip_level, |
|
202 |
|
r.layer_offset, |
|
203 |
|
r.layer_count |
|
204 |
|
}; |
|
205 |
|
region.imageOffset.x = int32_t(r.offset.x); |
|
206 |
|
region.imageOffset.y = int32_t(r.offset.y); |
|
207 |
|
region.imageOffset.z = int32_t(r.offset.z); |
|
208 |
|
region.imageExtent.width = r.extent.x; |
|
209 |
|
region.imageExtent.height = r.extent.y; |
|
210 |
|
region.imageExtent.depth = r.extent.z; |
|
|
225 |
|
uint64_t y_size = ext.x * x_size; |
|
226 |
|
uint64_t z_size = ext.y * y_size; |
|
227 |
|
uint64_t l_size = ext.z * z_size; |
|
228 |
|
uint64_t write_size = (r.extent.x - r.offset.x) * x_size; |
|
229 |
|
uint8_t *p_user = reinterpret_cast<uint8_t*>(r.p_data); |
|
230 |
|
uint64_t l_offset = r.layer_offset * l_size; |
|
231 |
|
uint64_t z_offset = l_offset + r.offset.z * z_size; |
|
232 |
|
for (uint32_t z = 0; z < r.extent.z; ++z) { |
|
233 |
|
uint64_t y_offset = z_offset + r.offset.y * y_size; |
|
234 |
|
for (uint32_t y = 0; y < r.extent.y; ++y) { |
|
235 |
|
write_to_allocation(p_user, &im.staging, y_offset, write_size); |
|
236 |
|
p_user += write_size; |
|
237 |
|
y_offset += y_size; |
211 |
238 |
} |
} |
212 |
|
cmd.cmd_cp_buffer_to_image({im.staging.buffer, im.image.image}, |
|
213 |
|
region, vkw::ImLayout::TRANSFER_DST); |
|
214 |
|
|
|
215 |
|
offset += size; |
|
|
239 |
|
z_offset += z_size; |
|
240 |
|
} |
|
241 |
|
uint64_t offset = moffset; |
|
242 |
|
offset += r.layer_offset * l_size; |
|
243 |
|
offset += r.offset.z * z_size; |
|
244 |
|
offset += r.offset.y * y_size; |
|
245 |
|
vkw::BufferAndImageRegion region; { |
|
246 |
|
region.bufferOffset = im.staging.offset() + offset; |
|
247 |
|
region.bufferRowLength = ext.x; |
|
248 |
|
region.bufferImageHeight = ext.y; |
|
249 |
|
region.imageSubresource = { |
|
250 |
|
vkw::ImAspect::COLOR, r.mip_level, r.layer_offset, 1 |
|
251 |
|
}; |
|
252 |
|
region.imageOffset.x = int32_t(r.offset.x); |
|
253 |
|
region.imageOffset.y = int32_t(r.offset.y); |
|
254 |
|
region.imageOffset.z = int32_t(r.offset.z); |
|
255 |
|
region.imageExtent.width = r.extent.x; |
|
256 |
|
region.imageExtent.height = r.extent.y; |
|
257 |
|
region.imageExtent.depth = r.extent.z; |
216 |
258 |
} |
} |
|
259 |
|
cmd.cmd_cp_buffer_to_image({im.staging.buffer, im.image.image}, |
|
260 |
|
region, vkw::ImLayout::TRANSFER_DST); |
217 |
261 |
} |
} |
218 |
|
|
|
219 |
|
if (wait_transfer_write) { |
|
|
262 |
|
} |
|
263 |
|
if (wait_transfer_write) { |
|
264 |
|
jen::Result res; |
|
265 |
|
res = cmd.end(); |
|
266 |
|
if (res != VK_SUCCESS) |
|
267 |
|
return res; |
|
268 |
|
vkw::QueueSignal signal(syncs.semaphores[0].p_vk); |
|
269 |
|
vkw::QueueSubmit submit(cmd, {}, signal); |
|
270 |
|
res = p_dev->queues.transfer.submit_locked(submit); |
|
271 |
|
if (res != VK_SUCCESS) |
|
272 |
|
return res; |
|
273 |
|
for (uint32_t i = 0; i < images_writes.count(); ++i) |
|
274 |
|
images_writes[i].p_image->layout = vkw::ImLayout::TRANSFER_DST; |
|
275 |
|
} |
|
276 |
|
return VK_SUCCESS; |
|
277 |
|
} |
|
278 |
|
[[nodiscard]] Result ComputeCmdUnit::Data:: |
|
279 |
|
proceed_staging_reads(BufferTransfers buffer_reads, |
|
280 |
|
ImagesTransfers images_reads) |
|
281 |
|
{ |
|
282 |
|
auto &cmd = transfer_cmds.primary[1]; |
|
283 |
|
auto begin = [&cmd, this]() -> jen::Result { |
|
284 |
|
if (not wait_transfer_read) { |
220 |
285 |
jen::Result res; |
jen::Result res; |
221 |
|
res = cmd.end(); |
|
222 |
|
if (res != VK_SUCCESS) |
|
223 |
|
return res; |
|
224 |
|
vkw::QueueSignal signal(syncs.semaphores[0].p_vk); |
|
225 |
|
vkw::QueueSubmit submit(cmd, {}, signal); |
|
226 |
|
res = p_dev->queues.transfer.submit_locked(submit); |
|
|
286 |
|
res = cmd.begin(vkw::CmdUsage::ONE_TIME_SUBMIT); |
227 |
287 |
if (res != VK_SUCCESS) |
if (res != VK_SUCCESS) |
228 |
288 |
return res; |
return res; |
229 |
|
|
|
230 |
|
for (uint32_t i = 0; i < images_writes.count(); ++i) |
|
231 |
|
images_writes[i].p_image->layout = vkw::ImLayout::TRANSFER_DST; |
|
|
289 |
|
wait_transfer_read = true; |
232 |
290 |
} |
} |
233 |
|
|
|
234 |
291 |
return VK_SUCCESS; |
return VK_SUCCESS; |
235 |
|
} |
|
236 |
|
|
|
237 |
|
[[nodiscard]] Result |
|
238 |
|
proceed_staging_reads(BufferTransfers buffer_reads, |
|
239 |
|
ImagesTransfers images_reads) |
|
240 |
|
{ |
|
241 |
|
auto &cmd = transfer_cmds.primary[1]; |
|
242 |
|
auto begin = [&cmd, this]() -> jen::Result { |
|
243 |
|
if (not wait_transfer_read) { |
|
244 |
|
jen::Result res; |
|
245 |
|
res = cmd.begin(vkw::CmdUsage::ONE_TIME_SUBMIT); |
|
246 |
|
if (res != VK_SUCCESS) |
|
247 |
|
return res; |
|
248 |
|
wait_transfer_read = true; |
|
249 |
|
} |
|
250 |
|
return VK_SUCCESS; |
|
251 |
|
}; |
|
252 |
|
|
|
253 |
|
for (uint32_t i = 0; i < buffer_reads.count(); ++i) { |
|
254 |
|
auto &read = buffer_reads[i]; |
|
255 |
|
auto &buffer = *read.p_buffer; |
|
256 |
|
|
|
257 |
|
if (buffer.use_staging) { |
|
258 |
|
vkw::BufferChange bs; |
|
259 |
|
bs.src = buffer.part.buffer; |
|
260 |
|
bs.dst = buffer.staging.buffer; |
|
261 |
|
vkw::BufferRegion region; |
|
262 |
|
region.offsets.src = buffer.part.offset(); |
|
263 |
|
region.offsets.dst = buffer.staging.offset(); |
|
264 |
|
region.size = read.size; |
|
265 |
|
auto res = begin(); |
|
266 |
|
if (res != VK_SUCCESS) |
|
267 |
|
return res; |
|
268 |
|
cmd.cmd_cp_buffer(bs, region); |
|
269 |
|
} |
|
270 |
|
} |
|
|
292 |
|
}; |
|
293 |
|
for (uint32_t i = 0; i < buffer_reads.count(); ++i) { |
|
294 |
|
auto &read = buffer_reads[i]; |
|
295 |
|
auto &buffer = *read.p_buffer; |
271 |
296 |
|
|
272 |
|
for (uint32_t i = 0; i < images_reads.count(); ++i) { |
|
|
297 |
|
if (buffer.use_staging) { |
|
298 |
|
vkw::BufferChange bs; |
|
299 |
|
bs.src = buffer.part.buffer; |
|
300 |
|
bs.dst = buffer.staging.buffer; |
|
301 |
|
vkw::BufferRegion region; |
|
302 |
|
region.offsets.src = buffer.part.offset(); |
|
303 |
|
region.offsets.dst = buffer.staging.offset(); |
|
304 |
|
region.size = read.size; |
273 |
305 |
auto res = begin(); |
auto res = begin(); |
274 |
306 |
if (res != VK_SUCCESS) |
if (res != VK_SUCCESS) |
275 |
307 |
return res; |
return res; |
276 |
|
|
|
277 |
|
auto &w = images_reads[i]; |
|
278 |
|
auto &im = *w.p_image; |
|
279 |
|
|
|
280 |
|
if (im.layout != vkw::ImLayout::TRANSFER_SRC) { |
|
281 |
|
vkw::StageMaskChange stages; |
|
282 |
|
stages.src = vkw::StageFlag::TOP_OF_PIPE; |
|
283 |
|
stages.dst = vkw::StageFlag::TRANSFER; |
|
284 |
|
transitionLayout(&im, &cmd, vkw::ImLayout::TRANSFER_SRC, stages); |
|
|
308 |
|
cmd.cmd_cp_buffer(bs, region); |
|
309 |
|
} |
|
310 |
|
} |
|
311 |
|
for (uint32_t i = 0; i < images_reads.count(); ++i) { |
|
312 |
|
auto res = begin(); |
|
313 |
|
if (res != VK_SUCCESS) |
|
314 |
|
return res; |
|
315 |
|
auto &w = images_reads[i]; |
|
316 |
|
auto &im = *w.p_image; |
|
317 |
|
if (im.layout != vkw::ImLayout::TRANSFER_SRC) { |
|
318 |
|
vkw::StageMaskChange stages; |
|
319 |
|
stages.src = vkw::StageFlag::TOP_OF_PIPE; |
|
320 |
|
stages.dst = vkw::StageFlag::TRANSFER; |
|
321 |
|
transitionLayout(&im, &cmd, vkw::ImLayout::TRANSFER_SRC, stages); |
|
322 |
|
} |
|
323 |
|
uint32_t x_size = vkw::format_size(im.format); |
|
324 |
|
for (auto &r : w.transfers) { |
|
325 |
|
auto ext = im.extent; |
|
326 |
|
uint64_t moffset = 0; |
|
327 |
|
for (uint32_t i = 1; i < r.mip_level; ++i) { |
|
328 |
|
moffset += ext.all_scale() * x_size; |
|
329 |
|
ext /= 2; |
|
330 |
|
ext.x = jl::max(ext.x, 1u); |
|
331 |
|
ext.y = jl::max(ext.y, 1u); |
|
332 |
|
ext.z = jl::max(ext.z, 1u); |
285 |
333 |
} |
} |
286 |
|
|
|
287 |
|
vkw::DeviceSize offset = 0; |
|
288 |
|
for (auto &r : w.transfers) { |
|
289 |
|
vkw::BufferAndImageRegion region; { |
|
290 |
|
region.bufferOffset = im.staging.offset() + offset; |
|
291 |
|
region.bufferRowLength = region.bufferImageHeight = 0; |
|
292 |
|
region.imageSubresource = { |
|
293 |
|
vkw::ImAspect::COLOR, |
|
294 |
|
r.mip_level, |
|
295 |
|
r.layer_offset, |
|
296 |
|
r.layer_count |
|
297 |
|
}; |
|
298 |
|
region.imageOffset.x = int32_t(r.offset.x); |
|
299 |
|
region.imageOffset.y = int32_t(r.offset.y); |
|
300 |
|
region.imageOffset.z = int32_t(r.offset.z); |
|
301 |
|
region.imageExtent.width = r.extent.x; |
|
302 |
|
region.imageExtent.height = r.extent.y; |
|
303 |
|
region.imageExtent.depth = r.extent.z; |
|
304 |
|
} |
|
305 |
|
cmd.cmd_cp_image_to_buffer({im.image.image, im.staging.buffer}, |
|
306 |
|
region, vkw::ImLayout::TRANSFER_SRC); |
|
307 |
|
|
|
308 |
|
offset += r.extent.volume() * vkw::format_size(im.format) * r.layer_count; |
|
|
334 |
|
uint64_t y_size = ext.x * x_size; |
|
335 |
|
uint64_t z_size = ext.y * y_size; |
|
336 |
|
uint64_t l_size = ext.z * z_size; |
|
337 |
|
uint64_t offset = moffset; |
|
338 |
|
offset += r.layer_offset * l_size; |
|
339 |
|
offset += r.offset.z * z_size; |
|
340 |
|
offset += r.offset.y * y_size; |
|
341 |
|
vkw::BufferAndImageRegion region; { |
|
342 |
|
region.bufferOffset = im.staging.offset() + offset; |
|
343 |
|
region.bufferRowLength = ext.x; |
|
344 |
|
region.bufferImageHeight = ext.y; |
|
345 |
|
region.imageSubresource = { |
|
346 |
|
vkw::ImAspect::COLOR, r.mip_level, r.layer_offset, 1 |
|
347 |
|
}; |
|
348 |
|
region.imageOffset.x = int32_t(r.offset.x); |
|
349 |
|
region.imageOffset.y = int32_t(r.offset.y); |
|
350 |
|
region.imageOffset.z = int32_t(r.offset.z); |
|
351 |
|
region.imageExtent.width = r.extent.x; |
|
352 |
|
region.imageExtent.height = r.extent.y; |
|
353 |
|
region.imageExtent.depth = r.extent.z; |
309 |
354 |
} |
} |
|
355 |
|
cmd.cmd_cp_image_to_buffer({im.image.image, im.staging.buffer}, |
|
356 |
|
region, vkw::ImLayout::TRANSFER_SRC); |
310 |
357 |
} |
} |
311 |
|
|
|
312 |
|
if (wait_transfer_read) { |
|
313 |
|
wait_compute = false; |
|
314 |
|
jen::Result res; |
|
315 |
|
res = transfer_cmds.primary[1].end(); |
|
316 |
|
if (res != VK_SUCCESS) |
|
317 |
|
return res; |
|
318 |
|
vkw::StageMask stage_mask = vkw::StageFlag::COMPUTE_SHADER; |
|
319 |
|
vkw::QueueWait wait; |
|
320 |
|
wait.semaphores = syncs.semaphores[1].p_vk; |
|
321 |
|
wait.stage_masks = stage_mask; |
|
322 |
|
vkw::QueueSubmit submit(cmd, wait); |
|
323 |
|
res = p_dev->queues.transfer.submit_locked(submit, syncs.fences[1]); |
|
324 |
|
if (res != VK_SUCCESS) |
|
325 |
|
return res; |
|
326 |
|
reset_fence[1] = true; |
|
327 |
|
|
|
328 |
|
for (uint32_t i = 0; i < images_reads.count(); ++i) |
|
329 |
|
images_reads[i].p_image->layout = vkw::ImLayout::TRANSFER_SRC; |
|
330 |
|
} |
|
331 |
|
|
|
332 |
|
return VK_SUCCESS; |
|
333 |
358 |
} |
} |
334 |
|
|
|
335 |
|
|
|
336 |
|
struct SyncCounts : vk::SyncContainerCounts { |
|
337 |
|
constexpr static const uint32_t FENCES = 2; |
|
338 |
|
constexpr static const uint32_t SEMAPHORES = 2; |
|
339 |
|
}; |
|
340 |
|
|
|
341 |
|
Device *p_dev; |
|
342 |
|
vk::CmdPoolContainer<1, 0> compute_cmds; |
|
343 |
|
vk::CmdPoolContainer<2, 0> transfer_cmds; |
|
344 |
|
vk::SyncContainer<SyncCounts> syncs; |
|
345 |
|
bool wait_transfer_write; |
|
346 |
|
bool wait_compute; |
|
347 |
|
bool wait_transfer_read; |
|
348 |
|
jl::array<bool, SyncCounts::FENCES> reset_fence; |
|
349 |
|
}; |
|
350 |
|
|
|
351 |
|
[[nodiscard]] Result ComputeCmdUnit:: |
|
352 |
|
init(ModuleCompute mc) { |
|
353 |
|
if (not jl::allocate(&p)) |
|
354 |
|
return VK_ERROR_OUT_OF_HOST_MEMORY; |
|
355 |
|
Result res = p->init(&mc.p->device); |
|
356 |
|
if (res != VK_SUCCESS) |
|
357 |
|
jl::deallocate(&p); |
|
358 |
|
return res; |
|
359 |
|
} |
|
360 |
|
void ComputeCmdUnit::destroy() { |
|
361 |
|
p->destroy(); |
|
362 |
|
jl::deallocate(&p); |
|
|
359 |
|
if (wait_transfer_read) { |
|
360 |
|
wait_compute = false; |
|
361 |
|
jen::Result res; |
|
362 |
|
res = transfer_cmds.primary[1].end(); |
|
363 |
|
if (res != VK_SUCCESS) |
|
364 |
|
return res; |
|
365 |
|
vkw::StageMask stage_mask = vkw::StageFlag::COMPUTE_SHADER; |
|
366 |
|
vkw::QueueWait wait; |
|
367 |
|
wait.semaphores = syncs.semaphores[1].p_vk; |
|
368 |
|
wait.stage_masks = stage_mask; |
|
369 |
|
vkw::QueueSubmit submit(cmd, wait); |
|
370 |
|
res = p_dev->queues.transfer.submit_locked(submit, syncs.fences[1]); |
|
371 |
|
if (res != VK_SUCCESS) |
|
372 |
|
return res; |
|
373 |
|
reset_fence[1] = true; |
|
374 |
|
for (uint32_t i = 0; i < images_reads.count(); ++i) |
|
375 |
|
images_reads[i].p_image->layout = vkw::ImLayout::TRANSFER_SRC; |
|
376 |
|
} |
|
377 |
|
return VK_SUCCESS; |
363 |
378 |
} |
} |
364 |
|
|
|
365 |
|
|
|
366 |
379 |
[[nodiscard]] Result |
[[nodiscard]] Result |
367 |
|
check_computeInfo(const Device &device, |
|
368 |
|
const ComputeInfo &info) { |
|
|
380 |
|
check_computeInfo(const Device &device, const ComputeInfo &info) { |
369 |
381 |
for (int i = 0; i < 3; ++i) |
for (int i = 0; i < 3; ++i) |
370 |
382 |
if (info.group_count[i] > |
if (info.group_count[i] > |
371 |
383 |
device.properties.limits.maxComputeWorkGroupCount[i]) { |
device.properties.limits.maxComputeWorkGroupCount[i]) { |
|
... |
... |
check_computeInfo(const Device &device, |
379 |
391 |
} |
} |
380 |
392 |
return VK_SUCCESS; |
return VK_SUCCESS; |
381 |
393 |
} |
} |
382 |
|
|
|
383 |
394 |
[[nodiscard]] Result ComputeCmdUnit:: |
[[nodiscard]] Result ComputeCmdUnit:: |
384 |
395 |
compute_status() { |
compute_status() { |
385 |
396 |
jen::Result res; |
jen::Result res; |
|
... |
... |
compute_status() { |
395 |
406 |
} |
} |
396 |
407 |
return VK_SUCCESS; |
return VK_SUCCESS; |
397 |
408 |
} |
} |
398 |
|
|
|
399 |
409 |
[[nodiscard]] Result ComputeCmdUnit:: |
[[nodiscard]] Result ComputeCmdUnit:: |
400 |
410 |
compute(const ComputeInfo &info) |
compute(const ComputeInfo &info) |
401 |
411 |
{ |
{ |
|
... |
... |
compute(const ComputeInfo &info) |
403 |
413 |
res = check_computeInfo(*p->p_dev, info); |
res = check_computeInfo(*p->p_dev, info); |
404 |
414 |
if (res != VK_SUCCESS) |
if (res != VK_SUCCESS) |
405 |
415 |
return res; |
return res; |
406 |
|
|
|
407 |
416 |
res = p->wait(); |
res = p->wait(); |
408 |
417 |
if (res != VK_SUCCESS) |
if (res != VK_SUCCESS) |
409 |
418 |
return res; |
return res; |
410 |
|
|
|
411 |
419 |
res = p->proceed_writes(info.buffer_writes, info.images_writes); |
res = p->proceed_writes(info.buffer_writes, info.images_writes); |
412 |
420 |
if (res != VK_SUCCESS) |
if (res != VK_SUCCESS) |
413 |
421 |
return res; |
return res; |
414 |
|
|
|
415 |
422 |
auto &syncs = p->syncs; |
auto &syncs = p->syncs; |
416 |
423 |
auto &cmds = p->compute_cmds; |
auto &cmds = p->compute_cmds; |
417 |
424 |
auto &pipeline = info.p_pipeline->pipeline; |
auto &pipeline = info.p_pipeline->pipeline; |
418 |
425 |
auto &pipelineLayout = info.p_pipeline->layout; |
auto &pipelineLayout = info.p_pipeline->layout; |
419 |
426 |
auto &set = info.p_binding_set->set; |
auto &set = info.p_binding_set->set; |
420 |
|
|
|
421 |
427 |
auto &cmd = cmds.primary[0]; |
auto &cmd = cmds.primary[0]; |
422 |
428 |
res = cmd.begin(vkw::CmdUsage::ONE_TIME_SUBMIT); |
res = cmd.begin(vkw::CmdUsage::ONE_TIME_SUBMIT); |
423 |
429 |
if (res != VK_SUCCESS) |
if (res != VK_SUCCESS) |
424 |
430 |
return res; |
return res; |
425 |
|
|
|
426 |
431 |
for (auto &im : info.p_bindings->storage_image) { |
for (auto &im : info.p_bindings->storage_image) { |
427 |
432 |
auto l = vkw::ImLayout::GENERAL; |
auto l = vkw::ImLayout::GENERAL; |
428 |
433 |
if (im.p_image->layout == l) |
if (im.p_image->layout == l) |
|
... |
... |
compute(const ComputeInfo &info) |
432 |
437 |
stages.dst = vkw::StageFlag::COMPUTE_SHADER; |
stages.dst = vkw::StageFlag::COMPUTE_SHADER; |
433 |
438 |
transitionLayout(im.p_image, &cmd, l, stages); |
transitionLayout(im.p_image, &cmd, l, stages); |
434 |
439 |
} |
} |
435 |
|
|
|
436 |
440 |
cmd.cmd_set_pipeline(pipeline, vkw::BindPoint::COMPUTE); |
cmd.cmd_set_pipeline(pipeline, vkw::BindPoint::COMPUTE); |
437 |
|
|
|
438 |
441 |
cmd.cmd_set_descr_sets(vkw::BindPoint::COMPUTE, pipelineLayout, set, 0); |
cmd.cmd_set_descr_sets(vkw::BindPoint::COMPUTE, pipelineLayout, set, 0); |
439 |
442 |
cmd.cmd_dispatch(*reinterpret_cast<const vkw::Vector3D*>(&info.group_count)); |
cmd.cmd_dispatch(*reinterpret_cast<const vkw::Vector3D*>(&info.group_count)); |
440 |
|
|
|
441 |
443 |
res = cmd.end(); |
res = cmd.end(); |
442 |
444 |
if (res != VK_SUCCESS) |
if (res != VK_SUCCESS) |
443 |
445 |
return res; |
return res; |
444 |
|
|
|
445 |
446 |
bool use_read_semaphore = false; |
bool use_read_semaphore = false; |
446 |
447 |
if (info.images_reads.count() > 0) |
if (info.images_reads.count() > 0) |
447 |
448 |
use_read_semaphore = true; |
use_read_semaphore = true; |
|
... |
... |
compute(const ComputeInfo &info) |
451 |
452 |
break; |
break; |
452 |
453 |
} |
} |
453 |
454 |
} |
} |
454 |
|
|
|
455 |
455 |
vkw::QueueWait wait; |
vkw::QueueWait wait; |
456 |
456 |
vkw::StageMask wait_mask = vkw::StageFlag::TRANSFER; |
vkw::StageMask wait_mask = vkw::StageFlag::TRANSFER; |
457 |
457 |
if (p->wait_transfer_write) { |
if (p->wait_transfer_write) { |
|
... |
... |
compute(const ComputeInfo &info) |
466 |
466 |
else |
else |
467 |
467 |
signal = {}; |
signal = {}; |
468 |
468 |
vkw::QueueSubmit submit(cmd, wait, signal); |
vkw::QueueSubmit submit(cmd, wait, signal); |
469 |
|
|
|
470 |
469 |
res = p->p_dev->queues.compute.submit_locked(submit, syncs.fences[0]); |
res = p->p_dev->queues.compute.submit_locked(submit, syncs.fences[0]); |
471 |
470 |
if (res != VK_SUCCESS) |
if (res != VK_SUCCESS) |
472 |
471 |
return res; |
return res; |
473 |
472 |
p->reset_fence[0] = true; |
p->reset_fence[0] = true; |
474 |
|
|
|
475 |
473 |
for (auto &im : info.p_bindings->storage_image) { |
for (auto &im : info.p_bindings->storage_image) { |
476 |
474 |
auto l = vkw::ImLayout::GENERAL; |
auto l = vkw::ImLayout::GENERAL; |
477 |
475 |
im.p_image->layout = l; |
im.p_image->layout = l; |
478 |
476 |
} |
} |
479 |
|
|
|
480 |
477 |
p->wait_compute = true; |
p->wait_compute = true; |
481 |
|
|
|
482 |
478 |
return p->proceed_staging_reads(info.buffer_reads, info.images_reads); |
return p->proceed_staging_reads(info.buffer_reads, info.images_reads); |
483 |
479 |
} |
} |
484 |
|
|
|
485 |
|
|
|
486 |
480 |
[[nodiscard]] Result ComputeCmdUnit:: |
[[nodiscard]] Result ComputeCmdUnit:: |
487 |
481 |
read_result(BufferTransfers buffer_reads, ImagesTransfers images_reads) { |
read_result(BufferTransfers buffer_reads, ImagesTransfers images_reads) { |
488 |
482 |
Result res; |
Result res; |
489 |
483 |
res = p->wait(); |
res = p->wait(); |
490 |
484 |
if (res != VK_SUCCESS) |
if (res != VK_SUCCESS) |
491 |
485 |
return res; |
return res; |
492 |
|
|
|
493 |
486 |
for (uint32_t i = 0; i < buffer_reads.count(); ++i) { |
for (uint32_t i = 0; i < buffer_reads.count(); ++i) { |
494 |
487 |
auto &read = buffer_reads[i]; |
auto &read = buffer_reads[i]; |
495 |
488 |
auto &buffer = *read.p_buffer; |
auto &buffer = *read.p_buffer; |
496 |
|
|
|
497 |
489 |
jen::DeviceBufferPart *p_part; |
jen::DeviceBufferPart *p_part; |
498 |
490 |
if (buffer.use_staging) |
if (buffer.use_staging) |
499 |
491 |
p_part = &buffer.staging; |
p_part = &buffer.staging; |
500 |
492 |
else |
else |
501 |
493 |
p_part = &buffer.part; |
p_part = &buffer.part; |
502 |
|
|
|
503 |
494 |
read_from_allocation(p_part, read.p_data, read.offset, read.size); |
read_from_allocation(p_part, read.p_data, read.offset, read.size); |
504 |
495 |
} |
} |
505 |
|
|
|
506 |
496 |
for (uint32_t i = 0; i < images_reads.count(); ++i) { |
for (uint32_t i = 0; i < images_reads.count(); ++i) { |
507 |
497 |
auto &read = images_reads[i]; |
auto &read = images_reads[i]; |
508 |
498 |
auto &im = *read.p_image; |
auto &im = *read.p_image; |
509 |
499 |
auto p_part = &im.staging; |
auto p_part = &im.staging; |
510 |
|
|
|
511 |
|
vkw::DeviceSize offset = 0; |
|
|
500 |
|
uint32_t x_size = vkw::format_size(im.format); |
512 |
501 |
for (auto &r : read.transfers) { |
for (auto &r : read.transfers) { |
513 |
|
auto size = r.extent.volume() * vkw::format_size(im.format) |
|
514 |
|
* r.layer_count; |
|
515 |
|
read_from_allocation(p_part, r.p_data, offset, size); |
|
516 |
|
offset += size; |
|
|
502 |
|
auto ext = im.extent; |
|
503 |
|
uint64_t moffset = 0; |
|
504 |
|
for (uint32_t i = 1; i < r.mip_level; ++i) { |
|
505 |
|
moffset += ext.all_scale() * x_size; |
|
506 |
|
ext /= 2; |
|
507 |
|
ext.x = jl::max(ext.x, 1u); |
|
508 |
|
ext.y = jl::max(ext.y, 1u); |
|
509 |
|
ext.z = jl::max(ext.z, 1u); |
|
510 |
|
} |
|
511 |
|
uint64_t y_size = ext.x * x_size; |
|
512 |
|
uint64_t z_size = ext.y * y_size; |
|
513 |
|
uint64_t l_size = ext.z * z_size; |
|
514 |
|
uint64_t read_size = (r.extent.x - r.offset.x) * x_size; |
|
515 |
|
uint8_t *p_user = reinterpret_cast<uint8_t*>(r.p_data); |
|
516 |
|
uint64_t l_offset = moffset + r.layer_offset * l_size; |
|
517 |
|
uint64_t z_offset = l_offset + r.offset.z * z_size; |
|
518 |
|
for (uint32_t z = 0; z < r.extent.z; ++z) { |
|
519 |
|
uint64_t y_offset = z_offset + r.offset.y * y_size; |
|
520 |
|
for (uint32_t y = 0; y < r.extent.y; ++y) { |
|
521 |
|
read_from_allocation(p_part, p_user, y_offset, read_size); |
|
522 |
|
p_user += read_size; |
|
523 |
|
y_offset += y_size; |
|
524 |
|
} |
|
525 |
|
z_offset += z_size; |
|
526 |
|
} |
517 |
527 |
} |
} |
518 |
528 |
} |
} |
519 |
529 |
return VK_SUCCESS; |
return VK_SUCCESS; |