Img2Num C++ (Internal Developer Docs) dev
API Documentation
Loading...
Searching...
No Matches
kmeans_gpu.cpp
1
2#include "internal/kmeans_gpu.h"
3
4#include <algorithm>
5#include <cmath>
6#include <cstddef>
7#include <cstdint>
8#include <cstdlib>
9#include <cstring>
10#include <ctime>
11#include <functional>
12#include <limits>
13#include <numeric>
14#include <random>
15#include <type_traits> // Required for std::is_same_v
16#include <vector>
17
18#include "img2num.h"
19#include "internal/Image.h"
20#include "internal/LABAPixel.h"
21#include "internal/PixelConverters.h"
22#include "internal/RGBAPixel.h"
23#include "internal/cielab.h"
24#include "internal/gpu.h"
25
26static constexpr uint8_t COLOR_SPACE_OPTION_CIELAB{0};
27static constexpr uint8_t COLOR_SPACE_OPTION_RGB{1};
28
29struct Params {
30 uint32_t numPoints;
31 uint32_t numCentroids;
32 uint32_t pad[2];
33} __attribute__((packed));
34
36 int32_t sumR;
37 int32_t sumG;
38 int32_t sumB;
39 uint32_t count;
40} __attribute__((packed));
41
43 float r, g, b, a;
44 uint32_t width;
45 uint32_t pad[3]; // Padding to align to 16 bytes
46} __attribute__((packed));
47
48// The K-Means++ Initialization Function
49template <typename PixelT>
50void kMeansPlusPlusInitGpu(const ImageLib::Image<PixelT>& pixels,
51 ImageLib::Image<PixelT>& out_centroids, int k,
52 const uint8_t color_space) {
53 if (k <= 0) return;
54
55 size_t width = pixels.getWidth();
56 size_t height = pixels.getHeight();
57 size_t num_pixels = width * height;
58
59 std::vector<PixelT> centroids;
60
61 // --- WEBGPU SETUP START ---
62 // (Assuming 'device' and 'queue' are globally available or passed in)
63 // 1. Upload Image Texture
64 wgpu::TextureDescriptor texDesc = {};
65 texDesc.size = {static_cast<uint32_t>(width), static_cast<uint32_t>(height), 1};
66 texDesc.format = wgpu::TextureFormat::RGBA32Float;
67 texDesc.usage = wgpu::TextureUsage::TextureBinding | wgpu::TextureUsage::CopyDst;
68 texDesc.label = "inputTextureInit";
69 wgpu::Texture inputTexture = GPU::getClassInstance().get_device().CreateTexture(&texDesc);
70
71 // Upload pixel data (Normalization to 0.0-1.0 assumed)
72 std::vector<float> gpu_pixels;
73 gpu_pixels.reserve(num_pixels * 4);
74
75 for (int i = 0; i < num_pixels; i++) {
76 PixelT p = pixels[i];
77 if constexpr (std::is_same_v<PixelT, ImageLib::LABAPixel<float>>) {
78 gpu_pixels.push_back(p.l / 255.0f);
79 gpu_pixels.push_back(p.a / 255.0f);
80 gpu_pixels.push_back(p.b / 255.0f);
81 gpu_pixels.push_back(p.alpha / 255.0f);
82 } else {
83 gpu_pixels.push_back(p.red / 255.0f);
84 gpu_pixels.push_back(p.green / 255.0f);
85 gpu_pixels.push_back(p.blue / 255.0f);
86 gpu_pixels.push_back(p.alpha / 255.0f);
87 }
88 }
89
90 wgpu::TexelCopyTextureInfo texDst = {};
91 texDst.texture = inputTexture;
92 wgpu::TexelCopyBufferLayout texLayout = {};
93 texLayout.bytesPerRow = width * 16;
94 texLayout.rowsPerImage = height;
95 GPU::getClassInstance().get_queue().WriteTexture(
96 &texDst, gpu_pixels.data(), gpu_pixels.size() * 4, &texLayout, &texDesc.size);
97
98 // 2. Create MinDist Buffer (Storage)
99 // Initialize with FLT_MAX so the first centroid overwrites everything
100 std::vector<float> initial_dists(num_pixels, std::numeric_limits<float>::max());
101
102 wgpu::BufferDescriptor distDesc = {};
103 distDesc.size = num_pixels * sizeof(float);
104 distDesc.usage =
105 wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
106 wgpu::Buffer minDistBuffer = GPU::getClassInstance().get_device().CreateBuffer(&distDesc);
107 GPU::getClassInstance().get_queue().WriteBuffer(minDistBuffer, 0, initial_dists.data(),
108 distDesc.size);
109
110 // 3. Create Uniform Buffer (For passing new centroid color)
111
112 wgpu::BufferDescriptor uniDesc = {};
113 uniDesc.size = sizeof(CentroidParams);
114 uniDesc.usage = wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopyDst;
115 wgpu::Buffer paramBuffer = GPU::getClassInstance().get_device().CreateBuffer(&uniDesc);
116
117 // 4. Create Readback Buffer
118 wgpu::BufferDescriptor readDesc = {};
119 readDesc.size = num_pixels * sizeof(float);
120 readDesc.usage = wgpu::BufferUsage::MapRead | wgpu::BufferUsage::CopyDst;
121 wgpu::Buffer readBuffer = GPU::getClassInstance().get_device().CreateBuffer(&readDesc);
122
123 // 5. Compile Shader & Pipeline
124 wgpu::ComputePipeline pipeline =
125 GPU::getClassInstance().createPipeline("dist_shader", "updateDistShader");
126
127 // 6. Bind Group
128 wgpu::BindGroupEntry entries[3];
129 entries[0].binding = 0;
130 entries[0].textureView = inputTexture.CreateView();
131 entries[1].binding = 1;
132 entries[1].buffer = minDistBuffer;
133 entries[1].size = distDesc.size;
134 entries[2].binding = 2;
135 entries[2].buffer = paramBuffer;
136 entries[2].size = uniDesc.size;
137
138 wgpu::BindGroupDescriptor bgDesc = {};
139 bgDesc.layout = pipeline.GetBindGroupLayout(0);
140 bgDesc.entryCount = 3;
141 bgDesc.entries = entries;
142 wgpu::BindGroup bindGroup = GPU::getClassInstance().get_device().CreateBindGroup(&bgDesc);
143 // --- WEBGPU SETUP END ---
144
145 // RNG Setup
146 std::random_device rd;
147 std::mt19937 gen(rd());
148
149 // --- Step 1: Choose the first centroid randomly ---
150 std::uniform_int_distribution<> dis(0, num_pixels - 1);
151 int first_index = dis(gen);
152 centroids.push_back(pixels[first_index]);
153
154 // --- Step 2 & 3: Repeat until we have k centroids ---
155 // static volatile bool done = false;
156 bool* done = new bool(false);
157
158 for (int i = 1; i < k; ++i) {
159 *done = false;
160 // A. Upload Current Centroid to GPU
161 PixelT c = centroids.back();
162 CentroidParams params;
163 if constexpr (std::is_same_v<PixelT, ImageLib::LABAPixel<float>>) {
164 params = CentroidParams{c.l / 255.0f, c.a / 255.0f, c.b / 255.0f, 1.0f,
165 static_cast<uint32_t>(width)};
166 } else {
167 params = CentroidParams{c.red / 255.0f, c.green / 255.0f, c.blue / 255.0f, 1.0f,
168 static_cast<uint32_t>(width)};
169 }
170
171 GPU::getClassInstance().get_queue().WriteBuffer(paramBuffer, 0, &params,
172 sizeof(CentroidParams));
173
174 // B. Dispatch Shader (Updates min_dist buffer on GPU)
175 wgpu::CommandEncoder encoder = GPU::getClassInstance().get_device().CreateCommandEncoder();
176 wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
177 pass.SetPipeline(pipeline);
178 pass.SetBindGroup(0, bindGroup);
179 pass.DispatchWorkgroups((width + 15) / 16, (height + 15) / 16, 1);
180 pass.End();
181
182 // C. Copy Result to ReadBuffer
183 encoder.CopyBufferToBuffer(minDistBuffer, 0, readBuffer, 0, readDesc.size);
184 wgpu::CommandBuffer commands = encoder.Finish();
185 GPU::getClassInstance().get_queue().Submit(1, &commands);
186
187 // D. Map and Read
188
189 readBuffer.MapAsync(
190 wgpu::MapMode::Read, 0, readDesc.size, wgpu::CallbackMode::AllowProcessEvents,
191 [](wgpu::MapAsyncStatus status, wgpu::StringView msg, void* userdata) {
192 bool* flag = static_cast<bool*>(userdata);
193 bool success = false;
194 if (status == wgpu::MapAsyncStatus::Success) {
195 // std::cout << "Map success: " << msg.data << std::endl;
196 success = true;
197 } else {
198 // Handle error
199 // std::cerr << "Map failed: " << msg.data << std::endl;
200 success = false;
201 }
202 *flag = true;
203 },
204 (void*)done);
205
206 // E. Wait for GPU
207 while (!*done) {
208 GPU::getClassInstance().get_instance().ProcessEvents();
209#if defined(__EMSCRIPTEN__)
210 emscripten_sleep(10);
211#endif
212 }
213
214 const float* dists = (const float*)readBuffer.GetConstMappedRange();
215 // --- CPU SIDE: Selection Logic ---
216 double sum_dist_sq = 0.0;
217
218 // 1. Sum (We have to iterate anyway for roulette, so sum here)
219 // Note: dists[] contains the SQUARED distance because shader calculated distSq
220 for (size_t j = 0; j < num_pixels; ++j) {
221 sum_dist_sq += dists[j];
222 }
223
224 // 2. Select
225 std::uniform_real_distribution<> dist_selector(0.0, sum_dist_sq);
226 double random_value = dist_selector(gen);
227 double current_sum = 0.0;
228 int selected_index = -1;
229
230 for (size_t j = 0; j < num_pixels; ++j) {
231 current_sum += dists[j];
232 if (current_sum >= random_value) {
233 selected_index = j;
234 break;
235 }
236 }
237
238 if (selected_index == -1) selected_index = num_pixels - 1;
239
240 // Add new centroid
241 centroids.push_back(pixels[selected_index]);
242 readBuffer.Unmap();
243#if defined(__EMSCRIPTEN__)
244 emscripten_sleep(10);
245#endif
246 }
247
248 std::copy(centroids.begin(), centroids.end(), out_centroids.begin());
249
250 // explicit clean up
251 if (inputTexture) inputTexture.Destroy();
252 readBuffer.Destroy();
253 minDistBuffer.Destroy();
254 paramBuffer.Destroy();
255 delete done;
256
257#if defined(__EMSCRIPTEN__)
258 emscripten_sleep(50);
259#endif
260}
261
265 ImageLib::Image<ImageLib::LABAPixel<float>>& centroids_lab, const int32_t width,
266 const int32_t height, const int32_t k, wgpu::Texture& inputTexture,
267 wgpu::Texture& labelTexture, wgpu::Texture& centroidTexture,
268 wgpu::TextureDescriptor& labelDesc, wgpu::TextureDescriptor& centroidDesc,
269 wgpu::ComputePipeline& pipeline1, wgpu::ComputePipeline& pipeline2,
270 wgpu::BindGroup& bindGroup1, wgpu::BindGroup& bindGroup2, const uint8_t color_space) {
271 int bytesPerPixel{16};
272 const int32_t num_pixels{pixels.getSize()};
273
274 wgpu::TextureDescriptor texDesc = {};
275 texDesc.size = {static_cast<uint32_t>(width), static_cast<uint32_t>(height), 1};
276 texDesc.format = wgpu::TextureFormat::RGBA32Float;
277 texDesc.usage = wgpu::TextureUsage::TextureBinding | wgpu::TextureUsage::CopyDst;
278 texDesc.label = "inputTexture";
279 inputTexture = GPU::getClassInstance().get_device().CreateTexture(&texDesc);
280
281 wgpu::TexelCopyTextureInfo dst = {};
282 dst.texture = inputTexture;
283 wgpu::TexelCopyBufferLayout layout = {};
284 layout.offset = 0;
285 layout.bytesPerRow = width * bytesPerPixel; // Tightly packed for upload
286 layout.rowsPerImage = height;
287
288 std::vector<float> pixels_;
289 for (int i = 0; i < num_pixels; i++) {
290 switch (color_space) {
291 case COLOR_SPACE_OPTION_RGB: {
292 auto p = pixels[i];
293 pixels_.push_back(p.red / 255.0f);
294 pixels_.push_back(p.green / 255.0f);
295 pixels_.push_back(p.blue / 255.0f);
296 pixels_.push_back(p.alpha / 255.0f);
297 break;
298 }
299 case COLOR_SPACE_OPTION_CIELAB: {
300 auto p = lab[i];
301 pixels_.push_back(p.l / 255.0f);
302 pixels_.push_back(p.a / 255.0f);
303 pixels_.push_back(p.b / 255.0f);
304 pixels_.push_back(p.alpha / 255.0f);
305 break;
306 }
307 }
308 }
309
310 GPU::getClassInstance().get_queue().WriteTexture(
311 &dst, pixels_.data(), pixels_.size() * sizeof(float), &layout, &texDesc.size);
312
313 // centroids
314 centroidDesc.size = {static_cast<uint32_t>(k), 1, 1};
315 centroidDesc.format = wgpu::TextureFormat::RGBA32Float;
316 centroidDesc.usage = wgpu::TextureUsage::TextureBinding | wgpu::TextureUsage::StorageBinding |
317 wgpu::TextureUsage::CopyDst | wgpu::TextureUsage::CopySrc;
318 centroidDesc.label = "centroidTexture";
319 centroidTexture = GPU::getClassInstance().get_device().CreateTexture(&centroidDesc);
320
321 wgpu::TexelCopyTextureInfo cdst = {};
322 cdst.texture = centroidTexture;
323 wgpu::TexelCopyBufferLayout clayout = {};
324 clayout.offset = 0;
325 clayout.bytesPerRow = k * bytesPerPixel; // Tightly packed for upload
326 clayout.rowsPerImage = 1;
327
328 std::vector<float> centroids_; // rgba
329 switch (color_space) {
330 case COLOR_SPACE_OPTION_RGB: {
331 for (int i = 0; i < k; i++) {
332 auto p = centroids[i];
333 centroids_.push_back(p.red / 255.0f);
334 centroids_.push_back(p.green / 255.0f);
335 centroids_.push_back(p.blue / 255.0f);
336 centroids_.push_back(p.alpha / 255.0f);
337 }
338 break;
339 }
340 case COLOR_SPACE_OPTION_CIELAB: {
341 for (int i = 0; i < k; i++) {
342 auto p = centroids_lab[i];
343 centroids_.push_back(p.l / 255.0f);
344 centroids_.push_back(p.a / 255.0f);
345 centroids_.push_back(p.b / 255.0f);
346 centroids_.push_back(p.alpha / 255.0f);
347 }
348 break;
349 }
350 }
351
352 GPU::getClassInstance().get_queue().WriteTexture(
353 &cdst, centroids_.data(), centroids_.size() * sizeof(float), &clayout, &centroidDesc.size);
354
355 // labels
356 labelDesc.size = {static_cast<uint32_t>(width), static_cast<uint32_t>(height), 1};
357 labelDesc.format = wgpu::TextureFormat::RGBA32Uint;
358 labelDesc.usage = wgpu::TextureUsage::TextureBinding | wgpu::TextureUsage::StorageBinding |
359 wgpu::TextureUsage::CopyDst | wgpu::TextureUsage::CopySrc;
360 labelDesc.label = "labelTexture";
361 labelTexture = GPU::getClassInstance().get_device().CreateTexture(&labelDesc);
362
363 // params
364 Params params = {static_cast<uint32_t>(num_pixels), static_cast<uint32_t>(k)};
365 wgpu::BufferDescriptor bufDesc = {};
366 bufDesc.size = sizeof(Params);
367 bufDesc.usage = wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopyDst;
368 wgpu::Buffer paramBuffer = GPU::getClassInstance().get_device().CreateBuffer(&bufDesc);
369 GPU::getClassInstance().get_queue().WriteBuffer(paramBuffer, 0, &params, sizeof(Params));
370
371 // centroid accumulator
372 std::vector<ClusterAccumulator> reset_centroids(k, {0, 0, 0, 0});
373 wgpu::BufferDescriptor accDesc = {};
374 accDesc.size = sizeof(ClusterAccumulator) * k;
375 accDesc.usage = wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopyDst;
376 wgpu::Buffer accBuffer = GPU::getClassInstance().get_device().CreateBuffer(&accDesc);
377 GPU::getClassInstance().get_queue().WriteBuffer(accBuffer, 0, reset_centroids.data(),
378 accDesc.size);
379
380 // shaders
381 pipeline1 =
382 GPU::getClassInstance().createPipeline("assign_update_shader", "assignUpdateShader");
383 pipeline2 = GPU::getClassInstance().createPipeline("resolve_shader", "resolveShader");
384
385 // binding groups
386 wgpu::BindGroupDescriptor bindGroupDesc1 = {};
387 bindGroupDesc1.layout = pipeline1.GetBindGroupLayout(0);
388 wgpu::BindGroupEntry entries1[5]; // 4
389 // Entry 0: Input Texture View
390 entries1[0].binding = 0;
391 entries1[0].textureView = inputTexture.CreateView();
392 // Entry 1: Centroid Texture View
393 entries1[1].binding = 1;
394 entries1[1].textureView = centroidTexture.CreateView();
395 // Entry 2: Label Texture View
396 entries1[2].binding = 2;
397 entries1[2].textureView = labelTexture.CreateView();
398 // Entry 2: Uniform Buffer
399 entries1[3].binding = 3;
400 entries1[3].buffer = paramBuffer;
401 entries1[3].size = sizeof(Params);
402
403 entries1[4].binding = 4;
404 entries1[4].buffer = accBuffer;
405 entries1[4].size = sizeof(ClusterAccumulator) * k;
406
407 bindGroupDesc1.entryCount = 5; // 4;
408 bindGroupDesc1.entries = entries1;
409 bindGroup1 = GPU::getClassInstance().get_device().CreateBindGroup(&bindGroupDesc1);
410
411 wgpu::BindGroupDescriptor bindGroupDesc2 = {};
412 bindGroupDesc2.layout = pipeline2.GetBindGroupLayout(0);
413 wgpu::BindGroupEntry entries2[2];
414 entries2[0].binding = 0;
415 entries2[0].buffer = accBuffer;
416 entries2[0].size = accDesc.size;
417 entries2[1].binding = 1;
418 entries2[1].textureView = centroidTexture.CreateView();
419 bindGroupDesc2.entryCount = 2;
420 bindGroupDesc2.entries = entries2;
421 bindGroup2 = GPU::getClassInstance().get_device().CreateBindGroup(&bindGroupDesc2);
422}
423
424void kmeans_gpu(const uint8_t* data, uint8_t* out_data, int32_t* out_labels, const int32_t width,
425 const int32_t height, const int32_t k, const int32_t max_iter,
426 const uint8_t color_space) {
428 pixels.loadFromBuffer(data, width, height, ImageLib::RGBA_CONVERTER<float>);
429 const int32_t num_pixels{pixels.getSize()};
430
431 // width = k, height = 1
432 // k centroids, initialized to rgba(0,0,0,255)
433 // Init of each pixel is from default in Image constructor
436 std::vector<int32_t> labels(num_pixels, -1);
437
438 ImageLib::Image<ImageLib::LABAPixel<float>> lab(pixels.getWidth(), pixels.getHeight());
439
440 if (color_space == COLOR_SPACE_OPTION_CIELAB) {
441 for (int i{0}; i < pixels.getSize(); ++i) {
442 rgb_to_lab<float, float>(pixels[i], lab[i]);
443 }
444 }
445
446 std::cout << "starting" << std::endl;
447 // Step 2: Initialize centroids
448
449 switch (color_space) {
450 case COLOR_SPACE_OPTION_RGB: {
451 kMeansPlusPlusInitGpu<ImageLib::RGBAPixel<float>>(pixels, centroids, k, color_space);
452 break;
453 }
454 case COLOR_SPACE_OPTION_CIELAB: {
455 kMeansPlusPlusInitGpu<ImageLib::LABAPixel<float>>(lab, centroids_lab, k, color_space);
456 break;
457 }
458 }
459 std::cout << "kmeans++ init done" << std::endl;
460 // Step 3: Run k-means iterations
461
462 int bytesPerPixel{16}; // float pixels
463
464 // shaders - 2 pipelines:
465 // 1. assign and update clusters
466 // 2. resolve cluster centroids
467 wgpu::ComputePipeline pipeline1;
468 wgpu::ComputePipeline pipeline2;
469 wgpu::BindGroup bindGroup1;
470 wgpu::BindGroup bindGroup2;
471 wgpu::Texture inputTexture;
472 wgpu::Texture labelTexture;
473 wgpu::Texture centroidTexture;
474 wgpu::TextureDescriptor labelDesc = {};
475 wgpu::TextureDescriptor centroidDesc = {};
476
477 // setup all textures and buffers needed for the kmeans loop on gpu
478 setup(pixels, lab, centroids, centroids_lab, width, height, k, inputTexture, labelTexture,
479 centroidTexture, labelDesc, centroidDesc, pipeline1, pipeline2, bindGroup1, bindGroup2,
480 color_space);
481
482 uint32_t wgX = (width + 15) / 16;
483 uint32_t wgY = (height + 15) / 16;
484
485 // Label Readback RGBA32Uint is 16 bytes/ pixel
486 uint32_t bytesPerRowLabels =
487 GPU::getAlignedBytesPerRow(width, static_cast<uint32_t>(bytesPerPixel));
488 wgpu::BufferDescriptor readLabelsDesc = {};
489 readLabelsDesc.size = bytesPerRowLabels * height;
490 readLabelsDesc.usage = wgpu::BufferUsage::MapRead | wgpu::BufferUsage::CopyDst;
491 wgpu::Buffer readLabelsBuffer =
492 GPU::getClassInstance().get_device().CreateBuffer(&readLabelsDesc);
493
494 // Centroid Readback
495 uint32_t bytesPerRowCentroids =
496 GPU::getAlignedBytesPerRow(width, static_cast<uint32_t>(bytesPerPixel));
497 wgpu::BufferDescriptor readCentroidsDesc = {};
498 readCentroidsDesc.size = bytesPerRowCentroids; // Height is 1
499 readCentroidsDesc.usage = wgpu::BufferUsage::MapRead | wgpu::BufferUsage::CopyDst;
500 wgpu::Buffer readCentroidsBuffer =
501 GPU::getClassInstance().get_device().CreateBuffer(&readCentroidsDesc);
502
503 // This is the actual KMeans loop
504 std::cout << "start iterations" << std::endl;
505 wgpu::CommandEncoder encoder = GPU::getClassInstance().get_device().CreateCommandEncoder();
506 for (int32_t iter{0}; iter < max_iter; ++iter) {
507 wgpu::ComputePassEncoder pass1 = encoder.BeginComputePass();
508 pass1.SetPipeline(pipeline1);
509 pass1.SetBindGroup(0, bindGroup1);
510 pass1.DispatchWorkgroups(wgX, wgY);
511 pass1.End();
512
513 wgpu::ComputePassEncoder pass2 = encoder.BeginComputePass();
514 pass2.SetPipeline(pipeline2);
515 pass2.SetBindGroup(0, bindGroup2);
516 pass2.DispatchWorkgroups((k + 255) / 256, 1);
517 pass2.End();
518 }
519
520 // 3. Readback (After Loop Finishes)
521
522 // Copy Labels
523 wgpu::TexelCopyTextureInfo srcLabels = {};
524 srcLabels.texture = labelTexture;
525 wgpu::TexelCopyBufferInfo dstLabels = {};
526 dstLabels.buffer = readLabelsBuffer;
527 dstLabels.layout.bytesPerRow = bytesPerRowLabels;
528 dstLabels.layout.rowsPerImage = height;
529 encoder.CopyTextureToBuffer(&srcLabels, &dstLabels, &labelDesc.size);
530
531 // Copy Centroids
532 wgpu::TexelCopyTextureInfo srcCentroids = {};
533 srcCentroids.texture = centroidTexture;
534 wgpu::TexelCopyBufferInfo dstCentroids = {};
535 dstCentroids.buffer = readCentroidsBuffer;
536 dstCentroids.layout.bytesPerRow = bytesPerRowCentroids;
537 dstCentroids.layout.rowsPerImage = 1;
538 encoder.CopyTextureToBuffer(&srcCentroids, &dstCentroids, &centroidDesc.size);
539
540 wgpu::CommandBuffer commands = encoder.Finish();
541 GPU::getClassInstance().get_queue().Submit(1, &commands);
542 std::cout << "done iterations" << std::endl;
543
544 // 4. Map Async & Wait
545 bool* done1 = new bool(false);
546 bool* done2 = new bool(false);
547
548 // Map Labels
549 readLabelsBuffer.MapAsync(
550 wgpu::MapMode::Read, 0, readLabelsDesc.size, wgpu::CallbackMode::AllowProcessEvents,
551 [](wgpu::MapAsyncStatus status, wgpu::StringView msg, void* userdata) {
552 bool* flag = static_cast<bool*>(userdata);
553 bool success = false;
554 if (status == wgpu::MapAsyncStatus::Success) {
555 // std::cout << "Map success" << std::endl;
556 success = true;
557 }
558 *flag = true;
559 },
560 (void*)done1);
561
562 std::cout << "read out" << std::endl;
563
564 while (!*done1) {
565 GPU::getClassInstance().get_instance().ProcessEvents();
566#if defined(__EMSCRIPTEN__)
567 emscripten_sleep(10);
568#endif
569 }
570
571 std::cout << "mapping labels" << std::endl;
572 const uint8_t* mappedData = (const uint8_t*)readLabelsBuffer.GetConstMappedRange();
573 // ... Copy data to your C++ vector ...
574 // Copy row by row to remove padding and put data into 'result'
575 for (size_t y = 0; y < height; ++y) {
576 const uint8_t* rowPtr = mappedData + (y * bytesPerRowLabels);
577 for (size_t x = 0; x < width; ++x) {
578 const uint8_t* pixelPtr = rowPtr + (x * bytesPerPixel);
579 uint32_t r = 0;
580 std::memcpy(&r, pixelPtr, sizeof(uint32_t));
581
582 size_t dstIndex = y * width + x;
583 labels[dstIndex] = static_cast<int32_t>(r);
584 }
585 }
586
587 readLabelsBuffer.Unmap();
588
589 // Map Centroids
590 readCentroidsBuffer.MapAsync(
591 wgpu::MapMode::Read, 0, readCentroidsDesc.size, wgpu::CallbackMode::AllowProcessEvents,
592 [](wgpu::MapAsyncStatus status, wgpu::StringView msg, void* userdata) {
593 bool* flag = static_cast<bool*>(userdata);
594 bool success = false;
595 if (status == wgpu::MapAsyncStatus::Success) {
596 // std::cout << "Map success" << std::endl;
597 success = true;
598 }
599 *flag = true; // Signal completion
600 },
601 (void*)done2);
602
603 while (!*done2) {
604 GPU::getClassInstance().get_instance().ProcessEvents();
605#if defined(__EMSCRIPTEN__)
606 emscripten_sleep(10);
607#endif
608 }
609
610 std::cout << "mapping centroids" << std::endl;
611 const float* mappedDataFloat = (const float*)readCentroidsBuffer.GetConstMappedRange();
612 // ... Copy data to your C++ vector ...
613
614 for (int i = 0; i < k; i++) {
615 // if CIELAB color space these represent l, a, b, alpha
616 const float* centroidPtr = mappedDataFloat + (i * 4);
617
618 float r = *(centroidPtr);
619 float g = *(centroidPtr + 1);
620 float b = *(centroidPtr + 2);
621 float a = *(centroidPtr + 3);
622 switch (color_space) {
623 case COLOR_SPACE_OPTION_RGB: {
624 centroids[i] =
625 ImageLib::RGBAPixel<float>(r * 255.f, g * 255.f, b * 255.f, a * 255.f);
626 break;
627 }
628 case COLOR_SPACE_OPTION_CIELAB: {
629 centroids_lab[i] =
630 ImageLib::LABAPixel<float>(r * 255.f, g * 255.f, b * 255.f, a * 255.f);
631 break;
632 }
633 }
634 }
635
636 readCentroidsBuffer.Unmap();
637
638 // Write the final centroid values to each pixel in the cluster
639 if (color_space == COLOR_SPACE_OPTION_CIELAB) {
640 for (int32_t i{0}; i < k; ++i) {
641 lab_to_rgb<float, float>(centroids_lab[i], centroids[i]);
642 }
643 }
644
645 for (int32_t i = 0; i < num_pixels; ++i) {
646 const int32_t cluster = labels[i];
647 out_data[i * 4 + 0] = static_cast<uint8_t>(centroids[cluster].red);
648 out_data[i * 4 + 1] = static_cast<uint8_t>(centroids[cluster].green);
649 out_data[i * 4 + 2] = static_cast<uint8_t>(centroids[cluster].blue);
650 out_data[i * 4 + 3] = 255;
651 }
652
653 // Write labels to out_labels
654 std::cout << "copying labels out" << std::endl;
655 std::memcpy(out_labels, labels.data(), labels.size() * sizeof(int32_t));
656
657 if (inputTexture) inputTexture.Destroy();
658 if (labelTexture) labelTexture.Destroy();
659 if (centroidTexture) centroidTexture.Destroy();
660 readLabelsBuffer.Destroy();
661 readCentroidsBuffer.Destroy();
662 delete done1;
663 delete done2;
664
665 labels.clear();
666 labels.shrink_to_fit();
667#if defined(__EMSCRIPTEN__)
668 emscripten_sleep(50);
669#endif
670}
Core image processing functions for img2num project.