Img2Num C++ (Internal Developer Docs) dev
API Documentation
Loading...
Searching...
No Matches
kmeans_gpu.cpp
1
2#include "internal/kmeans_gpu.h"
3
4#include "img2num.h"
5#include "internal/cielab.h"
6#include "internal/gpu.h"
7#include "internal/Image.h"
8#include "internal/LABAPixel.h"
9#include "internal/PixelConverters.h"
10#include "internal/RGBAPixel.h"
11
12#include <algorithm>
13#include <cmath>
14#include <cstddef>
15#include <cstdint>
16#include <cstdlib>
17#include <cstring>
18#include <ctime>
19#include <functional>
20#include <limits>
21#include <numeric>
22#include <random>
23#include <type_traits> // Required for std::is_same_v
24#include <vector>
25
26static constexpr uint8_t COLOR_SPACE_OPTION_CIELAB {0};
27static constexpr uint8_t COLOR_SPACE_OPTION_RGB {1};
28
29#ifdef _MSC_VER
30#pragma pack(push, 1)
31#endif
32struct Params {
33 uint32_t numPoints;
34 uint32_t numCentroids;
35 uint32_t pad[2];
36}
37#ifndef _MSC_VER
38__attribute__((packed))
39#endif
40;
41#ifdef _MSC_VER
42#pragma pack(pop)
43#endif
44
45#ifdef _MSC_VER
46#pragma pack(push, 1)
47#endif
49 int32_t sumR;
50 int32_t sumG;
51 int32_t sumB;
52 uint32_t count;
53}
54#ifndef _MSC_VER
55__attribute__((packed))
56#endif
57;
58#ifdef _MSC_VER
59#pragma pack(pop)
60#endif
61
62#ifdef _MSC_VER
63#pragma pack(push, 1)
64#endif
66 float r, g, b, a;
67 uint32_t width;
68 uint32_t pad[3]; // Padding to align to 16 bytes
69}
70#ifndef _MSC_VER
71__attribute__((packed))
72#endif
73;
74#ifdef _MSC_VER
75#pragma pack(pop)
76#endif
77
78// The K-Means++ Initialization Function
79template <typename PixelT>
80void kMeansPlusPlusInitGpu(
81 const ImageLib::Image<PixelT>& pixels, ImageLib::Image<PixelT>& out_centroids, int k,
82 const uint8_t color_space
83) {
84 if (k <= 0)
85 return;
86
87 size_t width = pixels.getWidth();
88 size_t height = pixels.getHeight();
89 size_t num_pixels = width * height;
90
91 std::vector<PixelT> centroids;
92
93 // --- WEBGPU SETUP START ---
94 // (Assuming 'device' and 'queue' are globally available or passed in)
95 // 1. Upload Image Texture
96 wgpu::TextureDescriptor texDesc = {};
97 texDesc.size = {static_cast<uint32_t>(width), static_cast<uint32_t>(height), 1};
98 texDesc.format = wgpu::TextureFormat::RGBA32Float;
99 texDesc.usage = wgpu::TextureUsage::TextureBinding | wgpu::TextureUsage::CopyDst;
100 texDesc.label = "inputTextureInit";
101 wgpu::Texture inputTexture = GPU::getClassInstance().get_device().CreateTexture(&texDesc);
102
103 // Upload pixel data (Normalization to 0.0-1.0 assumed)
104 std::vector<float> gpu_pixels;
105 gpu_pixels.reserve(num_pixels * 4);
106
107 for (int i = 0; i < num_pixels; i++) {
108 PixelT p = pixels[i];
109 if constexpr (std::is_same_v<PixelT, ImageLib::LABAPixel<float>>) {
110 gpu_pixels.push_back(p.l / 255.0f);
111 gpu_pixels.push_back(p.a / 255.0f);
112 gpu_pixels.push_back(p.b / 255.0f);
113 gpu_pixels.push_back(p.alpha / 255.0f);
114 } else {
115 gpu_pixels.push_back(p.red / 255.0f);
116 gpu_pixels.push_back(p.green / 255.0f);
117 gpu_pixels.push_back(p.blue / 255.0f);
118 gpu_pixels.push_back(p.alpha / 255.0f);
119 }
120 }
121
122 wgpu::TexelCopyTextureInfo texDst = {};
123 texDst.texture = inputTexture;
124 wgpu::TexelCopyBufferLayout texLayout = {};
125 texLayout.bytesPerRow = width * 16;
126 texLayout.rowsPerImage = height;
127 GPU::getClassInstance().get_queue().WriteTexture(
128 &texDst, gpu_pixels.data(), gpu_pixels.size() * 4, &texLayout, &texDesc.size
129 );
130
131 // 2. Create MinDist Buffer (Storage)
132 // Initialize with FLT_MAX so the first centroid overwrites everything
133 std::vector<float> initial_dists(num_pixels, std::numeric_limits<float>::max());
134
135 wgpu::BufferDescriptor distDesc = {};
136 distDesc.size = num_pixels * sizeof(float);
137 distDesc.usage =
138 wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
139 wgpu::Buffer minDistBuffer = GPU::getClassInstance().get_device().CreateBuffer(&distDesc);
140 GPU::getClassInstance().get_queue().WriteBuffer(
141 minDistBuffer, 0, initial_dists.data(), distDesc.size
142 );
143
144 // 3. Create Uniform Buffer (For passing new centroid color)
145
146 wgpu::BufferDescriptor uniDesc = {};
147 uniDesc.size = sizeof(CentroidParams);
148 uniDesc.usage = wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopyDst;
149 wgpu::Buffer paramBuffer = GPU::getClassInstance().get_device().CreateBuffer(&uniDesc);
150
151 // 4. Create Readback Buffer
152 wgpu::BufferDescriptor readDesc = {};
153 readDesc.size = num_pixels * sizeof(float);
154 readDesc.usage = wgpu::BufferUsage::MapRead | wgpu::BufferUsage::CopyDst;
155 wgpu::Buffer readBuffer = GPU::getClassInstance().get_device().CreateBuffer(&readDesc);
156
157 // 5. Compile Shader & Pipeline
158 wgpu::ComputePipeline pipeline =
159 GPU::getClassInstance().createPipeline("dist_shader", "updateDistShader");
160
161 // 6. Bind Group
162 wgpu::BindGroupEntry entries[3];
163 entries[0].binding = 0;
164 entries[0].textureView = inputTexture.CreateView();
165 entries[1].binding = 1;
166 entries[1].buffer = minDistBuffer;
167 entries[1].size = distDesc.size;
168 entries[2].binding = 2;
169 entries[2].buffer = paramBuffer;
170 entries[2].size = uniDesc.size;
171
172 wgpu::BindGroupDescriptor bgDesc = {};
173 bgDesc.layout = pipeline.GetBindGroupLayout(0);
174 bgDesc.entryCount = 3;
175 bgDesc.entries = entries;
176 wgpu::BindGroup bindGroup = GPU::getClassInstance().get_device().CreateBindGroup(&bgDesc);
177 // --- WEBGPU SETUP END ---
178
179 // RNG Setup
180 std::random_device rd;
181 std::mt19937 gen(rd());
182
183 // --- Step 1: Choose the first centroid randomly ---
184 std::uniform_int_distribution<> dis(0, num_pixels - 1);
185 int first_index = dis(gen);
186 centroids.push_back(pixels[first_index]);
187
188 // --- Step 2 & 3: Repeat until we have k centroids ---
189 // static volatile bool done = false;
190 bool* done = new bool(false);
191
192 for (int i = 1; i < k; ++i) {
193 *done = false;
194 // A. Upload Current Centroid to GPU
195 PixelT c = centroids.back();
196 CentroidParams params;
197 if constexpr (std::is_same_v<PixelT, ImageLib::LABAPixel<float>>) {
198 params = CentroidParams {
199 c.l / 255.0f, c.a / 255.0f, c.b / 255.0f, 1.0f, static_cast<uint32_t>(width)};
200 } else {
201 params = CentroidParams {
202 c.red / 255.0f, c.green / 255.0f, c.blue / 255.0f, 1.0f,
203 static_cast<uint32_t>(width)};
204 }
205
206 GPU::getClassInstance().get_queue().WriteBuffer(
207 paramBuffer, 0, &params, sizeof(CentroidParams)
208 );
209
210 // B. Dispatch Shader (Updates min_dist buffer on GPU)
211 wgpu::CommandEncoder encoder = GPU::getClassInstance().get_device().CreateCommandEncoder();
212 wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
213 pass.SetPipeline(pipeline);
214 pass.SetBindGroup(0, bindGroup);
215 pass.DispatchWorkgroups((width + 15) / 16, (height + 15) / 16, 1);
216 pass.End();
217
218 // C. Copy Result to ReadBuffer
219 encoder.CopyBufferToBuffer(minDistBuffer, 0, readBuffer, 0, readDesc.size);
220 wgpu::CommandBuffer commands = encoder.Finish();
221 GPU::getClassInstance().get_queue().Submit(1, &commands);
222
223 // D. Map and Read
224
225 readBuffer.MapAsync(
226 wgpu::MapMode::Read, 0, readDesc.size, wgpu::CallbackMode::AllowProcessEvents,
227 [](wgpu::MapAsyncStatus status, wgpu::StringView msg, void* userdata) {
228 bool* flag = static_cast<bool*>(userdata);
229 bool success = false;
230 if (status == wgpu::MapAsyncStatus::Success) {
231 // std::cout << "Map success: " << msg.data << std::endl;
232 success = true;
233 } else {
234 // Handle error
235 // std::cerr << "Map failed: " << msg.data << std::endl;
236 success = false;
237 }
238 *flag = true;
239 },
240 (void*)done
241 );
242
243 // E. Wait for GPU
244 while (!*done) {
245 GPU::getClassInstance().get_instance().ProcessEvents();
246#if defined(__EMSCRIPTEN__)
247 emscripten_sleep(10);
248#endif
249 }
250
251 const float* dists = (const float*)readBuffer.GetConstMappedRange();
252 // --- CPU SIDE: Selection Logic ---
253 double sum_dist_sq = 0.0;
254
255 // 1. Sum (We have to iterate anyway for roulette, so sum here)
256 // Note: dists[] contains the SQUARED distance because shader calculated distSq
257 for (size_t j = 0; j < num_pixels; ++j) {
258 sum_dist_sq += dists[j];
259 }
260
261 // 2. Select
262 std::uniform_real_distribution<> dist_selector(0.0, sum_dist_sq);
263 double random_value = dist_selector(gen);
264 double current_sum = 0.0;
265 int selected_index = -1;
266
267 for (size_t j = 0; j < num_pixels; ++j) {
268 current_sum += dists[j];
269 if (current_sum >= random_value) {
270 selected_index = j;
271 break;
272 }
273 }
274
275 if (selected_index == -1)
276 selected_index = num_pixels - 1;
277
278 // Add new centroid
279 centroids.push_back(pixels[selected_index]);
280 readBuffer.Unmap();
281#if defined(__EMSCRIPTEN__)
282 emscripten_sleep(10);
283#endif
284 }
285
286 std::copy(centroids.begin(), centroids.end(), out_centroids.begin());
287
288 // explicit clean up
289 if (inputTexture)
290 inputTexture.Destroy();
291 readBuffer.Destroy();
292 minDistBuffer.Destroy();
293 paramBuffer.Destroy();
294 delete done;
295
296#if defined(__EMSCRIPTEN__)
297 emscripten_sleep(50);
298#endif
299}
300
301void setup(
305 ImageLib::Image<ImageLib::LABAPixel<float>>& centroids_lab, const int32_t width,
306 const int32_t height, const int32_t k, wgpu::Texture& inputTexture, wgpu::Texture& labelTexture,
307 wgpu::Texture& centroidTexture, wgpu::TextureDescriptor& labelDesc,
308 wgpu::TextureDescriptor& centroidDesc, wgpu::ComputePipeline& pipeline1,
309 wgpu::ComputePipeline& pipeline2, wgpu::BindGroup& bindGroup1, wgpu::BindGroup& bindGroup2,
310 const uint8_t color_space
311) {
312 int bytesPerPixel {16};
313 const int32_t num_pixels {pixels.getSize()};
314
315 wgpu::TextureDescriptor texDesc = {};
316 texDesc.size = {static_cast<uint32_t>(width), static_cast<uint32_t>(height), 1};
317 texDesc.format = wgpu::TextureFormat::RGBA32Float;
318 texDesc.usage = wgpu::TextureUsage::TextureBinding | wgpu::TextureUsage::CopyDst;
319 texDesc.label = "inputTexture";
320 inputTexture = GPU::getClassInstance().get_device().CreateTexture(&texDesc);
321
322 wgpu::TexelCopyTextureInfo dst = {};
323 dst.texture = inputTexture;
324 wgpu::TexelCopyBufferLayout layout = {};
325 layout.offset = 0;
326 layout.bytesPerRow = width * bytesPerPixel; // Tightly packed for upload
327 layout.rowsPerImage = height;
328
329 std::vector<float> pixels_;
330 for (int i = 0; i < num_pixels; i++) {
331 switch (color_space) {
332 case COLOR_SPACE_OPTION_RGB: {
333 auto p = pixels[i];
334 pixels_.push_back(p.red / 255.0f);
335 pixels_.push_back(p.green / 255.0f);
336 pixels_.push_back(p.blue / 255.0f);
337 pixels_.push_back(p.alpha / 255.0f);
338 break;
339 }
340 case COLOR_SPACE_OPTION_CIELAB: {
341 auto p = lab[i];
342 pixels_.push_back(p.l / 255.0f);
343 pixels_.push_back(p.a / 255.0f);
344 pixels_.push_back(p.b / 255.0f);
345 pixels_.push_back(p.alpha / 255.0f);
346 break;
347 }
348 }
349 }
350
351 GPU::getClassInstance().get_queue().WriteTexture(
352 &dst, pixels_.data(), pixels_.size() * sizeof(float), &layout, &texDesc.size
353 );
354
355 // centroids
356 centroidDesc.size = {static_cast<uint32_t>(k), 1, 1};
357 centroidDesc.format = wgpu::TextureFormat::RGBA32Float;
358 centroidDesc.usage = wgpu::TextureUsage::TextureBinding | wgpu::TextureUsage::StorageBinding |
359 wgpu::TextureUsage::CopyDst | wgpu::TextureUsage::CopySrc;
360 centroidDesc.label = "centroidTexture";
361 centroidTexture = GPU::getClassInstance().get_device().CreateTexture(&centroidDesc);
362
363 wgpu::TexelCopyTextureInfo cdst = {};
364 cdst.texture = centroidTexture;
365 wgpu::TexelCopyBufferLayout clayout = {};
366 clayout.offset = 0;
367 clayout.bytesPerRow = k * bytesPerPixel; // Tightly packed for upload
368 clayout.rowsPerImage = 1;
369
370 std::vector<float> centroids_; // rgba
371 switch (color_space) {
372 case COLOR_SPACE_OPTION_RGB: {
373 for (int i = 0; i < k; i++) {
374 auto p = centroids[i];
375 centroids_.push_back(p.red / 255.0f);
376 centroids_.push_back(p.green / 255.0f);
377 centroids_.push_back(p.blue / 255.0f);
378 centroids_.push_back(p.alpha / 255.0f);
379 }
380 break;
381 }
382 case COLOR_SPACE_OPTION_CIELAB: {
383 for (int i = 0; i < k; i++) {
384 auto p = centroids_lab[i];
385 centroids_.push_back(p.l / 255.0f);
386 centroids_.push_back(p.a / 255.0f);
387 centroids_.push_back(p.b / 255.0f);
388 centroids_.push_back(p.alpha / 255.0f);
389 }
390 break;
391 }
392 }
393
394 GPU::getClassInstance().get_queue().WriteTexture(
395 &cdst, centroids_.data(), centroids_.size() * sizeof(float), &clayout, &centroidDesc.size
396 );
397
398 // labels
399 labelDesc.size = {static_cast<uint32_t>(width), static_cast<uint32_t>(height), 1};
400 labelDesc.format = wgpu::TextureFormat::RGBA32Uint;
401 labelDesc.usage = wgpu::TextureUsage::TextureBinding | wgpu::TextureUsage::StorageBinding |
402 wgpu::TextureUsage::CopyDst | wgpu::TextureUsage::CopySrc;
403 labelDesc.label = "labelTexture";
404 labelTexture = GPU::getClassInstance().get_device().CreateTexture(&labelDesc);
405
406 // params
407 Params params = {static_cast<uint32_t>(num_pixels), static_cast<uint32_t>(k)};
408 wgpu::BufferDescriptor bufDesc = {};
409 bufDesc.size = sizeof(Params);
410 bufDesc.usage = wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopyDst;
411 wgpu::Buffer paramBuffer = GPU::getClassInstance().get_device().CreateBuffer(&bufDesc);
412 GPU::getClassInstance().get_queue().WriteBuffer(paramBuffer, 0, &params, sizeof(Params));
413
414 // centroid accumulator
415 std::vector<ClusterAccumulator> reset_centroids(k, {0, 0, 0, 0});
416 wgpu::BufferDescriptor accDesc = {};
417 accDesc.size = sizeof(ClusterAccumulator) * k;
418 accDesc.usage = wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopyDst;
419 wgpu::Buffer accBuffer = GPU::getClassInstance().get_device().CreateBuffer(&accDesc);
420 GPU::getClassInstance().get_queue().WriteBuffer(
421 accBuffer, 0, reset_centroids.data(), accDesc.size
422 );
423
424 // shaders
425 pipeline1 =
426 GPU::getClassInstance().createPipeline("assign_update_shader", "assignUpdateShader");
427 pipeline2 = GPU::getClassInstance().createPipeline("resolve_shader", "resolveShader");
428
429 // binding groups
430 wgpu::BindGroupDescriptor bindGroupDesc1 = {};
431 bindGroupDesc1.layout = pipeline1.GetBindGroupLayout(0);
432 wgpu::BindGroupEntry entries1[5]; // 4
433 // Entry 0: Input Texture View
434 entries1[0].binding = 0;
435 entries1[0].textureView = inputTexture.CreateView();
436 // Entry 1: Centroid Texture View
437 entries1[1].binding = 1;
438 entries1[1].textureView = centroidTexture.CreateView();
439 // Entry 2: Label Texture View
440 entries1[2].binding = 2;
441 entries1[2].textureView = labelTexture.CreateView();
442 // Entry 2: Uniform Buffer
443 entries1[3].binding = 3;
444 entries1[3].buffer = paramBuffer;
445 entries1[3].size = sizeof(Params);
446
447 entries1[4].binding = 4;
448 entries1[4].buffer = accBuffer;
449 entries1[4].size = sizeof(ClusterAccumulator) * k;
450
451 bindGroupDesc1.entryCount = 5; // 4;
452 bindGroupDesc1.entries = entries1;
453 bindGroup1 = GPU::getClassInstance().get_device().CreateBindGroup(&bindGroupDesc1);
454
455 wgpu::BindGroupDescriptor bindGroupDesc2 = {};
456 bindGroupDesc2.layout = pipeline2.GetBindGroupLayout(0);
457 wgpu::BindGroupEntry entries2[2];
458 entries2[0].binding = 0;
459 entries2[0].buffer = accBuffer;
460 entries2[0].size = accDesc.size;
461 entries2[1].binding = 1;
462 entries2[1].textureView = centroidTexture.CreateView();
463 bindGroupDesc2.entryCount = 2;
464 bindGroupDesc2.entries = entries2;
465 bindGroup2 = GPU::getClassInstance().get_device().CreateBindGroup(&bindGroupDesc2);
466}
467
468void kmeans_gpu(
469 const uint8_t* data, uint8_t* out_data, int32_t* out_labels, const int32_t width,
470 const int32_t height, const int32_t k, const int32_t max_iter, const uint8_t color_space
471) {
473 pixels.loadFromBuffer(data, width, height, ImageLib::RGBA_CONVERTER<float>);
474 const int32_t num_pixels {pixels.getSize()};
475
476 // width = k, height = 1
477 // k centroids, initialized to rgba(0,0,0,255)
478 // Init of each pixel is from default in Image constructor
480 ImageLib::Image<ImageLib::LABAPixel<float>> centroids_lab {k, 1};
481 std::vector<int32_t> labels(num_pixels, -1);
482
483 ImageLib::Image<ImageLib::LABAPixel<float>> lab(pixels.getWidth(), pixels.getHeight());
484
485 if (color_space == COLOR_SPACE_OPTION_CIELAB) {
486 for (int i {0}; i < pixels.getSize(); ++i) {
487 rgb_to_lab<float, float>(pixels[i], lab[i]);
488 }
489 }
490
491 std::cout << "starting" << std::endl;
492 // Step 2: Initialize centroids
493
494 switch (color_space) {
495 case COLOR_SPACE_OPTION_RGB: {
496 kMeansPlusPlusInitGpu<ImageLib::RGBAPixel<float>>(pixels, centroids, k, color_space);
497 break;
498 }
499 case COLOR_SPACE_OPTION_CIELAB: {
500 kMeansPlusPlusInitGpu<ImageLib::LABAPixel<float>>(lab, centroids_lab, k, color_space);
501 break;
502 }
503 }
504 std::cout << "kmeans++ init done" << std::endl;
505 // Step 3: Run k-means iterations
506
507 int bytesPerPixel {16}; // float pixels
508
509 // shaders - 2 pipelines:
510 // 1. assign and update clusters
511 // 2. resolve cluster centroids
512 wgpu::ComputePipeline pipeline1;
513 wgpu::ComputePipeline pipeline2;
514 wgpu::BindGroup bindGroup1;
515 wgpu::BindGroup bindGroup2;
516 wgpu::Texture inputTexture;
517 wgpu::Texture labelTexture;
518 wgpu::Texture centroidTexture;
519 wgpu::TextureDescriptor labelDesc = {};
520 wgpu::TextureDescriptor centroidDesc = {};
521
522 // setup all textures and buffers needed for the kmeans loop on gpu
523 setup(
524 pixels, lab, centroids, centroids_lab, width, height, k, inputTexture, labelTexture,
525 centroidTexture, labelDesc, centroidDesc, pipeline1, pipeline2, bindGroup1, bindGroup2,
526 color_space
527 );
528
529 uint32_t wgX = (width + 15) / 16;
530 uint32_t wgY = (height + 15) / 16;
531
532 // Label Readback RGBA32Uint is 16 bytes/ pixel
533 uint32_t bytesPerRowLabels =
534 GPU::getAlignedBytesPerRow(width, static_cast<uint32_t>(bytesPerPixel));
535 wgpu::BufferDescriptor readLabelsDesc = {};
536 readLabelsDesc.size = bytesPerRowLabels * height;
537 readLabelsDesc.usage = wgpu::BufferUsage::MapRead | wgpu::BufferUsage::CopyDst;
538 wgpu::Buffer readLabelsBuffer =
539 GPU::getClassInstance().get_device().CreateBuffer(&readLabelsDesc);
540
541 // Centroid Readback
542 uint32_t bytesPerRowCentroids =
543 GPU::getAlignedBytesPerRow(width, static_cast<uint32_t>(bytesPerPixel));
544 wgpu::BufferDescriptor readCentroidsDesc = {};
545 readCentroidsDesc.size = bytesPerRowCentroids; // Height is 1
546 readCentroidsDesc.usage = wgpu::BufferUsage::MapRead | wgpu::BufferUsage::CopyDst;
547 wgpu::Buffer readCentroidsBuffer =
548 GPU::getClassInstance().get_device().CreateBuffer(&readCentroidsDesc);
549
550 // This is the actual KMeans loop
551 std::cout << "start iterations" << std::endl;
552 wgpu::CommandEncoder encoder = GPU::getClassInstance().get_device().CreateCommandEncoder();
553 for (int32_t iter {0}; iter < max_iter; ++iter) {
554 wgpu::ComputePassEncoder pass1 = encoder.BeginComputePass();
555 pass1.SetPipeline(pipeline1);
556 pass1.SetBindGroup(0, bindGroup1);
557 pass1.DispatchWorkgroups(wgX, wgY);
558 pass1.End();
559
560 wgpu::ComputePassEncoder pass2 = encoder.BeginComputePass();
561 pass2.SetPipeline(pipeline2);
562 pass2.SetBindGroup(0, bindGroup2);
563 pass2.DispatchWorkgroups((k + 255) / 256, 1);
564 pass2.End();
565 }
566
567 // 3. Readback (After Loop Finishes)
568
569 // Copy Labels
570 wgpu::TexelCopyTextureInfo srcLabels = {};
571 srcLabels.texture = labelTexture;
572 wgpu::TexelCopyBufferInfo dstLabels = {};
573 dstLabels.buffer = readLabelsBuffer;
574 dstLabels.layout.bytesPerRow = bytesPerRowLabels;
575 dstLabels.layout.rowsPerImage = height;
576 encoder.CopyTextureToBuffer(&srcLabels, &dstLabels, &labelDesc.size);
577
578 // Copy Centroids
579 wgpu::TexelCopyTextureInfo srcCentroids = {};
580 srcCentroids.texture = centroidTexture;
581 wgpu::TexelCopyBufferInfo dstCentroids = {};
582 dstCentroids.buffer = readCentroidsBuffer;
583 dstCentroids.layout.bytesPerRow = bytesPerRowCentroids;
584 dstCentroids.layout.rowsPerImage = 1;
585 encoder.CopyTextureToBuffer(&srcCentroids, &dstCentroids, &centroidDesc.size);
586
587 wgpu::CommandBuffer commands = encoder.Finish();
588 GPU::getClassInstance().get_queue().Submit(1, &commands);
589 std::cout << "done iterations" << std::endl;
590
591 // 4. Map Async & Wait
592 bool* done1 = new bool(false);
593 bool* done2 = new bool(false);
594
595 // Map Labels
596 readLabelsBuffer.MapAsync(
597 wgpu::MapMode::Read, 0, readLabelsDesc.size, wgpu::CallbackMode::AllowProcessEvents,
598 [](wgpu::MapAsyncStatus status, wgpu::StringView msg, void* userdata) {
599 bool* flag = static_cast<bool*>(userdata);
600 bool success = false;
601 if (status == wgpu::MapAsyncStatus::Success) {
602 // std::cout << "Map success" << std::endl;
603 success = true;
604 }
605 *flag = true;
606 },
607 (void*)done1
608 );
609
610 std::cout << "read out" << std::endl;
611
612 while (!*done1) {
613 GPU::getClassInstance().get_instance().ProcessEvents();
614#if defined(__EMSCRIPTEN__)
615 emscripten_sleep(10);
616#endif
617 }
618
619 std::cout << "mapping labels" << std::endl;
620 const uint8_t* mappedData = (const uint8_t*)readLabelsBuffer.GetConstMappedRange();
621 // ... Copy data to your C++ vector ...
622 // Copy row by row to remove padding and put data into 'result'
623 for (size_t y = 0; y < height; ++y) {
624 const uint8_t* rowPtr = mappedData + (y * bytesPerRowLabels);
625 for (size_t x = 0; x < width; ++x) {
626 const uint8_t* pixelPtr = rowPtr + (x * bytesPerPixel);
627 uint32_t r = 0;
628 std::memcpy(&r, pixelPtr, sizeof(uint32_t));
629
630 size_t dstIndex = y * width + x;
631 labels[dstIndex] = static_cast<int32_t>(r);
632 }
633 }
634
635 readLabelsBuffer.Unmap();
636
637 // Map Centroids
638 readCentroidsBuffer.MapAsync(
639 wgpu::MapMode::Read, 0, readCentroidsDesc.size, wgpu::CallbackMode::AllowProcessEvents,
640 [](wgpu::MapAsyncStatus status, wgpu::StringView msg, void* userdata) {
641 bool* flag = static_cast<bool*>(userdata);
642 bool success = false;
643 if (status == wgpu::MapAsyncStatus::Success) {
644 // std::cout << "Map success" << std::endl;
645 success = true;
646 }
647 *flag = true; // Signal completion
648 },
649 (void*)done2
650 );
651
652 while (!*done2) {
653 GPU::getClassInstance().get_instance().ProcessEvents();
654#if defined(__EMSCRIPTEN__)
655 emscripten_sleep(10);
656#endif
657 }
658
659 std::cout << "mapping centroids" << std::endl;
660 const float* mappedDataFloat = (const float*)readCentroidsBuffer.GetConstMappedRange();
661 // ... Copy data to your C++ vector ...
662
663 for (int i = 0; i < k; i++) {
664 // if CIELAB color space these represent l, a, b, alpha
665 const float* centroidPtr = mappedDataFloat + (i * 4);
666
667 float r = *(centroidPtr);
668 float g = *(centroidPtr + 1);
669 float b = *(centroidPtr + 2);
670 float a = *(centroidPtr + 3);
671 switch (color_space) {
672 case COLOR_SPACE_OPTION_RGB: {
673 centroids[i] = ImageLib::RGBAPixel<float>(r * 255.f, g * 255.f, b * 255.f, a * 255.f);
674 break;
675 }
676 case COLOR_SPACE_OPTION_CIELAB: {
677 centroids_lab[i] =
678 ImageLib::LABAPixel<float>(r * 255.f, g * 255.f, b * 255.f, a * 255.f);
679 break;
680 }
681 }
682 }
683
684 readCentroidsBuffer.Unmap();
685
686 // Write the final centroid values to each pixel in the cluster
687 if (color_space == COLOR_SPACE_OPTION_CIELAB) {
688 for (int32_t i {0}; i < k; ++i) {
689 lab_to_rgb<float, float>(centroids_lab[i], centroids[i]);
690 }
691 }
692
693 for (int32_t i = 0; i < num_pixels; ++i) {
694 const int32_t cluster = labels[i];
695 out_data[i * 4 + 0] = static_cast<uint8_t>(centroids[cluster].red);
696 out_data[i * 4 + 1] = static_cast<uint8_t>(centroids[cluster].green);
697 out_data[i * 4 + 2] = static_cast<uint8_t>(centroids[cluster].blue);
698 out_data[i * 4 + 3] = 255;
699 }
700
701 // Write labels to out_labels
702 std::cout << "copying labels out" << std::endl;
703 std::memcpy(out_labels, labels.data(), labels.size() * sizeof(int32_t));
704
705 if (inputTexture)
706 inputTexture.Destroy();
707 if (labelTexture)
708 labelTexture.Destroy();
709 if (centroidTexture)
710 centroidTexture.Destroy();
711 readLabelsBuffer.Destroy();
712 readCentroidsBuffer.Destroy();
713 delete done1;
714 delete done2;
715
716 labels.clear();
717 labels.shrink_to_fit();
718#if defined(__EMSCRIPTEN__)
719 emscripten_sleep(50);
720#endif
721}
Core image processing functions for img2num project.