Img2Num C++ (Internal Developer Docs)  dev
API Documentation
image_utils.cpp
1 #include "internal/image_utils.h"
2 
3 #include <algorithm>
4 #include <cmath>
5 #include <cstdint>
6 #include <cstring>
7 #include <limits>
8 #include <vector>
9 
10 #include "img2num.h"
11 #include "internal/Image.h"
12 #include "internal/PixelConverters.h"
13 #include "internal/RGBAPixel.h"
14 #include "internal/fft_iterative.h"
15 
16 uint8_t quantize(uint8_t value, uint8_t region_size) {
17  if (region_size == 0) {
18  return value;
19  }
20 
21  uint8_t bucket = value / region_size; // Narrowing to colour boundary with
22  // Range [0 : num_thresholds - 1].
23 
24  uint8_t bucket_boundary = (bucket * region_size);
25  uint8_t bucket_midpoint =
26  bucket_boundary + (region_size / 2); // Map to threshold region's midpoint.
27 
28  // In case of bucket_midpoint overflow: revert to a smaller bucket than the
29  // largest possible value.
30  bool overflow = bucket_midpoint < bucket_boundary;
31  if (overflow) {
32  bucket_midpoint = ((bucket - 1) * region_size) +
33  (region_size / 2); // Correction by reducing the bucket value belongs to.
34  }
35 
36  return bucket_midpoint;
37 }
38 
39 namespace img2num {
40 // image: pointer to RGBA data
41 // width, height: dimensions
42 // sigma: standard deviation of Gaussian blur
43 void gaussian_blur_fft(uint8_t *image, size_t width, size_t height, double sigma_pixels) {
44  if (!image || width == 0 || height == 0 || sigma_pixels <= 0) return;
45 
46  const size_t Npix = width * height;
47 
48  // Compute padded dimensions (next power of two)
49  const size_t W = fft::next_power_of_two(width);
50  const size_t H = fft::next_power_of_two(height);
51  const size_t Npix_padded = W * H;
52 
53  // Frequency coordinates helper (DC at corner)
54  auto freq_coord = [](int k, int dim) -> double {
55  return (k <= dim / 2) ? double(k) / dim : double(k - dim) / dim;
56  };
57 
58  // Precompute Gaussian factor in frequency domain
59  const double two_pi2_sigma2 = 2.0 * M_PI * M_PI * sigma_pixels * sigma_pixels;
60 
61  for (int channel = 0; channel < 3; channel++) {
62  // Allocate padded buffer
63  std::vector<fft::cd> data(Npix_padded, {0.0, 0.0});
64 
65  // Copy original image channel into padded buffer
66  for (size_t y = 0; y < height; y++)
67  for (size_t x = 0; x < width; x++)
68  data[y * W + x] = fft::cd(image[(y * width + x) * 4 + channel], 0.0);
69 
70  // Forward 2D FFT
71  fft::iterative_fft_2d(data, W, H, false);
72 
73  // Apply Gaussian filter in frequency domain
74  for (size_t y = 0; y < H; y++) {
75  double fy2 = freq_coord(y, H) * freq_coord(y, H);
76  for (size_t x = 0; x < W; x++) {
77  double fx2 = freq_coord(x, W) * freq_coord(x, W);
78  double gain = std::exp(-two_pi2_sigma2 * (fx2 + fy2));
79  data[y * W + x] *= gain;
80  }
81  }
82 
83  // Inverse 2D FFT
84  fft::iterative_fft_2d(data, W, H, true);
85 
86  // Copy back only the original width/height and clamp
87  for (size_t y = 0; y < height; y++)
88  for (size_t x = 0; x < width; x++) {
89  double v = data[y * W + x].real();
90  v = std::clamp(v, 0.0, 255.0);
91  image[(y * width + x) * 4 + channel] = static_cast<uint8_t>(std::lrint(v));
92  }
93  }
94 
95  // Alpha channel remains unchanged
96 }
97 
98 // Called from JS. `ptr` points to RGBA bytes.
99 void invert_image(uint8_t *ptr, int width, int height) {
101  img.loadFromBuffer(ptr, width, height, ImageLib::RGBA_CONVERTER<uint8_t>);
102 
103  for (ImageLib::RGBAPixel<uint8_t> &p : img) {
104  p.red = 255 - p.red;
105  p.blue = 255 - p.blue;
106  p.green = 255 - p.green;
107  }
108 
109  const auto &modified = img.getData();
110  std::memcpy(ptr, modified.data(), modified.size() * sizeof(ImageLib::RGBAPixel<uint8_t>));
111 }
112 
113 void threshold_image(uint8_t *ptr, const int width, const int height, const int num_thresholds) {
114  const uint8_t REGION_SIZE(255 / num_thresholds); // Size of buckets per colour
115 
117  img.loadFromBuffer(ptr, width, height, ImageLib::RGBA_CONVERTER<uint8_t>);
118 
119  const auto imgWidth{img.getWidth()}, imgHeight{img.getHeight()};
120  for (ImageLib::RGBAPixel<uint8_t> &p : img) {
121  p.red = quantize(p.red, REGION_SIZE);
122  p.green = quantize(p.green, REGION_SIZE);
123  p.blue = quantize(p.blue, REGION_SIZE);
124  }
125 
126  const auto &modified = img.getData();
127  std::memcpy(ptr, modified.data(), modified.size() * sizeof(ImageLib::RGBAPixel<uint8_t>));
128 }
129 
130 void black_threshold_image(uint8_t *ptr, const int width, const int height,
131  const int num_thresholds) {
133  img.loadFromBuffer(ptr, width, height, ImageLib::RGBA_CONVERTER<uint8_t>);
134 
135  const auto imgWidth{img.getWidth()}, imgHeight{img.getHeight()};
136  for (ImageLib::RGBAPixel<uint8_t> &p : img) {
137  const bool R{p.red < num_thresholds};
138  const bool G{p.green < num_thresholds};
139  const bool B{p.blue < num_thresholds};
140  if (R && B && G) {
141  p.setGray(0);
142  }
143  }
144 
145  const auto &modified = img.getData();
146  std::memcpy(ptr, modified.data(), modified.size() * sizeof(ImageLib::RGBAPixel<uint8_t>));
147 }
148 } // namespace img2num
Core image processing functions for img2num project.
Definition: Error.h:7
void black_threshold_image(uint8_t *ptr, const int width, const int height, const int num_thresholds)
Apply black-thresholding to an image.
void gaussian_blur_fft(uint8_t *image, size_t width, size_t height, double sigma)
Apply a Gaussian blur to an image using FFT.
Definition: image_utils.cpp:43
void threshold_image(uint8_t *ptr, const int width, const int height, const int num_thresholds)
Apply a thresholding operation to an image.
void invert_image(uint8_t *ptr, int width, int height)
Invert the pixel values of an image.
Definition: image_utils.cpp:99