Grok 20.3.2
GrkImageSIMD.h
Go to the documentation of this file.
1/*
2 * Copyright (C) 2016-2026 Grok Image Compression Inc.
3 *
4 * This source code is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU Affero General Public License, version 3,
6 * as published by the Free Software Foundation.
7 *
8 * This source code is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU Affero General Public License for more details.
12 *
13 * You should have received a copy of the GNU Affero General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 *
16 */
17
18#pragma once
19
20#include <cstdint>
21#include "grok.h"
22
23/* Visibility macro for functions that need to be accessible from the codec library */
24#if defined(_WIN32)
25#ifdef GRK_STATIC
26#define GRK_SIMD_API
27#elif defined(GRK_EXPORTS)
28#define GRK_SIMD_API __declspec(dllexport)
29#else
30#define GRK_SIMD_API __declspec(dllimport)
31#endif
32#elif !defined(GRK_STATIC)
33#define GRK_SIMD_API __attribute__((visibility("default")))
34#else
35#define GRK_SIMD_API
36#endif
37
38namespace grk
39{
40
41/* Clamp int32_t image data to [minVal, maxVal] using Highway SIMD */
42void hwy_clip_i32(int32_t* data, uint32_t w, uint32_t h, uint32_t stride, int32_t minVal,
43 int32_t maxVal);
44
45/* Scale int32_t image data: multiply each element by scale */
46void hwy_scale_mul_i32(int32_t* data, uint32_t w, uint32_t h, uint32_t stride, int32_t scale);
47
48/* Scale int32_t image data: divide each element by scale (truncation toward zero) */
49void hwy_scale_div_i32(int32_t* data, uint32_t w, uint32_t h, uint32_t stride, int32_t scale);
50
51/* YCC 4:4:4 to RGB conversion using Highway SIMD.
52 * Reads from planar y/cb/cr, writes to planar r/g/b.
53 * offset = 1 << (prec - 1), upb = (1 << prec) - 1 */
54void hwy_sycc444_to_rgb_i32(const int32_t* y, const int32_t* cb, const int32_t* cr, int32_t* r,
55 int32_t* g, int32_t* b, uint32_t w, uint32_t h, uint32_t src_stride,
56 uint32_t dst_stride, int32_t offset, int32_t upb);
57
58/* eYCC to RGB conversion using Highway SIMD.
59 * In-place: reads/writes from the same yd/bd/rd arrays. */
60void hwy_esycc_to_rgb_i32(int32_t* yd, int32_t* bd, int32_t* rd, uint32_t w, uint32_t h,
61 uint32_t stride, int32_t max_value, int32_t flip_value, bool sign1,
62 bool sign2);
63
64/* Planar int32_t (3 components) → packed uint8_t RGB */
65void hwy_planar_to_packed_8(const int32_t* r, const int32_t* g, const int32_t* b, uint8_t* out,
66 uint32_t w, uint32_t h, uint32_t src_stride);
67
68/* Packed uint8_t RGB → planar int32_t (3 components) */
69void hwy_packed_to_planar_8(const uint8_t* in, int32_t* r, int32_t* g, int32_t* b, uint32_t w,
70 uint32_t h, uint32_t dst_stride);
71
72/* Planar int32_t (3 components) → packed uint16_t RGB */
73void hwy_planar_to_packed_16(const int32_t* r, const int32_t* g, const int32_t* b, uint16_t* out,
74 uint32_t w, uint32_t h, uint32_t src_stride);
75
76/* Packed uint16_t RGB → planar int32_t (3 components) */
77void hwy_packed_to_planar_16(const uint16_t* in, int32_t* r, int32_t* g, int32_t* b, uint32_t w,
78 uint32_t h, uint32_t dst_stride);
79
87void hwy_copy_tile_to_swath(const grk_image* tile_img, const grk_swath_buffer* buf);
88
89/* ─── Format-level conversion SIMD primitives ─── */
90
91/* Unpack packed uint8 bytes → int32 array, with optional bitwise invert (XOR 0xFF).
92 * Equivalent to the N=8 unsigned path of convertToOutput. */
93GRK_SIMD_API void hwy_unpack_8u_to_i32(const uint8_t* src, int32_t* dest, size_t w, bool invert);
94
95/* Unpack packed uint8 bytes → int32 array with sign extension from 8 bits,
96 * plus optional bitwise invert.
97 * Equivalent to the N=8 signed path of convertToOutput. */
98GRK_SIMD_API void hwy_unpack_8s_to_i32(const uint8_t* src, int32_t* dest, size_t w, bool invert);
99
100/* Unpack big-endian uint16 pairs → int32 array, with optional 16-bit XOR invert.
101 * Used by PNG decode (N=16 path). */
102GRK_SIMD_API void hwy_unpack_16be_to_i32(const uint8_t* src, int32_t* dest, size_t w, bool invert);
103
104/* Unpack machine-endian uint16 → int32 array, with optional 16-bit XOR invert.
105 * Used by TIFF decode (N=16 path, libtiff already decoded to native byte order). */
106GRK_SIMD_API void hwy_unpack_16le_to_i32(const uint16_t* src, int32_t* dest, size_t w, bool invert);
107
108/* Deinterleave packed int32 buffer [R0,G0,B0,R1,G1,B1,...] into separate component
109 * planes. Optimised for numComps == 3 and 4; falls back to scalar for others. */
110GRK_SIMD_API void hwy_deinterleave_i32(const int32_t* src, int32_t* const* dest, uint32_t w,
111 uint16_t numComps);
112
113/* Pack N planar int32 components into interleaved uint8 output, one row at a time.
114 * Each src[k] points to the start of the k-th component for this row.
115 * adjust is added to each sample before narrowing to uint8. */
116GRK_SIMD_API void hwy_pack_planar_to_8(const int32_t* const* src, uint32_t numPlanes, uint8_t* dest,
117 uint32_t w, int32_t adjust);
118
119/* Pack N planar int32 components into interleaved machine-endian uint16 output.
120 * Same semantics as hwy_pack_planar_to_8 but for 16-bit output. */
121GRK_SIMD_API void hwy_pack_planar_to_16(const int32_t* const* src, uint32_t numPlanes,
122 uint16_t* dest, uint32_t w, int32_t adjust);
123
124/* Pack N planar int32 components into interleaved big-endian uint16 output.
125 * Same as hwy_pack_planar_to_16 but each uint16 is stored in big-endian byte order. */
126GRK_SIMD_API void hwy_pack_planar_to_16be(const int32_t* const* src, uint32_t numPlanes,
127 uint8_t* dest, uint32_t w, int32_t adjust);
128
129/* Scale int32 component data by power-of-two multiply, with stride. */
130GRK_SIMD_API void hwy_scale_component_up(int32_t* data, uint32_t w, uint32_t h, uint32_t stride,
131 int32_t scale);
132
133/* Scale int32 component data by power-of-two divide, with stride. */
134GRK_SIMD_API void hwy_scale_component_down(int32_t* data, uint32_t w, uint32_t h, uint32_t stride,
135 int32_t scale);
136
137} // namespace grk
#define GRK_SIMD_API
Definition GrkImageSIMD.h:33
ResWindow.
Definition CompressedChunkCache.h:36
GRK_SIMD_API void hwy_unpack_16be_to_i32(const uint8_t *src, int32_t *dest, size_t w, bool invert)
void hwy_scale_mul_i32(int32_t *data, uint32_t w, uint32_t h, uint32_t stride, int32_t scale)
GRK_SIMD_API void hwy_scale_component_up(int32_t *data, uint32_t w, uint32_t h, uint32_t stride, int32_t scale)
GRK_SIMD_API void hwy_pack_planar_to_8(const int32_t *const *src, uint32_t numPlanes, uint8_t *dest, uint32_t w, int32_t adjust)
void hwy_planar_to_packed_8(const int32_t *r, const int32_t *g, const int32_t *b, uint8_t *out, uint32_t w, uint32_t h, uint32_t src_stride)
GRK_SIMD_API void hwy_unpack_8s_to_i32(const uint8_t *src, int32_t *dest, size_t w, bool invert)
void hwy_planar_to_packed_16(const int32_t *r, const int32_t *g, const int32_t *b, uint16_t *out, uint32_t w, uint32_t h, uint32_t src_stride)
GRK_SIMD_API void hwy_unpack_16le_to_i32(const uint16_t *src, int32_t *dest, size_t w, bool invert)
GRK_SIMD_API void hwy_deinterleave_i32(const int32_t *src, int32_t *const *dest, uint32_t w, uint16_t numComps)
void hwy_copy_tile_to_swath(const grk_image *tile_img, const grk_swath_buffer *buf)
Copy a decoded tile image (int32_t planar) into a swath output buffer described by grk_swath_buffer.
void hwy_sycc444_to_rgb_i32(const int32_t *y, const int32_t *cb, const int32_t *cr, int32_t *r, int32_t *g, int32_t *b, uint32_t w, uint32_t h, uint32_t src_stride, uint32_t dst_stride, int32_t offset, int32_t upb)
void hwy_packed_to_planar_8(const uint8_t *in, int32_t *r, int32_t *g, int32_t *b, uint32_t w, uint32_t h, uint32_t dst_stride)
GRK_SIMD_API void hwy_pack_planar_to_16(const int32_t *const *src, uint32_t numPlanes, uint16_t *dest, uint32_t w, int32_t adjust)
void hwy_packed_to_planar_16(const uint16_t *in, int32_t *r, int32_t *g, int32_t *b, uint32_t w, uint32_t h, uint32_t dst_stride)
const double scale
Definition RateControl.cpp:167
void hwy_clip_i32(int32_t *data, uint32_t w, uint32_t h, uint32_t stride, int32_t minVal, int32_t maxVal)
GRK_SIMD_API void hwy_unpack_8u_to_i32(const uint8_t *src, int32_t *dest, size_t w, bool invert)
GRK_SIMD_API void hwy_pack_planar_to_16be(const int32_t *const *src, uint32_t numPlanes, uint8_t *dest, uint32_t w, int32_t adjust)
void hwy_esycc_to_rgb_i32(int32_t *yd, int32_t *bd, int32_t *rd, uint32_t w, uint32_t h, uint32_t stride, int32_t max_value, int32_t flip_value, bool sign1, bool sign2)
void hwy_scale_div_i32(int32_t *data, uint32_t w, uint32_t h, uint32_t stride, int32_t scale)
GRK_SIMD_API void hwy_scale_component_down(int32_t *data, uint32_t w, uint32_t h, uint32_t stride, int32_t scale)
Grok image Note: do not directly create a grk_image object.
User-managed output buffer for asynchronous swath tile copy-and-convert.
Definition grok.h:974