stb_image_resize.h - randomcrap - random crap programs of varying quality
 (HTM) git clone git://git.codemadness.org/randomcrap
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       stb_image_resize.h (50864B)
       ---
            1 #include <math.h>
            2 #include <stdint.h>
            3 #include <stdlib.h>
            4 #include <string.h>
            5 
            6 typedef enum
            7 {
            8         STBIR_FILTER_DEFAULT      = 0,  // use same filter type that easy-to-use API chooses
            9         STBIR_FILTER_BOX          = 1,  // A trapezoid w/1-pixel wide ramps, same result as box for integer scale ratios
           10         STBIR_FILTER_TRIANGLE     = 2,  // On upsampling, produces same results as bilinear texture filtering
           11         STBIR_FILTER_CUBICBSPLINE = 3,  // The cubic b-spline (aka Mitchell-Netrevalli with B=1,C=0), gaussian-esque
           12         STBIR_FILTER_CATMULLROM   = 4,  // An interpolating cubic spline
           13         STBIR_FILTER_MITCHELL     = 5,  // Mitchell-Netrevalli filter with B=1/3, C=1/3
           14 } stbir_filter;
           15 
           16 static int stbir_resize_uint16_generic(const uint16_t *input_pixels, int input_w, int input_h, int input_stride_in_bytes,
           17                                         uint16_t *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
           18                                         int num_channels, int alpha_channel, stbir_filter filter);
           19 
           20 // (s0, t0) & (s1, t1) are the top-left and bottom right corner (uv addressing style: [0, 1]x[0, 1])
           21 // of a region of the input image to use.
           22 
           23 // should produce compiler error if size is wrong
           24 typedef unsigned char stbir__validate_uint32[sizeof(uint32_t) == 4 ? 1 : -1];
           25 
           26 #define STBIR__ARRAY_SIZE(a) (sizeof((a))/sizeof((a)[0]))
           27 
           28 #define STBIR_DEFAULT_FILTER_UPSAMPLE    STBIR_FILTER_CATMULLROM
           29 #define STBIR_DEFAULT_FILTER_DOWNSAMPLE  STBIR_FILTER_MITCHELL
           30 
           31 #ifndef STBIR_PROGRESS_REPORT
           32 #define STBIR_PROGRESS_REPORT(float_0_to_1)
           33 #endif
           34 
           35 #define STBIR_ALPHA_EPSILON ((float)1 / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20))
           36 #define STBIR__UNUSED_PARAM(v)  (void)sizeof(v)
           37 
           38 typedef struct
           39 {
           40         float (*kernel)(float, float);
           41         float (*support)(float);
           42 } stbir__filter_info;
           43 
           44 // When upsampling, the contributors are which source pixels contribute.
           45 // When downsampling, the contributors are which destination pixels are contributed to.
           46 typedef struct
           47 {
           48         int n0; // First contributing pixel
           49         int n1; // Last contributing pixel
           50 } stbir__contributors;
           51 
           52 typedef struct
           53 {
           54         const void* input_data;
           55         int input_w;
           56         int input_h;
           57         int input_stride_bytes;
           58 
           59         void* output_data;
           60         int output_w;
           61         int output_h;
           62         int output_stride_bytes;
           63 
           64         float s0, t0, s1, t1;
           65 
           66         float horizontal_shift; // Units: output pixels
           67         float vertical_shift;   // Units: output pixels
           68         float horizontal_scale;
           69         float vertical_scale;
           70 
           71         int channels;
           72         int alpha_channel;
           73         uint32_t flags;
           74         stbir_filter horizontal_filter;
           75         stbir_filter vertical_filter;
           76 
           77         stbir__contributors* horizontal_contributors;
           78         float* horizontal_coefficients;
           79 
           80         stbir__contributors* vertical_contributors;
           81         float* vertical_coefficients;
           82 
           83         int decode_buffer_pixels;
           84         float* decode_buffer;
           85 
           86         float* horizontal_buffer;
           87 
           88         // cache these because ceil/floor are inexplicably showing up in profile
           89         int horizontal_coefficient_width;
           90         int vertical_coefficient_width;
           91         int horizontal_filter_pixel_width;
           92         int vertical_filter_pixel_width;
           93         int horizontal_filter_pixel_margin;
           94         int vertical_filter_pixel_margin;
           95         int horizontal_num_contributors;
           96         int vertical_num_contributors;
           97 
           98         int ring_buffer_length_bytes;   // The length of an individual entry in the ring buffer. The total number of ring buffers is stbir__get_filter_pixel_width(filter)
           99         int ring_buffer_num_entries;    // Total number of entries in the ring buffer.
          100         int ring_buffer_first_scanline;
          101         int ring_buffer_last_scanline;
          102         int ring_buffer_begin_index;    // first_scanline is at this index in the ring buffer
          103         float* ring_buffer;
          104 
          105         float* encode_buffer; // A temporary buffer to store floats so we don't lose precision while we do multiply-adds.
          106 
          107         int horizontal_contributors_size;
          108         int horizontal_coefficients_size;
          109         int vertical_contributors_size;
          110         int vertical_coefficients_size;
          111         int decode_buffer_size;
          112         int horizontal_buffer_size;
          113         int ring_buffer_size;
          114         int encode_buffer_size;
          115 } stbir__info;
          116 
          117 static const float stbir__max_uint16_as_float = 65535.0f;
          118 
          119 static inline int stbir__min(int a, int b)
          120 {
          121         return a < b ? a : b;
          122 }
          123 
          124 static inline float stbir__saturate(float x)
          125 {
          126         if (x < 0)
          127                 return 0;
          128 
          129         if (x > 1)
          130                 return 1;
          131 
          132         return x;
          133 }
          134 static float stbir__support_zero(float s)
          135 {
          136         STBIR__UNUSED_PARAM(s);
          137         return 0;
          138 }
          139 
          140 static float stbir__support_one(float s)
          141 {
          142         STBIR__UNUSED_PARAM(s);
          143         return 1;
          144 }
          145 
          146 static float stbir__support_two(float s)
          147 {
          148         STBIR__UNUSED_PARAM(s);
          149         return 2;
          150 }
          151 
          152 static float stbir__support_trapezoid(float scale)
          153 {
          154         return 0.5f + scale / 2;
          155 }
          156 
          157 static float stbir__filter_trapezoid(float x, float scale)
          158 {
          159         float halfscale = scale / 2;
          160         float t = 0.5f + halfscale;
          161 
          162         x = (float)fabs(x);
          163 
          164         if (x >= t)
          165                 return 0;
          166         else
          167         {
          168                 float r = 0.5f - halfscale;
          169                 if (x <= r)
          170                         return 1;
          171                 else
          172                         return (t - x) / scale;
          173         }
          174 }
          175 
          176 static float stbir__filter_triangle(float x, float s)
          177 {
          178         STBIR__UNUSED_PARAM(s);
          179 
          180         x = (float)fabs(x);
          181 
          182         if (x <= 1.0f)
          183                 return 1 - x;
          184         else
          185                 return 0;
          186 }
          187 
          188 static float stbir__filter_cubic(float x, float s)
          189 {
          190         STBIR__UNUSED_PARAM(s);
          191 
          192         x = (float)fabs(x);
          193 
          194         if (x < 1.0f)
          195                 return (4 + x*x*(3*x - 6))/6;
          196         else if (x < 2.0f)
          197                 return (8 + x*(-12 + x*(6 - x)))/6;
          198 
          199         return (0.0f);
          200 }
          201 
          202 static float stbir__filter_catmullrom(float x, float s)
          203 {
          204         STBIR__UNUSED_PARAM(s);
          205 
          206         x = (float)fabs(x);
          207 
          208         if (x < 1.0f)
          209                 return 1 - x*x*(2.5f - 1.5f*x);
          210         else if (x < 2.0f)
          211                 return 2 - x*(4 + x*(0.5f*x - 2.5f));
          212 
          213         return (0.0f);
          214 }
          215 
          216 static float stbir__filter_mitchell(float x, float s)
          217 {
          218         STBIR__UNUSED_PARAM(s);
          219 
          220         x = (float)fabs(x);
          221 
          222         if (x < 1.0f)
          223                 return (16 + x*x*(21 * x - 36))/18;
          224         else if (x < 2.0f)
          225                 return (32 + x*(-60 + x*(36 - 7*x)))/18;
          226 
          227         return (0.0f);
          228 }
          229 
          230 static stbir__filter_info stbir__filter_info_table[] = {
          231         { NULL,                     stbir__support_zero },
          232         { stbir__filter_trapezoid,  stbir__support_trapezoid },
          233         { stbir__filter_triangle,   stbir__support_one },
          234         { stbir__filter_cubic,      stbir__support_two },
          235         { stbir__filter_catmullrom, stbir__support_two },
          236         { stbir__filter_mitchell,   stbir__support_two },
          237 };
          238 
          239 static inline int stbir__use_upsampling(float ratio)
          240 {
          241         return ratio > 1;
          242 }
          243 
          244 static inline int stbir__use_width_upsampling(stbir__info* stbir_info)
          245 {
          246         return stbir__use_upsampling(stbir_info->horizontal_scale);
          247 }
          248 
          249 static inline int stbir__use_height_upsampling(stbir__info* stbir_info)
          250 {
          251         return stbir__use_upsampling(stbir_info->vertical_scale);
          252 }
          253 
          254 // This is the maximum number of input samples that can affect an output sample
          255 // with the given filter
          256 static int stbir__get_filter_pixel_width(stbir_filter filter, float scale)
          257 {
          258         if (stbir__use_upsampling(scale))
          259                 return (int)ceil(stbir__filter_info_table[filter].support(1/scale) * 2);
          260         else
          261                 return (int)ceil(stbir__filter_info_table[filter].support(scale) * 2 / scale);
          262 }
          263 
          264 // This is how much to expand buffers to account for filters seeking outside
          265 // the image boundaries.
          266 static int stbir__get_filter_pixel_margin(stbir_filter filter, float scale)
          267 {
          268         return stbir__get_filter_pixel_width(filter, scale) / 2;
          269 }
          270 
          271 static int stbir__get_coefficient_width(stbir_filter filter, float scale)
          272 {
          273         if (stbir__use_upsampling(scale))
          274                 return (int)ceil(stbir__filter_info_table[filter].support(1 / scale) * 2);
          275         else
          276                 return (int)ceil(stbir__filter_info_table[filter].support(scale) * 2);
          277 }
          278 
          279 static int stbir__get_contributors(float scale, stbir_filter filter, int input_size, int output_size)
          280 {
          281         if (stbir__use_upsampling(scale))
          282                 return output_size;
          283         else
          284                 return (input_size + stbir__get_filter_pixel_margin(filter, scale) * 2);
          285 }
          286 
          287 static int stbir__get_total_horizontal_coefficients(stbir__info* info)
          288 {
          289         return info->horizontal_num_contributors
          290                 * stbir__get_coefficient_width      (info->horizontal_filter, info->horizontal_scale);
          291 }
          292 
          293 static int stbir__get_total_vertical_coefficients(stbir__info* info)
          294 {
          295         return info->vertical_num_contributors
          296                 * stbir__get_coefficient_width      (info->vertical_filter, info->vertical_scale);
          297 }
          298 
          299 static stbir__contributors* stbir__get_contributor(stbir__contributors* contributors, int n)
          300 {
          301         return &contributors[n];
          302 }
          303 
          304 // For perf reasons this code is duplicated in stbir__resample_horizontal_upsample/downsample,
          305 // if you change it here change it there too.
          306 static float* stbir__get_coefficient(float* coefficients, stbir_filter filter, float scale, int n, int c)
          307 {
          308         int width = stbir__get_coefficient_width(filter, scale);
          309         return &coefficients[width*n + c];
          310 }
          311 
          312 static inline int stbir__edge_wrap(int n, int max)
          313 {
          314         // avoid per-pixel switch
          315         if (n >= 0 && n < max)
          316                 return n;
          317         return 0; // was: STBIR_EDGE_ZERO
          318 }
          319 
          320 // What input pixels contribute to this output pixel?
          321 static void stbir__calculate_sample_range_upsample(int n, float out_filter_radius, float scale_ratio, float out_shift, int* in_first_pixel, int* in_last_pixel, float* in_center_of_out)
          322 {
          323         float out_pixel_center = (float)n + 0.5f;
          324         float out_pixel_influence_lowerbound = out_pixel_center - out_filter_radius;
          325         float out_pixel_influence_upperbound = out_pixel_center + out_filter_radius;
          326 
          327         float in_pixel_influence_lowerbound = (out_pixel_influence_lowerbound + out_shift) / scale_ratio;
          328         float in_pixel_influence_upperbound = (out_pixel_influence_upperbound + out_shift) / scale_ratio;
          329 
          330         *in_center_of_out = (out_pixel_center + out_shift) / scale_ratio;
          331         *in_first_pixel = (int)(floor(in_pixel_influence_lowerbound + 0.5));
          332         *in_last_pixel = (int)(floor(in_pixel_influence_upperbound - 0.5));
          333 }
          334 
          335 // What output pixels does this input pixel contribute to?
          336 static void stbir__calculate_sample_range_downsample(int n, float in_pixels_radius, float scale_ratio, float out_shift, int* out_first_pixel, int* out_last_pixel, float* out_center_of_in)
          337 {
          338         float in_pixel_center = (float)n + 0.5f;
          339         float in_pixel_influence_lowerbound = in_pixel_center - in_pixels_radius;
          340         float in_pixel_influence_upperbound = in_pixel_center + in_pixels_radius;
          341 
          342         float out_pixel_influence_lowerbound = in_pixel_influence_lowerbound * scale_ratio - out_shift;
          343         float out_pixel_influence_upperbound = in_pixel_influence_upperbound * scale_ratio - out_shift;
          344 
          345         *out_center_of_in = in_pixel_center * scale_ratio - out_shift;
          346         *out_first_pixel = (int)(floor(out_pixel_influence_lowerbound + 0.5));
          347         *out_last_pixel = (int)(floor(out_pixel_influence_upperbound - 0.5));
          348 }
          349 
          350 static void stbir__calculate_coefficients_upsample(stbir_filter filter, float scale, int in_first_pixel, int in_last_pixel, float in_center_of_out, stbir__contributors* contributor, float* coefficient_group)
          351 {
          352         int i;
          353         float total_filter = 0;
          354         float filter_scale;
          355 
          356         contributor->n0 = in_first_pixel;
          357         contributor->n1 = in_last_pixel;
          358 
          359         for (i = 0; i <= in_last_pixel - in_first_pixel; i++)
          360         {
          361                 float in_pixel_center = (float)(i + in_first_pixel) + 0.5f;
          362                 coefficient_group[i] = stbir__filter_info_table[filter].kernel(in_center_of_out - in_pixel_center, 1 / scale);
          363 
          364                 // If the coefficient is zero, skip it. (Don't do the <0 check here, we want the influence of those outside pixels.)
          365                 if (i == 0 && !coefficient_group[i])
          366                 {
          367                         contributor->n0 = ++in_first_pixel;
          368                         i--;
          369                         continue;
          370                 }
          371                 total_filter += coefficient_group[i];
          372         }
          373 
          374         // Make sure the sum of all coefficients is 1.
          375         filter_scale = 1 / total_filter;
          376 
          377         for (i = 0; i <= in_last_pixel - in_first_pixel; i++)
          378                 coefficient_group[i] *= filter_scale;
          379 
          380         for (i = in_last_pixel - in_first_pixel; i >= 0; i--)
          381         {
          382                 if (coefficient_group[i])
          383                         break;
          384 
          385                 // This line has no weight. We can skip it.
          386                 contributor->n1 = contributor->n0 + i - 1;
          387         }
          388 }
          389 
          390 static void stbir__calculate_coefficients_downsample(stbir_filter filter, float scale_ratio, int out_first_pixel, int out_last_pixel, float out_center_of_in, stbir__contributors* contributor, float* coefficient_group)
          391 {
          392         int i;
          393 
          394         contributor->n0 = out_first_pixel;
          395         contributor->n1 = out_last_pixel;
          396 
          397         for (i = 0; i <= out_last_pixel - out_first_pixel; i++)
          398         {
          399                 float out_pixel_center = (float)(i + out_first_pixel) + 0.5f;
          400                 float x = out_pixel_center - out_center_of_in;
          401                 coefficient_group[i] = stbir__filter_info_table[filter].kernel(x, scale_ratio) * scale_ratio;
          402         }
          403 
          404         for (i = out_last_pixel - out_first_pixel; i >= 0; i--)
          405         {
          406                 if (coefficient_group[i])
          407                         break;
          408 
          409                 // This line has no weight. We can skip it.
          410                 contributor->n1 = contributor->n0 + i - 1;
          411         }
          412 }
          413 
          414 static void stbir__normalize_downsample_coefficients(stbir__contributors* contributors, float* coefficients, stbir_filter filter, float scale_ratio, int input_size, int output_size)
          415 {
          416         int num_contributors = stbir__get_contributors(scale_ratio, filter, input_size, output_size);
          417         int num_coefficients = stbir__get_coefficient_width(filter, scale_ratio);
          418         int i, j;
          419         int skip;
          420 
          421         for (i = 0; i < output_size; i++)
          422         {
          423                 float scale;
          424                 float total = 0;
          425 
          426                 for (j = 0; j < num_contributors; j++)
          427                 {
          428                         if (i >= contributors[j].n0 && i <= contributors[j].n1)
          429                         {
          430                                 float coefficient = *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i - contributors[j].n0);
          431                                 total += coefficient;
          432                         }
          433                         else if (i < contributors[j].n0)
          434                                 break;
          435                 }
          436 
          437                 scale = 1 / total;
          438 
          439                 for (j = 0; j < num_contributors; j++)
          440                 {
          441                         if (i >= contributors[j].n0 && i <= contributors[j].n1)
          442                                 *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i - contributors[j].n0) *= scale;
          443                         else if (i < contributors[j].n0)
          444                                 break;
          445                 }
          446         }
          447 
          448         // Optimize: Skip zero coefficients and contributions outside of image bounds.
          449         // Do this after normalizing because normalization depends on the n0/n1 values.
          450         for (j = 0; j < num_contributors; j++)
          451         {
          452                 int range, max, width;
          453 
          454                 skip = 0;
          455                 while (*stbir__get_coefficient(coefficients, filter, scale_ratio, j, skip) == 0)
          456                         skip++;
          457 
          458                 contributors[j].n0 += skip;
          459 
          460                 while (contributors[j].n0 < 0)
          461                 {
          462                         contributors[j].n0++;
          463                         skip++;
          464                 }
          465 
          466                 range = contributors[j].n1 - contributors[j].n0 + 1;
          467                 max = stbir__min(num_coefficients, range);
          468 
          469                 width = stbir__get_coefficient_width(filter, scale_ratio);
          470                 for (i = 0; i < max; i++)
          471                 {
          472                         if (i + skip >= width)
          473                                 break;
          474 
          475                         *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i) = *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i + skip);
          476                 }
          477                 continue;
          478         }
          479 
          480         // Using min to avoid writing into invalid pixels.
          481         for (i = 0; i < num_contributors; i++)
          482                 contributors[i].n1 = stbir__min(contributors[i].n1, output_size - 1);
          483 }
          484 
          485 // Each scan line uses the same kernel values so we should calculate the kernel
          486 // values once and then we can use them for every scan line.
          487 static void stbir__calculate_filters(stbir__contributors* contributors, float* coefficients, stbir_filter filter, float scale_ratio, float shift, int input_size, int output_size)
          488 {
          489         int n;
          490         int total_contributors = stbir__get_contributors(scale_ratio, filter, input_size, output_size);
          491 
          492         if (stbir__use_upsampling(scale_ratio))
          493         {
          494                 float out_pixels_radius = stbir__filter_info_table[filter].support(1 / scale_ratio) * scale_ratio;
          495 
          496                 // Looping through out pixels
          497                 for (n = 0; n < total_contributors; n++)
          498                 {
          499                         float in_center_of_out; // Center of the current out pixel in the in pixel space
          500                         int in_first_pixel, in_last_pixel;
          501 
          502                         stbir__calculate_sample_range_upsample(n, out_pixels_radius, scale_ratio, shift, &in_first_pixel, &in_last_pixel, &in_center_of_out);
          503 
          504                         stbir__calculate_coefficients_upsample(filter, scale_ratio, in_first_pixel, in_last_pixel, in_center_of_out, stbir__get_contributor(contributors, n), stbir__get_coefficient(coefficients, filter, scale_ratio, n, 0));
          505                 }
          506         }
          507         else
          508         {
          509                 float in_pixels_radius = stbir__filter_info_table[filter].support(scale_ratio) / scale_ratio;
          510 
          511                 // Looping through in pixels
          512                 for (n = 0; n < total_contributors; n++)
          513                 {
          514                         float out_center_of_in; // Center of the current out pixel in the in pixel space
          515                         int out_first_pixel, out_last_pixel;
          516                         int n_adjusted = n - stbir__get_filter_pixel_margin(filter, scale_ratio);
          517 
          518                         stbir__calculate_sample_range_downsample(n_adjusted, in_pixels_radius, scale_ratio, shift, &out_first_pixel, &out_last_pixel, &out_center_of_in);
          519                         stbir__calculate_coefficients_downsample(filter, scale_ratio, out_first_pixel, out_last_pixel, out_center_of_in, stbir__get_contributor(contributors, n), stbir__get_coefficient(coefficients, filter, scale_ratio, n, 0));
          520                 }
          521 
          522                 stbir__normalize_downsample_coefficients(contributors, coefficients, filter, scale_ratio, input_size, output_size);
          523         }
          524 }
          525 
          526 static float* stbir__get_decode_buffer(stbir__info* stbir_info)
          527 {
          528         // The 0 index of the decode buffer starts after the margin. This makes
          529         // it okay to use negative indexes on the decode buffer.
          530         return &stbir_info->decode_buffer[stbir_info->horizontal_filter_pixel_margin * stbir_info->channels];
          531 }
          532 
          533 /* TODO: remove */
          534 //#define STBIR__DECODE(type, colorspace) ((type) * (STBIR_MAX_COLORSPACES) + (colorspace))
          535 
          536 static void stbir__decode_scanline(stbir__info* stbir_info, int n)
          537 {
          538         int c;
          539         int channels = stbir_info->channels;
          540         int alpha_channel = stbir_info->alpha_channel;
          541         int input_w = stbir_info->input_w;
          542         size_t input_stride_bytes = stbir_info->input_stride_bytes;
          543         float* decode_buffer = stbir__get_decode_buffer(stbir_info);
          544         size_t in_buffer_row_offset = stbir__edge_wrap(n, stbir_info->input_h) * input_stride_bytes;
          545         const void* input_data = (char *) stbir_info->input_data + in_buffer_row_offset;
          546         int max_x = input_w + stbir_info->horizontal_filter_pixel_margin;
          547         int x = -stbir_info->horizontal_filter_pixel_margin;
          548 
          549         // special handling for STBIR_EDGE_ZERO because it needs to return an item that doesn't appear in the input,
          550         // and we want to avoid paying overhead on every pixel if not STBIR_EDGE_ZERO
          551         if ((n < 0 || n >= stbir_info->input_h))
          552         {
          553                 for (; x < max_x; x++)
          554                         for (c = 0; c < channels; c++)
          555                                 decode_buffer[x*channels + c] = 0;
          556                 return;
          557         }
          558 
          559         for (; x < max_x; x++)
          560         {
          561                 int decode_pixel_index = x * channels;
          562                 int input_pixel_index = stbir__edge_wrap(x, input_w) * channels;
          563                 for (c = 0; c < channels; c++)
          564                         decode_buffer[decode_pixel_index + c] = ((float)((const unsigned short*)input_data)[input_pixel_index + c]) / stbir__max_uint16_as_float;
          565         }
          566 
          567         for (x = -stbir_info->horizontal_filter_pixel_margin; x < max_x; x++)
          568         {
          569                 int decode_pixel_index = x * channels;
          570 
          571                 // If the alpha value is 0 it will clobber the color values. Make sure it's not.
          572                 float alpha = decode_buffer[decode_pixel_index + alpha_channel];
          573 
          574                 alpha += STBIR_ALPHA_EPSILON;
          575                 decode_buffer[decode_pixel_index + alpha_channel] = alpha;
          576 
          577                 for (c = 0; c < channels; c++)
          578                 {
          579                         if (c == alpha_channel)
          580                                 continue;
          581 
          582                         decode_buffer[decode_pixel_index + c] *= alpha;
          583                 }
          584         }
          585 
          586         for (x = -stbir_info->horizontal_filter_pixel_margin; x < 0; x++)
          587         {
          588                 for (c = 0; c < channels; c++)
          589                         decode_buffer[x*channels + c] = 0;
          590         }
          591         for (x = input_w; x < max_x; x++)
          592         {
          593                 for (c = 0; c < channels; c++)
          594                         decode_buffer[x*channels + c] = 0;
          595         }
          596 }
          597 
          598 static float* stbir__get_ring_buffer_entry(float* ring_buffer, int index, int ring_buffer_length)
          599 {
          600         return &ring_buffer[index * ring_buffer_length];
          601 }
          602 
          603 static float* stbir__add_empty_ring_buffer_entry(stbir__info* stbir_info, int n)
          604 {
          605         int ring_buffer_index;
          606         float* ring_buffer;
          607 
          608         stbir_info->ring_buffer_last_scanline = n;
          609 
          610         if (stbir_info->ring_buffer_begin_index < 0)
          611         {
          612                 ring_buffer_index = stbir_info->ring_buffer_begin_index = 0;
          613                 stbir_info->ring_buffer_first_scanline = n;
          614         }
          615         else
          616         {
          617                 ring_buffer_index = (stbir_info->ring_buffer_begin_index + (stbir_info->ring_buffer_last_scanline - stbir_info->ring_buffer_first_scanline)) % stbir_info->ring_buffer_num_entries;
          618         }
          619 
          620         ring_buffer = stbir__get_ring_buffer_entry(stbir_info->ring_buffer, ring_buffer_index, stbir_info->ring_buffer_length_bytes / sizeof(float));
          621         memset(ring_buffer, 0, stbir_info->ring_buffer_length_bytes);
          622 
          623         return ring_buffer;
          624 }
          625 
          626 
          627 static void stbir__resample_horizontal_upsample(stbir__info* stbir_info, float* output_buffer)
          628 {
          629         int x, k;
          630         int output_w = stbir_info->output_w;
          631         int channels = stbir_info->channels;
          632         float* decode_buffer = stbir__get_decode_buffer(stbir_info);
          633         stbir__contributors* horizontal_contributors = stbir_info->horizontal_contributors;
          634         float* horizontal_coefficients = stbir_info->horizontal_coefficients;
          635         int coefficient_width = stbir_info->horizontal_coefficient_width;
          636 
          637         for (x = 0; x < output_w; x++)
          638         {
          639                 int n0 = horizontal_contributors[x].n0;
          640                 int n1 = horizontal_contributors[x].n1;
          641 
          642                 int out_pixel_index = x * channels;
          643                 int coefficient_group = coefficient_width * x;
          644                 int coefficient_counter = 0;
          645 
          646                 switch (channels) {
          647                 case 1:
          648                         for (k = n0; k <= n1; k++)
          649                         {
          650                                 int in_pixel_index = k * 1;
          651                                 float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
          652                                 output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
          653                         }
          654                         break;
          655                 case 2:
          656                         for (k = n0; k <= n1; k++)
          657                         {
          658                                 int in_pixel_index = k * 2;
          659                                 float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
          660                                 output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
          661                                 output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
          662                         }
          663                         break;
          664                 case 3:
          665                         for (k = n0; k <= n1; k++)
          666                         {
          667                                 int in_pixel_index = k * 3;
          668                                 float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
          669                                 output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
          670                                 output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
          671                                 output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
          672                         }
          673                         break;
          674                 case 4:
          675                         for (k = n0; k <= n1; k++)
          676                         {
          677                                 int in_pixel_index = k * 4;
          678                                 float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
          679                                 output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
          680                                 output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
          681                                 output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
          682                                 output_buffer[out_pixel_index + 3] += decode_buffer[in_pixel_index + 3] * coefficient;
          683                         }
          684                         break;
          685                 default:
          686                         for (k = n0; k <= n1; k++)
          687                         {
          688                                 int in_pixel_index = k * channels;
          689                                 float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
          690                                 int c;
          691                                 for (c = 0; c < channels; c++)
          692                                         output_buffer[out_pixel_index + c] += decode_buffer[in_pixel_index + c] * coefficient;
          693                         }
          694                         break;
          695                 }
          696         }
          697 }
          698 
          699 static void stbir__resample_horizontal_downsample(stbir__info* stbir_info, float* output_buffer)
          700 {
          701         int x, k;
          702         int input_w = stbir_info->input_w;
          703         int channels = stbir_info->channels;
          704         float* decode_buffer = stbir__get_decode_buffer(stbir_info);
          705         stbir__contributors* horizontal_contributors = stbir_info->horizontal_contributors;
          706         float* horizontal_coefficients = stbir_info->horizontal_coefficients;
          707         int coefficient_width = stbir_info->horizontal_coefficient_width;
          708         int filter_pixel_margin = stbir_info->horizontal_filter_pixel_margin;
          709         int max_x = input_w + filter_pixel_margin * 2;
          710 
          711         switch (channels) {
          712                 case 1:
          713                         for (x = 0; x < max_x; x++)
          714                         {
          715                                 int n0 = horizontal_contributors[x].n0;
          716                                 int n1 = horizontal_contributors[x].n1;
          717 
          718                                 int in_x = x - filter_pixel_margin;
          719                                 int in_pixel_index = in_x * 1;
          720                                 int max_n = n1;
          721                                 int coefficient_group = coefficient_width * x;
          722 
          723                                 for (k = n0; k <= max_n; k++)
          724                                 {
          725                                         int out_pixel_index = k * 1;
          726                                         float coefficient = horizontal_coefficients[coefficient_group + k - n0];
          727                                         output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
          728                                 }
          729                         }
          730                         break;
          731                 case 2:
          732                         for (x = 0; x < max_x; x++)
          733                         {
          734                                 int n0 = horizontal_contributors[x].n0;
          735                                 int n1 = horizontal_contributors[x].n1;
          736 
          737                                 int in_x = x - filter_pixel_margin;
          738                                 int in_pixel_index = in_x * 2;
          739                                 int max_n = n1;
          740                                 int coefficient_group = coefficient_width * x;
          741 
          742                                 for (k = n0; k <= max_n; k++)
          743                                 {
          744                                         int out_pixel_index = k * 2;
          745                                         float coefficient = horizontal_coefficients[coefficient_group + k - n0];
          746                                         output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
          747                                         output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
          748                                 }
          749                         }
          750                         break;
          751                 case 3:
          752                         for (x = 0; x < max_x; x++)
          753                         {
          754                                 int n0 = horizontal_contributors[x].n0;
          755                                 int n1 = horizontal_contributors[x].n1;
          756 
          757                                 int in_x = x - filter_pixel_margin;
          758                                 int in_pixel_index = in_x * 3;
          759                                 int max_n = n1;
          760                                 int coefficient_group = coefficient_width * x;
          761 
          762                                 for (k = n0; k <= max_n; k++)
          763                                 {
          764                                         int out_pixel_index = k * 3;
          765                                         float coefficient = horizontal_coefficients[coefficient_group + k - n0];
          766                                         output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
          767                                         output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
          768                                         output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
          769                                 }
          770                         }
          771                         break;
          772                 case 4:
          773                         for (x = 0; x < max_x; x++)
          774                         {
          775                                 int n0 = horizontal_contributors[x].n0;
          776                                 int n1 = horizontal_contributors[x].n1;
          777 
          778                                 int in_x = x - filter_pixel_margin;
          779                                 int in_pixel_index = in_x * 4;
          780                                 int max_n = n1;
          781                                 int coefficient_group = coefficient_width * x;
          782 
          783                                 for (k = n0; k <= max_n; k++)
          784                                 {
          785                                         int out_pixel_index = k * 4;
          786                                         float coefficient = horizontal_coefficients[coefficient_group + k - n0];
          787                                         output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
          788                                         output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
          789                                         output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
          790                                         output_buffer[out_pixel_index + 3] += decode_buffer[in_pixel_index + 3] * coefficient;
          791                                 }
          792                         }
          793                         break;
          794                 default:
          795                         for (x = 0; x < max_x; x++)
          796                         {
          797                                 int n0 = horizontal_contributors[x].n0;
          798                                 int n1 = horizontal_contributors[x].n1;
          799 
          800                                 int in_x = x - filter_pixel_margin;
          801                                 int in_pixel_index = in_x * channels;
          802                                 int max_n = n1;
          803                                 int coefficient_group = coefficient_width * x;
          804 
          805                                 for (k = n0; k <= max_n; k++)
          806                                 {
          807                                         int c;
          808                                         int out_pixel_index = k * channels;
          809                                         float coefficient = horizontal_coefficients[coefficient_group + k - n0];
          810                                         for (c = 0; c < channels; c++)
          811                                                 output_buffer[out_pixel_index + c] += decode_buffer[in_pixel_index + c] * coefficient;
          812                                 }
          813                         }
          814                         break;
          815         }
          816 }
          817 
          818 static void stbir__decode_and_resample_upsample(stbir__info* stbir_info, int n)
          819 {
          820         // Decode the nth scanline from the source image into the decode buffer.
          821         stbir__decode_scanline(stbir_info, n);
          822 
          823         // Now resample it into the ring buffer.
          824         if (stbir__use_width_upsampling(stbir_info))
          825                 stbir__resample_horizontal_upsample(stbir_info, stbir__add_empty_ring_buffer_entry(stbir_info, n));
          826         else
          827                 stbir__resample_horizontal_downsample(stbir_info, stbir__add_empty_ring_buffer_entry(stbir_info, n));
          828 
          829         // Now it's sitting in the ring buffer ready to be used as source for the vertical sampling.
          830 }
          831 
          832 static void stbir__decode_and_resample_downsample(stbir__info* stbir_info, int n)
          833 {
          834         // Decode the nth scanline from the source image into the decode buffer.
          835         stbir__decode_scanline(stbir_info, n);
          836 
          837         memset(stbir_info->horizontal_buffer, 0, stbir_info->output_w * stbir_info->channels * sizeof(float));
          838 
          839         // Now resample it into the horizontal buffer.
          840         if (stbir__use_width_upsampling(stbir_info))
          841                 stbir__resample_horizontal_upsample(stbir_info, stbir_info->horizontal_buffer);
          842         else
          843                 stbir__resample_horizontal_downsample(stbir_info, stbir_info->horizontal_buffer);
          844 
          845         // Now it's sitting in the horizontal buffer ready to be distributed into the ring buffers.
          846 }
          847 
          848 // Get the specified scan line from the ring buffer.
          849 static float* stbir__get_ring_buffer_scanline(int get_scanline, float* ring_buffer, int begin_index, int first_scanline, int ring_buffer_num_entries, int ring_buffer_length)
          850 {
          851         int ring_buffer_index = (begin_index + (get_scanline - first_scanline)) % ring_buffer_num_entries;
          852         return stbir__get_ring_buffer_entry(ring_buffer, ring_buffer_index, ring_buffer_length);
          853 }
          854 
          855 static void stbir__encode_scanline(int num_pixels, void *output_buffer, float *encode_buffer, int channels, int alpha_channel)
          856 {
          857         int x;
          858         int n;
          859 
          860         for (x=0; x < num_pixels; ++x)
          861         {
          862                 int pixel_index = x*channels;
          863 
          864                 float alpha = encode_buffer[pixel_index + alpha_channel];
          865                 float reciprocal_alpha = alpha ? 1.0f / alpha : 0;
          866 
          867                 // unrolling this produced a 1% slowdown upscaling a large RGBA linear-space image on my machine - stb
          868                 for (n = 0; n < channels; n++)
          869                         if (n != alpha_channel)
          870                                 encode_buffer[pixel_index + n] *= reciprocal_alpha;
          871 
          872                 // We added in a small epsilon to prevent the color channel from being deleted with zero alpha.
          873                 // Because we only add it for integer types, it will automatically be discarded on integer
          874                 // conversion, so we don't need to subtract it back out (which would be problematic for
          875                 // numeric precision reasons).
          876         }
          877 
          878 #define STBIR__ROUND_INT(f)    ((int)          ((f)+0.5))
          879 #define STBIR__ENCODE_LINEAR16(f)  (unsigned short) STBIR__ROUND_INT(stbir__saturate(f) * stbir__max_uint16_as_float)
          880 
          881         for (x=0; x < num_pixels; ++x)
          882         {
          883                 int pixel_index = x*channels;
          884 
          885                 for (n = 0; n < channels; n++)
          886                 {
          887                         int index = pixel_index + n;
          888                         ((unsigned short*)output_buffer)[index] = STBIR__ENCODE_LINEAR16(encode_buffer[index]);
          889                 }
          890         }
          891 }
          892 
          893 static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n)
          894 {
          895         int x, k;
          896         int output_w = stbir_info->output_w;
          897         stbir__contributors* vertical_contributors = stbir_info->vertical_contributors;
          898         float* vertical_coefficients = stbir_info->vertical_coefficients;
          899         int channels = stbir_info->channels;
          900         int alpha_channel = stbir_info->alpha_channel;
          901 //        int type = stbir_info->type;
          902         //int colorspace = stbir_info->colorspace;
          903         int ring_buffer_entries = stbir_info->ring_buffer_num_entries;
          904         void* output_data = stbir_info->output_data;
          905         float* encode_buffer = stbir_info->encode_buffer;
          906         //int decode = STBIR__DECODE(type, colorspace);
          907         int coefficient_width = stbir_info->vertical_coefficient_width;
          908         int coefficient_counter;
          909         int contributor = n;
          910 
          911         float* ring_buffer = stbir_info->ring_buffer;
          912         int ring_buffer_begin_index = stbir_info->ring_buffer_begin_index;
          913         int ring_buffer_first_scanline = stbir_info->ring_buffer_first_scanline;
          914         int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float);
          915 
          916         int n0,n1, output_row_start;
          917         int coefficient_group = coefficient_width * contributor;
          918 
          919         n0 = vertical_contributors[contributor].n0;
          920         n1 = vertical_contributors[contributor].n1;
          921 
          922         output_row_start = n * stbir_info->output_stride_bytes;
          923 
          924         memset(encode_buffer, 0, output_w * sizeof(float) * channels);
          925 
          926         // I tried reblocking this for better cache usage of encode_buffer
          927         // (using x_outer, k, x_inner), but it lost speed. -- stb
          928 
          929         coefficient_counter = 0;
          930         switch (channels) {
          931         case 1:
          932                 for (k = n0; k <= n1; k++)
          933                 {
          934                         int coefficient_index = coefficient_counter++;
          935                         float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
          936                         float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
          937                         for (x = 0; x < output_w; ++x)
          938                         {
          939                                 int in_pixel_index = x * 1;
          940                                 encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
          941                         }
          942                 }
          943                 break;
          944         case 2:
          945                 for (k = n0; k <= n1; k++)
          946                 {
          947                         int coefficient_index = coefficient_counter++;
          948                         float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
          949                         float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
          950                         for (x = 0; x < output_w; ++x)
          951                         {
          952                                 int in_pixel_index = x * 2;
          953                                 encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
          954                                 encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient;
          955                         }
          956                 }
          957                 break;
          958         case 3:
          959                 for (k = n0; k <= n1; k++)
          960                 {
          961                         int coefficient_index = coefficient_counter++;
          962                         float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
          963                         float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
          964                         for (x = 0; x < output_w; ++x)
          965                         {
          966                                 int in_pixel_index = x * 3;
          967                                 encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
          968                                 encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient;
          969                                 encode_buffer[in_pixel_index + 2] += ring_buffer_entry[in_pixel_index + 2] * coefficient;
          970                         }
          971                 }
          972                 break;
          973         case 4:
          974                 for (k = n0; k <= n1; k++)
          975                 {
          976                         int coefficient_index = coefficient_counter++;
          977                         float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
          978                         float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
          979                         for (x = 0; x < output_w; ++x)
          980                         {
          981                                 int in_pixel_index = x * 4;
          982                                 encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
          983                                 encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient;
          984                                 encode_buffer[in_pixel_index + 2] += ring_buffer_entry[in_pixel_index + 2] * coefficient;
          985                                 encode_buffer[in_pixel_index + 3] += ring_buffer_entry[in_pixel_index + 3] * coefficient;
          986                         }
          987                 }
          988                 break;
          989         default:
          990                 for (k = n0; k <= n1; k++)
          991                 {
          992                         int coefficient_index = coefficient_counter++;
          993                         float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
          994                         float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
          995                         for (x = 0; x < output_w; ++x)
          996                         {
          997                                 int in_pixel_index = x * channels;
          998                                 int c;
          999                                 for (c = 0; c < channels; c++)
         1000                                         encode_buffer[in_pixel_index + c] += ring_buffer_entry[in_pixel_index + c] * coefficient;
         1001                         }
         1002                 }
         1003                 break;
         1004         }
         1005         stbir__encode_scanline(output_w, (char *) output_data + output_row_start, encode_buffer, channels, alpha_channel);
         1006 }
         1007 
         1008 static void stbir__resample_vertical_downsample(stbir__info* stbir_info, int n)
         1009 {
         1010         int x, k;
         1011         int output_w = stbir_info->output_w;
         1012         stbir__contributors* vertical_contributors = stbir_info->vertical_contributors;
         1013         float* vertical_coefficients = stbir_info->vertical_coefficients;
         1014         int channels = stbir_info->channels;
         1015         int ring_buffer_entries = stbir_info->ring_buffer_num_entries;
         1016         float* horizontal_buffer = stbir_info->horizontal_buffer;
         1017         int coefficient_width = stbir_info->vertical_coefficient_width;
         1018         int contributor = n + stbir_info->vertical_filter_pixel_margin;
         1019 
         1020         float* ring_buffer = stbir_info->ring_buffer;
         1021         int ring_buffer_begin_index = stbir_info->ring_buffer_begin_index;
         1022         int ring_buffer_first_scanline = stbir_info->ring_buffer_first_scanline;
         1023         int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float);
         1024         int n0,n1;
         1025 
         1026         n0 = vertical_contributors[contributor].n0;
         1027         n1 = vertical_contributors[contributor].n1;
         1028 
         1029         for (k = n0; k <= n1; k++)
         1030         {
         1031                 int coefficient_index = k - n0;
         1032                 int coefficient_group = coefficient_width * contributor;
         1033                 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
         1034 
         1035                 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
         1036 
         1037                 switch (channels) {
         1038                 case 1:
         1039                         for (x = 0; x < output_w; x++)
         1040                         {
         1041                                 int in_pixel_index = x * 1;
         1042                                 ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
         1043                         }
         1044                         break;
         1045                 case 2:
         1046                         for (x = 0; x < output_w; x++)
         1047                         {
         1048                                 int in_pixel_index = x * 2;
         1049                                 ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
         1050                                 ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient;
         1051                         }
         1052                         break;
         1053                 case 3:
         1054                         for (x = 0; x < output_w; x++)
         1055                         {
         1056                                 int in_pixel_index = x * 3;
         1057                                 ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
         1058                                 ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient;
         1059                                 ring_buffer_entry[in_pixel_index + 2] += horizontal_buffer[in_pixel_index + 2] * coefficient;
         1060                         }
         1061                         break;
         1062                 case 4:
         1063                         for (x = 0; x < output_w; x++)
         1064                         {
         1065                                 int in_pixel_index = x * 4;
         1066                                 ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
         1067                                 ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient;
         1068                                 ring_buffer_entry[in_pixel_index + 2] += horizontal_buffer[in_pixel_index + 2] * coefficient;
         1069                                 ring_buffer_entry[in_pixel_index + 3] += horizontal_buffer[in_pixel_index + 3] * coefficient;
         1070                         }
         1071                         break;
         1072                 default:
         1073                         for (x = 0; x < output_w; x++)
         1074                         {
         1075                                 int in_pixel_index = x * channels;
         1076                                 int c;
         1077                                 for (c = 0; c < channels; c++)
         1078                                         ring_buffer_entry[in_pixel_index + c] += horizontal_buffer[in_pixel_index + c] * coefficient;
         1079                         }
         1080                         break;
         1081                 }
         1082         }
         1083 }
         1084 
         1085 static void stbir__buffer_loop_upsample(stbir__info* stbir_info)
         1086 {
         1087         int y;
         1088         float scale_ratio = stbir_info->vertical_scale;
         1089         float out_scanlines_radius = stbir__filter_info_table[stbir_info->vertical_filter].support(1/scale_ratio) * scale_ratio;
         1090 
         1091         for (y = 0; y < stbir_info->output_h; y++)
         1092         {
         1093                 float in_center_of_out = 0; // Center of the current out scanline in the in scanline space
         1094                 int in_first_scanline = 0, in_last_scanline = 0;
         1095 
         1096                 stbir__calculate_sample_range_upsample(y, out_scanlines_radius, scale_ratio, stbir_info->vertical_shift, &in_first_scanline, &in_last_scanline, &in_center_of_out);
         1097 
         1098                 if (stbir_info->ring_buffer_begin_index >= 0)
         1099                 {
         1100                         // Get rid of whatever we don't need anymore.
         1101                         while (in_first_scanline > stbir_info->ring_buffer_first_scanline)
         1102                         {
         1103                                 if (stbir_info->ring_buffer_first_scanline == stbir_info->ring_buffer_last_scanline)
         1104                                 {
         1105                                         // We just popped the last scanline off the ring buffer.
         1106                                         // Reset it to the empty state.
         1107                                         stbir_info->ring_buffer_begin_index = -1;
         1108                                         stbir_info->ring_buffer_first_scanline = 0;
         1109                                         stbir_info->ring_buffer_last_scanline = 0;
         1110                                         break;
         1111                                 }
         1112                                 else
         1113                                 {
         1114                                         stbir_info->ring_buffer_first_scanline++;
         1115                                         stbir_info->ring_buffer_begin_index = (stbir_info->ring_buffer_begin_index + 1) % stbir_info->ring_buffer_num_entries;
         1116                                 }
         1117                         }
         1118                 }
         1119 
         1120                 // Load in new ones.
         1121                 if (stbir_info->ring_buffer_begin_index < 0)
         1122                         stbir__decode_and_resample_upsample(stbir_info, in_first_scanline);
         1123 
         1124                 while (in_last_scanline > stbir_info->ring_buffer_last_scanline)
         1125                         stbir__decode_and_resample_upsample(stbir_info, stbir_info->ring_buffer_last_scanline + 1);
         1126 
         1127                 // Now all buffers should be ready to write a row of vertical sampling.
         1128                 stbir__resample_vertical_upsample(stbir_info, y);
         1129 
         1130                 STBIR_PROGRESS_REPORT((float)y / stbir_info->output_h);
         1131         }
         1132 }
         1133 
         1134 static void stbir__empty_ring_buffer(stbir__info* stbir_info, int first_necessary_scanline)
         1135 {
         1136         int output_stride_bytes = stbir_info->output_stride_bytes;
         1137         int channels = stbir_info->channels;
         1138         int alpha_channel = stbir_info->alpha_channel;
         1139         int output_w = stbir_info->output_w;
         1140         void* output_data = stbir_info->output_data;
         1141 
         1142         float* ring_buffer = stbir_info->ring_buffer;
         1143         int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float);
         1144 
         1145         if (stbir_info->ring_buffer_begin_index >= 0)
         1146         {
         1147                 // Get rid of whatever we don't need anymore.
         1148                 while (first_necessary_scanline > stbir_info->ring_buffer_first_scanline)
         1149                 {
         1150                         if (stbir_info->ring_buffer_first_scanline >= 0 && stbir_info->ring_buffer_first_scanline < stbir_info->output_h)
         1151                         {
         1152                                 int output_row_start = stbir_info->ring_buffer_first_scanline * output_stride_bytes;
         1153                                 float* ring_buffer_entry = stbir__get_ring_buffer_entry(ring_buffer, stbir_info->ring_buffer_begin_index, ring_buffer_length);
         1154                                 stbir__encode_scanline(output_w, (char *) output_data + output_row_start, ring_buffer_entry, channels, alpha_channel);
         1155                                 STBIR_PROGRESS_REPORT((float)stbir_info->ring_buffer_first_scanline / stbir_info->output_h);
         1156                         }
         1157 
         1158                         if (stbir_info->ring_buffer_first_scanline == stbir_info->ring_buffer_last_scanline)
         1159                         {
         1160                                 // We just popped the last scanline off the ring buffer.
         1161                                 // Reset it to the empty state.
         1162                                 stbir_info->ring_buffer_begin_index = -1;
         1163                                 stbir_info->ring_buffer_first_scanline = 0;
         1164                                 stbir_info->ring_buffer_last_scanline = 0;
         1165                                 break;
         1166                         }
         1167                         else
         1168                         {
         1169                                 stbir_info->ring_buffer_first_scanline++;
         1170                                 stbir_info->ring_buffer_begin_index = (stbir_info->ring_buffer_begin_index + 1) % stbir_info->ring_buffer_num_entries;
         1171                         }
         1172                 }
         1173         }
         1174 }
         1175 
         1176 static void stbir__buffer_loop_downsample(stbir__info* stbir_info)
         1177 {
         1178         int y;
         1179         float scale_ratio = stbir_info->vertical_scale;
         1180         int output_h = stbir_info->output_h;
         1181         float in_pixels_radius = stbir__filter_info_table[stbir_info->vertical_filter].support(scale_ratio) / scale_ratio;
         1182         int pixel_margin = stbir_info->vertical_filter_pixel_margin;
         1183         int max_y = stbir_info->input_h + pixel_margin;
         1184 
         1185         for (y = -pixel_margin; y < max_y; y++)
         1186         {
         1187                 float out_center_of_in; // Center of the current out scanline in the in scanline space
         1188                 int out_first_scanline, out_last_scanline;
         1189 
         1190                 stbir__calculate_sample_range_downsample(y, in_pixels_radius, scale_ratio, stbir_info->vertical_shift, &out_first_scanline, &out_last_scanline, &out_center_of_in);
         1191 
         1192                 if (out_last_scanline < 0 || out_first_scanline >= output_h)
         1193                         continue;
         1194 
         1195                 stbir__empty_ring_buffer(stbir_info, out_first_scanline);
         1196                 stbir__decode_and_resample_downsample(stbir_info, y);
         1197 
         1198                 // Load in new ones.
         1199                 if (stbir_info->ring_buffer_begin_index < 0)
         1200                         stbir__add_empty_ring_buffer_entry(stbir_info, out_first_scanline);
         1201 
         1202                 while (out_last_scanline > stbir_info->ring_buffer_last_scanline)
         1203                         stbir__add_empty_ring_buffer_entry(stbir_info, stbir_info->ring_buffer_last_scanline + 1);
         1204 
         1205                 // Now the horizontal buffer is ready to write to all ring buffer rows.
         1206                 stbir__resample_vertical_downsample(stbir_info, y);
         1207         }
         1208 
         1209         stbir__empty_ring_buffer(stbir_info, stbir_info->output_h);
         1210 }
         1211 
         1212 static void stbir__setup(stbir__info *info, int input_w, int input_h, int output_w, int output_h, int channels)
         1213 {
         1214         info->input_w = input_w;
         1215         info->input_h = input_h;
         1216         info->output_w = output_w;
         1217         info->output_h = output_h;
         1218         info->channels = channels;
         1219 }
         1220 
         1221 static void stbir__calculate_transform(stbir__info *info, float s0, float t0, float s1, float t1, float *transform)
         1222 {
         1223         info->s0 = s0;
         1224         info->t0 = t0;
         1225         info->s1 = s1;
         1226         info->t1 = t1;
         1227 
         1228         if (transform)
         1229         {
         1230                 info->horizontal_scale = transform[0];
         1231                 info->vertical_scale   = transform[1];
         1232                 info->horizontal_shift = transform[2];
         1233                 info->vertical_shift   = transform[3];
         1234         }
         1235         else
         1236         {
         1237                 info->horizontal_scale = ((float)info->output_w / info->input_w) / (s1 - s0);
         1238                 info->vertical_scale = ((float)info->output_h / info->input_h) / (t1 - t0);
         1239 
         1240                 info->horizontal_shift = s0 * info->output_w / (s1 - s0);
         1241                 info->vertical_shift = t0 * info->output_h / (t1 - t0);
         1242         }
         1243 }
         1244 
         1245 static void stbir__choose_filter(stbir__info *info, stbir_filter h_filter, stbir_filter v_filter)
         1246 {
         1247         if (h_filter == 0)
         1248                 h_filter = stbir__use_upsampling(info->horizontal_scale) ? STBIR_DEFAULT_FILTER_UPSAMPLE : STBIR_DEFAULT_FILTER_DOWNSAMPLE;
         1249         if (v_filter == 0)
         1250                 v_filter = stbir__use_upsampling(info->vertical_scale)   ? STBIR_DEFAULT_FILTER_UPSAMPLE : STBIR_DEFAULT_FILTER_DOWNSAMPLE;
         1251         info->horizontal_filter = h_filter;
         1252         info->vertical_filter = v_filter;
         1253 }
         1254 
         1255 static uint32_t stbir__calculate_memory(stbir__info *info)
         1256 {
         1257         int pixel_margin = stbir__get_filter_pixel_margin(info->horizontal_filter, info->horizontal_scale);
         1258         int filter_height = stbir__get_filter_pixel_width(info->vertical_filter, info->vertical_scale);
         1259 
         1260         info->horizontal_num_contributors = stbir__get_contributors(info->horizontal_scale, info->horizontal_filter, info->input_w, info->output_w);
         1261         info->vertical_num_contributors   = stbir__get_contributors(info->vertical_scale  , info->vertical_filter  , info->input_h, info->output_h);
         1262 
         1263         // One extra entry because floating point precision problems sometimes cause an extra to be necessary.
         1264         info->ring_buffer_num_entries = filter_height + 1;
         1265 
         1266         info->horizontal_contributors_size = info->horizontal_num_contributors * sizeof(stbir__contributors);
         1267         info->horizontal_coefficients_size = stbir__get_total_horizontal_coefficients(info) * sizeof(float);
         1268         info->vertical_contributors_size = info->vertical_num_contributors * sizeof(stbir__contributors);
         1269         info->vertical_coefficients_size = stbir__get_total_vertical_coefficients(info) * sizeof(float);
         1270         info->decode_buffer_size = (info->input_w + pixel_margin * 2) * info->channels * sizeof(float);
         1271         info->horizontal_buffer_size = info->output_w * info->channels * sizeof(float);
         1272         info->ring_buffer_size = info->output_w * info->channels * info->ring_buffer_num_entries * sizeof(float);
         1273         info->encode_buffer_size = info->output_w * info->channels * sizeof(float);
         1274 
         1275         if (stbir__use_height_upsampling(info))
         1276                 // The horizontal buffer is for when we're downsampling the height and we
         1277                 // can't output the result of sampling the decode buffer directly into the
         1278                 // ring buffers.
         1279                 info->horizontal_buffer_size = 0;
         1280         else
         1281                 // The encode buffer is to retain precision in the height upsampling method
         1282                 // and isn't used when height downsampling.
         1283                 info->encode_buffer_size = 0;
         1284 
         1285         return info->horizontal_contributors_size + info->horizontal_coefficients_size
         1286                 + info->vertical_contributors_size + info->vertical_coefficients_size
         1287                 + info->decode_buffer_size + info->horizontal_buffer_size
         1288                 + info->ring_buffer_size + info->encode_buffer_size;
         1289 }
         1290 
         1291 /* NOTE: do not call directly (anymore) */
         1292 static int stbir__resize_allocated(stbir__info *info,
         1293         const void* input_data, int input_stride_in_bytes,
         1294         void* output_data, int output_stride_in_bytes,
         1295         int alpha_channel,
         1296         void* tempmem, size_t tempmem_size_in_bytes)
         1297 {
         1298         size_t memory_required = stbir__calculate_memory(info);
         1299 
         1300         int width_stride_input = input_stride_in_bytes ? input_stride_in_bytes : info->channels * info->input_w * 2 /*stbir__type_size[type]*/;
         1301         int width_stride_output = output_stride_in_bytes ? output_stride_in_bytes : info->channels * info->output_w * 2 /*stbir__type_size[type]*/;
         1302 
         1303         if (info->channels < 0 || info->channels > 64)
         1304                 return 0;
         1305         if (info->horizontal_filter >= STBIR__ARRAY_SIZE(stbir__filter_info_table))
         1306                 return 0;
         1307         if (info->vertical_filter >= STBIR__ARRAY_SIZE(stbir__filter_info_table))
         1308                 return 0;
         1309         if (alpha_channel >= info->channels)
         1310                 return 0;
         1311         if (tempmem_size_in_bytes < memory_required)
         1312                 return 0;
         1313 
         1314         info->input_data = input_data;
         1315         info->input_stride_bytes = width_stride_input;
         1316         info->output_data = output_data;
         1317         info->output_stride_bytes = width_stride_output;
         1318         info->alpha_channel = alpha_channel;
         1319 
         1320         info->horizontal_coefficient_width   = stbir__get_coefficient_width  (info->horizontal_filter, info->horizontal_scale);
         1321         info->vertical_coefficient_width     = stbir__get_coefficient_width  (info->vertical_filter  , info->vertical_scale  );
         1322         info->horizontal_filter_pixel_width  = stbir__get_filter_pixel_width (info->horizontal_filter, info->horizontal_scale);
         1323         info->vertical_filter_pixel_width    = stbir__get_filter_pixel_width (info->vertical_filter  , info->vertical_scale  );
         1324         info->horizontal_filter_pixel_margin = stbir__get_filter_pixel_margin(info->horizontal_filter, info->horizontal_scale);
         1325         info->vertical_filter_pixel_margin   = stbir__get_filter_pixel_margin(info->vertical_filter  , info->vertical_scale  );
         1326 
         1327         info->ring_buffer_length_bytes = info->output_w * info->channels * sizeof(float);
         1328         info->decode_buffer_pixels = info->input_w + info->horizontal_filter_pixel_margin * 2;
         1329 
         1330 #define STBIR__NEXT_MEMPTR(current, newtype) (newtype*)(((unsigned char*)current) + current##_size)
         1331 
         1332         info->horizontal_contributors = (stbir__contributors *) tempmem;
         1333         info->horizontal_coefficients = STBIR__NEXT_MEMPTR(info->horizontal_contributors, float);
         1334         info->vertical_contributors = STBIR__NEXT_MEMPTR(info->horizontal_coefficients, stbir__contributors);
         1335         info->vertical_coefficients = STBIR__NEXT_MEMPTR(info->vertical_contributors, float);
         1336         info->decode_buffer = STBIR__NEXT_MEMPTR(info->vertical_coefficients, float);
         1337 
         1338         if (stbir__use_height_upsampling(info))
         1339         {
         1340                 info->horizontal_buffer = NULL;
         1341                 info->ring_buffer = STBIR__NEXT_MEMPTR(info->decode_buffer, float);
         1342                 info->encode_buffer = STBIR__NEXT_MEMPTR(info->ring_buffer, float);
         1343         }
         1344         else
         1345         {
         1346                 info->horizontal_buffer = STBIR__NEXT_MEMPTR(info->decode_buffer, float);
         1347                 info->ring_buffer = STBIR__NEXT_MEMPTR(info->horizontal_buffer, float);
         1348                 info->encode_buffer = NULL;
         1349         }
         1350 
         1351 #undef STBIR__NEXT_MEMPTR
         1352 
         1353         // This signals that the ring buffer is empty
         1354         info->ring_buffer_begin_index = -1;
         1355 
         1356         stbir__calculate_filters(info->horizontal_contributors, info->horizontal_coefficients, info->horizontal_filter, info->horizontal_scale, info->horizontal_shift, info->input_w, info->output_w);
         1357         stbir__calculate_filters(info->vertical_contributors, info->vertical_coefficients, info->vertical_filter, info->vertical_scale, info->vertical_shift, info->input_h, info->output_h);
         1358 
         1359         STBIR_PROGRESS_REPORT(0);
         1360 
         1361         if (stbir__use_height_upsampling(info))
         1362                 stbir__buffer_loop_upsample(info);
         1363         else
         1364                 stbir__buffer_loop_downsample(info);
         1365 
         1366         STBIR_PROGRESS_REPORT(1);
         1367 
         1368         return 1;
         1369 }
         1370 
         1371 static int stbir__resize_arbitrary(
         1372         const void* input_data, int input_w, int input_h, int input_stride_in_bytes,
         1373         void* output_data, int output_w, int output_h, int output_stride_in_bytes,
         1374         float s0, float t0, float s1, float t1, float *transform,
         1375         int channels, int alpha_channel,
         1376         stbir_filter h_filter, stbir_filter v_filter)
         1377 {
         1378         stbir__info info;
         1379         int result;
         1380         size_t memory_required;
         1381         void* extra_memory;
         1382 
         1383         stbir__setup(&info, input_w, input_h, output_w, output_h, channels);
         1384         stbir__calculate_transform(&info, s0,t0,s1,t1,transform);
         1385         stbir__choose_filter(&info, h_filter, v_filter);
         1386         memory_required = stbir__calculate_memory(&info);
         1387         extra_memory = calloc(memory_required, 1);
         1388         if (!extra_memory)
         1389                 return 0;
         1390 
         1391         result = stbir__resize_allocated(&info, input_data, input_stride_in_bytes,
         1392                                             output_data, output_stride_in_bytes, 
         1393                                             alpha_channel, extra_memory, memory_required);
         1394 
         1395         free(extra_memory);
         1396 
         1397         return result;
         1398 }
         1399 
         1400 static int stbir_resize_uint16_generic(const uint16_t *input_pixels  , int input_w , int input_h , int input_stride_in_bytes,
         1401                                                uint16_t *output_pixels , int output_w, int output_h, int output_stride_in_bytes,
         1402                                          int num_channels, int alpha_channel, stbir_filter filter)
         1403 {
         1404         return stbir__resize_arbitrary(input_pixels, input_w, input_h, input_stride_in_bytes,
         1405                 output_pixels, output_w, output_h, output_stride_in_bytes,
         1406                 0,0,1,1,NULL,num_channels,alpha_channel, filter, filter);
         1407 }