Yet Another Game Engine
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
stb_image.h
Go to the documentation of this file.
1 /* stb_image - v2.16 - public domain image loader -
2 http://nothings.org/stb_image.h no warranty implied; use at your own risk
3 
4  Do this:
5  #define STB_IMAGE_IMPLEMENTATION
6  before you include this file in *one* C or C++ file to create the
7 implementation.
8 
9  // i.e. it should look like this:
10  #include ...
11  #include ...
12  #include ...
13  #define STB_IMAGE_IMPLEMENTATION
14  #include "stb_image.h"
15 
16  You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
17  And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using
18 malloc,realloc,free
19 
20 
21  QUICK NOTES:
22  Primarily of interest to game developers and other people who can
23  avoid problematic images and only need the trivial interface
24 
25  JPEG baseline & progressive (12 bpc/arithmetic not supported, same as
26 stock IJG lib) PNG 1/2/4/8/16-bit-per-channel
27 
28  TGA (not sure what subset, if a subset)
29  BMP non-1bpp, non-RLE
30  PSD (composited view only, no extra channels, 8/16 bit-per-channel)
31 
32  GIF (*comp always reports as 4-channel)
33  HDR (radiance rgbE format)
34  PIC (Softimage PIC)
35  PNM (PPM and PGM binary only)
36 
37  Animated GIF still needs a proper API, but here's one way to do it:
38  http://gist.github.com/urraka/685d9a6340b26b830d49
39 
40  - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
41  - decode from arbitrary I/O callbacks
42  - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
43 
44  Full documentation under "DOCUMENTATION" below.
45 
46 
47 LICENSE
48 
49  See end of file for license information.
50 
51 RECENT REVISION HISTORY:
52 
53  2.16 (2017-07-23) all functions have 16-bit variants; optimizations;
54 bugfixes 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE
55 detection on GCC 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for
56 Imagenet JPGs 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far;
57 fixes 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes 2.11
58 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64 RGB-format JPEG; remove
59 white matting in PSD; allocate large structures on the stack; correct channel
60 count for PNG & BMP 2.10 (2016-01-22) avoid warning introduced in 2.09 2.09
61 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
62 
63  See end of file for full revision history.
64 
65 
66  ============================ Contributors =========================
67 
68  Image formats Extensions, features
69  Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info)
70  Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info)
71  Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG)
72  Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks)
73  Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG)
74  Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip)
75  Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD)
76  github:urraka (animated gif) Junggon Kim (PNM comments)
77  Daniel Gibson (16-bit TGA)
78  socks-the-fox (16-bit PNG)
79  Jeremy Sawicki (handle all ImageNet
80 JPGs) Optimizations & bugfixes Fabian "ryg" Giesen Arseny Kapoulkine John-Mark
81 Allen
82 
83  Bug & warning fixes
84  Marc LeBlanc David Woo Guillaume George Martins
85 Mozeiko Christpher Lloyd Jerry Jansson Joseph Thomson Phil
86 Jordan Dave Moore Roy Eltham Hayaki Saito Nathan Reed
87  Won Chun Luke Graham Johan Duparc Nick Verigakis
88  the Horde3D community Thomas Ruf Ronny Chevalier Baldur
89 Karlsson Janez Zemva John Bartholomew Michal Cichon
90 github:rlyeh Jonathan Blow Ken Hamada Tero Hanninen
91 github:romigrou Laurent Gomila Cort Stratton Sergio Gonzalez
92 github:svdijk Aruelien Pocheville Thibault Reuille Cass Everitt
93 github:snagar Ryamond Barbiero Paul Du Bois Engin Manap
94 github:Zelex Michaelangel007@github Philipp Wiesemann Dale Weiler
95 github:grim210 Oriol Ferrer Mesia Josh Tobin Matthew Gregan
96 github:sammyhw Blazej Dariusz Roszkowski Gregory Mullen
97 github:phprus Christian Floisand Kevin Schmidt
98 github:poppolopoppo
99 */
100 
101 #ifndef STBI_INCLUDE_STB_IMAGE_H
102 #define STBI_INCLUDE_STB_IMAGE_H
103 
104 // DOCUMENTATION
105 //
106 // Limitations:
107 // - no 16-bit-per-channel PNG
108 // - no 12-bit-per-channel JPEG
109 // - no JPEGs with arithmetic coding
110 // - no 1-bit BMP
111 // - GIF always returns *comp=4
112 //
113 // Basic usage (see HDR discussion below for HDR usage):
114 // int x,y,n;
115 // unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
116 // // ... process data if not NULL ...
117 // // ... x = width, y = height, n = # 8-bit components per pixel ...
118 // // ... replace '0' with '1'..'4' to force that many components per pixel
119 // // ... but 'n' will always be the number that it would have been if you
120 // said 0 stbi_image_free(data)
121 //
122 // Standard parameters:
123 // int *x -- outputs image width in pixels
124 // int *y -- outputs image height in pixels
125 // int *channels_in_file -- outputs # of image components in image file
126 // int desired_channels -- if non-zero, # of image components requested in
127 // result
128 //
129 // The return value from an image loader is an 'unsigned char *' which points
130 // to the pixel data, or NULL on an allocation failure or if the image is
131 // corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
132 // with each pixel consisting of N interleaved 8-bit components; the first
133 // pixel pointed to is top-left-most in the image. There is no padding between
134 // image scanlines or between pixels, regardless of format. The number of
135 // components N is 'desired_channels' if desired_channels is non-zero, or
136 // *channels_in_file otherwise. If desired_channels is non-zero,
137 // *channels_in_file has the number of components that _would_ have been
138 // output otherwise. E.g. if you set desired_channels to 4, you will always
139 // get RGBA output, but you can check *channels_in_file to see if it's trivially
140 // opaque because e.g. there were only 3 channels in the source image.
141 //
142 // An output image with N components has the following components interleaved
143 // in this order in each pixel:
144 //
145 // N=#comp components
146 // 1 grey
147 // 2 grey, alpha
148 // 3 red, green, blue
149 // 4 red, green, blue, alpha
150 //
151 // If image loading fails for any reason, the return value will be NULL,
152 // and *x, *y, *channels_in_file will be unchanged. The function
153 // stbi_failure_reason() can be queried for an extremely brief, end-user
154 // unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
155 // to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get
156 // slightly more user-friendly ones.
157 //
158 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
159 //
160 // ===========================================================================
161 //
162 // Philosophy
163 //
164 // stb libraries are designed with the following priorities:
165 //
166 // 1. easy to use
167 // 2. easy to maintain
168 // 3. good performance
169 //
170 // Sometimes I let "good performance" creep up in priority over "easy to
171 // maintain", and for best performance I may provide less-easy-to-use APIs that
172 // give higher performance, in addition to the easy to use ones. Nevertheless,
173 // it's important to keep in mind that from the standpoint of you, a client of
174 // this library, all you care about is #1 and #3, and stb libraries DO NOT
175 // emphasize #3 above all.
176 //
177 // Some secondary priorities arise directly from the first two, some of which
178 // make more explicit reasons why performance can't be emphasized.
179 //
180 // - Portable ("ease of use")
181 // - Small source code footprint ("easy to maintain")
182 // - No dependencies ("ease of use")
183 //
184 // ===========================================================================
185 //
186 // I/O callbacks
187 //
188 // I/O callbacks allow you to read from arbitrary sources, like packaged
189 // files or some other source. Data read from callbacks are processed
190 // through a small internal buffer (currently 128 bytes) to try to reduce
191 // overhead.
192 //
193 // The three functions you must define are "read" (reads some bytes of data),
194 // "skip" (skips some bytes of data), "eof" (reports if the stream is at the
195 // end).
196 //
197 // ===========================================================================
198 //
199 // SIMD support
200 //
201 // The JPEG decoder will try to automatically use SIMD kernels on x86 when
202 // supported by the compiler. For ARM Neon support, you must explicitly
203 // request it.
204 //
205 // (The old do-it-yourself SIMD API is no longer supported in the current
206 // code.)
207 //
208 // On x86, SSE2 will automatically be used when available based on a run-time
209 // test; if not, the generic C versions are used as a fall-back. On ARM targets,
210 // the typical path is to have separate builds for NEON and non-NEON devices
211 // (at least this is true for iOS and Android). Therefore, the NEON support is
212 // toggled by a build flag: define STBI_NEON to get NEON loops.
213 //
214 // If for some reason you do not want to use any of SIMD code, or if
215 // you have issues compiling it, you can disable it entirely by
216 // defining STBI_NO_SIMD.
217 //
218 // ===========================================================================
219 //
220 // HDR image support (disable by defining STBI_NO_HDR)
221 //
222 // stb_image now supports loading HDR images in general, and currently
223 // the Radiance .HDR file format, although the support is provided
224 // generically. You can still load any file through the existing interface;
225 // if you attempt to load an HDR file, it will be automatically remapped to
226 // LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
227 // both of these constants can be reconfigured through this interface:
228 //
229 // stbi_hdr_to_ldr_gamma(2.2f);
230 // stbi_hdr_to_ldr_scale(1.0f);
231 //
232 // (note, do not use _inverse_ constants; stbi_image will invert them
233 // appropriately).
234 //
235 // Additionally, there is a new, parallel interface for loading files as
236 // (linear) floats to preserve the full dynamic range:
237 //
238 // float *data = stbi_loadf(filename, &x, &y, &n, 0);
239 //
240 // If you load LDR images through this interface, those images will
241 // be promoted to floating point values, run through the inverse of
242 // constants corresponding to the above:
243 //
244 // stbi_ldr_to_hdr_scale(1.0f);
245 // stbi_ldr_to_hdr_gamma(2.2f);
246 //
247 // Finally, given a filename (or an open file or memory block--see header
248 // file for details) containing image data, you can query for the "most
249 // appropriate" interface to use (that is, whether the image is HDR or
250 // not), using:
251 //
252 // stbi_is_hdr(char *filename);
253 //
254 // ===========================================================================
255 //
256 // iPhone PNG support:
257 //
258 // By default we convert iphone-formatted PNGs back to RGB, even though
259 // they are internally encoded differently. You can disable this conversion
260 // by by calling stbi_convert_iphone_png_to_rgb(0), in which case
261 // you will always just get the native iphone "format" through (which
262 // is BGR stored in RGB).
263 //
264 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
265 // pixel to remove any premultiplied alpha *only* if the image file explicitly
266 // says there's premultiplied data (currently only happens in iPhone images,
267 // and only if iPhone convert-to-rgb processing is on).
268 //
269 // ===========================================================================
270 //
271 // ADDITIONAL CONFIGURATION
272 //
273 // - You can suppress implementation of any of the decoders to reduce
274 // your code footprint by #defining one or more of the following
275 // symbols before creating the implementation.
276 //
277 // STBI_NO_JPEG
278 // STBI_NO_PNG
279 // STBI_NO_BMP
280 // STBI_NO_PSD
281 // STBI_NO_TGA
282 // STBI_NO_GIF
283 // STBI_NO_HDR
284 // STBI_NO_PIC
285 // STBI_NO_PNM (.ppm and .pgm)
286 //
287 // - You can request *only* certain decoders and suppress all other ones
288 // (this will be more forward-compatible, as addition of new decoders
289 // doesn't require you to disable them explicitly):
290 //
291 // STBI_ONLY_JPEG
292 // STBI_ONLY_PNG
293 // STBI_ONLY_BMP
294 // STBI_ONLY_PSD
295 // STBI_ONLY_TGA
296 // STBI_ONLY_GIF
297 // STBI_ONLY_HDR
298 // STBI_ONLY_PIC
299 // STBI_ONLY_PNM (.ppm and .pgm)
300 //
301 // - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
302 // want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
303 //
304 
305 #ifndef STBI_NO_STDIO
306 #include <stdio.h>
307 #endif // STBI_NO_STDIO
308 
309 #define STBI_VERSION 1
310 
311 enum {
312  STBI_default = 0, // only used for desired_channels
313 
316  STBI_rgb = 3,
318 };
319 
320 typedef unsigned char stbi_uc;
321 typedef unsigned short stbi_us;
322 
323 #ifdef __cplusplus
324 extern "C" {
325 #endif
326 
327 #ifdef STB_IMAGE_STATIC
328 #define STBIDEF static
329 #else
330 #define STBIDEF extern
331 #endif
332 
334 //
335 // PRIMARY API - works on images of any type
336 //
337 
338 //
339 // load image by filename, open file, or memory buffer
340 //
341 
342 typedef struct {
343  int (*read)(void *user, char *data, int size); // fill 'data' with 'size'
344  // bytes. return number of
345  // bytes actually read
346  void (*skip)(void *user, int n); // skip the next 'n' bytes, or 'unget' the
347  // last -n bytes if negative
348  int (*eof)(void *user); // returns nonzero if we are at end of file/data
350 
352 //
353 // 8-bits-per-channel interface
354 //
355 
356 STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x,
357  int *y, int *channels_in_file,
358  int desired_channels);
360  void *user, int *x, int *y,
361  int *channels_in_file,
362  int desired_channels);
363 
364 #ifndef STBI_NO_STDIO
365 STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y,
366  int *channels_in_file, int desired_channels);
367 STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y,
368  int *channels_in_file,
369  int desired_channels);
370 // for stbi_load_from_file, file pointer is left pointing immediately after
371 // image
372 #endif
373 
375 //
376 // 16-bits-per-channel interface
377 //
378 
379 STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len,
380  int *x, int *y, int *channels_in_file,
381  int desired_channels);
383  void *user, int *x, int *y,
384  int *channels_in_file,
385  int desired_channels);
386 
387 #ifndef STBI_NO_STDIO
388 STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y,
389  int *channels_in_file, int desired_channels);
390 STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y,
391  int *channels_in_file,
392  int desired_channels);
393 #endif
394 
396 //
397 // float-per-channel interface
398 //
399 #ifndef STBI_NO_LINEAR
400 STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x,
401  int *y, int *channels_in_file,
402  int desired_channels);
404  void *user, int *x, int *y,
405  int *channels_in_file,
406  int desired_channels);
407 
408 #ifndef STBI_NO_STDIO
409 STBIDEF float *stbi_loadf(char const *filename, int *x, int *y,
410  int *channels_in_file, int desired_channels);
411 STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y,
412  int *channels_in_file,
413  int desired_channels);
414 #endif
415 #endif
416 
417 #ifndef STBI_NO_HDR
418 STBIDEF void stbi_hdr_to_ldr_gamma(float gamma);
419 STBIDEF void stbi_hdr_to_ldr_scale(float scale);
420 #endif // STBI_NO_HDR
421 
422 #ifndef STBI_NO_LINEAR
423 STBIDEF void stbi_ldr_to_hdr_gamma(float gamma);
424 STBIDEF void stbi_ldr_to_hdr_scale(float scale);
425 #endif // STBI_NO_LINEAR
426 
427 // stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
429  void *user);
430 STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
431 #ifndef STBI_NO_STDIO
432 STBIDEF int stbi_is_hdr(char const *filename);
433 STBIDEF int stbi_is_hdr_from_file(FILE *f);
434 #endif // STBI_NO_STDIO
435 
436 // get a VERY brief reason for failure
437 // NOT THREADSAFE
438 STBIDEF const char *stbi_failure_reason(void);
439 
440 // free the loaded image -- this is just free()
441 STBIDEF void stbi_image_free(void *retval_from_stbi_load);
442 
443 // get image dimensions & components without fully decoding
444 STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x,
445  int *y, int *comp);
446 STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user,
447  int *x, int *y, int *comp);
448 
449 #ifndef STBI_NO_STDIO
450 STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp);
451 STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp);
452 
453 #endif
454 
455 // for image formats that explicitly notate that they have premultiplied alpha,
456 // we just return the colors as stored in the file. set this flag to force
457 // unpremultiplication. results are undefined if the unpremultiply overflow.
458 STBIDEF void
459 stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
460 
461 // indicate whether we should process iphone images back to canonical format,
462 // or just pass them through "as-is"
463 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
464 
465 // flip the image vertically, so the first pixel in the output array is the
466 // bottom left
467 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
468 
469 // ZLIB client - used by PNG, available for other purposes
470 
471 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len,
472  int initial_size, int *outlen);
474  int len,
475  int initial_size,
476  int *outlen,
477  int parse_header);
478 STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
479 STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen,
480  const char *ibuffer, int ilen);
481 
482 STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len,
483  int *outlen);
484 STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen,
485  const char *ibuffer, int ilen);
486 
487 #ifdef __cplusplus
488 }
489 #endif
490 
491 //
492 //
494 #endif // STBI_INCLUDE_STB_IMAGE_H
495 
496 #ifdef STB_IMAGE_IMPLEMENTATION
497 
498 #if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || \
499  defined(STBI_ONLY_BMP) || defined(STBI_ONLY_TGA) || \
500  defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) || \
501  defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || \
502  defined(STBI_ONLY_PNM) || defined(STBI_ONLY_ZLIB)
503 #ifndef STBI_ONLY_JPEG
504 #define STBI_NO_JPEG
505 #endif
506 #ifndef STBI_ONLY_PNG
507 #define STBI_NO_PNG
508 #endif
509 #ifndef STBI_ONLY_BMP
510 #define STBI_NO_BMP
511 #endif
512 #ifndef STBI_ONLY_PSD
513 #define STBI_NO_PSD
514 #endif
515 #ifndef STBI_ONLY_TGA
516 #define STBI_NO_TGA
517 #endif
518 #ifndef STBI_ONLY_GIF
519 #define STBI_NO_GIF
520 #endif
521 #ifndef STBI_ONLY_HDR
522 #define STBI_NO_HDR
523 #endif
524 #ifndef STBI_ONLY_PIC
525 #define STBI_NO_PIC
526 #endif
527 #ifndef STBI_ONLY_PNM
528 #define STBI_NO_PNM
529 #endif
530 #endif
531 
532 #if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && \
533  !defined(STBI_NO_ZLIB)
534 #define STBI_NO_ZLIB
535 #endif
536 
537 #include <limits.h>
538 #include <stdarg.h>
539 #include <stddef.h> // ptrdiff_t on osx
540 #include <stdlib.h>
541 #include <string.h>
542 
543 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
544 #include <math.h> // ldexp
545 #endif
546 
547 #ifndef STBI_NO_STDIO
548 #include <stdio.h>
549 #endif
550 
551 #ifndef STBI_ASSERT
552 #include <assert.h>
553 #define STBI_ASSERT(x) assert(x)
554 #endif
555 
556 #ifndef _MSC_VER
557 #ifdef __cplusplus
558 #define stbi_inline inline
559 #else
560 #define stbi_inline
561 #endif
562 #else
563 #define stbi_inline __forceinline
564 #endif
565 
566 #ifdef _MSC_VER
567 typedef unsigned short stbi__uint16;
568 typedef signed short stbi__int16;
569 typedef unsigned int stbi__uint32;
570 typedef signed int stbi__int32;
571 #else
572 #include <stdint.h>
573 typedef uint16_t stbi__uint16;
574 typedef int16_t stbi__int16;
575 typedef uint32_t stbi__uint32;
576 typedef int32_t stbi__int32;
577 #endif
578 
579 // should produce compiler error if size is wrong
580 typedef unsigned char validate_uint32[sizeof(stbi__uint32) == 4 ? 1 : -1];
581 
582 #ifdef _MSC_VER
583 #define STBI_NOTUSED(v) (void)(v)
584 #else
585 #define STBI_NOTUSED(v) (void)sizeof(v)
586 #endif
587 
588 #ifdef _MSC_VER
589 #define STBI_HAS_LROTL
590 #endif
591 
592 #ifdef STBI_HAS_LROTL
593 #define stbi_lrot(x, y) _lrotl(x, y)
594 #else
595 #define stbi_lrot(x, y) (((x) << (y)) | ((x) >> (32 - (y))))
596 #endif
597 
598 #if defined(STBI_MALLOC) && defined(STBI_FREE) && \
599  (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
600 // ok
601 #elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && \
602  !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
603 // ok
604 #else
605 #error \
606  "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
607 #endif
608 
609 #ifndef STBI_MALLOC
610 #define STBI_MALLOC(sz) malloc(sz)
611 #define STBI_REALLOC(p, newsz) realloc(p, newsz)
612 #define STBI_FREE(p) free(p)
613 #endif
614 
615 #ifndef STBI_REALLOC_SIZED
616 #define STBI_REALLOC_SIZED(p, oldsz, newsz) STBI_REALLOC(p, newsz)
617 #endif
618 
619 // x86/x64 detection
620 #if defined(__x86_64__) || defined(_M_X64)
621 #define STBI__X64_TARGET
622 #elif defined(__i386) || defined(_M_IX86)
623 #define STBI__X86_TARGET
624 #endif
625 
626 #if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && \
627  !defined(STBI_NO_SIMD)
628 // gcc doesn't support sse2 intrinsics unless you compile with -msse2,
629 // which in turn means it gets to use SSE2 everywhere. This is unfortunate,
630 // but previous attempts to provide the SSE2 functions with runtime
631 // detection caused numerous issues. The way architecture extensions are
632 // exposed in GCC/Clang is, sadly, not really suited for one-file libs.
633 // New behavior: if compiled with -msse2, we use SSE2 without any
634 // detection; if not, we don't use it at all.
635 #define STBI_NO_SIMD
636 #endif
637 
638 #if defined(__MINGW32__) && defined(STBI__X86_TARGET) && \
639  !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
640 // Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid
641 // STBI__X64_TARGET
642 //
643 // 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
644 // Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
645 // As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
646 // simultaneously enabling "-mstackrealign".
647 //
648 // See https://github.com/nothings/stb/issues/81 for more information.
649 //
650 // So default to no SSE2 on 32-bit MinGW. If you've read this far and added
651 // -mstackrealign to your build settings, feel free to #define
652 // STBI_MINGW_ENABLE_SSE2.
653 #define STBI_NO_SIMD
654 #endif
655 
656 #if !defined(STBI_NO_SIMD) && \
657  (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
658 #define STBI_SSE2
659 #include <emmintrin.h>
660 
661 #ifdef _MSC_VER
662 
663 #if _MSC_VER >= 1400 // not VC6
664 #include <intrin.h> // __cpuid
665 static int stbi__cpuid3(void)
666 {
667  int info[4];
668  __cpuid(info, 1);
669  return info[3];
670 }
671 #else
672 static int stbi__cpuid3(void)
673 {
674  int res;
675  __asm {
676  mov eax,1
677  cpuid
678  mov res,edx
679  }
680  return res;
681 }
682 #endif
683 
684 #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
685 
686 static int stbi__sse2_available(void)
687 {
688  int info3 = stbi__cpuid3();
689  return ((info3 >> 26) & 1) != 0;
690 }
691 #else // assume GCC-style if not VC++
692 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
693 
694 static int stbi__sse2_available(void)
695 {
696  // If we're even attempting to compile this on GCC/Clang, that means
697  // -msse2 is on, which means the compiler is allowed to use SSE2
698  // instructions at will, and so are we.
699  return 1;
700 }
701 #endif
702 #endif
703 
704 // ARM NEON
705 #if defined(STBI_NO_SIMD) && defined(STBI_NEON)
706 #undef STBI_NEON
707 #endif
708 
709 #ifdef STBI_NEON
710 #include <arm_neon.h>
711 // assume GCC or Clang on ARM targets
712 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
713 #endif
714 
715 #ifndef STBI_SIMD_ALIGN
716 #define STBI_SIMD_ALIGN(type, name) type name
717 #endif
718 
720 //
721 // stbi__context struct and start_xxx functions
722 
723 // stbi__context structure is our basic context used by all images, so it
724 // contains all the IO context, plus some basic image information
725 typedef struct {
726  stbi__uint32 img_x, img_y;
727  int img_n, img_out_n;
728 
730  void *io_user_data;
731 
732  int read_from_callbacks;
733  int buflen;
734  stbi_uc buffer_start[128];
735 
736  stbi_uc *img_buffer, *img_buffer_end;
737  stbi_uc *img_buffer_original, *img_buffer_original_end;
738 } stbi__context;
739 
740 static void stbi__refill_buffer(stbi__context *s);
741 
742 // initialize a memory-decode context
743 static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
744 {
745  s->io.read = NULL;
746  s->read_from_callbacks = 0;
747  s->img_buffer = s->img_buffer_original = (stbi_uc *)buffer;
748  s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *)buffer + len;
749 }
750 
751 // initialize a callback-based context
752 static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c,
753  void *user)
754 {
755  s->io = *c;
756  s->io_user_data = user;
757  s->buflen = sizeof(s->buffer_start);
758  s->read_from_callbacks = 1;
759  s->img_buffer_original = s->buffer_start;
760  stbi__refill_buffer(s);
761  s->img_buffer_original_end = s->img_buffer_end;
762 }
763 
764 #ifndef STBI_NO_STDIO
765 
766 static int stbi__stdio_read(void *user, char *data, int size)
767 {
768  return (int)fread(data, 1, size, (FILE *)user);
769 }
770 
771 static void stbi__stdio_skip(void *user, int n)
772 {
773  fseek((FILE *)user, n, SEEK_CUR);
774 }
775 
776 static int stbi__stdio_eof(void *user)
777 {
778  return feof((FILE *)user);
779 }
780 
781 static stbi_io_callbacks stbi__stdio_callbacks = {
782  stbi__stdio_read,
783  stbi__stdio_skip,
784  stbi__stdio_eof,
785 };
786 
787 static void stbi__start_file(stbi__context *s, FILE *f)
788 {
789  stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *)f);
790 }
791 
792 // static void stop_file(stbi__context *s) { }
793 
794 #endif // !STBI_NO_STDIO
795 
796 static void stbi__rewind(stbi__context *s)
797 {
798  // conceptually rewind SHOULD rewind to the beginning of the stream,
799  // but we just rewind to the beginning of the initial buffer, because
800  // we only use it after doing 'test', which only ever looks at at most 92
801  // bytes
802  s->img_buffer = s->img_buffer_original;
803  s->img_buffer_end = s->img_buffer_original_end;
804 }
805 
806 enum { STBI_ORDER_RGB, STBI_ORDER_BGR };
807 
808 typedef struct {
809  int bits_per_channel;
810  int num_channels;
811  int channel_order;
812 } stbi__result_info;
813 
814 #ifndef STBI_NO_JPEG
815 static int stbi__jpeg_test(stbi__context *s);
816 static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp,
817  int req_comp, stbi__result_info *ri);
818 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
819 #endif
820 
821 #ifndef STBI_NO_PNG
822 static int stbi__png_test(stbi__context *s);
823 static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp,
824  int req_comp, stbi__result_info *ri);
825 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
826 #endif
827 
828 #ifndef STBI_NO_BMP
829 static int stbi__bmp_test(stbi__context *s);
830 static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp,
831  int req_comp, stbi__result_info *ri);
832 static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
833 #endif
834 
835 #ifndef STBI_NO_TGA
836 static int stbi__tga_test(stbi__context *s);
837 static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp,
838  int req_comp, stbi__result_info *ri);
839 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
840 #endif
841 
842 #ifndef STBI_NO_PSD
843 static int stbi__psd_test(stbi__context *s);
844 static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp,
845  int req_comp, stbi__result_info *ri, int bpc);
846 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
847 #endif
848 
849 #ifndef STBI_NO_HDR
850 static int stbi__hdr_test(stbi__context *s);
851 static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp,
852  int req_comp, stbi__result_info *ri);
853 static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
854 #endif
855 
856 #ifndef STBI_NO_PIC
857 static int stbi__pic_test(stbi__context *s);
858 static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp,
859  int req_comp, stbi__result_info *ri);
860 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
861 #endif
862 
863 #ifndef STBI_NO_GIF
864 static int stbi__gif_test(stbi__context *s);
865 static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp,
866  int req_comp, stbi__result_info *ri);
867 static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
868 #endif
869 
870 #ifndef STBI_NO_PNM
871 static int stbi__pnm_test(stbi__context *s);
872 static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp,
873  int req_comp, stbi__result_info *ri);
874 static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
875 #endif
876 
877 // this is not threadsafe
878 static const char *stbi__g_failure_reason;
879 
880 STBIDEF const char *stbi_failure_reason(void)
881 {
882  return stbi__g_failure_reason;
883 }
884 
885 static int stbi__err(const char *str)
886 {
887  stbi__g_failure_reason = str;
888  return 0;
889 }
890 
891 static void *stbi__malloc(size_t size)
892 {
893  return STBI_MALLOC(size);
894 }
895 
896 // stb_image uses ints pervasively, including for offset calculations.
897 // therefore the largest decoded image size we can support with the
898 // current code, even on 64-bit targets, is INT_MAX. this is not a
899 // significant limitation for the intended use case.
900 //
901 // we do, however, need to make sure our size calculations don't
902 // overflow. hence a few helper functions for size calculations that
903 // multiply integers together, making sure that they're non-negative
904 // and no overflow occurs.
905 
906 // return 1 if the sum is valid, 0 on overflow.
907 // negative terms are considered invalid.
908 static int stbi__addsizes_valid(int a, int b)
909 {
910  if (b < 0)
911  return 0;
912  // now 0 <= b <= INT_MAX, hence also
913  // 0 <= INT_MAX - b <= INTMAX.
914  // And "a + b <= INT_MAX" (which might overflow) is the
915  // same as a <= INT_MAX - b (no overflow)
916  return a <= INT_MAX - b;
917 }
918 
919 // returns 1 if the product is valid, 0 on overflow.
920 // negative factors are considered invalid.
921 static int stbi__mul2sizes_valid(int a, int b)
922 {
923  if (a < 0 || b < 0)
924  return 0;
925  if (b == 0)
926  return 1; // mul-by-0 is always safe
927  // portable way to check for no overflows in a*b
928  return a <= INT_MAX / b;
929 }
930 
931 // returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
932 static int stbi__mad2sizes_valid(int a, int b, int add)
933 {
934  return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a * b, add);
935 }
936 
937 // returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
938 static int stbi__mad3sizes_valid(int a, int b, int c, int add)
939 {
940  return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a * b, c) &&
941  stbi__addsizes_valid(a * b * c, add);
942 }
943 
944 // returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't
945 // overflow
946 static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
947 {
948  return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a * b, c) &&
949  stbi__mul2sizes_valid(a * b * c, d) &&
950  stbi__addsizes_valid(a * b * c * d, add);
951 }
952 
953 // mallocs with size overflow checking
954 static void *stbi__malloc_mad2(int a, int b, int add)
955 {
956  if (!stbi__mad2sizes_valid(a, b, add))
957  return NULL;
958  return stbi__malloc(a * b + add);
959 }
960 
961 static void *stbi__malloc_mad3(int a, int b, int c, int add)
962 {
963  if (!stbi__mad3sizes_valid(a, b, c, add))
964  return NULL;
965  return stbi__malloc(a * b * c + add);
966 }
967 
968 static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
969 {
970  if (!stbi__mad4sizes_valid(a, b, c, d, add))
971  return NULL;
972  return stbi__malloc(a * b * c * d + add);
973 }
974 
975 // stbi__err - error
976 // stbi__errpf - error returning pointer to float
977 // stbi__errpuc - error returning pointer to unsigned char
978 
979 #ifdef STBI_NO_FAILURE_STRINGS
980 #define stbi__err(x, y) 0
981 #elif defined(STBI_FAILURE_USERMSG)
982 #define stbi__err(x, y) stbi__err(y)
983 #else
984 #define stbi__err(x, y) stbi__err(x)
985 #endif
986 
987 #define stbi__errpf(x, y) ((float *)(size_t)(stbi__err(x, y) ? NULL : NULL))
988 #define stbi__errpuc(x, y) \
989  ((unsigned char *)(size_t)(stbi__err(x, y) ? NULL : NULL))
990 
991 STBIDEF void stbi_image_free(void *retval_from_stbi_load)
992 {
993  STBI_FREE(retval_from_stbi_load);
994 }
995 
996 #ifndef STBI_NO_LINEAR
997 static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
998 #endif
999 
1000 #ifndef STBI_NO_HDR
1001 static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp);
1002 #endif
1003 
1004 static int stbi__vertically_flip_on_load = 0;
1005 
1006 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
1007 {
1008  stbi__vertically_flip_on_load = flag_true_if_should_flip;
1009 }
1010 
1011 static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp,
1012  int req_comp, stbi__result_info *ri, int bpc)
1013 {
1014  memset(ri, 0,
1015  sizeof(*ri)); // make sure it's initialized if we add new fields
1016  ri->bits_per_channel =
1017  8; // default is 8 so most paths don't have to be changed
1018  ri->channel_order = STBI_ORDER_RGB; // all current input & output are this,
1019  // but this is here so we can add BGR
1020  // order
1021  ri->num_channels = 0;
1022 
1023 #ifndef STBI_NO_JPEG
1024  if (stbi__jpeg_test(s))
1025  return stbi__jpeg_load(s, x, y, comp, req_comp, ri);
1026 #endif
1027 #ifndef STBI_NO_PNG
1028  if (stbi__png_test(s))
1029  return stbi__png_load(s, x, y, comp, req_comp, ri);
1030 #endif
1031 #ifndef STBI_NO_BMP
1032  if (stbi__bmp_test(s))
1033  return stbi__bmp_load(s, x, y, comp, req_comp, ri);
1034 #endif
1035 #ifndef STBI_NO_GIF
1036  if (stbi__gif_test(s))
1037  return stbi__gif_load(s, x, y, comp, req_comp, ri);
1038 #endif
1039 #ifndef STBI_NO_PSD
1040  if (stbi__psd_test(s))
1041  return stbi__psd_load(s, x, y, comp, req_comp, ri, bpc);
1042 #endif
1043 #ifndef STBI_NO_PIC
1044  if (stbi__pic_test(s))
1045  return stbi__pic_load(s, x, y, comp, req_comp, ri);
1046 #endif
1047 #ifndef STBI_NO_PNM
1048  if (stbi__pnm_test(s))
1049  return stbi__pnm_load(s, x, y, comp, req_comp, ri);
1050 #endif
1051 
1052 #ifndef STBI_NO_HDR
1053  if (stbi__hdr_test(s)) {
1054  float *hdr = stbi__hdr_load(s, x, y, comp, req_comp, ri);
1055  return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
1056  }
1057 #endif
1058 
1059 #ifndef STBI_NO_TGA
1060  // test tga last because it's a crappy test!
1061  if (stbi__tga_test(s))
1062  return stbi__tga_load(s, x, y, comp, req_comp, ri);
1063 #endif
1064 
1065  return stbi__errpuc("unknown image type",
1066  "Image not of any known type, or corrupt");
1067 }
1068 
1069 static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h,
1070  int channels)
1071 {
1072  int i;
1073  int img_len = w * h * channels;
1074  stbi_uc *reduced;
1075 
1076  reduced = (stbi_uc *)stbi__malloc(img_len);
1077  if (reduced == NULL)
1078  return stbi__errpuc("outofmem", "Out of memory");
1079 
1080  for (i = 0; i < img_len; ++i)
1081  reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte
1082  // is sufficient approx
1083  // of 16->8 bit scaling
1084 
1085  STBI_FREE(orig);
1086  return reduced;
1087 }
1088 
1089 static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h,
1090  int channels)
1091 {
1092  int i;
1093  int img_len = w * h * channels;
1094  stbi__uint16 *enlarged;
1095 
1096  enlarged = (stbi__uint16 *)stbi__malloc(img_len * 2);
1097  if (enlarged == NULL)
1098  return (stbi__uint16 *)stbi__errpuc("outofmem", "Out of memory");
1099 
1100  for (i = 0; i < img_len; ++i)
1101  enlarged[i] = (stbi__uint16)(
1102  (orig[i] << 8) +
1103  orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
1104 
1105  STBI_FREE(orig);
1106  return enlarged;
1107 }
1108 
1109 static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
1110 {
1111  int row;
1112  size_t bytes_per_row = (size_t)w * bytes_per_pixel;
1113  stbi_uc temp[2048];
1114  stbi_uc *bytes = (stbi_uc *)image;
1115 
1116  for (row = 0; row < (h >> 1); row++) {
1117  stbi_uc *row0 = bytes + row * bytes_per_row;
1118  stbi_uc *row1 = bytes + (h - row - 1) * bytes_per_row;
1119  // swap row0 with row1
1120  size_t bytes_left = bytes_per_row;
1121  while (bytes_left) {
1122  size_t bytes_copy =
1123  (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
1124  memcpy(temp, row0, bytes_copy);
1125  memcpy(row0, row1, bytes_copy);
1126  memcpy(row1, temp, bytes_copy);
1127  row0 += bytes_copy;
1128  row1 += bytes_copy;
1129  bytes_left -= bytes_copy;
1130  }
1131  }
1132 }
1133 
1134 static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x,
1135  int *y, int *comp,
1136  int req_comp)
1137 {
1138  stbi__result_info ri;
1139  void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
1140 
1141  if (result == NULL)
1142  return NULL;
1143 
1144  if (ri.bits_per_channel != 8) {
1145  STBI_ASSERT(ri.bits_per_channel == 16);
1146  result = stbi__convert_16_to_8((stbi__uint16 *)result, *x, *y,
1147  req_comp == 0 ? *comp : req_comp);
1148  ri.bits_per_channel = 8;
1149  }
1150 
1151  // @TODO: move stbi__convert_format to here
1152 
1153  if (stbi__vertically_flip_on_load) {
1154  int channels = req_comp ? req_comp : *comp;
1155  stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
1156  }
1157 
1158  return (unsigned char *)result;
1159 }
1160 
1161 static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x,
1162  int *y, int *comp,
1163  int req_comp)
1164 {
1165  stbi__result_info ri;
1166  void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
1167 
1168  if (result == NULL)
1169  return NULL;
1170 
1171  if (ri.bits_per_channel != 16) {
1172  STBI_ASSERT(ri.bits_per_channel == 8);
1173  result = stbi__convert_8_to_16((stbi_uc *)result, *x, *y,
1174  req_comp == 0 ? *comp : req_comp);
1175  ri.bits_per_channel = 16;
1176  }
1177 
1178  // @TODO: move stbi__convert_format16 to here
1179  // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to
1180  // keep more precision
1181 
1182  if (stbi__vertically_flip_on_load) {
1183  int channels = req_comp ? req_comp : *comp;
1184  stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
1185  }
1186 
1187  return (stbi__uint16 *)result;
1188 }
1189 
1190 #ifndef STBI_NO_HDR
1191 static void stbi__float_postprocess(float *result, int *x, int *y, int *comp,
1192  int req_comp)
1193 {
1194  if (stbi__vertically_flip_on_load && result != NULL) {
1195  int channels = req_comp ? req_comp : *comp;
1196  stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
1197  }
1198 }
1199 #endif
1200 
1201 #ifndef STBI_NO_STDIO
1202 
1203 static FILE *stbi__fopen(char const *filename, char const *mode)
1204 {
1205  FILE *f;
1206 #if defined(_MSC_VER) && _MSC_VER >= 1400
1207  if (0 != fopen_s(&f, filename, mode))
1208  f = 0;
1209 #else
1210  f = fopen(filename, mode);
1211 #endif
1212  return f;
1213 }
1214 
1215 STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp,
1216  int req_comp)
1217 {
1218  FILE *f = stbi__fopen(filename, "rb");
1219  unsigned char *result;
1220  if (!f)
1221  return stbi__errpuc("can't fopen", "Unable to open file");
1222  result = stbi_load_from_file(f, x, y, comp, req_comp);
1223  fclose(f);
1224  return result;
1225 }
1226 
1227 STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp,
1228  int req_comp)
1229 {
1230  unsigned char *result;
1231  stbi__context s;
1232  stbi__start_file(&s, f);
1233  result = stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);
1234  if (result) {
1235  // need to 'unget' all the characters in the IO buffer
1236  fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR);
1237  }
1238  return result;
1239 }
1240 
1241 STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp,
1242  int req_comp)
1243 {
1244  stbi__uint16 *result;
1245  stbi__context s;
1246  stbi__start_file(&s, f);
1247  result = stbi__load_and_postprocess_16bit(&s, x, y, comp, req_comp);
1248  if (result) {
1249  // need to 'unget' all the characters in the IO buffer
1250  fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR);
1251  }
1252  return result;
1253 }
1254 
1255 STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp,
1256  int req_comp)
1257 {
1258  FILE *f = stbi__fopen(filename, "rb");
1259  stbi__uint16 *result;
1260  if (!f)
1261  return (stbi_us *)stbi__errpuc("can't fopen", "Unable to open file");
1262  result = stbi_load_from_file_16(f, x, y, comp, req_comp);
1263  fclose(f);
1264  return result;
1265 }
1266 
1267 #endif
1268 
1269 STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len,
1270  int *x, int *y, int *channels_in_file,
1271  int desired_channels)
1272 {
1273  stbi__context s;
1274  stbi__start_mem(&s, buffer, len);
1275  return stbi__load_and_postprocess_16bit(&s, x, y, channels_in_file,
1276  desired_channels);
1277 }
1278 
1280  void *user, int *x, int *y,
1281  int *channels_in_file,
1282  int desired_channels)
1283 {
1284  stbi__context s;
1285  stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1286  return stbi__load_and_postprocess_16bit(&s, x, y, channels_in_file,
1287  desired_channels);
1288 }
1289 
1290 STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x,
1291  int *y, int *comp, int req_comp)
1292 {
1293  stbi__context s;
1294  stbi__start_mem(&s, buffer, len);
1295  return stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);
1296 }
1297 
1299  void *user, int *x, int *y, int *comp,
1300  int req_comp)
1301 {
1302  stbi__context s;
1303  stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1304  return stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);
1305 }
1306 
1307 #ifndef STBI_NO_LINEAR
1308 static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp,
1309  int req_comp)
1310 {
1311  unsigned char *data;
1312 #ifndef STBI_NO_HDR
1313  if (stbi__hdr_test(s)) {
1314  stbi__result_info ri;
1315  float *hdr_data = stbi__hdr_load(s, x, y, comp, req_comp, &ri);
1316  if (hdr_data)
1317  stbi__float_postprocess(hdr_data, x, y, comp, req_comp);
1318  return hdr_data;
1319  }
1320 #endif
1321  data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
1322  if (data)
1323  return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
1324  return stbi__errpf("unknown image type",
1325  "Image not of any known type, or corrupt");
1326 }
1327 
1328 STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x,
1329  int *y, int *comp, int req_comp)
1330 {
1331  stbi__context s;
1332  stbi__start_mem(&s, buffer, len);
1333  return stbi__loadf_main(&s, x, y, comp, req_comp);
1334 }
1335 
1337  void *user, int *x, int *y, int *comp,
1338  int req_comp)
1339 {
1340  stbi__context s;
1341  stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1342  return stbi__loadf_main(&s, x, y, comp, req_comp);
1343 }
1344 
1345 #ifndef STBI_NO_STDIO
1346 STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp,
1347  int req_comp)
1348 {
1349  float *result;
1350  FILE *f = stbi__fopen(filename, "rb");
1351  if (!f)
1352  return stbi__errpf("can't fopen", "Unable to open file");
1353  result = stbi_loadf_from_file(f, x, y, comp, req_comp);
1354  fclose(f);
1355  return result;
1356 }
1357 
1358 STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp,
1359  int req_comp)
1360 {
1361  stbi__context s;
1362  stbi__start_file(&s, f);
1363  return stbi__loadf_main(&s, x, y, comp, req_comp);
1364 }
1365 #endif // !STBI_NO_STDIO
1366 
1367 #endif // !STBI_NO_LINEAR
1368 
1369 // these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
1370 // defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
1371 // reports false!
1372 
1373 STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
1374 {
1375 #ifndef STBI_NO_HDR
1376  stbi__context s;
1377  stbi__start_mem(&s, buffer, len);
1378  return stbi__hdr_test(&s);
1379 #else
1380  STBI_NOTUSED(buffer);
1381  STBI_NOTUSED(len);
1382  return 0;
1383 #endif
1384 }
1385 
1386 #ifndef STBI_NO_STDIO
1387 STBIDEF int stbi_is_hdr(char const *filename)
1388 {
1389  FILE *f = stbi__fopen(filename, "rb");
1390  int result = 0;
1391  if (f) {
1392  result = stbi_is_hdr_from_file(f);
1393  fclose(f);
1394  }
1395  return result;
1396 }
1397 
1398 STBIDEF int stbi_is_hdr_from_file(FILE *f)
1399 {
1400 #ifndef STBI_NO_HDR
1401  stbi__context s;
1402  stbi__start_file(&s, f);
1403  return stbi__hdr_test(&s);
1404 #else
1405  STBI_NOTUSED(f);
1406  return 0;
1407 #endif
1408 }
1409 #endif // !STBI_NO_STDIO
1410 
1412  void *user)
1413 {
1414 #ifndef STBI_NO_HDR
1415  stbi__context s;
1416  stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1417  return stbi__hdr_test(&s);
1418 #else
1419  STBI_NOTUSED(clbk);
1420  STBI_NOTUSED(user);
1421  return 0;
1422 #endif
1423 }
1424 
1425 #ifndef STBI_NO_LINEAR
1426 static float stbi__l2h_gamma = 2.2f, stbi__l2h_scale = 1.0f;
1427 
1428 STBIDEF void stbi_ldr_to_hdr_gamma(float gamma)
1429 {
1430  stbi__l2h_gamma = gamma;
1431 }
1432 STBIDEF void stbi_ldr_to_hdr_scale(float scale)
1433 {
1434  stbi__l2h_scale = scale;
1435 }
1436 #endif
1437 
1438 static float stbi__h2l_gamma_i = 1.0f / 2.2f, stbi__h2l_scale_i = 1.0f;
1439 
1440 STBIDEF void stbi_hdr_to_ldr_gamma(float gamma)
1441 {
1442  stbi__h2l_gamma_i = 1 / gamma;
1443 }
1444 STBIDEF void stbi_hdr_to_ldr_scale(float scale)
1445 {
1446  stbi__h2l_scale_i = 1 / scale;
1447 }
1448 
1450 //
1451 // Common code used by all image loaders
1452 //
1453 
1454 enum { STBI__SCAN_load = 0, STBI__SCAN_type, STBI__SCAN_header };
1455 
1456 static void stbi__refill_buffer(stbi__context *s)
1457 {
1458  int n = (s->io.read)(s->io_user_data, (char *)s->buffer_start, s->buflen);
1459  if (n == 0) {
1460  // at end of file, treat same as if from memory, but need to handle case
1461  // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
1462  s->read_from_callbacks = 0;
1463  s->img_buffer = s->buffer_start;
1464  s->img_buffer_end = s->buffer_start + 1;
1465  *s->img_buffer = 0;
1466  } else {
1467  s->img_buffer = s->buffer_start;
1468  s->img_buffer_end = s->buffer_start + n;
1469  }
1470 }
1471 
1472 stbi_inline static stbi_uc stbi__get8(stbi__context *s)
1473 {
1474  if (s->img_buffer < s->img_buffer_end)
1475  return *s->img_buffer++;
1476  if (s->read_from_callbacks) {
1477  stbi__refill_buffer(s);
1478  return *s->img_buffer++;
1479  }
1480  return 0;
1481 }
1482 
1483 stbi_inline static int stbi__at_eof(stbi__context *s)
1484 {
1485  if (s->io.read) {
1486  if (!(s->io.eof)(s->io_user_data))
1487  return 0;
1488  // if feof() is true, check if buffer = end
1489  // special case: we've only got the special 0 character at the end
1490  if (s->read_from_callbacks == 0)
1491  return 1;
1492  }
1493 
1494  return s->img_buffer >= s->img_buffer_end;
1495 }
1496 
1497 static void stbi__skip(stbi__context *s, int n)
1498 {
1499  if (n < 0) {
1500  s->img_buffer = s->img_buffer_end;
1501  return;
1502  }
1503  if (s->io.read) {
1504  int blen = (int)(s->img_buffer_end - s->img_buffer);
1505  if (blen < n) {
1506  s->img_buffer = s->img_buffer_end;
1507  (s->io.skip)(s->io_user_data, n - blen);
1508  return;
1509  }
1510  }
1511  s->img_buffer += n;
1512 }
1513 
1514 static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
1515 {
1516  if (s->io.read) {
1517  int blen = (int)(s->img_buffer_end - s->img_buffer);
1518  if (blen < n) {
1519  int res, count;
1520 
1521  memcpy(buffer, s->img_buffer, blen);
1522 
1523  count =
1524  (s->io.read)(s->io_user_data, (char *)buffer + blen, n - blen);
1525  res = (count == (n - blen));
1526  s->img_buffer = s->img_buffer_end;
1527  return res;
1528  }
1529  }
1530 
1531  if (s->img_buffer + n <= s->img_buffer_end) {
1532  memcpy(buffer, s->img_buffer, n);
1533  s->img_buffer += n;
1534  return 1;
1535  } else
1536  return 0;
1537 }
1538 
1539 static int stbi__get16be(stbi__context *s)
1540 {
1541  int z = stbi__get8(s);
1542  return (z << 8) + stbi__get8(s);
1543 }
1544 
1545 static stbi__uint32 stbi__get32be(stbi__context *s)
1546 {
1547  stbi__uint32 z = stbi__get16be(s);
1548  return (z << 16) + stbi__get16be(s);
1549 }
1550 
1551 #if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
1552 // nothing
1553 #else
1554 static int stbi__get16le(stbi__context *s)
1555 {
1556  int z = stbi__get8(s);
1557  return z + (stbi__get8(s) << 8);
1558 }
1559 #endif
1560 
1561 #ifndef STBI_NO_BMP
1562 static stbi__uint32 stbi__get32le(stbi__context *s)
1563 {
1564  stbi__uint32 z = stbi__get16le(s);
1565  return z + (stbi__get16le(s) << 16);
1566 }
1567 #endif
1568 
1569 #define STBI__BYTECAST(x) \
1570  ((stbi_uc)((x)&255)) // truncate int to byte without warnings
1571 
1573 //
1574 // generic converter from built-in img_n to req_comp
1575 // individual types do this automatically as much as possible (e.g. jpeg
1576 // does all cases internally since it needs to colorspace convert anyway,
1577 // and it never has alpha, so very few cases ). png can automatically
1578 // interleave an alpha=255 channel, but falls back to this for other cases
1579 //
1580 // assume data buffer is malloced, so malloc a new one and free that one
1581 // only failure mode is malloc failing
1582 
1583 static stbi_uc stbi__compute_y(int r, int g, int b)
1584 {
1585  return (stbi_uc)(((r * 77) + (g * 150) + (29 * b)) >> 8);
1586 }
1587 
1588 static unsigned char *stbi__convert_format(unsigned char *data, int img_n,
1589  int req_comp, unsigned int x,
1590  unsigned int y)
1591 {
1592  int i, j;
1593  unsigned char *good;
1594 
1595  if (req_comp == img_n)
1596  return data;
1597  STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1598 
1599  good = (unsigned char *)stbi__malloc_mad3(req_comp, x, y, 0);
1600  if (good == NULL) {
1601  STBI_FREE(data);
1602  return stbi__errpuc("outofmem", "Out of memory");
1603  }
1604 
1605  for (j = 0; j < (int)y; ++j) {
1606  unsigned char *src = data + j * x * img_n;
1607  unsigned char *dest = good + j * x * req_comp;
1608 
1609 #define STBI__COMBO(a, b) ((a)*8 + (b))
1610 #define STBI__CASE(a, b) \
1611  case STBI__COMBO(a, b): \
1612  for (i = x - 1; i >= 0; --i, src += a, dest += b)
1613  // convert source image with img_n components to one with req_comp
1614  // components; avoid switch per pixel, so use switch per scanline and
1615  // massive macros
1616  switch (STBI__COMBO(img_n, req_comp)) {
1617  STBI__CASE(1, 2) { dest[0] = src[0], dest[1] = 255; }
1618  break;
1619  STBI__CASE(1, 3) { dest[0] = dest[1] = dest[2] = src[0]; }
1620  break;
1621  STBI__CASE(1, 4)
1622  {
1623  dest[0] = dest[1] = dest[2] = src[0], dest[3] = 255;
1624  }
1625  break;
1626  STBI__CASE(2, 1) { dest[0] = src[0]; }
1627  break;
1628  STBI__CASE(2, 3) { dest[0] = dest[1] = dest[2] = src[0]; }
1629  break;
1630  STBI__CASE(2, 4)
1631  {
1632  dest[0] = dest[1] = dest[2] = src[0], dest[3] = src[1];
1633  }
1634  break;
1635  STBI__CASE(3, 4)
1636  {
1637  dest[0] = src[0], dest[1] = src[1], dest[2] = src[2],
1638  dest[3] = 255;
1639  }
1640  break;
1641  STBI__CASE(3, 1)
1642  {
1643  dest[0] = stbi__compute_y(src[0], src[1], src[2]);
1644  }
1645  break;
1646  STBI__CASE(3, 2)
1647  {
1648  dest[0] = stbi__compute_y(src[0], src[1], src[2]),
1649  dest[1] = 255;
1650  }
1651  break;
1652  STBI__CASE(4, 1)
1653  {
1654  dest[0] = stbi__compute_y(src[0], src[1], src[2]);
1655  }
1656  break;
1657  STBI__CASE(4, 2)
1658  {
1659  dest[0] = stbi__compute_y(src[0], src[1], src[2]),
1660  dest[1] = src[3];
1661  }
1662  break;
1663  STBI__CASE(4, 3)
1664  {
1665  dest[0] = src[0], dest[1] = src[1], dest[2] = src[2];
1666  }
1667  break;
1668  default:
1669  STBI_ASSERT(0);
1670  }
1671 #undef STBI__CASE
1672  }
1673 
1674  STBI_FREE(data);
1675  return good;
1676 }
1677 
1678 static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
1679 {
1680  return (stbi__uint16)(((r * 77) + (g * 150) + (29 * b)) >> 8);
1681 }
1682 
1683 static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n,
1684  int req_comp, unsigned int x,
1685  unsigned int y)
1686 {
1687  int i, j;
1688  stbi__uint16 *good;
1689 
1690  if (req_comp == img_n)
1691  return data;
1692  STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1693 
1694  good = (stbi__uint16 *)stbi__malloc(req_comp * x * y * 2);
1695  if (good == NULL) {
1696  STBI_FREE(data);
1697  return (stbi__uint16 *)stbi__errpuc("outofmem", "Out of memory");
1698  }
1699 
1700  for (j = 0; j < (int)y; ++j) {
1701  stbi__uint16 *src = data + j * x * img_n;
1702  stbi__uint16 *dest = good + j * x * req_comp;
1703 
1704 #define STBI__COMBO(a, b) ((a)*8 + (b))
1705 #define STBI__CASE(a, b) \
1706  case STBI__COMBO(a, b): \
1707  for (i = x - 1; i >= 0; --i, src += a, dest += b)
1708  // convert source image with img_n components to one with req_comp
1709  // components; avoid switch per pixel, so use switch per scanline and
1710  // massive macros
1711  switch (STBI__COMBO(img_n, req_comp)) {
1712  STBI__CASE(1, 2) { dest[0] = src[0], dest[1] = 0xffff; }
1713  break;
1714  STBI__CASE(1, 3) { dest[0] = dest[1] = dest[2] = src[0]; }
1715  break;
1716  STBI__CASE(1, 4)
1717  {
1718  dest[0] = dest[1] = dest[2] = src[0], dest[3] = 0xffff;
1719  }
1720  break;
1721  STBI__CASE(2, 1) { dest[0] = src[0]; }
1722  break;
1723  STBI__CASE(2, 3) { dest[0] = dest[1] = dest[2] = src[0]; }
1724  break;
1725  STBI__CASE(2, 4)
1726  {
1727  dest[0] = dest[1] = dest[2] = src[0], dest[3] = src[1];
1728  }
1729  break;
1730  STBI__CASE(3, 4)
1731  {
1732  dest[0] = src[0], dest[1] = src[1], dest[2] = src[2],
1733  dest[3] = 0xffff;
1734  }
1735  break;
1736  STBI__CASE(3, 1)
1737  {
1738  dest[0] = stbi__compute_y_16(src[0], src[1], src[2]);
1739  }
1740  break;
1741  STBI__CASE(3, 2)
1742  {
1743  dest[0] = stbi__compute_y_16(src[0], src[1], src[2]),
1744  dest[1] = 0xffff;
1745  }
1746  break;
1747  STBI__CASE(4, 1)
1748  {
1749  dest[0] = stbi__compute_y_16(src[0], src[1], src[2]);
1750  }
1751  break;
1752  STBI__CASE(4, 2)
1753  {
1754  dest[0] = stbi__compute_y_16(src[0], src[1], src[2]),
1755  dest[1] = src[3];
1756  }
1757  break;
1758  STBI__CASE(4, 3)
1759  {
1760  dest[0] = src[0], dest[1] = src[1], dest[2] = src[2];
1761  }
1762  break;
1763  default:
1764  STBI_ASSERT(0);
1765  }
1766 #undef STBI__CASE
1767  }
1768 
1769  STBI_FREE(data);
1770  return good;
1771 }
1772 
1773 #ifndef STBI_NO_LINEAR
1774 static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1775 {
1776  int i, k, n;
1777  float *output;
1778  if (!data)
1779  return NULL;
1780  output = (float *)stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
1781  if (output == NULL) {
1782  STBI_FREE(data);
1783  return stbi__errpf("outofmem", "Out of memory");
1784  }
1785  // compute number of non-alpha components
1786  if (comp & 1)
1787  n = comp;
1788  else
1789  n = comp - 1;
1790  for (i = 0; i < x * y; ++i) {
1791  for (k = 0; k < n; ++k) {
1792  output[i * comp + k] =
1793  (float)(pow(data[i * comp + k] / 255.0f, stbi__l2h_gamma) *
1794  stbi__l2h_scale);
1795  }
1796  if (k < comp)
1797  output[i * comp + k] = data[i * comp + k] / 255.0f;
1798  }
1799  STBI_FREE(data);
1800  return output;
1801 }
1802 #endif
1803 
1804 #ifndef STBI_NO_HDR
1805 #define stbi__float2int(x) ((int)(x))
1806 static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp)
1807 {
1808  int i, k, n;
1809  stbi_uc *output;
1810  if (!data)
1811  return NULL;
1812  output = (stbi_uc *)stbi__malloc_mad3(x, y, comp, 0);
1813  if (output == NULL) {
1814  STBI_FREE(data);
1815  return stbi__errpuc("outofmem", "Out of memory");
1816  }
1817  // compute number of non-alpha components
1818  if (comp & 1)
1819  n = comp;
1820  else
1821  n = comp - 1;
1822  for (i = 0; i < x * y; ++i) {
1823  for (k = 0; k < n; ++k) {
1824  float z = (float)pow(data[i * comp + k] * stbi__h2l_scale_i,
1825  stbi__h2l_gamma_i) *
1826  255 +
1827  0.5f;
1828  if (z < 0)
1829  z = 0;
1830  if (z > 255)
1831  z = 255;
1832  output[i * comp + k] = (stbi_uc)stbi__float2int(z);
1833  }
1834  if (k < comp) {
1835  float z = data[i * comp + k] * 255 + 0.5f;
1836  if (z < 0)
1837  z = 0;
1838  if (z > 255)
1839  z = 255;
1840  output[i * comp + k] = (stbi_uc)stbi__float2int(z);
1841  }
1842  }
1843  STBI_FREE(data);
1844  return output;
1845 }
1846 #endif
1847 
1849 //
1850 // "baseline" JPEG/JFIF decoder
1851 //
1852 // simple implementation
1853 // - doesn't support delayed output of y-dimension
1854 // - simple interface (only one output format: 8-bit interleaved RGB)
1855 // - doesn't try to recover corrupt jpegs
1856 // - doesn't allow partial loading, loading multiple at once
1857 // - still fast on x86 (copying globals into locals doesn't help x86)
1858 // - allocates lots of intermediate memory (full size of all
1859 // components)
1860 // - non-interleaved case requires this anyway
1861 // - allows good upsampling (see next)
1862 // high-quality
1863 // - upsampled channels are bilinearly interpolated, even across blocks
1864 // - quality integer IDCT derived from IJG's 'slow'
1865 // performance
1866 // - fast huffman; reasonable integer IDCT
1867 // - some SIMD kernels for common paths on targets with SSE2/NEON
1868 // - uses a lot of intermediate memory, could cache poorly
1869 
1870 #ifndef STBI_NO_JPEG
1871 
1872 // huffman decoding acceleration
1873 #define FAST_BITS 9 // larger handles more cases; smaller stomps less cache
1874 
1875 typedef struct {
1876  stbi_uc fast[1 << FAST_BITS];
1877  // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1878  stbi__uint16 code[256];
1879  stbi_uc values[256];
1880  stbi_uc size[257];
1881  unsigned int maxcode[18];
1882  int delta[17]; // old 'firstsymbol' - old 'firstcode'
1883 } stbi__huffman;
1884 
1885 typedef struct {
1886  stbi__context *s;
1887  stbi__huffman huff_dc[4];
1888  stbi__huffman huff_ac[4];
1889  stbi__uint16 dequant[4][64];
1890  stbi__int16 fast_ac[4][1 << FAST_BITS];
1891 
1892  // sizes for components, interleaved MCUs
1893  int img_h_max, img_v_max;
1894  int img_mcu_x, img_mcu_y;
1895  int img_mcu_w, img_mcu_h;
1896 
1897  // definition of jpeg image component
1898  struct {
1899  int id;
1900  int h, v;
1901  int tq;
1902  int hd, ha;
1903  int dc_pred;
1904 
1905  int x, y, w2, h2;
1906  stbi_uc *data;
1907  void *raw_data, *raw_coeff;
1908  stbi_uc *linebuf;
1909  short *coeff; // progressive only
1910  int coeff_w, coeff_h; // number of 8x8 coefficient blocks
1911  } img_comp[4];
1912 
1913  stbi__uint32 code_buffer; // jpeg entropy-coded buffer
1914  int code_bits; // number of valid bits
1915  unsigned char marker; // marker seen while filling entropy buffer
1916  int nomore; // flag if we saw a marker so must stop
1917 
1918  int progressive;
1919  int spec_start;
1920  int spec_end;
1921  int succ_high;
1922  int succ_low;
1923  int eob_run;
1924  int jfif;
1925  int app14_color_transform; // Adobe APP14 tag
1926  int rgb;
1927 
1928  int scan_n, order[4];
1929  int restart_interval, todo;
1930 
1931  // kernels
1932  void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
1933  void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y,
1934  const stbi_uc *pcb, const stbi_uc *pcr,
1935  int count, int step);
1936  stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near,
1937  stbi_uc *in_far, int w, int hs);
1938 } stbi__jpeg;
1939 
1940 static int stbi__build_huffman(stbi__huffman *h, int *count)
1941 {
1942  int i, j, k = 0, code;
1943  // build size list for each symbol (from JPEG spec)
1944  for (i = 0; i < 16; ++i)
1945  for (j = 0; j < count[i]; ++j)
1946  h->size[k++] = (stbi_uc)(i + 1);
1947  h->size[k] = 0;
1948 
1949  // compute actual symbols (from jpeg spec)
1950  code = 0;
1951  k = 0;
1952  for (j = 1; j <= 16; ++j) {
1953  // compute delta to add to code to compute symbol id
1954  h->delta[j] = k - code;
1955  if (h->size[k] == j) {
1956  while (h->size[k] == j)
1957  h->code[k++] = (stbi__uint16)(code++);
1958  if (code - 1 >= (1 << j))
1959  return stbi__err("bad code lengths", "Corrupt JPEG");
1960  }
1961  // compute largest code + 1 for this size, preshifted as needed later
1962  h->maxcode[j] = code << (16 - j);
1963  code <<= 1;
1964  }
1965  h->maxcode[j] = 0xffffffff;
1966 
1967  // build non-spec acceleration table; 255 is flag for not-accelerated
1968  memset(h->fast, 255, 1 << FAST_BITS);
1969  for (i = 0; i < k; ++i) {
1970  int s = h->size[i];
1971  if (s <= FAST_BITS) {
1972  int c = h->code[i] << (FAST_BITS - s);
1973  int m = 1 << (FAST_BITS - s);
1974  for (j = 0; j < m; ++j) {
1975  h->fast[c + j] = (stbi_uc)i;
1976  }
1977  }
1978  }
1979  return 1;
1980 }
1981 
1982 // build a table that decodes both magnitude and value of small ACs in
1983 // one go.
1984 static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
1985 {
1986  int i;
1987  for (i = 0; i < (1 << FAST_BITS); ++i) {
1988  stbi_uc fast = h->fast[i];
1989  fast_ac[i] = 0;
1990  if (fast < 255) {
1991  int rs = h->values[fast];
1992  int run = (rs >> 4) & 15;
1993  int magbits = rs & 15;
1994  int len = h->size[fast];
1995 
1996  if (magbits && len + magbits <= FAST_BITS) {
1997  // magnitude code followed by receive_extend code
1998  int k = ((i << len) & ((1 << FAST_BITS) - 1)) >>
1999  (FAST_BITS - magbits);
2000  int m = 1 << (magbits - 1);
2001  if (k < m)
2002  k += (~0U << magbits) + 1;
2003  // if the result is small enough, we can fit it in fast_ac table
2004  if (k >= -128 && k <= 127)
2005  fast_ac[i] =
2006  (stbi__int16)((k << 8) + (run << 4) + (len + magbits));
2007  }
2008  }
2009  }
2010 }
2011 
2012 static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
2013 {
2014  do {
2015  int b = j->nomore ? 0 : stbi__get8(j->s);
2016  if (b == 0xff) {
2017  int c = stbi__get8(j->s);
2018  while (c == 0xff)
2019  c = stbi__get8(j->s); // consume fill bytes
2020  if (c != 0) {
2021  j->marker = (unsigned char)c;
2022  j->nomore = 1;
2023  return;
2024  }
2025  }
2026  j->code_buffer |= b << (24 - j->code_bits);
2027  j->code_bits += 8;
2028  } while (j->code_bits <= 24);
2029 }
2030 
2031 // (1 << n) - 1
2032 static stbi__uint32 stbi__bmask[17] = {0, 1, 3, 7, 15, 31,
2033  63, 127, 255, 511, 1023, 2047,
2034  4095, 8191, 16383, 32767, 65535};
2035 
2036 // decode a jpeg huffman value from the bitstream
2037 stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
2038 {
2039  unsigned int temp;
2040  int c, k;
2041 
2042  if (j->code_bits < 16)
2043  stbi__grow_buffer_unsafe(j);
2044 
2045  // look at the top FAST_BITS and determine what symbol ID it is,
2046  // if the code is <= FAST_BITS
2047  c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
2048  k = h->fast[c];
2049  if (k < 255) {
2050  int s = h->size[k];
2051  if (s > j->code_bits)
2052  return -1;
2053  j->code_buffer <<= s;
2054  j->code_bits -= s;
2055  return h->values[k];
2056  }
2057 
2058  // naive test is to shift the code_buffer down so k bits are
2059  // valid, then test against maxcode. To speed this up, we've
2060  // preshifted maxcode left so that it has (16-k) 0s at the
2061  // end; in other words, regardless of the number of bits, it
2062  // wants to be compared against something shifted to have 16;
2063  // that way we don't need to shift inside the loop.
2064  temp = j->code_buffer >> 16;
2065  for (k = FAST_BITS + 1;; ++k)
2066  if (temp < h->maxcode[k])
2067  break;
2068  if (k == 17) {
2069  // error! code not found
2070  j->code_bits -= 16;
2071  return -1;
2072  }
2073 
2074  if (k > j->code_bits)
2075  return -1;
2076 
2077  // convert the huffman code to the symbol id
2078  c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
2079  STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) &
2080  stbi__bmask[h->size[c]]) == h->code[c]);
2081 
2082  // convert the id to a symbol
2083  j->code_bits -= k;
2084  j->code_buffer <<= k;
2085  return h->values[c];
2086 }
2087 
2088 // bias[n] = (-1<<n) + 1
2089 static int const stbi__jbias[16] = {0, -1, -3, -7, -15, -31,
2090  -63, -127, -255, -511, -1023, -2047,
2091  -4095, -8191, -16383, -32767};
2092 
2093 // combined JPEG 'receive' and JPEG 'extend', since baseline
2094 // always extends everything it receives.
2095 stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
2096 {
2097  unsigned int k;
2098  int sgn;
2099  if (j->code_bits < n)
2100  stbi__grow_buffer_unsafe(j);
2101 
2102  sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
2103  k = stbi_lrot(j->code_buffer, n);
2104  STBI_ASSERT(n >= 0 &&
2105  n < (int)(sizeof(stbi__bmask) / sizeof(*stbi__bmask)));
2106  j->code_buffer = k & ~stbi__bmask[n];
2107  k &= stbi__bmask[n];
2108  j->code_bits -= n;
2109  return k + (stbi__jbias[n] & ~sgn);
2110 }
2111 
2112 // get some unsigned bits
2113 stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
2114 {
2115  unsigned int k;
2116  if (j->code_bits < n)
2117  stbi__grow_buffer_unsafe(j);
2118  k = stbi_lrot(j->code_buffer, n);
2119  j->code_buffer = k & ~stbi__bmask[n];
2120  k &= stbi__bmask[n];
2121  j->code_bits -= n;
2122  return k;
2123 }
2124 
2125 stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
2126 {
2127  unsigned int k;
2128  if (j->code_bits < 1)
2129  stbi__grow_buffer_unsafe(j);
2130  k = j->code_buffer;
2131  j->code_buffer <<= 1;
2132  --j->code_bits;
2133  return k & 0x80000000;
2134 }
2135 
2136 // given a value that's at position X in the zigzag stream,
2137 // where does it appear in the 8x8 matrix coded as row-major?
2138 static stbi_uc stbi__jpeg_dezigzag[64 + 15] = {
2139  0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40,
2140  48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36,
2141  29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61,
2142  54, 47, 55, 62, 63,
2143  // let corrupt input sample past end
2144  63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63};
2145 
2146 // decode one 64-entry block--
2147 static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64],
2148  stbi__huffman *hdc, stbi__huffman *hac,
2149  stbi__int16 *fac, int b,
2150  stbi__uint16 *dequant)
2151 {
2152  int diff, dc, k;
2153  int t;
2154 
2155  if (j->code_bits < 16)
2156  stbi__grow_buffer_unsafe(j);
2157  t = stbi__jpeg_huff_decode(j, hdc);
2158  if (t < 0)
2159  return stbi__err("bad huffman code", "Corrupt JPEG");
2160 
2161  // 0 all the ac values now so we can do it 32-bits at a time
2162  memset(data, 0, 64 * sizeof(data[0]));
2163 
2164  diff = t ? stbi__extend_receive(j, t) : 0;
2165  dc = j->img_comp[b].dc_pred + diff;
2166  j->img_comp[b].dc_pred = dc;
2167  data[0] = (short)(dc * dequant[0]);
2168 
2169  // decode AC components, see JPEG spec
2170  k = 1;
2171  do {
2172  unsigned int zig;
2173  int c, r, s;
2174  if (j->code_bits < 16)
2175  stbi__grow_buffer_unsafe(j);
2176  c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
2177  r = fac[c];
2178  if (r) { // fast-AC path
2179  k += (r >> 4) & 15; // run
2180  s = r & 15; // combined length
2181  j->code_buffer <<= s;
2182  j->code_bits -= s;
2183  // decode into unzigzag'd location
2184  zig = stbi__jpeg_dezigzag[k++];
2185  data[zig] = (short)((r >> 8) * dequant[zig]);
2186  } else {
2187  int rs = stbi__jpeg_huff_decode(j, hac);
2188  if (rs < 0)
2189  return stbi__err("bad huffman code", "Corrupt JPEG");
2190  s = rs & 15;
2191  r = rs >> 4;
2192  if (s == 0) {
2193  if (rs != 0xf0)
2194  break; // end block
2195  k += 16;
2196  } else {
2197  k += r;
2198  // decode into unzigzag'd location
2199  zig = stbi__jpeg_dezigzag[k++];
2200  data[zig] = (short)(stbi__extend_receive(j, s) * dequant[zig]);
2201  }
2202  }
2203  } while (k < 64);
2204  return 1;
2205 }
2206 
2207 static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64],
2208  stbi__huffman *hdc, int b)
2209 {
2210  int diff, dc;
2211  int t;
2212  if (j->spec_end != 0)
2213  return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2214 
2215  if (j->code_bits < 16)
2216  stbi__grow_buffer_unsafe(j);
2217 
2218  if (j->succ_high == 0) {
2219  // first scan for DC coefficient, must be first
2220  memset(data, 0, 64 * sizeof(data[0])); // 0 all the ac values now
2221  t = stbi__jpeg_huff_decode(j, hdc);
2222  diff = t ? stbi__extend_receive(j, t) : 0;
2223 
2224  dc = j->img_comp[b].dc_pred + diff;
2225  j->img_comp[b].dc_pred = dc;
2226  data[0] = (short)(dc << j->succ_low);
2227  } else {
2228  // refinement scan for DC coefficient
2229  if (stbi__jpeg_get_bit(j))
2230  data[0] += (short)(1 << j->succ_low);
2231  }
2232  return 1;
2233 }
2234 
2235 // @OPTIMIZE: store non-zigzagged during the decode passes,
2236 // and only de-zigzag when dequantizing
2237 static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64],
2238  stbi__huffman *hac, stbi__int16 *fac)
2239 {
2240  int k;
2241  if (j->spec_start == 0)
2242  return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2243 
2244  if (j->succ_high == 0) {
2245  int shift = j->succ_low;
2246 
2247  if (j->eob_run) {
2248  --j->eob_run;
2249  return 1;
2250  }
2251 
2252  k = j->spec_start;
2253  do {
2254  unsigned int zig;
2255  int c, r, s;
2256  if (j->code_bits < 16)
2257  stbi__grow_buffer_unsafe(j);
2258  c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
2259  r = fac[c];
2260  if (r) { // fast-AC path
2261  k += (r >> 4) & 15; // run
2262  s = r & 15; // combined length
2263  j->code_buffer <<= s;
2264  j->code_bits -= s;
2265  zig = stbi__jpeg_dezigzag[k++];
2266  data[zig] = (short)((r >> 8) << shift);
2267  } else {
2268  int rs = stbi__jpeg_huff_decode(j, hac);
2269  if (rs < 0)
2270  return stbi__err("bad huffman code", "Corrupt JPEG");
2271  s = rs & 15;
2272  r = rs >> 4;
2273  if (s == 0) {
2274  if (r < 15) {
2275  j->eob_run = (1 << r);
2276  if (r)
2277  j->eob_run += stbi__jpeg_get_bits(j, r);
2278  --j->eob_run;
2279  break;
2280  }
2281  k += 16;
2282  } else {
2283  k += r;
2284  zig = stbi__jpeg_dezigzag[k++];
2285  data[zig] = (short)(stbi__extend_receive(j, s) << shift);
2286  }
2287  }
2288  } while (k <= j->spec_end);
2289  } else {
2290  // refinement scan for these AC coefficients
2291 
2292  short bit = (short)(1 << j->succ_low);
2293 
2294  if (j->eob_run) {
2295  --j->eob_run;
2296  for (k = j->spec_start; k <= j->spec_end; ++k) {
2297  short *p = &data[stbi__jpeg_dezigzag[k]];
2298  if (*p != 0)
2299  if (stbi__jpeg_get_bit(j))
2300  if ((*p & bit) == 0) {
2301  if (*p > 0)
2302  *p += bit;
2303  else
2304  *p -= bit;
2305  }
2306  }
2307  } else {
2308  k = j->spec_start;
2309  do {
2310  int r, s;
2311  int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we
2312  // can use the fast
2313  // path here,
2314  // advance-by-r is so
2315  // slow, eh
2316  if (rs < 0)
2317  return stbi__err("bad huffman code", "Corrupt JPEG");
2318  s = rs & 15;
2319  r = rs >> 4;
2320  if (s == 0) {
2321  if (r < 15) {
2322  j->eob_run = (1 << r) - 1;
2323  if (r)
2324  j->eob_run += stbi__jpeg_get_bits(j, r);
2325  r = 64; // force end of block
2326  } else {
2327  // r=15 s=0 should write 16 0s, so we just do
2328  // a run of 15 0s and then write s (which is 0),
2329  // so we don't have to do anything special here
2330  }
2331  } else {
2332  if (s != 1)
2333  return stbi__err("bad huffman code", "Corrupt JPEG");
2334  // sign bit
2335  if (stbi__jpeg_get_bit(j))
2336  s = bit;
2337  else
2338  s = -bit;
2339  }
2340 
2341  // advance by r
2342  while (k <= j->spec_end) {
2343  short *p = &data[stbi__jpeg_dezigzag[k++]];
2344  if (*p != 0) {
2345  if (stbi__jpeg_get_bit(j))
2346  if ((*p & bit) == 0) {
2347  if (*p > 0)
2348  *p += bit;
2349  else
2350  *p -= bit;
2351  }
2352  } else {
2353  if (r == 0) {
2354  *p = (short)s;
2355  break;
2356  }
2357  --r;
2358  }
2359  }
2360  } while (k <= j->spec_end);
2361  }
2362  }
2363  return 1;
2364 }
2365 
2366 // take a -128..127 value and stbi__clamp it and convert to 0..255
2367 stbi_inline static stbi_uc stbi__clamp(int x)
2368 {
2369  // trick to use a single test to catch both cases
2370  if ((unsigned int)x > 255) {
2371  if (x < 0)
2372  return 0;
2373  if (x > 255)
2374  return 255;
2375  }
2376  return (stbi_uc)x;
2377 }
2378 
2379 #define stbi__f2f(x) ((int)(((x)*4096 + 0.5)))
2380 #define stbi__fsh(x) ((x) << 12)
2381 
2382 // derived from jidctint -- DCT_ISLOW
2383 #define STBI__IDCT_1D(s0, s1, s2, s3, s4, s5, s6, s7) \
2384  int t0, t1, t2, t3, p1, p2, p3, p4, p5, x0, x1, x2, x3; \
2385  p2 = s2; \
2386  p3 = s6; \
2387  p1 = (p2 + p3) * stbi__f2f(0.5411961f); \
2388  t2 = p1 + p3 * stbi__f2f(-1.847759065f); \
2389  t3 = p1 + p2 * stbi__f2f(0.765366865f); \
2390  p2 = s0; \
2391  p3 = s4; \
2392  t0 = stbi__fsh(p2 + p3); \
2393  t1 = stbi__fsh(p2 - p3); \
2394  x0 = t0 + t3; \
2395  x3 = t0 - t3; \
2396  x1 = t1 + t2; \
2397  x2 = t1 - t2; \
2398  t0 = s7; \
2399  t1 = s5; \
2400  t2 = s3; \
2401  t3 = s1; \
2402  p3 = t0 + t2; \
2403  p4 = t1 + t3; \
2404  p1 = t0 + t3; \
2405  p2 = t1 + t2; \
2406  p5 = (p3 + p4) * stbi__f2f(1.175875602f); \
2407  t0 = t0 * stbi__f2f(0.298631336f); \
2408  t1 = t1 * stbi__f2f(2.053119869f); \
2409  t2 = t2 * stbi__f2f(3.072711026f); \
2410  t3 = t3 * stbi__f2f(1.501321110f); \
2411  p1 = p5 + p1 * stbi__f2f(-0.899976223f); \
2412  p2 = p5 + p2 * stbi__f2f(-2.562915447f); \
2413  p3 = p3 * stbi__f2f(-1.961570560f); \
2414  p4 = p4 * stbi__f2f(-0.390180644f); \
2415  t3 += p1 + p4; \
2416  t2 += p2 + p3; \
2417  t1 += p2 + p4; \
2418  t0 += p1 + p3;
2419 
2420 static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
2421 {
2422  int i, val[64], *v = val;
2423  stbi_uc *o;
2424  short *d = data;
2425 
2426  // columns
2427  for (i = 0; i < 8; ++i, ++d, ++v) {
2428  // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
2429  if (d[8] == 0 && d[16] == 0 && d[24] == 0 && d[32] == 0 && d[40] == 0 &&
2430  d[48] == 0 && d[56] == 0) {
2431  // no shortcut 0 seconds
2432  // (1|2|3|4|5|6|7)==0 0 seconds
2433  // all separate -0.047 seconds
2434  // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
2435  int dcterm = d[0] << 2;
2436  v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] =
2437  dcterm;
2438  } else {
2439  STBI__IDCT_1D(d[0], d[8], d[16], d[24], d[32], d[40], d[48], d[56])
2440  // constants scaled things up by 1<<12; let's bring them back
2441  // down, but keep 2 extra bits of precision
2442  x0 += 512;
2443  x1 += 512;
2444  x2 += 512;
2445  x3 += 512;
2446  v[0] = (x0 + t3) >> 10;
2447  v[56] = (x0 - t3) >> 10;
2448  v[8] = (x1 + t2) >> 10;
2449  v[48] = (x1 - t2) >> 10;
2450  v[16] = (x2 + t1) >> 10;
2451  v[40] = (x2 - t1) >> 10;
2452  v[24] = (x3 + t0) >> 10;
2453  v[32] = (x3 - t0) >> 10;
2454  }
2455  }
2456 
2457  for (i = 0, v = val, o = out; i < 8; ++i, v += 8, o += out_stride) {
2458  // no fast case since the first 1D IDCT spread components out
2459  STBI__IDCT_1D(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7])
2460  // constants scaled things up by 1<<12, plus we had 1<<2 from first
2461  // loop, plus horizontal and vertical each scale by sqrt(8) so together
2462  // we've got an extra 1<<3, so 1<<17 total we need to remove.
2463  // so we want to round that, which means adding 0.5 * 1<<17,
2464  // aka 65536. Also, we'll end up with -128 to 127 that we want
2465  // to encode as 0..255 by adding 128, so we'll add that before the shift
2466  x0 += 65536 + (128 << 17);
2467  x1 += 65536 + (128 << 17);
2468  x2 += 65536 + (128 << 17);
2469  x3 += 65536 + (128 << 17);
2470  // tried computing the shifts into temps, or'ing the temps to see
2471  // if any were out of range, but that was slower
2472  o[0] = stbi__clamp((x0 + t3) >> 17);
2473  o[7] = stbi__clamp((x0 - t3) >> 17);
2474  o[1] = stbi__clamp((x1 + t2) >> 17);
2475  o[6] = stbi__clamp((x1 - t2) >> 17);
2476  o[2] = stbi__clamp((x2 + t1) >> 17);
2477  o[5] = stbi__clamp((x2 - t1) >> 17);
2478  o[3] = stbi__clamp((x3 + t0) >> 17);
2479  o[4] = stbi__clamp((x3 - t0) >> 17);
2480  }
2481 }
2482 
2483 #ifdef STBI_SSE2
2484 // sse2 integer IDCT. not the fastest possible implementation but it
2485 // produces bit-identical results to the generic C version so it's
2486 // fully "transparent".
2487 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2488 {
2489  // This is constructed to match our regular (generic) integer IDCT exactly.
2490  __m128i row0, row1, row2, row3, row4, row5, row6, row7;
2491  __m128i tmp;
2492 
2493 // dot product constant: even elems=x, odd elems=y
2494 #define dct_const(x, y) _mm_setr_epi16((x), (y), (x), (y), (x), (y), (x), (y))
2495 
2496 // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit)
2497 // out(1) = c1[even]*x + c1[odd]*y
2498 #define dct_rot(out0, out1, x, y, c0, c1) \
2499  __m128i c0##lo = _mm_unpacklo_epi16((x), (y)); \
2500  __m128i c0##hi = _mm_unpackhi_epi16((x), (y)); \
2501  __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
2502  __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
2503  __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
2504  __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
2505 
2506 // out = in << 12 (in 16-bit, out 32-bit)
2507 #define dct_widen(out, in) \
2508  __m128i out##_l = \
2509  _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
2510  __m128i out##_h = \
2511  _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
2512 
2513 // wide add
2514 #define dct_wadd(out, a, b) \
2515  __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
2516  __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
2517 
2518 // wide sub
2519 #define dct_wsub(out, a, b) \
2520  __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
2521  __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
2522 
2523 // butterfly a/b, add bias, then shift by "s" and pack
2524 #define dct_bfly32o(out0, out1, a, b, bias, s) \
2525  { \
2526  __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
2527  __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
2528  dct_wadd(sum, abiased, b); \
2529  dct_wsub(dif, abiased, b); \
2530  out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), \
2531  _mm_srai_epi32(sum_h, s)); \
2532  out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), \
2533  _mm_srai_epi32(dif_h, s)); \
2534  }
2535 
2536 // 8-bit interleave step (for transposes)
2537 #define dct_interleave8(a, b) \
2538  tmp = a; \
2539  a = _mm_unpacklo_epi8(a, b); \
2540  b = _mm_unpackhi_epi8(tmp, b)
2541 
2542 // 16-bit interleave step (for transposes)
2543 #define dct_interleave16(a, b) \
2544  tmp = a; \
2545  a = _mm_unpacklo_epi16(a, b); \
2546  b = _mm_unpackhi_epi16(tmp, b)
2547 
2548 #define dct_pass(bias, shift) \
2549  { \
2550  /* even part */ \
2551  dct_rot(t2e, t3e, row2, row6, rot0_0, rot0_1); \
2552  __m128i sum04 = _mm_add_epi16(row0, row4); \
2553  __m128i dif04 = _mm_sub_epi16(row0, row4); \
2554  dct_widen(t0e, sum04); \
2555  dct_widen(t1e, dif04); \
2556  dct_wadd(x0, t0e, t3e); \
2557  dct_wsub(x3, t0e, t3e); \
2558  dct_wadd(x1, t1e, t2e); \
2559  dct_wsub(x2, t1e, t2e); \
2560  /* odd part */ \
2561  dct_rot(y0o, y2o, row7, row3, rot2_0, rot2_1); \
2562  dct_rot(y1o, y3o, row5, row1, rot3_0, rot3_1); \
2563  __m128i sum17 = _mm_add_epi16(row1, row7); \
2564  __m128i sum35 = _mm_add_epi16(row3, row5); \
2565  dct_rot(y4o, y5o, sum17, sum35, rot1_0, rot1_1); \
2566  dct_wadd(x4, y0o, y4o); \
2567  dct_wadd(x5, y1o, y5o); \
2568  dct_wadd(x6, y2o, y5o); \
2569  dct_wadd(x7, y3o, y4o); \
2570  dct_bfly32o(row0, row7, x0, x7, bias, shift); \
2571  dct_bfly32o(row1, row6, x1, x6, bias, shift); \
2572  dct_bfly32o(row2, row5, x2, x5, bias, shift); \
2573  dct_bfly32o(row3, row4, x3, x4, bias, shift); \
2574  }
2575 
2576  __m128i rot0_0 =
2577  dct_const(stbi__f2f(0.5411961f),
2578  stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
2579  __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f(0.765366865f),
2580  stbi__f2f(0.5411961f));
2581  __m128i rot1_0 =
2582  dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f),
2583  stbi__f2f(1.175875602f));
2584  __m128i rot1_1 =
2585  dct_const(stbi__f2f(1.175875602f),
2586  stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
2587  __m128i rot2_0 =
2588  dct_const(stbi__f2f(-1.961570560f) + stbi__f2f(0.298631336f),
2589  stbi__f2f(-1.961570560f));
2590  __m128i rot2_1 =
2591  dct_const(stbi__f2f(-1.961570560f),
2592  stbi__f2f(-1.961570560f) + stbi__f2f(3.072711026f));
2593  __m128i rot3_0 =
2594  dct_const(stbi__f2f(-0.390180644f) + stbi__f2f(2.053119869f),
2595  stbi__f2f(-0.390180644f));
2596  __m128i rot3_1 =
2597  dct_const(stbi__f2f(-0.390180644f),
2598  stbi__f2f(-0.390180644f) + stbi__f2f(1.501321110f));
2599 
2600  // rounding biases in column/row passes, see stbi__idct_block for
2601  // explanation.
2602  __m128i bias_0 = _mm_set1_epi32(512);
2603  __m128i bias_1 = _mm_set1_epi32(65536 + (128 << 17));
2604 
2605  // load
2606  row0 = _mm_load_si128((const __m128i *)(data + 0 * 8));
2607  row1 = _mm_load_si128((const __m128i *)(data + 1 * 8));
2608  row2 = _mm_load_si128((const __m128i *)(data + 2 * 8));
2609  row3 = _mm_load_si128((const __m128i *)(data + 3 * 8));
2610  row4 = _mm_load_si128((const __m128i *)(data + 4 * 8));
2611  row5 = _mm_load_si128((const __m128i *)(data + 5 * 8));
2612  row6 = _mm_load_si128((const __m128i *)(data + 6 * 8));
2613  row7 = _mm_load_si128((const __m128i *)(data + 7 * 8));
2614 
2615  // column pass
2616  dct_pass(bias_0, 10);
2617 
2618  {
2619  // 16bit 8x8 transpose pass 1
2620  dct_interleave16(row0, row4);
2621  dct_interleave16(row1, row5);
2622  dct_interleave16(row2, row6);
2623  dct_interleave16(row3, row7);
2624 
2625  // transpose pass 2
2626  dct_interleave16(row0, row2);
2627  dct_interleave16(row1, row3);
2628  dct_interleave16(row4, row6);
2629  dct_interleave16(row5, row7);
2630 
2631  // transpose pass 3
2632  dct_interleave16(row0, row1);
2633  dct_interleave16(row2, row3);
2634  dct_interleave16(row4, row5);
2635  dct_interleave16(row6, row7);
2636  }
2637 
2638  // row pass
2639  dct_pass(bias_1, 17);
2640 
2641  {
2642  // pack
2643  __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
2644  __m128i p1 = _mm_packus_epi16(row2, row3);
2645  __m128i p2 = _mm_packus_epi16(row4, row5);
2646  __m128i p3 = _mm_packus_epi16(row6, row7);
2647 
2648  // 8bit 8x8 transpose pass 1
2649  dct_interleave8(p0, p2); // a0e0a1e1...
2650  dct_interleave8(p1, p3); // c0g0c1g1...
2651 
2652  // transpose pass 2
2653  dct_interleave8(p0, p1); // a0c0e0g0...
2654  dct_interleave8(p2, p3); // b0d0f0h0...
2655 
2656  // transpose pass 3
2657  dct_interleave8(p0, p2); // a0b0c0d0...
2658  dct_interleave8(p1, p3); // a4b4c4d4...
2659 
2660  // store
2661  _mm_storel_epi64((__m128i *)out, p0);
2662  out += out_stride;
2663  _mm_storel_epi64((__m128i *)out, _mm_shuffle_epi32(p0, 0x4e));
2664  out += out_stride;
2665  _mm_storel_epi64((__m128i *)out, p2);
2666  out += out_stride;
2667  _mm_storel_epi64((__m128i *)out, _mm_shuffle_epi32(p2, 0x4e));
2668  out += out_stride;
2669  _mm_storel_epi64((__m128i *)out, p1);
2670  out += out_stride;
2671  _mm_storel_epi64((__m128i *)out, _mm_shuffle_epi32(p1, 0x4e));
2672  out += out_stride;
2673  _mm_storel_epi64((__m128i *)out, p3);
2674  out += out_stride;
2675  _mm_storel_epi64((__m128i *)out, _mm_shuffle_epi32(p3, 0x4e));
2676  }
2677 
2678 #undef dct_const
2679 #undef dct_rot
2680 #undef dct_widen
2681 #undef dct_wadd
2682 #undef dct_wsub
2683 #undef dct_bfly32o
2684 #undef dct_interleave8
2685 #undef dct_interleave16
2686 #undef dct_pass
2687 }
2688 
2689 #endif // STBI_SSE2
2690 
2691 #ifdef STBI_NEON
2692 
2693 // NEON integer IDCT. should produce bit-identical
2694 // results to the generic C version.
2695 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2696 {
2697  int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
2698 
2699  int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
2700  int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
2701  int16x4_t rot0_2 = vdup_n_s16(stbi__f2f(0.765366865f));
2702  int16x4_t rot1_0 = vdup_n_s16(stbi__f2f(1.175875602f));
2703  int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
2704  int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
2705  int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
2706  int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
2707  int16x4_t rot3_0 = vdup_n_s16(stbi__f2f(0.298631336f));
2708  int16x4_t rot3_1 = vdup_n_s16(stbi__f2f(2.053119869f));
2709  int16x4_t rot3_2 = vdup_n_s16(stbi__f2f(3.072711026f));
2710  int16x4_t rot3_3 = vdup_n_s16(stbi__f2f(1.501321110f));
2711 
2712 #define dct_long_mul(out, inq, coeff) \
2713  int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
2714  int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
2715 
2716 #define dct_long_mac(out, acc, inq, coeff) \
2717  int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
2718  int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
2719 
2720 #define dct_widen(out, inq) \
2721  int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
2722  int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
2723 
2724 // wide add
2725 #define dct_wadd(out, a, b) \
2726  int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
2727  int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
2728 
2729 // wide sub
2730 #define dct_wsub(out, a, b) \
2731  int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
2732  int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
2733 
2734 // butterfly a/b, then shift using "shiftop" by "s" and pack
2735 #define dct_bfly32o(out0, out1, a, b, shiftop, s) \
2736  { \
2737  dct_wadd(sum, a, b); \
2738  dct_wsub(dif, a, b); \
2739  out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
2740  out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
2741  }
2742 
2743 #define dct_pass(shiftop, shift) \
2744  { \
2745  /* even part */ \
2746  int16x8_t sum26 = vaddq_s16(row2, row6); \
2747  dct_long_mul(p1e, sum26, rot0_0); \
2748  dct_long_mac(t2e, p1e, row6, rot0_1); \
2749  dct_long_mac(t3e, p1e, row2, rot0_2); \
2750  int16x8_t sum04 = vaddq_s16(row0, row4); \
2751  int16x8_t dif04 = vsubq_s16(row0, row4); \
2752  dct_widen(t0e, sum04); \
2753  dct_widen(t1e, dif04); \
2754  dct_wadd(x0, t0e, t3e); \
2755  dct_wsub(x3, t0e, t3e); \
2756  dct_wadd(x1, t1e, t2e); \
2757  dct_wsub(x2, t1e, t2e); \
2758  /* odd part */ \
2759  int16x8_t sum15 = vaddq_s16(row1, row5); \
2760  int16x8_t sum17 = vaddq_s16(row1, row7); \
2761  int16x8_t sum35 = vaddq_s16(row3, row5); \
2762  int16x8_t sum37 = vaddq_s16(row3, row7); \
2763  int16x8_t sumodd = vaddq_s16(sum17, sum35); \
2764  dct_long_mul(p5o, sumodd, rot1_0); \
2765  dct_long_mac(p1o, p5o, sum17, rot1_1); \
2766  dct_long_mac(p2o, p5o, sum35, rot1_2); \
2767  dct_long_mul(p3o, sum37, rot2_0); \
2768  dct_long_mul(p4o, sum15, rot2_1); \
2769  dct_wadd(sump13o, p1o, p3o); \
2770  dct_wadd(sump24o, p2o, p4o); \
2771  dct_wadd(sump23o, p2o, p3o); \
2772  dct_wadd(sump14o, p1o, p4o); \
2773  dct_long_mac(x4, sump13o, row7, rot3_0); \
2774  dct_long_mac(x5, sump24o, row5, rot3_1); \
2775  dct_long_mac(x6, sump23o, row3, rot3_2); \
2776  dct_long_mac(x7, sump14o, row1, rot3_3); \
2777  dct_bfly32o(row0, row7, x0, x7, shiftop, shift); \
2778  dct_bfly32o(row1, row6, x1, x6, shiftop, shift); \
2779  dct_bfly32o(row2, row5, x2, x5, shiftop, shift); \
2780  dct_bfly32o(row3, row4, x3, x4, shiftop, shift); \
2781  }
2782 
2783  // load
2784  row0 = vld1q_s16(data + 0 * 8);
2785  row1 = vld1q_s16(data + 1 * 8);
2786  row2 = vld1q_s16(data + 2 * 8);
2787  row3 = vld1q_s16(data + 3 * 8);
2788  row4 = vld1q_s16(data + 4 * 8);
2789  row5 = vld1q_s16(data + 5 * 8);
2790  row6 = vld1q_s16(data + 6 * 8);
2791  row7 = vld1q_s16(data + 7 * 8);
2792 
2793  // add DC bias
2794  row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
2795 
2796  // column pass
2797  dct_pass(vrshrn_n_s32, 10);
2798 
2799  // 16bit 8x8 transpose
2800  {
2801 // these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
2802 // whether compilers actually get this is another story, sadly.
2803 #define dct_trn16(x, y) \
2804  { \
2805  int16x8x2_t t = vtrnq_s16(x, y); \
2806  x = t.val[0]; \
2807  y = t.val[1]; \
2808  }
2809 #define dct_trn32(x, y) \
2810  { \
2811  int32x4x2_t t = \
2812  vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); \
2813  x = vreinterpretq_s16_s32(t.val[0]); \
2814  y = vreinterpretq_s16_s32(t.val[1]); \
2815  }
2816 #define dct_trn64(x, y) \
2817  { \
2818  int16x8_t x0 = x; \
2819  int16x8_t y0 = y; \
2820  x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); \
2821  y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); \
2822  }
2823 
2824  // pass 1
2825  dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
2826  dct_trn16(row2, row3);
2827  dct_trn16(row4, row5);
2828  dct_trn16(row6, row7);
2829 
2830  // pass 2
2831  dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
2832  dct_trn32(row1, row3);
2833  dct_trn32(row4, row6);
2834  dct_trn32(row5, row7);
2835 
2836  // pass 3
2837  dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
2838  dct_trn64(row1, row5);
2839  dct_trn64(row2, row6);
2840  dct_trn64(row3, row7);
2841 
2842 #undef dct_trn16
2843 #undef dct_trn32
2844 #undef dct_trn64
2845  }
2846 
2847  // row pass
2848  // vrshrn_n_s32 only supports shifts up to 16, we need
2849  // 17. so do a non-rounding shift of 16 first then follow
2850  // up with a rounding shift by 1.
2851  dct_pass(vshrn_n_s32, 16);
2852 
2853  {
2854  // pack and round
2855  uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
2856  uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
2857  uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
2858  uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
2859  uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
2860  uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
2861  uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
2862  uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
2863 
2864  // again, these can translate into one instruction, but often don't.
2865 #define dct_trn8_8(x, y) \
2866  { \
2867  uint8x8x2_t t = vtrn_u8(x, y); \
2868  x = t.val[0]; \
2869  y = t.val[1]; \
2870  }
2871 #define dct_trn8_16(x, y) \
2872  { \
2873  uint16x4x2_t t = \
2874  vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); \
2875  x = vreinterpret_u8_u16(t.val[0]); \
2876  y = vreinterpret_u8_u16(t.val[1]); \
2877  }
2878 #define dct_trn8_32(x, y) \
2879  { \
2880  uint32x2x2_t t = \
2881  vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); \
2882  x = vreinterpret_u8_u32(t.val[0]); \
2883  y = vreinterpret_u8_u32(t.val[1]); \
2884  }
2885 
2886  // sadly can't use interleaved stores here since we only write
2887  // 8 bytes to each scan line!
2888 
2889  // 8x8 8-bit transpose pass 1
2890  dct_trn8_8(p0, p1);
2891  dct_trn8_8(p2, p3);
2892  dct_trn8_8(p4, p5);
2893  dct_trn8_8(p6, p7);
2894 
2895  // pass 2
2896  dct_trn8_16(p0, p2);
2897  dct_trn8_16(p1, p3);
2898  dct_trn8_16(p4, p6);
2899  dct_trn8_16(p5, p7);
2900 
2901  // pass 3
2902  dct_trn8_32(p0, p4);
2903  dct_trn8_32(p1, p5);
2904  dct_trn8_32(p2, p6);
2905  dct_trn8_32(p3, p7);
2906 
2907  // store
2908  vst1_u8(out, p0);
2909  out += out_stride;
2910  vst1_u8(out, p1);
2911  out += out_stride;
2912  vst1_u8(out, p2);
2913  out += out_stride;
2914  vst1_u8(out, p3);
2915  out += out_stride;
2916  vst1_u8(out, p4);
2917  out += out_stride;
2918  vst1_u8(out, p5);
2919  out += out_stride;
2920  vst1_u8(out, p6);
2921  out += out_stride;
2922  vst1_u8(out, p7);
2923 
2924 #undef dct_trn8_8
2925 #undef dct_trn8_16
2926 #undef dct_trn8_32
2927  }
2928 
2929 #undef dct_long_mul
2930 #undef dct_long_mac
2931 #undef dct_widen
2932 #undef dct_wadd
2933 #undef dct_wsub
2934 #undef dct_bfly32o
2935 #undef dct_pass
2936 }
2937 
2938 #endif // STBI_NEON
2939 
2940 #define STBI__MARKER_none 0xff
2941 // if there's a pending marker from the entropy stream, return that
2942 // otherwise, fetch from the stream and get a marker. if there's no
2943 // marker, return 0xff, which is never a valid marker value
2944 static stbi_uc stbi__get_marker(stbi__jpeg *j)
2945 {
2946  stbi_uc x;
2947  if (j->marker != STBI__MARKER_none) {
2948  x = j->marker;
2949  j->marker = STBI__MARKER_none;
2950  return x;
2951  }
2952  x = stbi__get8(j->s);
2953  if (x != 0xff)
2954  return STBI__MARKER_none;
2955  while (x == 0xff)
2956  x = stbi__get8(j->s); // consume repeated 0xff fill bytes
2957  return x;
2958 }
2959 
2960 // in each scan, we'll have scan_n components, and the order
2961 // of the components is specified by order[]
2962 #define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7)
2963 
2964 // after a restart interval, stbi__jpeg_reset the entropy decoder and
2965 // the dc prediction
2966 static void stbi__jpeg_reset(stbi__jpeg *j)
2967 {
2968  j->code_bits = 0;
2969  j->code_buffer = 0;
2970  j->nomore = 0;
2971  j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred =
2972  j->img_comp[3].dc_pred = 0;
2973  j->marker = STBI__MARKER_none;
2974  j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
2975  j->eob_run = 0;
2976  // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
2977  // since we don't even allow 1<<30 pixels
2978 }
2979 
2980 static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
2981 {
2982  stbi__jpeg_reset(z);
2983  if (!z->progressive) {
2984  if (z->scan_n == 1) {
2985  int i, j;
2986  STBI_SIMD_ALIGN(short, data[64]);
2987  int n = z->order[0];
2988  // non-interleaved data, we just need to process one block at a
2989  // time, in trivial scanline order number of blocks to do just
2990  // depends on how many actual "pixels" this component has,
2991  // independent of interleaved MCU blocking and such
2992  int w = (z->img_comp[n].x + 7) >> 3;
2993  int h = (z->img_comp[n].y + 7) >> 3;
2994  for (j = 0; j < h; ++j) {
2995  for (i = 0; i < w; ++i) {
2996  int ha = z->img_comp[n].ha;
2997  if (!stbi__jpeg_decode_block(
2998  z, data, z->huff_dc + z->img_comp[n].hd,
2999  z->huff_ac + ha, z->fast_ac[ha], n,
3000  z->dequant[z->img_comp[n].tq]))
3001  return 0;
3002  z->idct_block_kernel(z->img_comp[n].data +
3003  z->img_comp[n].w2 * j * 8 + i * 8,
3004  z->img_comp[n].w2, data);
3005  // every data block is an MCU, so countdown the restart
3006  // interval
3007  if (--z->todo <= 0) {
3008  if (z->code_bits < 24)
3009  stbi__grow_buffer_unsafe(z);
3010  // if it's NOT a restart, then just bail, so we get
3011  // corrupt data rather than no data
3012  if (!STBI__RESTART(z->marker))
3013  return 1;
3014  stbi__jpeg_reset(z);
3015  }
3016  }
3017  }
3018  return 1;
3019  } else { // interleaved
3020  int i, j, k, x, y;
3021  STBI_SIMD_ALIGN(short, data[64]);
3022  for (j = 0; j < z->img_mcu_y; ++j) {
3023  for (i = 0; i < z->img_mcu_x; ++i) {
3024  // scan an interleaved mcu... process scan_n components in
3025  // order
3026  for (k = 0; k < z->scan_n; ++k) {
3027  int n = z->order[k];
3028  // scan out an mcu's worth of this component; that's
3029  // just determined by the basic H and V specified for
3030  // the component
3031  for (y = 0; y < z->img_comp[n].v; ++y) {
3032  for (x = 0; x < z->img_comp[n].h; ++x) {
3033  int x2 = (i * z->img_comp[n].h + x) * 8;
3034  int y2 = (j * z->img_comp[n].v + y) * 8;
3035  int ha = z->img_comp[n].ha;
3036  if (!stbi__jpeg_decode_block(
3037  z, data, z->huff_dc + z->img_comp[n].hd,
3038  z->huff_ac + ha, z->fast_ac[ha], n,
3039  z->dequant[z->img_comp[n].tq]))
3040  return 0;
3041  z->idct_block_kernel(
3042  z->img_comp[n].data +
3043  z->img_comp[n].w2 * y2 + x2,
3044  z->img_comp[n].w2, data);
3045  }
3046  }
3047  }
3048  // after all interleaved components, that's an interleaved
3049  // MCU, so now count down the restart interval
3050  if (--z->todo <= 0) {
3051  if (z->code_bits < 24)
3052  stbi__grow_buffer_unsafe(z);
3053  if (!STBI__RESTART(z->marker))
3054  return 1;
3055  stbi__jpeg_reset(z);
3056  }
3057  }
3058  }
3059  return 1;
3060  }
3061  } else {
3062  if (z->scan_n == 1) {
3063  int i, j;
3064  int n = z->order[0];
3065  // non-interleaved data, we just need to process one block at a
3066  // time, in trivial scanline order number of blocks to do just
3067  // depends on how many actual "pixels" this component has,
3068  // independent of interleaved MCU blocking and such
3069  int w = (z->img_comp[n].x + 7) >> 3;
3070  int h = (z->img_comp[n].y + 7) >> 3;
3071  for (j = 0; j < h; ++j) {
3072  for (i = 0; i < w; ++i) {
3073  short *data = z->img_comp[n].coeff +
3074  64 * (i + j * z->img_comp[n].coeff_w);
3075  if (z->spec_start == 0) {
3076  if (!stbi__jpeg_decode_block_prog_dc(
3077  z, data, &z->huff_dc[z->img_comp[n].hd], n))
3078  return 0;
3079  } else {
3080  int ha = z->img_comp[n].ha;
3081  if (!stbi__jpeg_decode_block_prog_ac(
3082  z, data, &z->huff_ac[ha], z->fast_ac[ha]))
3083  return 0;
3084  }
3085  // every data block is an MCU, so countdown the restart
3086  // interval
3087  if (--z->todo <= 0) {
3088  if (z->code_bits < 24)
3089  stbi__grow_buffer_unsafe(z);
3090  if (!STBI__RESTART(z->marker))
3091  return 1;
3092  stbi__jpeg_reset(z);
3093  }
3094  }
3095  }
3096  return 1;
3097  } else { // interleaved
3098  int i, j, k, x, y;
3099  for (j = 0; j < z->img_mcu_y; ++j) {
3100  for (i = 0; i < z->img_mcu_x; ++i) {
3101  // scan an interleaved mcu... process scan_n components in
3102  // order
3103  for (k = 0; k < z->scan_n; ++k) {
3104  int n = z->order[k];
3105  // scan out an mcu's worth of this component; that's
3106  // just determined by the basic H and V specified for
3107  // the component
3108  for (y = 0; y < z->img_comp[n].v; ++y) {
3109  for (x = 0; x < z->img_comp[n].h; ++x) {
3110  int x2 = (i * z->img_comp[n].h + x);
3111  int y2 = (j * z->img_comp[n].v + y);
3112  short *data =
3113  z->img_comp[n].coeff +
3114  64 * (x2 + y2 * z->img_comp[n].coeff_w);
3115  if (!stbi__jpeg_decode_block_prog_dc(
3116  z, data, &z->huff_dc[z->img_comp[n].hd],
3117  n))
3118  return 0;
3119  }
3120  }
3121  }
3122  // after all interleaved components, that's an interleaved
3123  // MCU, so now count down the restart interval
3124  if (--z->todo <= 0) {
3125  if (z->code_bits < 24)
3126  stbi__grow_buffer_unsafe(z);
3127  if (!STBI__RESTART(z->marker))
3128  return 1;
3129  stbi__jpeg_reset(z);
3130  }
3131  }
3132  }
3133  return 1;
3134  }
3135  }
3136 }
3137 
3138 static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
3139 {
3140  int i;
3141  for (i = 0; i < 64; ++i)
3142  data[i] *= dequant[i];
3143 }
3144 
3145 static void stbi__jpeg_finish(stbi__jpeg *z)
3146 {
3147  if (z->progressive) {
3148  // dequantize and idct the data
3149  int i, j, n;
3150  for (n = 0; n < z->s->img_n; ++n) {
3151  int w = (z->img_comp[n].x + 7) >> 3;
3152  int h = (z->img_comp[n].y + 7) >> 3;
3153  for (j = 0; j < h; ++j) {
3154  for (i = 0; i < w; ++i) {
3155  short *data = z->img_comp[n].coeff +
3156  64 * (i + j * z->img_comp[n].coeff_w);
3157  stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
3158  z->idct_block_kernel(z->img_comp[n].data +
3159  z->img_comp[n].w2 * j * 8 + i * 8,
3160  z->img_comp[n].w2, data);
3161  }
3162  }
3163  }
3164  }
3165 }
3166 
3167 static int stbi__process_marker(stbi__jpeg *z, int m)
3168 {
3169  int L;
3170  switch (m) {
3171  case STBI__MARKER_none: // no marker found
3172  return stbi__err("expected marker", "Corrupt JPEG");
3173 
3174  case 0xDD: // DRI - specify restart interval
3175  if (stbi__get16be(z->s) != 4)
3176  return stbi__err("bad DRI len", "Corrupt JPEG");
3177  z->restart_interval = stbi__get16be(z->s);
3178  return 1;
3179 
3180  case 0xDB: // DQT - define quantization table
3181  L = stbi__get16be(z->s) - 2;
3182  while (L > 0) {
3183  int q = stbi__get8(z->s);
3184  int p = q >> 4, sixteen = (p != 0);
3185  int t = q & 15, i;
3186  if (p != 0 && p != 1)
3187  return stbi__err("bad DQT type", "Corrupt JPEG");
3188  if (t > 3)
3189  return stbi__err("bad DQT table", "Corrupt JPEG");
3190 
3191  for (i = 0; i < 64; ++i)
3192  z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(
3193  sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
3194  L -= (sixteen ? 129 : 65);
3195  }
3196  return L == 0;
3197 
3198  case 0xC4: // DHT - define huffman table
3199  L = stbi__get16be(z->s) - 2;
3200  while (L > 0) {
3201  stbi_uc *v;
3202  int sizes[16], i, n = 0;
3203  int q = stbi__get8(z->s);
3204  int tc = q >> 4;
3205  int th = q & 15;
3206  if (tc > 1 || th > 3)
3207  return stbi__err("bad DHT header", "Corrupt JPEG");
3208  for (i = 0; i < 16; ++i) {
3209  sizes[i] = stbi__get8(z->s);
3210  n += sizes[i];
3211  }
3212  L -= 17;
3213  if (tc == 0) {
3214  if (!stbi__build_huffman(z->huff_dc + th, sizes))
3215  return 0;
3216  v = z->huff_dc[th].values;
3217  } else {
3218  if (!stbi__build_huffman(z->huff_ac + th, sizes))
3219  return 0;
3220  v = z->huff_ac[th].values;
3221  }
3222  for (i = 0; i < n; ++i)
3223  v[i] = stbi__get8(z->s);
3224  if (tc != 0)
3225  stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
3226  L -= n;
3227  }
3228  return L == 0;
3229  }
3230 
3231  // check for comment block or APP blocks
3232  if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
3233  L = stbi__get16be(z->s);
3234  if (L < 2) {
3235  if (m == 0xFE)
3236  return stbi__err("bad COM len", "Corrupt JPEG");
3237  else
3238  return stbi__err("bad APP len", "Corrupt JPEG");
3239  }
3240  L -= 2;
3241 
3242  if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
3243  static const unsigned char tag[5] = {'J', 'F', 'I', 'F', '\0'};
3244  int ok = 1;
3245  int i;
3246  for (i = 0; i < 5; ++i)
3247  if (stbi__get8(z->s) != tag[i])
3248  ok = 0;
3249  L -= 5;
3250  if (ok)
3251  z->jfif = 1;
3252  } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
3253  static const unsigned char tag[6] = {'A', 'd', 'o', 'b', 'e', '\0'};
3254  int ok = 1;
3255  int i;
3256  for (i = 0; i < 6; ++i)
3257  if (stbi__get8(z->s) != tag[i])
3258  ok = 0;
3259  L -= 6;
3260  if (ok) {
3261  stbi__get8(z->s); // version
3262  stbi__get16be(z->s); // flags0
3263  stbi__get16be(z->s); // flags1
3264  z->app14_color_transform = stbi__get8(z->s); // color transform
3265  L -= 6;
3266  }
3267  }
3268 
3269  stbi__skip(z->s, L);
3270  return 1;
3271  }
3272 
3273  return stbi__err("unknown marker", "Corrupt JPEG");
3274 }
3275 
3276 // after we see SOS
3277 static int stbi__process_scan_header(stbi__jpeg *z)
3278 {
3279  int i;
3280  int Ls = stbi__get16be(z->s);
3281  z->scan_n = stbi__get8(z->s);
3282  if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int)z->s->img_n)
3283  return stbi__err("bad SOS component count", "Corrupt JPEG");
3284  if (Ls != 6 + 2 * z->scan_n)
3285  return stbi__err("bad SOS len", "Corrupt JPEG");
3286  for (i = 0; i < z->scan_n; ++i) {
3287  int id = stbi__get8(z->s), which;
3288  int q = stbi__get8(z->s);
3289  for (which = 0; which < z->s->img_n; ++which)
3290  if (z->img_comp[which].id == id)
3291  break;
3292  if (which == z->s->img_n)
3293  return 0; // no match
3294  z->img_comp[which].hd = q >> 4;
3295  if (z->img_comp[which].hd > 3)
3296  return stbi__err("bad DC huff", "Corrupt JPEG");
3297  z->img_comp[which].ha = q & 15;
3298  if (z->img_comp[which].ha > 3)
3299  return stbi__err("bad AC huff", "Corrupt JPEG");
3300  z->order[i] = which;
3301  }
3302 
3303  {
3304  int aa;
3305  z->spec_start = stbi__get8(z->s);
3306  z->spec_end = stbi__get8(z->s); // should be 63, but might be 0
3307  aa = stbi__get8(z->s);
3308  z->succ_high = (aa >> 4);
3309  z->succ_low = (aa & 15);
3310  if (z->progressive) {
3311  if (z->spec_start > 63 || z->spec_end > 63 ||
3312  z->spec_start > z->spec_end || z->succ_high > 13 ||
3313  z->succ_low > 13)
3314  return stbi__err("bad SOS", "Corrupt JPEG");
3315  } else {
3316  if (z->spec_start != 0)
3317  return stbi__err("bad SOS", "Corrupt JPEG");
3318  if (z->succ_high != 0 || z->succ_low != 0)
3319  return stbi__err("bad SOS", "Corrupt JPEG");
3320  z->spec_end = 63;
3321  }
3322  }
3323 
3324  return 1;
3325 }
3326 
3327 static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
3328 {
3329  int i;
3330  for (i = 0; i < ncomp; ++i) {
3331  if (z->img_comp[i].raw_data) {
3332  STBI_FREE(z->img_comp[i].raw_data);
3333  z->img_comp[i].raw_data = NULL;
3334  z->img_comp[i].data = NULL;
3335  }
3336  if (z->img_comp[i].raw_coeff) {
3337  STBI_FREE(z->img_comp[i].raw_coeff);
3338  z->img_comp[i].raw_coeff = 0;
3339  z->img_comp[i].coeff = 0;
3340  }
3341  if (z->img_comp[i].linebuf) {
3342  STBI_FREE(z->img_comp[i].linebuf);
3343  z->img_comp[i].linebuf = NULL;
3344  }
3345  }
3346  return why;
3347 }
3348 
3349 static int stbi__process_frame_header(stbi__jpeg *z, int scan)
3350 {
3351  stbi__context *s = z->s;
3352  int Lf, p, i, q, h_max = 1, v_max = 1, c;
3353  Lf = stbi__get16be(s);
3354  if (Lf < 11)
3355  return stbi__err("bad SOF len", "Corrupt JPEG"); // JPEG
3356  p = stbi__get8(s);
3357  if (p != 8)
3358  return stbi__err(
3359  "only 8-bit",
3360  "JPEG format not supported: 8-bit only"); // JPEG baseline
3361  s->img_y = stbi__get16be(s);
3362  if (s->img_y == 0)
3363  return stbi__err(
3364  "no header height",
3365  "JPEG format not supported: delayed height"); // Legal, but we don't
3366  // handle it--but
3367  // neither does IJG
3368  s->img_x = stbi__get16be(s);
3369  if (s->img_x == 0)
3370  return stbi__err("0 width", "Corrupt JPEG"); // JPEG requires
3371  c = stbi__get8(s);
3372  if (c != 3 && c != 1 && c != 4)
3373  return stbi__err("bad component count", "Corrupt JPEG");
3374  s->img_n = c;
3375  for (i = 0; i < c; ++i) {
3376  z->img_comp[i].data = NULL;
3377  z->img_comp[i].linebuf = NULL;
3378  }
3379 
3380  if (Lf != 8 + 3 * s->img_n)
3381  return stbi__err("bad SOF len", "Corrupt JPEG");
3382 
3383  z->rgb = 0;
3384  for (i = 0; i < s->img_n; ++i) {
3385  static unsigned char rgb[3] = {'R', 'G', 'B'};
3386  z->img_comp[i].id = stbi__get8(s);
3387  if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
3388  ++z->rgb;
3389  q = stbi__get8(s);
3390  z->img_comp[i].h = (q >> 4);
3391  if (!z->img_comp[i].h || z->img_comp[i].h > 4)
3392  return stbi__err("bad H", "Corrupt JPEG");
3393  z->img_comp[i].v = q & 15;
3394  if (!z->img_comp[i].v || z->img_comp[i].v > 4)
3395  return stbi__err("bad V", "Corrupt JPEG");
3396  z->img_comp[i].tq = stbi__get8(s);
3397  if (z->img_comp[i].tq > 3)
3398  return stbi__err("bad TQ", "Corrupt JPEG");
3399  }
3400 
3401  if (scan != STBI__SCAN_load)
3402  return 1;
3403 
3404  if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0))
3405  return stbi__err("too large", "Image too large to decode");
3406 
3407  for (i = 0; i < s->img_n; ++i) {
3408  if (z->img_comp[i].h > h_max)
3409  h_max = z->img_comp[i].h;
3410  if (z->img_comp[i].v > v_max)
3411  v_max = z->img_comp[i].v;
3412  }
3413 
3414  // compute interleaved mcu info
3415  z->img_h_max = h_max;
3416  z->img_v_max = v_max;
3417  z->img_mcu_w = h_max * 8;
3418  z->img_mcu_h = v_max * 8;
3419  // these sizes can't be more than 17 bits
3420  z->img_mcu_x = (s->img_x + z->img_mcu_w - 1) / z->img_mcu_w;
3421  z->img_mcu_y = (s->img_y + z->img_mcu_h - 1) / z->img_mcu_h;
3422 
3423  for (i = 0; i < s->img_n; ++i) {
3424  // number of effective pixels (e.g. for non-interleaved MCU)
3425  z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max - 1) / h_max;
3426  z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max - 1) / v_max;
3427  // to simplify generation, we'll allocate enough memory to decode
3428  // the bogus oversized data from using interleaved MCUs and their
3429  // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
3430  // discard the extra data until colorspace conversion
3431  //
3432  // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked
3433  // earlier) so these muls can't overflow with 32-bit ints (which we
3434  // require)
3435  z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
3436  z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
3437  z->img_comp[i].coeff = 0;
3438  z->img_comp[i].raw_coeff = 0;
3439  z->img_comp[i].linebuf = NULL;
3440  z->img_comp[i].raw_data =
3441  stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
3442  if (z->img_comp[i].raw_data == NULL)
3443  return stbi__free_jpeg_components(
3444  z, i + 1, stbi__err("outofmem", "Out of memory"));
3445  // align blocks for idct using mmx/sse
3446  z->img_comp[i].data =
3447  (stbi_uc *)(((size_t)z->img_comp[i].raw_data + 15) & ~15);
3448  if (z->progressive) {
3449  // w2, h2 are multiples of 8 (see above)
3450  z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
3451  z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
3452  z->img_comp[i].raw_coeff = stbi__malloc_mad3(
3453  z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
3454  if (z->img_comp[i].raw_coeff == NULL)
3455  return stbi__free_jpeg_components(
3456  z, i + 1, stbi__err("outofmem", "Out of memory"));
3457  z->img_comp[i].coeff =
3458  (short *)(((size_t)z->img_comp[i].raw_coeff + 15) & ~15);
3459  }
3460  }
3461 
3462  return 1;
3463 }
3464 
3465 // use comparisons since in some cases we handle more than one case (e.g. SOF)
3466 #define stbi__DNL(x) ((x) == 0xdc)
3467 #define stbi__SOI(x) ((x) == 0xd8)
3468 #define stbi__EOI(x) ((x) == 0xd9)
3469 #define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
3470 #define stbi__SOS(x) ((x) == 0xda)
3471 
3472 #define stbi__SOF_progressive(x) ((x) == 0xc2)
3473 
3474 static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
3475 {
3476  int m;
3477  z->jfif = 0;
3478  z->app14_color_transform = -1; // valid values are 0,1,2
3479  z->marker = STBI__MARKER_none; // initialize cached marker to empty
3480  m = stbi__get_marker(z);
3481  if (!stbi__SOI(m))
3482  return stbi__err("no SOI", "Corrupt JPEG");
3483  if (scan == STBI__SCAN_type)
3484  return 1;
3485  m = stbi__get_marker(z);
3486  while (!stbi__SOF(m)) {
3487  if (!stbi__process_marker(z, m))
3488  return 0;
3489  m = stbi__get_marker(z);
3490  while (m == STBI__MARKER_none) {
3491  // some files have extra padding after their blocks, so ok, we'll
3492  // scan
3493  if (stbi__at_eof(z->s))
3494  return stbi__err("no SOF", "Corrupt JPEG");
3495  m = stbi__get_marker(z);
3496  }
3497  }
3498  z->progressive = stbi__SOF_progressive(m);
3499  if (!stbi__process_frame_header(z, scan))
3500  return 0;
3501  return 1;
3502 }
3503 
3504 // decode image to YCbCr format
3505 static int stbi__decode_jpeg_image(stbi__jpeg *j)
3506 {
3507  int m;
3508  for (m = 0; m < 4; m++) {
3509  j->img_comp[m].raw_data = NULL;
3510  j->img_comp[m].raw_coeff = NULL;
3511  }
3512  j->restart_interval = 0;
3513  if (!stbi__decode_jpeg_header(j, STBI__SCAN_load))
3514  return 0;
3515  m = stbi__get_marker(j);
3516  while (!stbi__EOI(m)) {
3517  if (stbi__SOS(m)) {
3518  if (!stbi__process_scan_header(j))
3519  return 0;
3520  if (!stbi__parse_entropy_coded_data(j))
3521  return 0;
3522  if (j->marker == STBI__MARKER_none) {
3523  // handle 0s at the end of image data from IP Kamera 9060
3524  while (!stbi__at_eof(j->s)) {
3525  int x = stbi__get8(j->s);
3526  if (x == 255) {
3527  j->marker = stbi__get8(j->s);
3528  break;
3529  }
3530  }
3531  // if we reach eof without hitting a marker, stbi__get_marker()
3532  // below will fail and we'll eventually return 0
3533  }
3534  } else if (stbi__DNL(m)) {
3535  int Ld = stbi__get16be(j->s);
3536  stbi__uint32 NL = stbi__get16be(j->s);
3537  if (Ld != 4)
3538  stbi__err("bad DNL len", "Corrupt JPEG");
3539  if (NL != j->s->img_y)
3540  stbi__err("bad DNL height", "Corrupt JPEG");
3541  } else {
3542  if (!stbi__process_marker(j, m))
3543  return 0;
3544  }
3545  m = stbi__get_marker(j);
3546  }
3547  if (j->progressive)
3548  stbi__jpeg_finish(j);
3549  return 1;
3550 }
3551 
3552 // static jfif-centered resampling (across block boundaries)
3553 
3554 typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
3555  int w, int hs);
3556 
3557 #define stbi__div4(x) ((stbi_uc)((x) >> 2))
3558 
3559 static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far,
3560  int w, int hs)
3561 {
3562  STBI_NOTUSED(out);
3563  STBI_NOTUSED(in_far);
3564  STBI_NOTUSED(w);
3565  STBI_NOTUSED(hs);
3566  return in_near;
3567 }
3568 
3569 static stbi_uc *stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near,
3570  stbi_uc *in_far, int w, int hs)
3571 {
3572  // need to generate two samples vertically for every one in input
3573  int i;
3574  STBI_NOTUSED(hs);
3575  for (i = 0; i < w; ++i)
3576  out[i] = stbi__div4(3 * in_near[i] + in_far[i] + 2);
3577  return out;
3578 }
3579 
3580 static stbi_uc *stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near,
3581  stbi_uc *in_far, int w, int hs)
3582 {
3583  // need to generate two samples horizontally for every one in input
3584  int i;
3585  stbi_uc *input = in_near;
3586 
3587  if (w == 1) {
3588  // if only one sample, can't do any interpolation
3589  out[0] = out[1] = input[0];
3590  return out;
3591  }
3592 
3593  out[0] = input[0];
3594  out[1] = stbi__div4(input[0] * 3 + input[1] + 2);
3595  for (i = 1; i < w - 1; ++i) {
3596  int n = 3 * input[i] + 2;
3597  out[i * 2 + 0] = stbi__div4(n + input[i - 1]);
3598  out[i * 2 + 1] = stbi__div4(n + input[i + 1]);
3599  }
3600  out[i * 2 + 0] = stbi__div4(input[w - 2] * 3 + input[w - 1] + 2);
3601  out[i * 2 + 1] = input[w - 1];
3602 
3603  STBI_NOTUSED(in_far);
3604  STBI_NOTUSED(hs);
3605 
3606  return out;
3607 }
3608 
3609 #define stbi__div16(x) ((stbi_uc)((x) >> 4))
3610 
3611 static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near,
3612  stbi_uc *in_far, int w, int hs)
3613 {
3614  // need to generate 2x2 samples for every one in input
3615  int i, t0, t1;
3616  if (w == 1) {
3617  out[0] = out[1] = stbi__div4(3 * in_near[0] + in_far[0] + 2);
3618  return out;
3619  }
3620 
3621  t1 = 3 * in_near[0] + in_far[0];
3622  out[0] = stbi__div4(t1 + 2);
3623  for (i = 1; i < w; ++i) {
3624  t0 = t1;
3625  t1 = 3 * in_near[i] + in_far[i];
3626  out[i * 2 - 1] = stbi__div16(3 * t0 + t1 + 8);
3627  out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
3628  }
3629  out[w * 2 - 1] = stbi__div4(t1 + 2);
3630 
3631  STBI_NOTUSED(hs);
3632 
3633  return out;
3634 }
3635 
3636 #if defined(STBI_SSE2) || defined(STBI_NEON)
3637 static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near,
3638  stbi_uc *in_far, int w, int hs)
3639 {
3640  // need to generate 2x2 samples for every one in input
3641  int i = 0, t0, t1;
3642 
3643  if (w == 1) {
3644  out[0] = out[1] = stbi__div4(3 * in_near[0] + in_far[0] + 2);
3645  return out;
3646  }
3647 
3648  t1 = 3 * in_near[0] + in_far[0];
3649  // process groups of 8 pixels for as long as we can.
3650  // note we can't handle the last pixel in a row in this loop
3651  // because we need to handle the filter boundary conditions.
3652  for (; i < ((w - 1) & ~7); i += 8) {
3653 #if defined(STBI_SSE2)
3654  // load and perform the vertical filtering pass
3655  // this uses 3*x + y = 4*x + (y - x)
3656  __m128i zero = _mm_setzero_si128();
3657  __m128i farb = _mm_loadl_epi64((__m128i *)(in_far + i));
3658  __m128i nearb = _mm_loadl_epi64((__m128i *)(in_near + i));
3659  __m128i farw = _mm_unpacklo_epi8(farb, zero);
3660  __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
3661  __m128i diff = _mm_sub_epi16(farw, nearw);
3662  __m128i nears = _mm_slli_epi16(nearw, 2);
3663  __m128i curr = _mm_add_epi16(nears, diff); // current row
3664 
3665  // horizontal filter works the same based on shifted vers of current
3666  // row. "prev" is current row shifted right by 1 pixel; we need to
3667  // insert the previous pixel value (from t1).
3668  // "next" is current row shifted left by 1 pixel, with first pixel
3669  // of next block of 8 pixels added in.
3670  __m128i prv0 = _mm_slli_si128(curr, 2);
3671  __m128i nxt0 = _mm_srli_si128(curr, 2);
3672  __m128i prev = _mm_insert_epi16(prv0, t1, 0);
3673  __m128i next =
3674  _mm_insert_epi16(nxt0, 3 * in_near[i + 8] + in_far[i + 8], 7);
3675 
3676  // horizontal filter, polyphase implementation since it's convenient:
3677  // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3678  // odd pixels = 3*cur + next = cur*4 + (next - cur)
3679  // note the shared term.
3680  __m128i bias = _mm_set1_epi16(8);
3681  __m128i curs = _mm_slli_epi16(curr, 2);
3682  __m128i prvd = _mm_sub_epi16(prev, curr);
3683  __m128i nxtd = _mm_sub_epi16(next, curr);
3684  __m128i curb = _mm_add_epi16(curs, bias);
3685  __m128i even = _mm_add_epi16(prvd, curb);
3686  __m128i odd = _mm_add_epi16(nxtd, curb);
3687 
3688  // interleave even and odd pixels, then undo scaling.
3689  __m128i int0 = _mm_unpacklo_epi16(even, odd);
3690  __m128i int1 = _mm_unpackhi_epi16(even, odd);
3691  __m128i de0 = _mm_srli_epi16(int0, 4);
3692  __m128i de1 = _mm_srli_epi16(int1, 4);
3693 
3694  // pack and write output
3695  __m128i outv = _mm_packus_epi16(de0, de1);
3696  _mm_storeu_si128((__m128i *)(out + i * 2), outv);
3697 #elif defined(STBI_NEON)
3698  // load and perform the vertical filtering pass
3699  // this uses 3*x + y = 4*x + (y - x)
3700  uint8x8_t farb = vld1_u8(in_far + i);
3701  uint8x8_t nearb = vld1_u8(in_near + i);
3702  int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
3703  int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
3704  int16x8_t curr = vaddq_s16(nears, diff); // current row
3705 
3706  // horizontal filter works the same based on shifted vers of current
3707  // row. "prev" is current row shifted right by 1 pixel; we need to
3708  // insert the previous pixel value (from t1).
3709  // "next" is current row shifted left by 1 pixel, with first pixel
3710  // of next block of 8 pixels added in.
3711  int16x8_t prv0 = vextq_s16(curr, curr, 7);
3712  int16x8_t nxt0 = vextq_s16(curr, curr, 1);
3713  int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
3714  int16x8_t next =
3715  vsetq_lane_s16(3 * in_near[i + 8] + in_far[i + 8], nxt0, 7);
3716 
3717  // horizontal filter, polyphase implementation since it's convenient:
3718  // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3719  // odd pixels = 3*cur + next = cur*4 + (next - cur)
3720  // note the shared term.
3721  int16x8_t curs = vshlq_n_s16(curr, 2);
3722  int16x8_t prvd = vsubq_s16(prev, curr);
3723  int16x8_t nxtd = vsubq_s16(next, curr);
3724  int16x8_t even = vaddq_s16(curs, prvd);
3725  int16x8_t odd = vaddq_s16(curs, nxtd);
3726 
3727  // undo scaling and round, then store with even/odd phases interleaved
3728  uint8x8x2_t o;
3729  o.val[0] = vqrshrun_n_s16(even, 4);
3730  o.val[1] = vqrshrun_n_s16(odd, 4);
3731  vst2_u8(out + i * 2, o);
3732 #endif
3733 
3734  // "previous" value for next iter
3735  t1 = 3 * in_near[i + 7] + in_far[i + 7];
3736  }
3737 
3738  t0 = t1;
3739  t1 = 3 * in_near[i] + in_far[i];
3740  out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
3741 
3742  for (++i; i < w; ++i) {
3743  t0 = t1;
3744  t1 = 3 * in_near[i] + in_far[i];
3745  out[i * 2 - 1] = stbi__div16(3 * t0 + t1 + 8);
3746  out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
3747  }
3748  out[w * 2 - 1] = stbi__div4(t1 + 2);
3749 
3750  STBI_NOTUSED(hs);
3751 
3752  return out;
3753 }
3754 #endif
3755 
3756 static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near,
3757  stbi_uc *in_far, int w, int hs)
3758 {
3759  // resample with nearest-neighbor
3760  int i, j;
3761  STBI_NOTUSED(in_far);
3762  for (i = 0; i < w; ++i)
3763  for (j = 0; j < hs; ++j)
3764  out[i * hs + j] = in_near[i];
3765  return out;
3766 }
3767 
3768 // this is a reduced-precision calculation of YCbCr-to-RGB introduced
3769 // to make sure the code produces the same results in both SIMD and scalar
3770 #define stbi__float2fixed(x) (((int)((x)*4096.0f + 0.5f)) << 8)
3771 static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y,
3772  const stbi_uc *pcb, const stbi_uc *pcr,
3773  int count, int step)
3774 {
3775  int i;
3776  for (i = 0; i < count; ++i) {
3777  int y_fixed = (y[i] << 20) + (1 << 19); // rounding
3778  int r, g, b;
3779  int cr = pcr[i] - 128;
3780  int cb = pcb[i] - 128;
3781  r = y_fixed + cr * stbi__float2fixed(1.40200f);
3782  g = y_fixed + (cr * -stbi__float2fixed(0.71414f)) +
3783  ((cb * -stbi__float2fixed(0.34414f)) & 0xffff0000);
3784  b = y_fixed + cb * stbi__float2fixed(1.77200f);
3785  r >>= 20;
3786  g >>= 20;
3787  b >>= 20;
3788  if ((unsigned)r > 255) {
3789  if (r < 0)
3790  r = 0;
3791  else
3792  r = 255;
3793  }
3794  if ((unsigned)g > 255) {
3795  if (g < 0)
3796  g = 0;
3797  else
3798  g = 255;
3799  }
3800  if ((unsigned)b > 255) {
3801  if (b < 0)
3802  b = 0;
3803  else
3804  b = 255;
3805  }
3806  out[0] = (stbi_uc)r;
3807  out[1] = (stbi_uc)g;
3808  out[2] = (stbi_uc)b;
3809  out[3] = 255;
3810  out += step;
3811  }
3812 }
3813 
3814 #if defined(STBI_SSE2) || defined(STBI_NEON)
3815 static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y,
3816  stbi_uc const *pcb, stbi_uc const *pcr,
3817  int count, int step)
3818 {
3819  int i = 0;
3820 
3821 #ifdef STBI_SSE2
3822  // step == 3 is pretty ugly on the final interleave, and i'm not convinced
3823  // it's useful in practice (you wouldn't use it for textures, for example).
3824  // so just accelerate step == 4 case.
3825  if (step == 4) {
3826  // this is a fairly straightforward implementation and not
3827  // super-optimized.
3828  __m128i signflip = _mm_set1_epi8(-0x80);
3829  __m128i cr_const0 = _mm_set1_epi16((short)(1.40200f * 4096.0f + 0.5f));
3830  __m128i cr_const1 = _mm_set1_epi16(-(short)(0.71414f * 4096.0f + 0.5f));
3831  __m128i cb_const0 = _mm_set1_epi16(-(short)(0.34414f * 4096.0f + 0.5f));
3832  __m128i cb_const1 = _mm_set1_epi16((short)(1.77200f * 4096.0f + 0.5f));
3833  __m128i y_bias = _mm_set1_epi8((char)(unsigned char)128);
3834  __m128i xw = _mm_set1_epi16(255); // alpha channel
3835 
3836  for (; i + 7 < count; i += 8) {
3837  // load
3838  __m128i y_bytes = _mm_loadl_epi64((__m128i *)(y + i));
3839  __m128i cr_bytes = _mm_loadl_epi64((__m128i *)(pcr + i));
3840  __m128i cb_bytes = _mm_loadl_epi64((__m128i *)(pcb + i));
3841  __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
3842  __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
3843 
3844  // unpack to short (and left-shift cr, cb by 8)
3845  __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes);
3846  __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
3847  __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
3848 
3849  // color transform
3850  __m128i yws = _mm_srli_epi16(yw, 4);
3851  __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
3852  __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
3853  __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
3854  __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
3855  __m128i rws = _mm_add_epi16(cr0, yws);
3856  __m128i gwt = _mm_add_epi16(cb0, yws);
3857  __m128i bws = _mm_add_epi16(yws, cb1);
3858  __m128i gws = _mm_add_epi16(gwt, cr1);
3859 
3860  // descale
3861  __m128i rw = _mm_srai_epi16(rws, 4);
3862  __m128i bw = _mm_srai_epi16(bws, 4);
3863  __m128i gw = _mm_srai_epi16(gws, 4);
3864 
3865  // back to byte, set up for transpose
3866  __m128i brb = _mm_packus_epi16(rw, bw);
3867  __m128i gxb = _mm_packus_epi16(gw, xw);
3868 
3869  // transpose to interleave channels
3870  __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
3871  __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
3872  __m128i o0 = _mm_unpacklo_epi16(t0, t1);
3873  __m128i o1 = _mm_unpackhi_epi16(t0, t1);
3874 
3875  // store
3876  _mm_storeu_si128((__m128i *)(out + 0), o0);
3877  _mm_storeu_si128((__m128i *)(out + 16), o1);
3878  out += 32;
3879  }
3880  }
3881 #endif
3882 
3883 #ifdef STBI_NEON
3884  // in this version, step=3 support would be easy to add. but is there
3885  // demand?
3886  if (step == 4) {
3887  // this is a fairly straightforward implementation and not
3888  // super-optimized.
3889  uint8x8_t signflip = vdup_n_u8(0x80);
3890  int16x8_t cr_const0 = vdupq_n_s16((short)(1.40200f * 4096.0f + 0.5f));
3891  int16x8_t cr_const1 = vdupq_n_s16(-(short)(0.71414f * 4096.0f + 0.5f));
3892  int16x8_t cb_const0 = vdupq_n_s16(-(short)(0.34414f * 4096.0f + 0.5f));
3893  int16x8_t cb_const1 = vdupq_n_s16((short)(1.77200f * 4096.0f + 0.5f));
3894 
3895  for (; i + 7 < count; i += 8) {
3896  // load
3897  uint8x8_t y_bytes = vld1_u8(y + i);
3898  uint8x8_t cr_bytes = vld1_u8(pcr + i);
3899  uint8x8_t cb_bytes = vld1_u8(pcb + i);
3900  int8x8_t cr_biased =
3901  vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
3902  int8x8_t cb_biased =
3903  vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
3904 
3905  // expand to s16
3906  int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
3907  int16x8_t crw = vshll_n_s8(cr_biased, 7);
3908  int16x8_t cbw = vshll_n_s8(cb_biased, 7);
3909 
3910  // color transform
3911  int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
3912  int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
3913  int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
3914  int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
3915  int16x8_t rws = vaddq_s16(yws, cr0);
3916  int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
3917  int16x8_t bws = vaddq_s16(yws, cb1);
3918 
3919  // undo scaling, round, convert to byte
3920  uint8x8x4_t o;
3921  o.val[0] = vqrshrun_n_s16(rws, 4);
3922  o.val[1] = vqrshrun_n_s16(gws, 4);
3923  o.val[2] = vqrshrun_n_s16(bws, 4);
3924  o.val[3] = vdup_n_u8(255);
3925 
3926  // store, interleaving r/g/b/a
3927  vst4_u8(out, o);
3928  out += 8 * 4;
3929  }
3930  }
3931 #endif
3932 
3933  for (; i < count; ++i) {
3934  int y_fixed = (y[i] << 20) + (1 << 19); // rounding
3935  int r, g, b;
3936  int cr = pcr[i] - 128;
3937  int cb = pcb[i] - 128;
3938  r = y_fixed + cr * stbi__float2fixed(1.40200f);
3939  g = y_fixed + cr * -stbi__float2fixed(0.71414f) +
3940  ((cb * -stbi__float2fixed(0.34414f)) & 0xffff0000);
3941  b = y_fixed + cb * stbi__float2fixed(1.77200f);
3942  r >>= 20;
3943  g >>= 20;
3944  b >>= 20;
3945  if ((unsigned)r > 255) {
3946  if (r < 0)
3947  r = 0;
3948  else
3949  r = 255;
3950  }
3951  if ((unsigned)g > 255) {
3952  if (g < 0)
3953  g = 0;
3954  else
3955  g = 255;
3956  }
3957  if ((unsigned)b > 255) {
3958  if (b < 0)
3959  b = 0;
3960  else
3961  b = 255;
3962  }
3963  out[0] = (stbi_uc)r;
3964  out[1] = (stbi_uc)g;
3965  out[2] = (stbi_uc)b;
3966  out[3] = 255;
3967  out += step;
3968  }
3969 }
3970 #endif
3971 
3972 // set up the kernels
3973 static void stbi__setup_jpeg(stbi__jpeg *j)
3974 {
3975  j->idct_block_kernel = stbi__idct_block;
3976  j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
3977  j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
3978 
3979 #ifdef STBI_SSE2
3980  if (stbi__sse2_available()) {
3981  j->idct_block_kernel = stbi__idct_simd;
3982  j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3983  j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3984  }
3985 #endif
3986 
3987 #ifdef STBI_NEON
3988  j->idct_block_kernel = stbi__idct_simd;
3989  j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3990  j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3991 #endif
3992 }
3993 
3994 // clean up the temporary component buffers
3995 static void stbi__cleanup_jpeg(stbi__jpeg *j)
3996 {
3997  stbi__free_jpeg_components(j, j->s->img_n, 0);
3998 }
3999 
4000 typedef struct {
4001  resample_row_func resample;
4002  stbi_uc *line0, *line1;
4003  int hs, vs; // expansion factor in each axis
4004  int w_lores; // horizontal pixels pre-expansion
4005  int ystep; // how far through vertical expansion we are
4006  int ypos; // which pre-expansion row we're on
4007 } stbi__resample;
4008 
4009 // fast 0..255 * 0..255 => 0..255 rounded multiplication
4010 static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
4011 {
4012  unsigned int t = x * y + 128;
4013  return (stbi_uc)((t + (t >> 8)) >> 8);
4014 }
4015 
4016 static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y,
4017  int *comp, int req_comp)
4018 {
4019  int n, decode_n, is_rgb;
4020  z->s->img_n = 0; // make stbi__cleanup_jpeg safe
4021 
4022  // validate req_comp
4023  if (req_comp < 0 || req_comp > 4)
4024  return stbi__errpuc("bad req_comp", "Internal error");
4025 
4026  // load a jpeg image from whichever source, but leave in YCbCr format
4027  if (!stbi__decode_jpeg_image(z)) {
4028  stbi__cleanup_jpeg(z);
4029  return NULL;
4030  }
4031 
4032  // determine actual number of components to generate
4033  n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
4034 
4035  is_rgb = z->s->img_n == 3 &&
4036  (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
4037 
4038  if (z->s->img_n == 3 && n < 3 && !is_rgb)
4039  decode_n = 1;
4040  else
4041  decode_n = z->s->img_n;
4042 
4043  // resample and color-convert
4044  {
4045  int k;
4046  unsigned int i, j;
4047  stbi_uc *output;
4048  stbi_uc *coutput[4];
4049 
4050  stbi__resample res_comp[4];
4051 
4052  for (k = 0; k < decode_n; ++k) {
4053  stbi__resample *r = &res_comp[k];
4054 
4055  // allocate line buffer big enough for upsampling off the edges
4056  // with upsample factor of 4
4057  z->img_comp[k].linebuf = (stbi_uc *)stbi__malloc(z->s->img_x + 3);
4058  if (!z->img_comp[k].linebuf) {
4059  stbi__cleanup_jpeg(z);
4060  return stbi__errpuc("outofmem", "Out of memory");
4061  }
4062 
4063  r->hs = z->img_h_max / z->img_comp[k].h;
4064  r->vs = z->img_v_max / z->img_comp[k].v;
4065  r->ystep = r->vs >> 1;
4066  r->w_lores = (z->s->img_x + r->hs - 1) / r->hs;
4067  r->ypos = 0;
4068  r->line0 = r->line1 = z->img_comp[k].data;
4069 
4070  if (r->hs == 1 && r->vs == 1)
4071  r->resample = resample_row_1;
4072  else if (r->hs == 1 && r->vs == 2)
4073  r->resample = stbi__resample_row_v_2;
4074  else if (r->hs == 2 && r->vs == 1)
4075  r->resample = stbi__resample_row_h_2;
4076  else if (r->hs == 2 && r->vs == 2)
4077  r->resample = z->resample_row_hv_2_kernel;
4078  else
4079  r->resample = stbi__resample_row_generic;
4080  }
4081 
4082  // can't error after this so, this is safe
4083  output = (stbi_uc *)stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
4084  if (!output) {
4085  stbi__cleanup_jpeg(z);
4086  return stbi__errpuc("outofmem", "Out of memory");
4087  }
4088 
4089  // now go ahead and resample
4090  for (j = 0; j < z->s->img_y; ++j) {
4091  stbi_uc *out = output + n * z->s->img_x * j;
4092  for (k = 0; k < decode_n; ++k) {
4093  stbi__resample *r = &res_comp[k];
4094  int y_bot = r->ystep >= (r->vs >> 1);
4095  coutput[k] = r->resample(
4096  z->img_comp[k].linebuf, y_bot ? r->line1 : r->line0,
4097  y_bot ? r->line0 : r->line1, r->w_lores, r->hs);
4098  if (++r->ystep >= r->vs) {
4099  r->ystep = 0;
4100  r->line0 = r->line1;
4101  if (++r->ypos < z->img_comp[k].y)
4102  r->line1 += z->img_comp[k].w2;
4103  }
4104  }
4105  if (n >= 3) {
4106  stbi_uc *y = coutput[0];
4107  if (z->s->img_n == 3) {
4108  if (is_rgb) {
4109  for (i = 0; i < z->s->img_x; ++i) {
4110  out[0] = y[i];
4111  out[1] = coutput[1][i];
4112  out[2] = coutput[2][i];
4113  out[3] = 255;
4114  out += n;
4115  }
4116  } else {
4117  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2],
4118  z->s->img_x, n);
4119  }
4120  } else if (z->s->img_n == 4) {
4121  if (z->app14_color_transform == 0) { // CMYK
4122  for (i = 0; i < z->s->img_x; ++i) {
4123  stbi_uc m = coutput[3][i];
4124  out[0] = stbi__blinn_8x8(coutput[0][i], m);
4125  out[1] = stbi__blinn_8x8(coutput[1][i], m);
4126  out[2] = stbi__blinn_8x8(coutput[2][i], m);
4127  out[3] = 255;
4128  out += n;
4129  }
4130  } else if (z->app14_color_transform == 2) { // YCCK
4131  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2],
4132  z->s->img_x, n);
4133  for (i = 0; i < z->s->img_x; ++i) {
4134  stbi_uc m = coutput[3][i];
4135  out[0] = stbi__blinn_8x8(255 - out[0], m);
4136  out[1] = stbi__blinn_8x8(255 - out[1], m);
4137  out[2] = stbi__blinn_8x8(255 - out[2], m);
4138  out += n;
4139  }
4140  } else { // YCbCr + alpha? Ignore the fourth channel for
4141  // now
4142  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2],
4143  z->s->img_x, n);
4144  }
4145  } else
4146  for (i = 0; i < z->s->img_x; ++i) {
4147  out[0] = out[1] = out[2] = y[i];
4148  out[3] = 255; // not used if n==3
4149  out += n;
4150  }
4151  } else {
4152  if (is_rgb) {
4153  if (n == 1)
4154  for (i = 0; i < z->s->img_x; ++i)
4155  *out++ = stbi__compute_y(
4156  coutput[0][i], coutput[1][i], coutput[2][i]);
4157  else {
4158  for (i = 0; i < z->s->img_x; ++i, out += 2) {
4159  out[0] = stbi__compute_y(
4160  coutput[0][i], coutput[1][i], coutput[2][i]);
4161  out[1] = 255;
4162  }
4163  }
4164  } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
4165  for (i = 0; i < z->s->img_x; ++i) {
4166  stbi_uc m = coutput[3][i];
4167  stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
4168  stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
4169  stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
4170  out[0] = stbi__compute_y(r, g, b);
4171  out[1] = 255;
4172  out += n;
4173  }
4174  } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
4175  for (i = 0; i < z->s->img_x; ++i) {
4176  out[0] =
4177  stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
4178  out[1] = 255;
4179  out += n;
4180  }
4181  } else {
4182  stbi_uc *y = coutput[0];
4183  if (n == 1)
4184  for (i = 0; i < z->s->img_x; ++i)
4185  out[i] = y[i];
4186  else
4187  for (i = 0; i < z->s->img_x; ++i)
4188  *out++ = y[i], *out++ = 255;
4189  }
4190  }
4191  }
4192  stbi__cleanup_jpeg(z);
4193  *out_x = z->s->img_x;
4194  *out_y = z->s->img_y;
4195  if (comp)
4196  *comp = z->s->img_n >= 3
4197  ? 3
4198  : 1; // report original components, not output
4199  return output;
4200  }
4201 }
4202 
4203 static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp,
4204  int req_comp, stbi__result_info *ri)
4205 {
4206  unsigned char *result;
4207  stbi__jpeg *j = (stbi__jpeg *)stbi__malloc(sizeof(stbi__jpeg));
4208  STBI_NOTUSED(ri);
4209  j->s = s;
4210  stbi__setup_jpeg(j);
4211  result = load_jpeg_image(j, x, y, comp, req_comp);
4212  STBI_FREE(j);
4213  return result;
4214 }
4215 
4216 static int stbi__jpeg_test(stbi__context *s)
4217 {
4218  int r;
4219  stbi__jpeg *j = (stbi__jpeg *)stbi__malloc(sizeof(stbi__jpeg));
4220  j->s = s;
4221  stbi__setup_jpeg(j);
4222  r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
4223  stbi__rewind(s);
4224  STBI_FREE(j);
4225  return r;
4226 }
4227 
4228 static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
4229 {
4230  if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
4231  stbi__rewind(j->s);
4232  return 0;
4233  }
4234  if (x)
4235  *x = j->s->img_x;
4236  if (y)
4237  *y = j->s->img_y;
4238  if (comp)
4239  *comp = j->s->img_n >= 3 ? 3 : 1;
4240  return 1;
4241 }
4242 
4243 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
4244 {
4245  int result;
4246  stbi__jpeg *j = (stbi__jpeg *)(stbi__malloc(sizeof(stbi__jpeg)));
4247  j->s = s;
4248  result = stbi__jpeg_info_raw(j, x, y, comp);
4249  STBI_FREE(j);
4250  return result;
4251 }
4252 #endif
4253 
4254 // public domain zlib decode v0.2 Sean Barrett 2006-11-18
4255 // simple implementation
4256 // - all input must be provided in an upfront buffer
4257 // - all output is written to a single output buffer (can
4258 // malloc/realloc)
4259 // performance
4260 // - fast huffman
4261 
4262 #ifndef STBI_NO_ZLIB
4263 
4264 // fast-way is faster to check than jpeg huffman, but slow way is slower
4265 #define STBI__ZFAST_BITS 9 // accelerate all cases in default tables
4266 #define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1)
4267 
4268 // zlib-style huffman encoding
4269 // (jpegs packs from left, zlib from right, so can't share code)
4270 typedef struct {
4271  stbi__uint16 fast[1 << STBI__ZFAST_BITS];
4272  stbi__uint16 firstcode[16];
4273  int maxcode[17];
4274  stbi__uint16 firstsymbol[16];
4275  stbi_uc size[288];
4276  stbi__uint16 value[288];
4277 } stbi__zhuffman;
4278 
4279 stbi_inline static int stbi__bitreverse16(int n)
4280 {
4281  n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
4282  n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
4283  n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
4284  n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
4285  return n;
4286 }
4287 
4288 stbi_inline static int stbi__bit_reverse(int v, int bits)
4289 {
4290  STBI_ASSERT(bits <= 16);
4291  // to bit reverse n bits, reverse 16 and shift
4292  // e.g. 11 bits, bit reverse and shift away 5
4293  return stbi__bitreverse16(v) >> (16 - bits);
4294 }
4295 
4296 static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist,
4297  int num)
4298 {
4299  int i, k = 0;
4300  int code, next_code[16], sizes[17];
4301 
4302  // DEFLATE spec for generating codes
4303  memset(sizes, 0, sizeof(sizes));
4304  memset(z->fast, 0, sizeof(z->fast));
4305  for (i = 0; i < num; ++i)
4306  ++sizes[sizelist[i]];
4307  sizes[0] = 0;
4308  for (i = 1; i < 16; ++i)
4309  if (sizes[i] > (1 << i))
4310  return stbi__err("bad sizes", "Corrupt PNG");
4311  code = 0;
4312  for (i = 1; i < 16; ++i) {
4313  next_code[i] = code;
4314  z->firstcode[i] = (stbi__uint16)code;
4315  z->firstsymbol[i] = (stbi__uint16)k;
4316  code = (code + sizes[i]);
4317  if (sizes[i])
4318  if (code - 1 >= (1 << i))
4319  return stbi__err("bad codelengths", "Corrupt PNG");
4320  z->maxcode[i] = code << (16 - i); // preshift for inner loop
4321  code <<= 1;
4322  k += sizes[i];
4323  }
4324  z->maxcode[16] = 0x10000; // sentinel
4325  for (i = 0; i < num; ++i) {
4326  int s = sizelist[i];
4327  if (s) {
4328  int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
4329  stbi__uint16 fastv = (stbi__uint16)((s << 9) | i);
4330  z->size[c] = (stbi_uc)s;
4331  z->value[c] = (stbi__uint16)i;
4332  if (s <= STBI__ZFAST_BITS) {
4333  int j = stbi__bit_reverse(next_code[s], s);
4334  while (j < (1 << STBI__ZFAST_BITS)) {
4335  z->fast[j] = fastv;
4336  j += (1 << s);
4337  }
4338  }
4339  ++next_code[s];
4340  }
4341  }
4342  return 1;
4343 }
4344 
4345 // zlib-from-memory implementation for PNG reading
4346 // because PNG allows splitting the zlib stream arbitrarily,
4347 // and it's annoying structurally to have PNG call ZLIB call PNG,
4348 // we require PNG read all the IDATs and combine them into a single
4349 // memory buffer
4350 
4351 typedef struct {
4352  stbi_uc *zbuffer, *zbuffer_end;
4353  int num_bits;
4354  stbi__uint32 code_buffer;
4355 
4356  char *zout;
4357  char *zout_start;
4358  char *zout_end;
4359  int z_expandable;
4360 
4361  stbi__zhuffman z_length, z_distance;
4362 } stbi__zbuf;
4363 
4364 stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
4365 {
4366  if (z->zbuffer >= z->zbuffer_end)
4367  return 0;
4368  return *z->zbuffer++;
4369 }
4370 
4371 static void stbi__fill_bits(stbi__zbuf *z)
4372 {
4373  do {
4374  STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
4375  z->code_buffer |= (unsigned int)stbi__zget8(z) << z->num_bits;
4376  z->num_bits += 8;
4377  } while (z->num_bits <= 24);
4378 }
4379 
4380 stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
4381 {
4382  unsigned int k;
4383  if (z->num_bits < n)
4384  stbi__fill_bits(z);
4385  k = z->code_buffer & ((1 << n) - 1);
4386  z->code_buffer >>= n;
4387  z->num_bits -= n;
4388  return k;
4389 }
4390 
4391 static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
4392 {
4393  int b, s, k;
4394  // not resolved by fast table, so compute it the slow way
4395  // use jpeg approach, which requires MSbits at top
4396  k = stbi__bit_reverse(a->code_buffer, 16);
4397  for (s = STBI__ZFAST_BITS + 1;; ++s)
4398  if (k < z->maxcode[s])
4399  break;
4400  if (s == 16)
4401  return -1; // invalid code!
4402  // code size is s, so:
4403  b = (k >> (16 - s)) - z->firstcode[s] + z->firstsymbol[s];
4404  STBI_ASSERT(z->size[b] == s);
4405  a->code_buffer >>= s;
4406  a->num_bits -= s;
4407  return z->value[b];
4408 }
4409 
4410 stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
4411 {
4412  int b, s;
4413  if (a->num_bits < 16)
4414  stbi__fill_bits(a);
4415  b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
4416  if (b) {
4417  s = b >> 9;
4418  a->code_buffer >>= s;
4419  a->num_bits -= s;
4420  return b & 511;
4421  }
4422  return stbi__zhuffman_decode_slowpath(a, z);
4423 }
4424 
4425 static int stbi__zexpand(stbi__zbuf *z, char *zout,
4426  int n) // need to make room for n bytes
4427 {
4428  char *q;
4429  int cur, limit, old_limit;
4430  z->zout = zout;
4431  if (!z->z_expandable)
4432  return stbi__err("output buffer limit", "Corrupt PNG");
4433  cur = (int)(z->zout - z->zout_start);
4434  limit = old_limit = (int)(z->zout_end - z->zout_start);
4435  while (cur + n > limit)
4436  limit *= 2;
4437  q = (char *)STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
4438  STBI_NOTUSED(old_limit);
4439  if (q == NULL)
4440  return stbi__err("outofmem", "Out of memory");
4441  z->zout_start = q;
4442  z->zout = q + cur;
4443  z->zout_end = q + limit;
4444  return 1;
4445 }
4446 
4447 static int stbi__zlength_base[31] = {
4448  3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
4449  35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
4450 
4451 static int stbi__zlength_extra[31] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
4452  1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4,
4453  4, 4, 5, 5, 5, 5, 0, 0, 0};
4454 
4455 static int stbi__zdist_base[32] = {
4456  1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33,
4457  49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537,
4458  2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0};
4459 
4460 static int stbi__zdist_extra[32] = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
4461  4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
4462  9, 9, 10, 10, 11, 11, 12, 12, 13, 13};
4463 
4464 static int stbi__parse_huffman_block(stbi__zbuf *a)
4465 {
4466  char *zout = a->zout;
4467  for (;;) {
4468  int z = stbi__zhuffman_decode(a, &a->z_length);
4469  if (z < 256) {
4470  if (z < 0)
4471  return stbi__err("bad huffman code",
4472  "Corrupt PNG"); // error in huffman codes
4473  if (zout >= a->zout_end) {
4474  if (!stbi__zexpand(a, zout, 1))
4475  return 0;
4476  zout = a->zout;
4477  }
4478  *zout++ = (char)z;
4479  } else {
4480  stbi_uc *p;
4481  int len, dist;
4482  if (z == 256) {
4483  a->zout = zout;
4484  return 1;
4485  }
4486  z -= 257;
4487  len = stbi__zlength_base[z];
4488  if (stbi__zlength_extra[z])
4489  len += stbi__zreceive(a, stbi__zlength_extra[z]);
4490  z = stbi__zhuffman_decode(a, &a->z_distance);
4491  if (z < 0)
4492  return stbi__err("bad huffman code", "Corrupt PNG");
4493  dist = stbi__zdist_base[z];
4494  if (stbi__zdist_extra[z])
4495  dist += stbi__zreceive(a, stbi__zdist_extra[z]);
4496  if (zout - a->zout_start < dist)
4497  return stbi__err("bad dist", "Corrupt PNG");
4498  if (zout + len > a->zout_end) {
4499  if (!stbi__zexpand(a, zout, len))
4500  return 0;
4501  zout = a->zout;
4502  }
4503  p = (stbi_uc *)(zout - dist);
4504  if (dist == 1) { // run of one byte; common in images.
4505  stbi_uc v = *p;
4506  if (len) {
4507  do
4508  *zout++ = v;
4509  while (--len);
4510  }
4511  } else {
4512  if (len) {
4513  do
4514  *zout++ = *p++;
4515  while (--len);
4516  }
4517  }
4518  }
4519  }
4520 }
4521 
4522 static int stbi__compute_huffman_codes(stbi__zbuf *a)
4523 {
4524  static stbi_uc length_dezigzag[19] = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5,
4525  11, 4, 12, 3, 13, 2, 14, 1, 15};
4526  stbi__zhuffman z_codelength;
4527  stbi_uc lencodes[286 + 32 + 137]; // padding for maximum single op
4528  stbi_uc codelength_sizes[19];
4529  int i, n;
4530 
4531  int hlit = stbi__zreceive(a, 5) + 257;
4532  int hdist = stbi__zreceive(a, 5) + 1;
4533  int hclen = stbi__zreceive(a, 4) + 4;
4534  int ntot = hlit + hdist;
4535 
4536  memset(codelength_sizes, 0, sizeof(codelength_sizes));
4537  for (i = 0; i < hclen; ++i) {
4538  int s = stbi__zreceive(a, 3);
4539  codelength_sizes[length_dezigzag[i]] = (stbi_uc)s;
4540  }
4541  if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19))
4542  return 0;
4543 
4544  n = 0;
4545  while (n < ntot) {
4546  int c = stbi__zhuffman_decode(a, &z_codelength);
4547  if (c < 0 || c >= 19)
4548  return stbi__err("bad codelengths", "Corrupt PNG");
4549  if (c < 16)
4550  lencodes[n++] = (stbi_uc)c;
4551  else {
4552  stbi_uc fill = 0;
4553  if (c == 16) {
4554  c = stbi__zreceive(a, 2) + 3;
4555  if (n == 0)
4556  return stbi__err("bad codelengths", "Corrupt PNG");
4557  fill = lencodes[n - 1];
4558  } else if (c == 17)
4559  c = stbi__zreceive(a, 3) + 3;
4560  else {
4561  STBI_ASSERT(c == 18);
4562  c = stbi__zreceive(a, 7) + 11;
4563  }
4564  if (ntot - n < c)
4565  return stbi__err("bad codelengths", "Corrupt PNG");
4566  memset(lencodes + n, fill, c);
4567  n += c;
4568  }
4569  }
4570  if (n != ntot)
4571  return stbi__err("bad codelengths", "Corrupt PNG");
4572  if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit))
4573  return 0;
4574  if (!stbi__zbuild_huffman(&a->z_distance, lencodes + hlit, hdist))
4575  return 0;
4576  return 1;
4577 }
4578 
4579 static int stbi__parse_uncompressed_block(stbi__zbuf *a)
4580 {
4581  stbi_uc header[4];
4582  int len, nlen, k;
4583  if (a->num_bits & 7)
4584  stbi__zreceive(a, a->num_bits & 7); // discard
4585  // drain the bit-packed data into header
4586  k = 0;
4587  while (a->num_bits > 0) {
4588  header[k++] =
4589  (stbi_uc)(a->code_buffer & 255); // suppress MSVC run-time check
4590  a->code_buffer >>= 8;
4591  a->num_bits -= 8;
4592  }
4593  STBI_ASSERT(a->num_bits == 0);
4594  // now fill header the normal way
4595  while (k < 4)
4596  header[k++] = stbi__zget8(a);
4597  len = header[1] * 256 + header[0];
4598  nlen = header[3] * 256 + header[2];
4599  if (nlen != (len ^ 0xffff))
4600  return stbi__err("zlib corrupt", "Corrupt PNG");
4601  if (a->zbuffer + len > a->zbuffer_end)
4602  return stbi__err("read past buffer", "Corrupt PNG");
4603  if (a->zout + len > a->zout_end)
4604  if (!stbi__zexpand(a, a->zout, len))
4605  return 0;
4606  memcpy(a->zout, a->zbuffer, len);
4607  a->zbuffer += len;
4608  a->zout += len;
4609  return 1;
4610 }
4611 
4612 static int stbi__parse_zlib_header(stbi__zbuf *a)
4613 {
4614  int cmf = stbi__zget8(a);
4615  int cm = cmf & 15;
4616  /* int cinfo = cmf >> 4; */
4617  int flg = stbi__zget8(a);
4618  if ((cmf * 256 + flg) % 31 != 0)
4619  return stbi__err("bad zlib header", "Corrupt PNG"); // zlib spec
4620  if (flg & 32)
4621  return stbi__err("no preset dict",
4622  "Corrupt PNG"); // preset dictionary not allowed in png
4623  if (cm != 8)
4624  return stbi__err("bad compression",
4625  "Corrupt PNG"); // DEFLATE required for png
4626  // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
4627  return 1;
4628 }
4629 
4630 static const stbi_uc stbi__zdefault_length[288] = {
4631  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
4632  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
4633  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
4634  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
4635  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
4636  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
4637  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
4638  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
4639  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
4640  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
4641  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7,
4642  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8};
4643 static const stbi_uc stbi__zdefault_distance[32] = {
4644  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
4645  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5};
4646 /*
4647 Init algorithm:
4648 {
4649  int i; // use <= to match clearly with spec
4650  for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8;
4651  for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9;
4652  for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7;
4653  for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8;
4654 
4655  for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5;
4656 }
4657 */
4658 
4659 static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
4660 {
4661  int final, type;
4662  if (parse_header)
4663  if (!stbi__parse_zlib_header(a))
4664  return 0;
4665  a->num_bits = 0;
4666  a->code_buffer = 0;
4667  do {
4668  final = stbi__zreceive(a, 1);
4669  type = stbi__zreceive(a, 2);
4670  if (type == 0) {
4671  if (!stbi__parse_uncompressed_block(a))
4672  return 0;
4673  } else if (type == 3) {
4674  return 0;
4675  } else {
4676  if (type == 1) {
4677  // use fixed code lengths
4678  if (!stbi__zbuild_huffman(&a->z_length, stbi__zdefault_length,
4679  288))
4680  return 0;
4681  if (!stbi__zbuild_huffman(&a->z_distance,
4682  stbi__zdefault_distance, 32))
4683  return 0;
4684  } else {
4685  if (!stbi__compute_huffman_codes(a))
4686  return 0;
4687  }
4688  if (!stbi__parse_huffman_block(a))
4689  return 0;
4690  }
4691  } while (!final);
4692  return 1;
4693 }
4694 
4695 static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp,
4696  int parse_header)
4697 {
4698  a->zout_start = obuf;
4699  a->zout = obuf;
4700  a->zout_end = obuf + olen;
4701  a->z_expandable = exp;
4702 
4703  return stbi__parse_zlib(a, parse_header);
4704 }
4705 
4706 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len,
4707  int initial_size, int *outlen)
4708 {
4709  stbi__zbuf a;
4710  char *p = (char *)stbi__malloc(initial_size);
4711  if (p == NULL)
4712  return NULL;
4713  a.zbuffer = (stbi_uc *)buffer;
4714  a.zbuffer_end = (stbi_uc *)buffer + len;
4715  if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
4716  if (outlen)
4717  *outlen = (int)(a.zout - a.zout_start);
4718  return a.zout_start;
4719  } else {
4720  STBI_FREE(a.zout_start);
4721  return NULL;
4722  }
4723 }
4724 
4725 STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
4726 {
4727  return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
4728 }
4729 
4730 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer,
4731  int len,
4732  int initial_size,
4733  int *outlen,
4734  int parse_header)
4735 {
4736  stbi__zbuf a;
4737  char *p = (char *)stbi__malloc(initial_size);
4738  if (p == NULL)
4739  return NULL;
4740  a.zbuffer = (stbi_uc *)buffer;
4741  a.zbuffer_end = (stbi_uc *)buffer + len;
4742  if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
4743  if (outlen)
4744  *outlen = (int)(a.zout - a.zout_start);
4745  return a.zout_start;
4746  } else {
4747  STBI_FREE(a.zout_start);
4748  return NULL;
4749  }
4750 }
4751 
4752 STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen,
4753  char const *ibuffer, int ilen)
4754 {
4755  stbi__zbuf a;
4756  a.zbuffer = (stbi_uc *)ibuffer;
4757  a.zbuffer_end = (stbi_uc *)ibuffer + ilen;
4758  if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
4759  return (int)(a.zout - a.zout_start);
4760  else
4761  return -1;
4762 }
4763 
4764 STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len,
4765  int *outlen)
4766 {
4767  stbi__zbuf a;
4768  char *p = (char *)stbi__malloc(16384);
4769  if (p == NULL)
4770  return NULL;
4771  a.zbuffer = (stbi_uc *)buffer;
4772  a.zbuffer_end = (stbi_uc *)buffer + len;
4773  if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
4774  if (outlen)
4775  *outlen = (int)(a.zout - a.zout_start);
4776  return a.zout_start;
4777  } else {
4778  STBI_FREE(a.zout_start);
4779  return NULL;
4780  }
4781 }
4782 
4783 STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen,
4784  const char *ibuffer, int ilen)
4785 {
4786  stbi__zbuf a;
4787  a.zbuffer = (stbi_uc *)ibuffer;
4788  a.zbuffer_end = (stbi_uc *)ibuffer + ilen;
4789  if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
4790  return (int)(a.zout - a.zout_start);
4791  else
4792  return -1;
4793 }
4794 #endif
4795 
4796 // public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18
4797 // simple implementation
4798 // - only 8-bit samples
4799 // - no CRC checking
4800 // - allocates lots of intermediate memory
4801 // - avoids problem of streaming data between subsystems
4802 // - avoids explicit window management
4803 // performance
4804 // - uses stb_zlib, a PD zlib implementation with fast huffman decoding
4805 
4806 #ifndef STBI_NO_PNG
4807 typedef struct {
4808  stbi__uint32 length;
4809  stbi__uint32 type;
4810 } stbi__pngchunk;
4811 
4812 static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
4813 {
4814  stbi__pngchunk c;
4815  c.length = stbi__get32be(s);
4816  c.type = stbi__get32be(s);
4817  return c;
4818 }
4819 
4820 static int stbi__check_png_header(stbi__context *s)
4821 {
4822  static stbi_uc png_sig[8] = {137, 80, 78, 71, 13, 10, 26, 10};
4823  int i;
4824  for (i = 0; i < 8; ++i)
4825  if (stbi__get8(s) != png_sig[i])
4826  return stbi__err("bad png sig", "Not a PNG");
4827  return 1;
4828 }
4829 
4830 typedef struct {
4831  stbi__context *s;
4832  stbi_uc *idata, *expanded, *out;
4833  int depth;
4834 } stbi__png;
4835 
4836 enum {
4837  STBI__F_none = 0,
4838  STBI__F_sub = 1,
4839  STBI__F_up = 2,
4840  STBI__F_avg = 3,
4841  STBI__F_paeth = 4,
4842  // synthetic filters used for first scanline to avoid needing a dummy row of
4843  // 0s
4844  STBI__F_avg_first,
4845  STBI__F_paeth_first
4846 };
4847 
4848 static stbi_uc first_row_filter[5] = {STBI__F_none, STBI__F_sub, STBI__F_none,
4849  STBI__F_avg_first, STBI__F_paeth_first};
4850 
4851 static int stbi__paeth(int a, int b, int c)
4852 {
4853  int p = a + b - c;
4854  int pa = abs(p - a);
4855  int pb = abs(p - b);
4856  int pc = abs(p - c);
4857  if (pa <= pb && pa <= pc)
4858  return a;
4859  if (pb <= pc)
4860  return b;
4861  return c;
4862 }
4863 
4864 static stbi_uc stbi__depth_scale_table[9] = {0, 0xff, 0x55, 0, 0x11,
4865  0, 0, 0, 0x01};
4866 
4867 // create the png data from post-deflated data
4868 static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw,
4869  stbi__uint32 raw_len, int out_n,
4870  stbi__uint32 x, stbi__uint32 y, int depth,
4871  int color)
4872 {
4873  int bytes = (depth == 16 ? 2 : 1);
4874  stbi__context *s = a->s;
4875  stbi__uint32 i, j, stride = x * out_n * bytes;
4876  stbi__uint32 img_len, img_width_bytes;
4877  int k;
4878  int img_n = s->img_n; // copy it into a local for later
4879 
4880  int output_bytes = out_n * bytes;
4881  int filter_bytes = img_n * bytes;
4882  int width = x;
4883 
4884  STBI_ASSERT(out_n == s->img_n || out_n == s->img_n + 1);
4885  a->out = (stbi_uc *)stbi__malloc_mad3(
4886  x, y, output_bytes, 0); // extra bytes to write off the end into
4887  if (!a->out)
4888  return stbi__err("outofmem", "Out of memory");
4889 
4890  img_width_bytes = (((img_n * x * depth) + 7) >> 3);
4891  img_len = (img_width_bytes + 1) * y;
4892  // we used to check for exact match between raw_len and img_len on
4893  // non-interlaced PNGs, but issue #276 reported a PNG in the wild that had
4894  // extra data at the end (all zeros), so just check for raw_len < img_len
4895  // always.
4896  if (raw_len < img_len)
4897  return stbi__err("not enough pixels", "Corrupt PNG");
4898 
4899  for (j = 0; j < y; ++j) {
4900  stbi_uc *cur = a->out + stride * j;
4901  stbi_uc *prior;
4902  int filter = *raw++;
4903 
4904  if (filter > 4)
4905  return stbi__err("invalid filter", "Corrupt PNG");
4906 
4907  if (depth < 8) {
4908  STBI_ASSERT(img_width_bytes <= x);
4909  cur += x * out_n - img_width_bytes; // store output to the rightmost
4910  // img_len bytes, so we can
4911  // decode in place
4912  filter_bytes = 1;
4913  width = img_width_bytes;
4914  }
4915  prior = cur - stride; // bugfix: need to compute this after 'cur +='
4916  // computation above
4917 
4918  // if first row, use special filter that doesn't sample previous row
4919  if (j == 0)
4920  filter = first_row_filter[filter];
4921 
4922  // handle first byte explicitly
4923  for (k = 0; k < filter_bytes; ++k) {
4924  switch (filter) {
4925  case STBI__F_none:
4926  cur[k] = raw[k];
4927  break;
4928  case STBI__F_sub:
4929  cur[k] = raw[k];
4930  break;
4931  case STBI__F_up:
4932  cur[k] = STBI__BYTECAST(raw[k] + prior[k]);
4933  break;
4934  case STBI__F_avg:
4935  cur[k] = STBI__BYTECAST(raw[k] + (prior[k] >> 1));
4936  break;
4937  case STBI__F_paeth:
4938  cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0, prior[k], 0));
4939  break;
4940  case STBI__F_avg_first:
4941  cur[k] = raw[k];
4942  break;
4943  case STBI__F_paeth_first:
4944  cur[k] = raw[k];
4945  break;
4946  }
4947  }
4948 
4949  if (depth == 8) {
4950  if (img_n != out_n)
4951  cur[img_n] = 255; // first pixel
4952  raw += img_n;
4953  cur += out_n;
4954  prior += out_n;
4955  } else if (depth == 16) {
4956  if (img_n != out_n) {
4957  cur[filter_bytes] = 255; // first pixel top byte
4958  cur[filter_bytes + 1] = 255; // first pixel bottom byte
4959  }
4960  raw += filter_bytes;
4961  cur += output_bytes;
4962  prior += output_bytes;
4963  } else {
4964  raw += 1;
4965  cur += 1;
4966  prior += 1;
4967  }
4968 
4969  // this is a little gross, so that we don't switch per-pixel or
4970  // per-component
4971  if (depth < 8 || img_n == out_n) {
4972  int nk = (width - 1) * filter_bytes;
4973 #define STBI__CASE(f) \
4974  case f: \
4975  for (k = 0; k < nk; ++k)
4976  switch (filter) {
4977  // "none" filter turns into a memcpy here; make that explicit.
4978  case STBI__F_none:
4979  memcpy(cur, raw, nk);
4980  break;
4981  STBI__CASE(STBI__F_sub)
4982  {
4983  cur[k] = STBI__BYTECAST(raw[k] + cur[k - filter_bytes]);
4984  }
4985  break;
4986  STBI__CASE(STBI__F_up)
4987  {
4988  cur[k] = STBI__BYTECAST(raw[k] + prior[k]);
4989  }
4990  break;
4991  STBI__CASE(STBI__F_avg)
4992  {
4993  cur[k] = STBI__BYTECAST(
4994  raw[k] + ((prior[k] + cur[k - filter_bytes]) >> 1));
4995  }
4996  break;
4997  STBI__CASE(STBI__F_paeth)
4998  {
4999  cur[k] = STBI__BYTECAST(
5000  raw[k] + stbi__paeth(cur[k - filter_bytes], prior[k],
5001  prior[k - filter_bytes]));
5002  }
5003  break;
5004  STBI__CASE(STBI__F_avg_first)
5005  {
5006  cur[k] =
5007  STBI__BYTECAST(raw[k] + (cur[k - filter_bytes] >> 1));
5008  }
5009  break;
5010  STBI__CASE(STBI__F_paeth_first)
5011  {
5012  cur[k] = STBI__BYTECAST(
5013  raw[k] + stbi__paeth(cur[k - filter_bytes], 0, 0));
5014  }
5015  break;
5016  }
5017 #undef STBI__CASE
5018  raw += nk;
5019  } else {
5020  STBI_ASSERT(img_n + 1 == out_n);
5021 #define STBI__CASE(f) \
5022  case f: \
5023  for (i = x - 1; i >= 1; --i, cur[filter_bytes] = 255, \
5024  raw += filter_bytes, cur += output_bytes, prior += output_bytes) \
5025  for (k = 0; k < filter_bytes; ++k)
5026  switch (filter) {
5027  STBI__CASE(STBI__F_none) { cur[k] = raw[k]; }
5028  break;
5029  STBI__CASE(STBI__F_sub)
5030  {
5031  cur[k] = STBI__BYTECAST(raw[k] + cur[k - output_bytes]);
5032  }
5033  break;
5034  STBI__CASE(STBI__F_up)
5035  {
5036  cur[k] = STBI__BYTECAST(raw[k] + prior[k]);
5037  }
5038  break;
5039  STBI__CASE(STBI__F_avg)
5040  {
5041  cur[k] = STBI__BYTECAST(
5042  raw[k] + ((prior[k] + cur[k - output_bytes]) >> 1));
5043  }
5044  break;
5045  STBI__CASE(STBI__F_paeth)
5046  {
5047  cur[k] = STBI__BYTECAST(
5048  raw[k] + stbi__paeth(cur[k - output_bytes], prior[k],
5049  prior[k - output_bytes]));
5050  }
5051  break;
5052  STBI__CASE(STBI__F_avg_first)
5053  {
5054  cur[k] =
5055  STBI__BYTECAST(raw[k] + (cur[k - output_bytes] >> 1));
5056  }
5057  break;
5058  STBI__CASE(STBI__F_paeth_first)
5059  {
5060  cur[k] = STBI__BYTECAST(
5061  raw[k] + stbi__paeth(cur[k - output_bytes], 0, 0));
5062  }
5063  break;
5064  }
5065 #undef STBI__CASE
5066 
5067  // the loop above sets the high byte of the pixels' alpha, but for
5068  // 16 bit png files we also need the low byte set. we'll do that
5069  // here.
5070  if (depth == 16) {
5071  cur = a->out +
5072  stride * j; // start at the beginning of the row again
5073  for (i = 0; i < x; ++i, cur += output_bytes) {
5074  cur[filter_bytes + 1] = 255;
5075  }
5076  }
5077  }
5078  }
5079 
5080  // we make a separate pass to expand bits to pixels; for performance,
5081  // this could run two scanlines behind the above code, so it won't
5082  // intefere with filtering but will still be in the cache.
5083  if (depth < 8) {
5084  for (j = 0; j < y; ++j) {
5085  stbi_uc *cur = a->out + stride * j;
5086  stbi_uc *in = a->out + stride * j + x * out_n - img_width_bytes;
5087  // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the
5088  // common 8-bit path optimal at minimal cost for 1/2/4-bit png
5089  // guarante byte alignment, if width is not multiple of 8/4/2 we'll
5090  // decode dummy trailing data that will be skipped in the later loop
5091  stbi_uc scale = (color == 0)
5092  ? stbi__depth_scale_table[depth]
5093  : 1; // scale grayscale values to 0..255 range
5094 
5095  // note that the final byte might overshoot and write more data than
5096  // desired. we can allocate enough data that this never writes out
5097  // of memory, but it could also overwrite the next scanline. can it
5098  // overwrite non-empty data on the next scanline? yes, consider
5099  // 1-pixel-wide scanlines with 1-bit-per-pixel. so we need to
5100  // explicitly clamp the final ones
5101 
5102  if (depth == 4) {
5103  for (k = x * img_n; k >= 2; k -= 2, ++in) {
5104  *cur++ = scale * ((*in >> 4));
5105  *cur++ = scale * ((*in) & 0x0f);
5106  }
5107  if (k > 0)
5108  *cur++ = scale * ((*in >> 4));
5109  } else if (depth == 2) {
5110  for (k = x * img_n; k >= 4; k -= 4, ++in) {
5111  *cur++ = scale * ((*in >> 6));
5112  *cur++ = scale * ((*in >> 4) & 0x03);
5113  *cur++ = scale * ((*in >> 2) & 0x03);
5114  *cur++ = scale * ((*in) & 0x03);
5115  }
5116  if (k > 0)
5117  *cur++ = scale * ((*in >> 6));
5118  if (k > 1)
5119  *cur++ = scale * ((*in >> 4) & 0x03);
5120  if (k > 2)
5121  *cur++ = scale * ((*in >> 2) & 0x03);
5122  } else if (depth == 1) {
5123  for (k = x * img_n; k >= 8; k -= 8, ++in) {
5124  *cur++ = scale * ((*in >> 7));
5125  *cur++ = scale * ((*in >> 6) & 0x01);
5126  *cur++ = scale * ((*in >> 5) & 0x01);
5127  *cur++ = scale * ((*in >> 4) & 0x01);
5128  *cur++ = scale * ((*in >> 3) & 0x01);
5129  *cur++ = scale * ((*in >> 2) & 0x01);
5130  *cur++ = scale * ((*in >> 1) & 0x01);
5131  *cur++ = scale * ((*in) & 0x01);
5132  }
5133  if (k > 0)
5134  *cur++ = scale * ((*in >> 7));
5135  if (k > 1)
5136  *cur++ = scale * ((*in >> 6) & 0x01);
5137  if (k > 2)
5138  *cur++ = scale * ((*in >> 5) & 0x01);
5139  if (k > 3)
5140  *cur++ = scale * ((*in >> 4) & 0x01);
5141  if (k > 4)
5142  *cur++ = scale * ((*in >> 3) & 0x01);
5143  if (k > 5)
5144  *cur++ = scale * ((*in >> 2) & 0x01);
5145  if (k > 6)
5146  *cur++ = scale * ((*in >> 1) & 0x01);
5147  }
5148  if (img_n != out_n) {
5149  int q;
5150  // insert alpha = 255
5151  cur = a->out + stride * j;
5152  if (img_n == 1) {
5153  for (q = x - 1; q >= 0; --q) {
5154  cur[q * 2 + 1] = 255;
5155  cur[q * 2 + 0] = cur[q];
5156  }
5157  } else {
5158  STBI_ASSERT(img_n == 3);
5159  for (q = x - 1; q >= 0; --q) {
5160  cur[q * 4 + 3] = 255;
5161  cur[q * 4 + 2] = cur[q * 3 + 2];
5162  cur[q * 4 + 1] = cur[q * 3 + 1];
5163  cur[q * 4 + 0] = cur[q * 3 + 0];
5164  }
5165  }
5166  }
5167  }
5168  } else if (depth == 16) {
5169  // force the image data from big-endian to platform-native.
5170  // this is done in a separate pass due to the decoding relying
5171  // on the data being untouched, but could probably be done
5172  // per-line during decode if care is taken.
5173  stbi_uc *cur = a->out;
5174  stbi__uint16 *cur16 = (stbi__uint16 *)cur;
5175 
5176  for (i = 0; i < x * y * out_n; ++i, cur16++, cur += 2) {
5177  *cur16 = (cur[0] << 8) | cur[1];
5178  }
5179  }
5180 
5181  return 1;
5182 }
5183 
5184 static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data,
5185  stbi__uint32 image_data_len, int out_n,
5186  int depth, int color, int interlaced)
5187 {
5188  int bytes = (depth == 16 ? 2 : 1);
5189  int out_bytes = out_n * bytes;
5190  stbi_uc *final;
5191  int p;
5192  if (!interlaced)
5193  return stbi__create_png_image_raw(a, image_data, image_data_len, out_n,
5194  a->s->img_x, a->s->img_y, depth,
5195  color);
5196 
5197  // de-interlacing
5198  final =
5199  (stbi_uc *)stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
5200  for (p = 0; p < 7; ++p) {
5201  int xorig[] = {0, 4, 0, 2, 0, 1, 0};
5202  int yorig[] = {0, 0, 4, 0, 2, 0, 1};
5203  int xspc[] = {8, 8, 4, 4, 2, 2, 1};
5204  int yspc[] = {8, 8, 8, 4, 4, 2, 2};
5205  int i, j, x, y;
5206  // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
5207  x = (a->s->img_x - xorig[p] + xspc[p] - 1) / xspc[p];
5208  y = (a->s->img_y - yorig[p] + yspc[p] - 1) / yspc[p];
5209  if (x && y) {
5210  stbi__uint32 img_len =
5211  ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
5212  if (!stbi__create_png_image_raw(a, image_data, image_data_len,
5213  out_n, x, y, depth, color)) {
5214  STBI_FREE(final);
5215  return 0;
5216  }
5217  for (j = 0; j < y; ++j) {
5218  for (i = 0; i < x; ++i) {
5219  int out_y = j * yspc[p] + yorig[p];
5220  int out_x = i * xspc[p] + xorig[p];
5221  memcpy(final + out_y * a->s->img_x * out_bytes +
5222  out_x * out_bytes,
5223  a->out + (j * x + i) * out_bytes, out_bytes);
5224  }
5225  }
5226  STBI_FREE(a->out);
5227  image_data += img_len;
5228  image_data_len -= img_len;
5229  }
5230  }
5231  a->out = final;
5232 
5233  return 1;
5234 }
5235 
5236 static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
5237 {
5238  stbi__context *s = z->s;
5239  stbi__uint32 i, pixel_count = s->img_x * s->img_y;
5240  stbi_uc *p = z->out;
5241 
5242  // compute color-based transparency, assuming we've
5243  // already got 255 as the alpha value in the output
5244  STBI_ASSERT(out_n == 2 || out_n == 4);
5245 
5246  if (out_n == 2) {
5247  for (i = 0; i < pixel_count; ++i) {
5248  p[1] = (p[0] == tc[0] ? 0 : 255);
5249  p += 2;
5250  }
5251  } else {
5252  for (i = 0; i < pixel_count; ++i) {
5253  if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
5254  p[3] = 0;
5255  p += 4;
5256  }
5257  }
5258  return 1;
5259 }
5260 
5261 static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3],
5262  int out_n)
5263 {
5264  stbi__context *s = z->s;
5265  stbi__uint32 i, pixel_count = s->img_x * s->img_y;
5266  stbi__uint16 *p = (stbi__uint16 *)z->out;
5267 
5268  // compute color-based transparency, assuming we've
5269  // already got 65535 as the alpha value in the output
5270  STBI_ASSERT(out_n == 2 || out_n == 4);
5271 
5272  if (out_n == 2) {
5273  for (i = 0; i < pixel_count; ++i) {
5274  p[1] = (p[0] == tc[0] ? 0 : 65535);
5275  p += 2;
5276  }
5277  } else {
5278  for (i = 0; i < pixel_count; ++i) {
5279  if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
5280  p[3] = 0;
5281  p += 4;
5282  }
5283  }
5284  return 1;
5285 }
5286 
5287 static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len,
5288  int pal_img_n)
5289 {
5290  stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
5291  stbi_uc *p, *temp_out, *orig = a->out;
5292 
5293  p = (stbi_uc *)stbi__malloc_mad2(pixel_count, pal_img_n, 0);
5294  if (p == NULL)
5295  return stbi__err("outofmem", "Out of memory");
5296 
5297  // between here and free(out) below, exitting would leak
5298  temp_out = p;
5299 
5300  if (pal_img_n == 3) {
5301  for (i = 0; i < pixel_count; ++i) {
5302  int n = orig[i] * 4;
5303  p[0] = palette[n];
5304  p[1] = palette[n + 1];
5305  p[2] = palette[n + 2];
5306  p += 3;
5307  }
5308  } else {
5309  for (i = 0; i < pixel_count; ++i) {
5310  int n = orig[i] * 4;
5311  p[0] = palette[n];
5312  p[1] = palette[n + 1];
5313  p[2] = palette[n + 2];
5314  p[3] = palette[n + 3];
5315  p += 4;
5316  }
5317  }
5318  STBI_FREE(a->out);
5319  a->out = temp_out;
5320 
5321  STBI_NOTUSED(len);
5322 
5323  return 1;
5324 }
5325 
5326 static int stbi__unpremultiply_on_load = 0;
5327 static int stbi__de_iphone_flag = 0;
5328 
5329 STBIDEF void
5330 stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
5331 {
5332  stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
5333 }
5334 
5335 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
5336 {
5337  stbi__de_iphone_flag = flag_true_if_should_convert;
5338 }
5339 
5340 static void stbi__de_iphone(stbi__png *z)
5341 {
5342  stbi__context *s = z->s;
5343  stbi__uint32 i, pixel_count = s->img_x * s->img_y;
5344  stbi_uc *p = z->out;
5345 
5346  if (s->img_out_n == 3) { // convert bgr to rgb
5347  for (i = 0; i < pixel_count; ++i) {
5348  stbi_uc t = p[0];
5349  p[0] = p[2];
5350  p[2] = t;
5351  p += 3;
5352  }
5353  } else {
5354  STBI_ASSERT(s->img_out_n == 4);
5355  if (stbi__unpremultiply_on_load) {
5356  // convert bgr to rgb and unpremultiply
5357  for (i = 0; i < pixel_count; ++i) {
5358  stbi_uc a = p[3];
5359  stbi_uc t = p[0];
5360  if (a) {
5361  stbi_uc half = a / 2;
5362  p[0] = (p[2] * 255 + half) / a;
5363  p[1] = (p[1] * 255 + half) / a;
5364  p[2] = (t * 255 + half) / a;
5365  } else {
5366  p[0] = p[2];
5367  p[2] = t;
5368  }
5369  p += 4;
5370  }
5371  } else {
5372  // convert bgr to rgb
5373  for (i = 0; i < pixel_count; ++i) {
5374  stbi_uc t = p[0];
5375  p[0] = p[2];
5376  p[2] = t;
5377  p += 4;
5378  }
5379  }
5380  }
5381 }
5382 
5383 #define STBI__PNG_TYPE(a, b, c, d) \
5384  (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
5385 
5386 static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
5387 {
5388  stbi_uc palette[1024], pal_img_n = 0;
5389  stbi_uc has_trans = 0, tc[3];
5390  stbi__uint16 tc16[3];
5391  stbi__uint32 ioff = 0, idata_limit = 0, i, pal_len = 0;
5392  int first = 1, k, interlace = 0, color = 0, is_iphone = 0;
5393  stbi__context *s = z->s;
5394 
5395  z->expanded = NULL;
5396  z->idata = NULL;
5397  z->out = NULL;
5398 
5399  if (!stbi__check_png_header(s))
5400  return 0;
5401 
5402  if (scan == STBI__SCAN_type)
5403  return 1;
5404 
5405  for (;;) {
5406  stbi__pngchunk c = stbi__get_chunk_header(s);
5407  switch (c.type) {
5408  case STBI__PNG_TYPE('C', 'g', 'B', 'I'):
5409  is_iphone = 1;
5410  stbi__skip(s, c.length);
5411  break;
5412  case STBI__PNG_TYPE('I', 'H', 'D', 'R'): {
5413  int comp, filter;
5414  if (!first)
5415  return stbi__err("multiple IHDR", "Corrupt PNG");
5416  first = 0;
5417  if (c.length != 13)
5418  return stbi__err("bad IHDR len", "Corrupt PNG");
5419  s->img_x = stbi__get32be(s);
5420  if (s->img_x > (1 << 24))
5421  return stbi__err("too large", "Very large image (corrupt?)");
5422  s->img_y = stbi__get32be(s);
5423  if (s->img_y > (1 << 24))
5424  return stbi__err("too large", "Very large image (corrupt?)");
5425  z->depth = stbi__get8(s);
5426  if (z->depth != 1 && z->depth != 2 && z->depth != 4 &&
5427  z->depth != 8 && z->depth != 16)
5428  return stbi__err("1/2/4/8/16-bit only",
5429  "PNG not supported: 1/2/4/8/16-bit only");
5430  color = stbi__get8(s);
5431  if (color > 6)
5432  return stbi__err("bad ctype", "Corrupt PNG");
5433  if (color == 3 && z->depth == 16)
5434  return stbi__err("bad ctype", "Corrupt PNG");
5435  if (color == 3)
5436  pal_img_n = 3;
5437  else if (color & 1)
5438  return stbi__err("bad ctype", "Corrupt PNG");
5439  comp = stbi__get8(s);
5440  if (comp)
5441  return stbi__err("bad comp method", "Corrupt PNG");
5442  filter = stbi__get8(s);
5443  if (filter)
5444  return stbi__err("bad filter method", "Corrupt PNG");
5445  interlace = stbi__get8(s);
5446  if (interlace > 1)
5447  return stbi__err("bad interlace method", "Corrupt PNG");
5448  if (!s->img_x || !s->img_y)
5449  return stbi__err("0-pixel image", "Corrupt PNG");
5450  if (!pal_img_n) {
5451  s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
5452  if ((1 << 30) / s->img_x / s->img_n < s->img_y)
5453  return stbi__err("too large", "Image too large to decode");
5454  if (scan == STBI__SCAN_header)
5455  return 1;
5456  } else {
5457  // if paletted, then pal_n is our final components, and
5458  // img_n is # components to decompress/filter.
5459  s->img_n = 1;
5460  if ((1 << 30) / s->img_x / 4 < s->img_y)
5461  return stbi__err("too large", "Corrupt PNG");
5462  // if SCAN_header, have to scan to see if we have a tRNS
5463  }
5464  break;
5465  }
5466 
5467  case STBI__PNG_TYPE('P', 'L', 'T', 'E'): {
5468  if (first)
5469  return stbi__err("first not IHDR", "Corrupt PNG");
5470  if (c.length > 256 * 3)
5471  return stbi__err("invalid PLTE", "Corrupt PNG");
5472  pal_len = c.length / 3;
5473  if (pal_len * 3 != c.length)
5474  return stbi__err("invalid PLTE", "Corrupt PNG");
5475  for (i = 0; i < pal_len; ++i) {
5476  palette[i * 4 + 0] = stbi__get8(s);
5477  palette[i * 4 + 1] = stbi__get8(s);
5478  palette[i * 4 + 2] = stbi__get8(s);
5479  palette[i * 4 + 3] = 255;
5480  }
5481  break;
5482  }
5483 
5484  case STBI__PNG_TYPE('t', 'R', 'N', 'S'): {
5485  if (first)
5486  return stbi__err("first not IHDR", "Corrupt PNG");
5487  if (z->idata)
5488  return stbi__err("tRNS after IDAT", "Corrupt PNG");
5489  if (pal_img_n) {
5490  if (scan == STBI__SCAN_header) {
5491  s->img_n = 4;
5492  return 1;
5493  }
5494  if (pal_len == 0)
5495  return stbi__err("tRNS before PLTE", "Corrupt PNG");
5496  if (c.length > pal_len)
5497  return stbi__err("bad tRNS len", "Corrupt PNG");
5498  pal_img_n = 4;
5499  for (i = 0; i < c.length; ++i)
5500  palette[i * 4 + 3] = stbi__get8(s);
5501  } else {
5502  if (!(s->img_n & 1))
5503  return stbi__err("tRNS with alpha", "Corrupt PNG");
5504  if (c.length != (stbi__uint32)s->img_n * 2)
5505  return stbi__err("bad tRNS len", "Corrupt PNG");
5506  has_trans = 1;
5507  if (z->depth == 16) {
5508  for (k = 0; k < s->img_n; ++k)
5509  tc16[k] = (stbi__uint16)stbi__get16be(
5510  s); // copy the values as-is
5511  } else {
5512  for (k = 0; k < s->img_n; ++k)
5513  tc[k] = (stbi_uc)(stbi__get16be(s) & 255) *
5514  stbi__depth_scale_table[z->depth]; // non 8-bit
5515  // images
5516  // will be
5517  // larger
5518  }
5519  }
5520  break;
5521  }
5522 
5523  case STBI__PNG_TYPE('I', 'D', 'A', 'T'): {
5524  if (first)
5525  return stbi__err("first not IHDR", "Corrupt PNG");
5526  if (pal_img_n && !pal_len)
5527  return stbi__err("no PLTE", "Corrupt PNG");
5528  if (scan == STBI__SCAN_header) {
5529  s->img_n = pal_img_n;
5530  return 1;
5531  }
5532  if ((int)(ioff + c.length) < (int)ioff)
5533  return 0;
5534  if (ioff + c.length > idata_limit) {
5535  stbi__uint32 idata_limit_old = idata_limit;
5536  stbi_uc *p;
5537  if (idata_limit == 0)
5538  idata_limit = c.length > 4096 ? c.length : 4096;
5539  while (ioff + c.length > idata_limit)
5540  idata_limit *= 2;
5541  STBI_NOTUSED(idata_limit_old);
5542  p = (stbi_uc *)STBI_REALLOC_SIZED(z->idata, idata_limit_old,
5543  idata_limit);
5544  if (p == NULL)
5545  return stbi__err("outofmem", "Out of memory");
5546  z->idata = p;
5547  }
5548  if (!stbi__getn(s, z->idata + ioff, c.length))
5549  return stbi__err("outofdata", "Corrupt PNG");
5550  ioff += c.length;
5551  break;
5552  }
5553 
5554  case STBI__PNG_TYPE('I', 'E', 'N', 'D'): {
5555  stbi__uint32 raw_len, bpl;
5556  if (first)
5557  return stbi__err("first not IHDR", "Corrupt PNG");
5558  if (scan != STBI__SCAN_load)
5559  return 1;
5560  if (z->idata == NULL)
5561  return stbi__err("no IDAT", "Corrupt PNG");
5562  // initial guess for decoded data size to avoid unnecessary reallocs
5563  bpl =
5564  (s->img_x * z->depth + 7) / 8; // bytes per line, per component
5565  raw_len = bpl * s->img_y * s->img_n /* pixels */ +
5566  s->img_y /* filter mode per row */;
5567  z->expanded =
5569  (char *)z->idata, ioff, raw_len, (int *)&raw_len,
5570  !is_iphone);
5571  if (z->expanded == NULL)
5572  return 0; // zlib should set error
5573  STBI_FREE(z->idata);
5574  z->idata = NULL;
5575  if ((req_comp == s->img_n + 1 && req_comp != 3 && !pal_img_n) ||
5576  has_trans)
5577  s->img_out_n = s->img_n + 1;
5578  else
5579  s->img_out_n = s->img_n;
5580  if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n,
5581  z->depth, color, interlace))
5582  return 0;
5583  if (has_trans) {
5584  if (z->depth == 16) {
5585  if (!stbi__compute_transparency16(z, tc16, s->img_out_n))
5586  return 0;
5587  } else {
5588  if (!stbi__compute_transparency(z, tc, s->img_out_n))
5589  return 0;
5590  }
5591  }
5592  if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
5593  stbi__de_iphone(z);
5594  if (pal_img_n) {
5595  // pal_img_n == 3 or 4
5596  s->img_n = pal_img_n; // record the actual colors we had
5597  s->img_out_n = pal_img_n;
5598  if (req_comp >= 3)
5599  s->img_out_n = req_comp;
5600  if (!stbi__expand_png_palette(z, palette, pal_len,
5601  s->img_out_n))
5602  return 0;
5603  } else if (has_trans) {
5604  // non-paletted image with tRNS -> source image has (constant)
5605  // alpha
5606  ++s->img_n;
5607  }
5608  STBI_FREE(z->expanded);
5609  z->expanded = NULL;
5610  return 1;
5611  }
5612 
5613  default:
5614  // if critical, fail
5615  if (first)
5616  return stbi__err("first not IHDR", "Corrupt PNG");
5617  if ((c.type & (1 << 29)) == 0) {
5618 #ifndef STBI_NO_FAILURE_STRINGS
5619  // not threadsafe
5620  static char invalid_chunk[] = "XXXX PNG chunk not known";
5621  invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
5622  invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
5623  invalid_chunk[2] = STBI__BYTECAST(c.type >> 8);
5624  invalid_chunk[3] = STBI__BYTECAST(c.type >> 0);
5625 #endif
5626  return stbi__err(invalid_chunk,
5627  "PNG not supported: unknown PNG chunk type");
5628  }
5629  stbi__skip(s, c.length);
5630  break;
5631  }
5632  // end of PNG chunk, read and skip CRC
5633  stbi__get32be(s);
5634  }
5635 }
5636 
5637 static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp,
5638  stbi__result_info *ri)
5639 {
5640  void *result = NULL;
5641  if (req_comp < 0 || req_comp > 4)
5642  return stbi__errpuc("bad req_comp", "Internal error");
5643  if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
5644  if (p->depth < 8)
5645  ri->bits_per_channel = 8;
5646  else
5647  ri->bits_per_channel = p->depth;
5648  result = p->out;
5649  p->out = NULL;
5650  if (req_comp && req_comp != p->s->img_out_n) {
5651  if (ri->bits_per_channel == 8)
5652  result = stbi__convert_format((unsigned char *)result,
5653  p->s->img_out_n, req_comp,
5654  p->s->img_x, p->s->img_y);
5655  else
5656  result = stbi__convert_format16((stbi__uint16 *)result,
5657  p->s->img_out_n, req_comp,
5658  p->s->img_x, p->s->img_y);
5659  p->s->img_out_n = req_comp;
5660  if (result == NULL)
5661  return result;
5662  }
5663  *x = p->s->img_x;
5664  *y = p->s->img_y;
5665  if (n)
5666  *n = p->s->img_n;
5667  }
5668  STBI_FREE(p->out);
5669  p->out = NULL;
5670  STBI_FREE(p->expanded);
5671  p->expanded = NULL;
5672  STBI_FREE(p->idata);
5673  p->idata = NULL;
5674 
5675  return result;
5676 }
5677 
5678 static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp,
5679  int req_comp, stbi__result_info *ri)
5680 {
5681  stbi__png p;
5682  p.s = s;
5683  return stbi__do_png(&p, x, y, comp, req_comp, ri);
5684 }
5685 
5686 static int stbi__png_test(stbi__context *s)
5687 {
5688  int r;
5689  r = stbi__check_png_header(s);
5690  stbi__rewind(s);
5691  return r;
5692 }
5693 
5694 static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
5695 {
5696  if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
5697  stbi__rewind(p->s);
5698  return 0;
5699  }
5700  if (x)
5701  *x = p->s->img_x;
5702  if (y)
5703  *y = p->s->img_y;
5704  if (comp)
5705  *comp = p->s->img_n;
5706  return 1;
5707 }
5708 
5709 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
5710 {
5711  stbi__png p;
5712  p.s = s;
5713  return stbi__png_info_raw(&p, x, y, comp);
5714 }
5715 #endif
5716 
5717 // Microsoft/Windows BMP image
5718 
5719 #ifndef STBI_NO_BMP
5720 static int stbi__bmp_test_raw(stbi__context *s)
5721 {
5722  int r;
5723  int sz;
5724  if (stbi__get8(s) != 'B')
5725  return 0;
5726  if (stbi__get8(s) != 'M')
5727  return 0;
5728  stbi__get32le(s); // discard filesize
5729  stbi__get16le(s); // discard reserved
5730  stbi__get16le(s); // discard reserved
5731  stbi__get32le(s); // discard data offset
5732  sz = stbi__get32le(s);
5733  r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
5734  return r;
5735 }
5736 
5737 static int stbi__bmp_test(stbi__context *s)
5738 {
5739  int r = stbi__bmp_test_raw(s);
5740  stbi__rewind(s);
5741  return r;
5742 }
5743 
5744 // returns 0..31 for the highest set bit
5745 static int stbi__high_bit(unsigned int z)
5746 {
5747  int n = 0;
5748  if (z == 0)
5749  return -1;
5750  if (z >= 0x10000)
5751  n += 16, z >>= 16;
5752  if (z >= 0x00100)
5753  n += 8, z >>= 8;
5754  if (z >= 0x00010)
5755  n += 4, z >>= 4;
5756  if (z >= 0x00004)
5757  n += 2, z >>= 2;
5758  if (z >= 0x00002)
5759  n += 1, z >>= 1;
5760  return n;
5761 }
5762 
5763 static int stbi__bitcount(unsigned int a)
5764 {
5765  a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2
5766  a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4
5767  a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
5768  a = (a + (a >> 8)); // max 16 per 8 bits
5769  a = (a + (a >> 16)); // max 32 per 8 bits
5770  return a & 0xff;
5771 }
5772 
5773 static int stbi__shiftsigned(int v, int shift, int bits)
5774 {
5775  int result;
5776  int z = 0;
5777 
5778  if (shift < 0)
5779  v <<= -shift;
5780  else
5781  v >>= shift;
5782  result = v;
5783 
5784  z = bits;
5785  while (z < 8) {
5786  result += v >> z;
5787  z += bits;
5788  }
5789  return result;
5790 }
5791 
5792 typedef struct {
5793  int bpp, offset, hsz;
5794  unsigned int mr, mg, mb, ma, all_a;
5795 } stbi__bmp_data;
5796 
5797 static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
5798 {
5799  int hsz;
5800  if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M')
5801  return stbi__errpuc("not BMP", "Corrupt BMP");
5802  stbi__get32le(s); // discard filesize
5803  stbi__get16le(s); // discard reserved
5804  stbi__get16le(s); // discard reserved
5805  info->offset = stbi__get32le(s);
5806  info->hsz = hsz = stbi__get32le(s);
5807  info->mr = info->mg = info->mb = info->ma = 0;
5808 
5809  if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124)
5810  return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
5811  if (hsz == 12) {
5812  s->img_x = stbi__get16le(s);
5813  s->img_y = stbi__get16le(s);
5814  } else {
5815  s->img_x = stbi__get32le(s);
5816  s->img_y = stbi__get32le(s);
5817  }
5818  if (stbi__get16le(s) != 1)
5819  return stbi__errpuc("bad BMP", "bad BMP");
5820  info->bpp = stbi__get16le(s);
5821  if (info->bpp == 1)
5822  return stbi__errpuc("monochrome", "BMP type not supported: 1-bit");
5823  if (hsz != 12) {
5824  int compress = stbi__get32le(s);
5825  if (compress == 1 || compress == 2)
5826  return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
5827  stbi__get32le(s); // discard sizeof
5828  stbi__get32le(s); // discard hres
5829  stbi__get32le(s); // discard vres
5830  stbi__get32le(s); // discard colorsused
5831  stbi__get32le(s); // discard max important
5832  if (hsz == 40 || hsz == 56) {
5833  if (hsz == 56) {
5834  stbi__get32le(s);
5835  stbi__get32le(s);
5836  stbi__get32le(s);
5837  stbi__get32le(s);
5838  }
5839  if (info->bpp == 16 || info->bpp == 32) {
5840  if (compress == 0) {
5841  if (info->bpp == 32) {
5842  info->mr = 0xffu << 16;
5843  info->mg = 0xffu << 8;
5844  info->mb = 0xffu << 0;
5845  info->ma = 0xffu << 24;
5846  info->all_a = 0; // if all_a is 0 at end, then we loaded
5847  // alpha channel but it was all 0
5848  } else {
5849  info->mr = 31u << 10;
5850  info->mg = 31u << 5;
5851  info->mb = 31u << 0;
5852  }
5853  } else if (compress == 3) {
5854  info->mr = stbi__get32le(s);
5855  info->mg = stbi__get32le(s);
5856  info->mb = stbi__get32le(s);
5857  // not documented, but generated by photoshop and handled by
5858  // mspaint
5859  if (info->mr == info->mg && info->mg == info->mb) {
5860  // ?!?!?
5861  return stbi__errpuc("bad BMP", "bad BMP");
5862  }
5863  } else
5864  return stbi__errpuc("bad BMP", "bad BMP");
5865  }
5866  } else {
5867  int i;
5868  if (hsz != 108 && hsz != 124)
5869  return stbi__errpuc("bad BMP", "bad BMP");
5870  info->mr = stbi__get32le(s);
5871  info->mg = stbi__get32le(s);
5872  info->mb = stbi__get32le(s);
5873  info->ma = stbi__get32le(s);
5874  stbi__get32le(s); // discard color space
5875  for (i = 0; i < 12; ++i)
5876  stbi__get32le(s); // discard color space parameters
5877  if (hsz == 124) {
5878  stbi__get32le(s); // discard rendering intent
5879  stbi__get32le(s); // discard offset of profile data
5880  stbi__get32le(s); // discard size of profile data
5881  stbi__get32le(s); // discard reserved
5882  }
5883  }
5884  }
5885  return (void *)1;
5886 }
5887 
5888 static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp,
5889  int req_comp, stbi__result_info *ri)
5890 {
5891  stbi_uc *out;
5892  unsigned int mr = 0, mg = 0, mb = 0, ma = 0, all_a;
5893  stbi_uc pal[256][4];
5894  int psize = 0, i, j, width;
5895  int flip_vertically, pad, target;
5896  stbi__bmp_data info;
5897  STBI_NOTUSED(ri);
5898 
5899  info.all_a = 255;
5900  if (stbi__bmp_parse_header(s, &info) == NULL)
5901  return NULL; // error code already set
5902 
5903  flip_vertically = ((int)s->img_y) > 0;
5904  s->img_y = abs((int)s->img_y);
5905 
5906  mr = info.mr;
5907  mg = info.mg;
5908  mb = info.mb;
5909  ma = info.ma;
5910  all_a = info.all_a;
5911 
5912  if (info.hsz == 12) {
5913  if (info.bpp < 24)
5914  psize = (info.offset - 14 - 24) / 3;
5915  } else {
5916  if (info.bpp < 16)
5917  psize = (info.offset - 14 - info.hsz) >> 2;
5918  }
5919 
5920  s->img_n = ma ? 4 : 3;
5921  if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
5922  target = req_comp;
5923  else
5924  target = s->img_n; // if they want monochrome, we'll post-convert
5925 
5926  // sanity-check size
5927  if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
5928  return stbi__errpuc("too large", "Corrupt BMP");
5929 
5930  out = (stbi_uc *)stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
5931  if (!out)
5932  return stbi__errpuc("outofmem", "Out of memory");
5933  if (info.bpp < 16) {
5934  int z = 0;
5935  if (psize == 0 || psize > 256) {
5936  STBI_FREE(out);
5937  return stbi__errpuc("invalid", "Corrupt BMP");
5938  }
5939  for (i = 0; i < psize; ++i) {
5940  pal[i][2] = stbi__get8(s);
5941  pal[i][1] = stbi__get8(s);
5942  pal[i][0] = stbi__get8(s);
5943  if (info.hsz != 12)
5944  stbi__get8(s);
5945  pal[i][3] = 255;
5946  }
5947  stbi__skip(s, info.offset - 14 - info.hsz -
5948  psize * (info.hsz == 12 ? 3 : 4));
5949  if (info.bpp == 4)
5950  width = (s->img_x + 1) >> 1;
5951  else if (info.bpp == 8)
5952  width = s->img_x;
5953  else {
5954  STBI_FREE(out);
5955  return stbi__errpuc("bad bpp", "Corrupt BMP");
5956  }
5957  pad = (-width) & 3;
5958  for (j = 0; j < (int)s->img_y; ++j) {
5959  for (i = 0; i < (int)s->img_x; i += 2) {
5960  int v = stbi__get8(s), v2 = 0;
5961  if (info.bpp == 4) {
5962  v2 = v & 15;
5963  v >>= 4;
5964  }
5965  out[z++] = pal[v][0];
5966  out[z++] = pal[v][1];
5967  out[z++] = pal[v][2];
5968  if (target == 4)
5969  out[z++] = 255;
5970  if (i + 1 == (int)s->img_x)
5971  break;
5972  v = (info.bpp == 8) ? stbi__get8(s) : v2;
5973  out[z++] = pal[v][0];
5974  out[z++] = pal[v][1];
5975  out[z++] = pal[v][2];
5976  if (target == 4)
5977  out[z++] = 255;
5978  }
5979  stbi__skip(s, pad);
5980  }
5981  } else {
5982  int rshift = 0, gshift = 0, bshift = 0, ashift = 0, rcount = 0,
5983  gcount = 0, bcount = 0, acount = 0;
5984  int z = 0;
5985  int easy = 0;
5986  stbi__skip(s, info.offset - 14 - info.hsz);
5987  if (info.bpp == 24)
5988  width = 3 * s->img_x;
5989  else if (info.bpp == 16)
5990  width = 2 * s->img_x;
5991  else /* bpp = 32 and pad = 0 */
5992  width = 0;
5993  pad = (-width) & 3;
5994  if (info.bpp == 24) {
5995  easy = 1;
5996  } else if (info.bpp == 32) {
5997  if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 &&
5998  ma == 0xff000000)
5999  easy = 2;
6000  }
6001  if (!easy) {
6002  if (!mr || !mg || !mb) {
6003  STBI_FREE(out);
6004  return stbi__errpuc("bad masks", "Corrupt BMP");
6005  }
6006  // right shift amt to put high bit in position #7
6007  rshift = stbi__high_bit(mr) - 7;
6008  rcount = stbi__bitcount(mr);
6009  gshift = stbi__high_bit(mg) - 7;
6010  gcount = stbi__bitcount(mg);
6011  bshift = stbi__high_bit(mb) - 7;
6012  bcount = stbi__bitcount(mb);
6013  ashift = stbi__high_bit(ma) - 7;
6014  acount = stbi__bitcount(ma);
6015  }
6016  for (j = 0; j < (int)s->img_y; ++j) {
6017  if (easy) {
6018  for (i = 0; i < (int)s->img_x; ++i) {
6019  unsigned char a;
6020  out[z + 2] = stbi__get8(s);
6021  out[z + 1] = stbi__get8(s);
6022  out[z + 0] = stbi__get8(s);
6023  z += 3;
6024  a = (easy == 2 ? stbi__get8(s) : 255);
6025  all_a |= a;
6026  if (target == 4)
6027  out[z++] = a;
6028  }
6029  } else {
6030  int bpp = info.bpp;
6031  for (i = 0; i < (int)s->img_x; ++i) {
6032  stbi__uint32 v = (bpp == 16 ? (stbi__uint32)stbi__get16le(s)
6033  : stbi__get32le(s));
6034  int a;
6035  out[z++] = STBI__BYTECAST(
6036  stbi__shiftsigned(v & mr, rshift, rcount));
6037  out[z++] = STBI__BYTECAST(
6038  stbi__shiftsigned(v & mg, gshift, gcount));
6039  out[z++] = STBI__BYTECAST(
6040  stbi__shiftsigned(v & mb, bshift, bcount));
6041  a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
6042  all_a |= a;
6043  if (target == 4)
6044  out[z++] = STBI__BYTECAST(a);
6045  }
6046  }
6047  stbi__skip(s, pad);
6048  }
6049  }
6050 
6051  // if alpha channel is all 0s, replace with all 255s
6052  if (target == 4 && all_a == 0)
6053  for (i = 4 * s->img_x * s->img_y - 1; i >= 0; i -= 4)
6054  out[i] = 255;
6055 
6056  if (flip_vertically) {
6057  stbi_uc t;
6058  for (j = 0; j<(int)s->img_y>> 1; ++j) {
6059  stbi_uc *p1 = out + j * s->img_x * target;
6060  stbi_uc *p2 = out + (s->img_y - 1 - j) * s->img_x * target;
6061  for (i = 0; i < (int)s->img_x * target; ++i) {
6062  t = p1[i], p1[i] = p2[i], p2[i] = t;
6063  }
6064  }
6065  }
6066 
6067  if (req_comp && req_comp != target) {
6068  out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
6069  if (out == NULL)
6070  return out; // stbi__convert_format frees input on failure
6071  }
6072 
6073  *x = s->img_x;
6074  *y = s->img_y;
6075  if (comp)
6076  *comp = s->img_n;
6077  return out;
6078 }
6079 #endif
6080 
6081 // Targa Truevision - TGA
6082 // by Jonathan Dummer
6083 #ifndef STBI_NO_TGA
6084 // returns STBI_rgb or whatever, 0 on error
6085 static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int *is_rgb16)
6086 {
6087  // only RGB or RGBA (incl. 16bit) or grey allowed
6088  if (is_rgb16)
6089  *is_rgb16 = 0;
6090  switch (bits_per_pixel) {
6091  case 8:
6092  return STBI_grey;
6093  case 16:
6094  if (is_grey)
6095  return STBI_grey_alpha;
6096  // else: fall-through
6097  case 15:
6098  if (is_rgb16)
6099  *is_rgb16 = 1;
6100  return STBI_rgb;
6101  case 24: // fall-through
6102  case 32:
6103  return bits_per_pixel / 8;
6104  default:
6105  return 0;
6106  }
6107 }
6108 
6109 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
6110 {
6111  int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel,
6112  tga_colormap_bpp;
6113  int sz, tga_colormap_type;
6114  stbi__get8(s); // discard Offset
6115  tga_colormap_type = stbi__get8(s); // colormap type
6116  if (tga_colormap_type > 1) {
6117  stbi__rewind(s);
6118  return 0; // only RGB or indexed allowed
6119  }
6120  tga_image_type = stbi__get8(s); // image type
6121  if (tga_colormap_type == 1) { // colormapped (paletted) image
6122  if (tga_image_type != 1 && tga_image_type != 9) {
6123  stbi__rewind(s);
6124  return 0;
6125  }
6126  stbi__skip(
6127  s, 4); // skip index of first colormap entry and number of entries
6128  sz = stbi__get8(s); // check bits per palette color entry
6129  if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32)) {
6130  stbi__rewind(s);
6131  return 0;
6132  }
6133  stbi__skip(s, 4); // skip image x and y origin
6134  tga_colormap_bpp = sz;
6135  } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
6136  if ((tga_image_type != 2) && (tga_image_type != 3) &&
6137  (tga_image_type != 10) && (tga_image_type != 11)) {
6138  stbi__rewind(s);
6139  return 0; // only RGB or grey allowed, +/- RLE
6140  }
6141  stbi__skip(s, 9); // skip colormap specification and image x/y origin
6142  tga_colormap_bpp = 0;
6143  }
6144  tga_w = stbi__get16le(s);
6145  if (tga_w < 1) {
6146  stbi__rewind(s);
6147  return 0; // test width
6148  }
6149  tga_h = stbi__get16le(s);
6150  if (tga_h < 1) {
6151  stbi__rewind(s);
6152  return 0; // test height
6153  }
6154  tga_bits_per_pixel = stbi__get8(s); // bits per pixel
6155  stbi__get8(s); // ignore alpha bits
6156  if (tga_colormap_bpp != 0) {
6157  if ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
6158  // when using a colormap, tga_bits_per_pixel is the size of the
6159  // indexes I don't think anything but 8 or 16bit indexes makes sense
6160  stbi__rewind(s);
6161  return 0;
6162  }
6163  tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
6164  } else {
6165  tga_comp = stbi__tga_get_comp(
6166  tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11),
6167  NULL);
6168  }
6169  if (!tga_comp) {
6170  stbi__rewind(s);
6171  return 0;
6172  }
6173  if (x)
6174  *x = tga_w;
6175  if (y)
6176  *y = tga_h;
6177  if (comp)
6178  *comp = tga_comp;
6179  return 1; // seems to have passed everything
6180 }
6181 
6182 static int stbi__tga_test(stbi__context *s)
6183 {
6184  int res = 0;
6185  int sz, tga_color_type;
6186  stbi__get8(s); // discard Offset
6187  tga_color_type = stbi__get8(s); // color type
6188  if (tga_color_type > 1)
6189  goto errorEnd; // only RGB or indexed allowed
6190  sz = stbi__get8(s); // image type
6191  if (tga_color_type == 1) { // colormapped (paletted) image
6192  if (sz != 1 && sz != 9)
6193  goto errorEnd; // colortype 1 demands image type 1 or 9
6194  stbi__skip(
6195  s, 4); // skip index of first colormap entry and number of entries
6196  sz = stbi__get8(s); // check bits per palette color entry
6197  if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32))
6198  goto errorEnd;
6199  stbi__skip(s, 4); // skip image x and y origin
6200  } else { // "normal" image w/o colormap
6201  if ((sz != 2) && (sz != 3) && (sz != 10) && (sz != 11))
6202  goto errorEnd; // only RGB or grey allowed, +/- RLE
6203  stbi__skip(s, 9); // skip colormap specification and image x/y origin
6204  }
6205  if (stbi__get16le(s) < 1)
6206  goto errorEnd; // test width
6207  if (stbi__get16le(s) < 1)
6208  goto errorEnd; // test height
6209  sz = stbi__get8(s); // bits per pixel
6210  if ((tga_color_type == 1) && (sz != 8) && (sz != 16))
6211  goto errorEnd; // for colormapped images, bpp is size of an index
6212  if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32))
6213  goto errorEnd;
6214 
6215  res = 1; // if we got this far, everything's good and we can return 1
6216  // instead of 0
6217 
6218 errorEnd:
6219  stbi__rewind(s);
6220  return res;
6221 }
6222 
6223 // read 16bit value and convert to 24bit RGB
6224 static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc *out)
6225 {
6226  stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
6227  stbi__uint16 fiveBitMask = 31;
6228  // we have 3 channels with 5bits each
6229  int r = (px >> 10) & fiveBitMask;
6230  int g = (px >> 5) & fiveBitMask;
6231  int b = px & fiveBitMask;
6232  // Note that this saves the data in RGB(A) order, so it doesn't need to be
6233  // swapped later
6234  out[0] = (stbi_uc)((r * 255) / 31);
6235  out[1] = (stbi_uc)((g * 255) / 31);
6236  out[2] = (stbi_uc)((b * 255) / 31);
6237 
6238  // some people claim that the most significant bit might be used for alpha
6239  // (possibly if an alpha-bit is set in the "image descriptor byte")
6240  // but that only made 16bit test images completely translucent..
6241  // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
6242 }
6243 
6244 static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp,
6245  int req_comp, stbi__result_info *ri)
6246 {
6247  // read in the TGA header stuff
6248  int tga_offset = stbi__get8(s);
6249  int tga_indexed = stbi__get8(s);
6250  int tga_image_type = stbi__get8(s);
6251  int tga_is_RLE = 0;
6252  int tga_palette_start = stbi__get16le(s);
6253  int tga_palette_len = stbi__get16le(s);
6254  int tga_palette_bits = stbi__get8(s);
6255  int tga_x_origin = stbi__get16le(s);
6256  int tga_y_origin = stbi__get16le(s);
6257  int tga_width = stbi__get16le(s);
6258  int tga_height = stbi__get16le(s);
6259  int tga_bits_per_pixel = stbi__get8(s);
6260  int tga_comp, tga_rgb16 = 0;
6261  int tga_inverted = stbi__get8(s);
6262  // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused
6263  // (useless?)
6264  // image data
6265  unsigned char *tga_data;
6266  unsigned char *tga_palette = NULL;
6267  int i, j;
6268  unsigned char raw_data[4] = {0};
6269  int RLE_count = 0;
6270  int RLE_repeating = 0;
6271  int read_next_pixel = 1;
6272  STBI_NOTUSED(ri);
6273 
6274  // do a tiny bit of precessing
6275  if (tga_image_type >= 8) {
6276  tga_image_type -= 8;
6277  tga_is_RLE = 1;
6278  }
6279  tga_inverted = 1 - ((tga_inverted >> 5) & 1);
6280 
6281  // If I'm paletted, then I'll use the number of bits from the palette
6282  if (tga_indexed)
6283  tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
6284  else
6285  tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3),
6286  &tga_rgb16);
6287 
6288  if (!tga_comp) // shouldn't really happen, stbi__tga_test() should have
6289  // ensured basic consistency
6290  return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
6291 
6292  // tga info
6293  *x = tga_width;
6294  *y = tga_height;
6295  if (comp)
6296  *comp = tga_comp;
6297 
6298  if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
6299  return stbi__errpuc("too large", "Corrupt TGA");
6300 
6301  tga_data =
6302  (unsigned char *)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
6303  if (!tga_data)
6304  return stbi__errpuc("outofmem", "Out of memory");
6305 
6306  // skip to the data's starting position (offset usually = 0)
6307  stbi__skip(s, tga_offset);
6308 
6309  if (!tga_indexed && !tga_is_RLE && !tga_rgb16) {
6310  for (i = 0; i < tga_height; ++i) {
6311  int row = tga_inverted ? tga_height - i - 1 : i;
6312  stbi_uc *tga_row = tga_data + row * tga_width * tga_comp;
6313  stbi__getn(s, tga_row, tga_width * tga_comp);
6314  }
6315  } else {
6316  // do I need to load a palette?
6317  if (tga_indexed) {
6318  // any data to skip? (offset usually = 0)
6319  stbi__skip(s, tga_palette_start);
6320  // load the palette
6321  tga_palette = (unsigned char *)stbi__malloc_mad2(tga_palette_len,
6322  tga_comp, 0);
6323  if (!tga_palette) {
6324  STBI_FREE(tga_data);
6325  return stbi__errpuc("outofmem", "Out of memory");
6326  }
6327  if (tga_rgb16) {
6328  stbi_uc *pal_entry = tga_palette;
6329  STBI_ASSERT(tga_comp == STBI_rgb);
6330  for (i = 0; i < tga_palette_len; ++i) {
6331  stbi__tga_read_rgb16(s, pal_entry);
6332  pal_entry += tga_comp;
6333  }
6334  } else if (!stbi__getn(s, tga_palette,
6335  tga_palette_len * tga_comp)) {
6336  STBI_FREE(tga_data);
6337  STBI_FREE(tga_palette);
6338  return stbi__errpuc("bad palette", "Corrupt TGA");
6339  }
6340  }
6341  // load the data
6342  for (i = 0; i < tga_width * tga_height; ++i) {
6343  // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
6344  if (tga_is_RLE) {
6345  if (RLE_count == 0) {
6346  // yep, get the next byte as a RLE command
6347  int RLE_cmd = stbi__get8(s);
6348  RLE_count = 1 + (RLE_cmd & 127);
6349  RLE_repeating = RLE_cmd >> 7;
6350  read_next_pixel = 1;
6351  } else if (!RLE_repeating) {
6352  read_next_pixel = 1;
6353  }
6354  } else {
6355  read_next_pixel = 1;
6356  }
6357  // OK, if I need to read a pixel, do it now
6358  if (read_next_pixel) {
6359  // load however much data we did have
6360  if (tga_indexed) {
6361  // read in index, then perform the lookup
6362  int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s)
6363  : stbi__get16le(s);
6364  if (pal_idx >= tga_palette_len) {
6365  // invalid index
6366  pal_idx = 0;
6367  }
6368  pal_idx *= tga_comp;
6369  for (j = 0; j < tga_comp; ++j) {
6370  raw_data[j] = tga_palette[pal_idx + j];
6371  }
6372  } else if (tga_rgb16) {
6373  STBI_ASSERT(tga_comp == STBI_rgb);
6374  stbi__tga_read_rgb16(s, raw_data);
6375  } else {
6376  // read in the data raw
6377  for (j = 0; j < tga_comp; ++j) {
6378  raw_data[j] = stbi__get8(s);
6379  }
6380  }
6381  // clear the reading flag for the next pixel
6382  read_next_pixel = 0;
6383  } // end of reading a pixel
6384 
6385  // copy data
6386  for (j = 0; j < tga_comp; ++j)
6387  tga_data[i * tga_comp + j] = raw_data[j];
6388 
6389  // in case we're in RLE mode, keep counting down
6390  --RLE_count;
6391  }
6392  // do I need to invert the image?
6393  if (tga_inverted) {
6394  for (j = 0; j * 2 < tga_height; ++j) {
6395  int index1 = j * tga_width * tga_comp;
6396  int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
6397  for (i = tga_width * tga_comp; i > 0; --i) {
6398  unsigned char temp = tga_data[index1];
6399  tga_data[index1] = tga_data[index2];
6400  tga_data[index2] = temp;
6401  ++index1;
6402  ++index2;
6403  }
6404  }
6405  }
6406  // clear my palette, if I had one
6407  if (tga_palette != NULL) {
6408  STBI_FREE(tga_palette);
6409  }
6410  }
6411 
6412  // swap RGB - if the source data was RGB16, it already is in the right order
6413  if (tga_comp >= 3 && !tga_rgb16) {
6414  unsigned char *tga_pixel = tga_data;
6415  for (i = 0; i < tga_width * tga_height; ++i) {
6416  unsigned char temp = tga_pixel[0];
6417  tga_pixel[0] = tga_pixel[2];
6418  tga_pixel[2] = temp;
6419  tga_pixel += tga_comp;
6420  }
6421  }
6422 
6423  // convert to target component count
6424  if (req_comp && req_comp != tga_comp)
6425  tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width,
6426  tga_height);
6427 
6428  // the things I do to get rid of an error message, and yet keep
6429  // Microsoft's C compilers happy... [8^(
6430  tga_palette_start = tga_palette_len = tga_palette_bits = tga_x_origin =
6431  tga_y_origin = 0;
6432  // OK, done
6433  return tga_data;
6434 }
6435 #endif
6436 
6437 // *************************************************************************************************
6438 // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas
6439 // Schulz, tweaked by STB
6440 
6441 #ifndef STBI_NO_PSD
6442 static int stbi__psd_test(stbi__context *s)
6443 {
6444  int r = (stbi__get32be(s) == 0x38425053);
6445  stbi__rewind(s);
6446  return r;
6447 }
6448 
6449 static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
6450 {
6451  int count, nleft, len;
6452 
6453  count = 0;
6454  while ((nleft = pixelCount - count) > 0) {
6455  len = stbi__get8(s);
6456  if (len == 128) {
6457  // No-op.
6458  } else if (len < 128) {
6459  // Copy next len+1 bytes literally.
6460  len++;
6461  if (len > nleft)
6462  return 0; // corrupt data
6463  count += len;
6464  while (len) {
6465  *p = stbi__get8(s);
6466  p += 4;
6467  len--;
6468  }
6469  } else if (len > 128) {
6470  stbi_uc val;
6471  // Next -len+1 bytes in the dest are replicated from next source
6472  // byte. (Interpret len as a negative 8-bit int.)
6473  len = 257 - len;
6474  if (len > nleft)
6475  return 0; // corrupt data
6476  val = stbi__get8(s);
6477  count += len;
6478  while (len) {
6479  *p = val;
6480  p += 4;
6481  len--;
6482  }
6483  }
6484  }
6485 
6486  return 1;
6487 }
6488 
6489 static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp,
6490  int req_comp, stbi__result_info *ri, int bpc)
6491 {
6492  int pixelCount;
6493  int channelCount, compression;
6494  int channel, i;
6495  int bitdepth;
6496  int w, h;
6497  stbi_uc *out;
6498  STBI_NOTUSED(ri);
6499 
6500  // Check identifier
6501  if (stbi__get32be(s) != 0x38425053) // "8BPS"
6502  return stbi__errpuc("not PSD", "Corrupt PSD image");
6503 
6504  // Check file type version.
6505  if (stbi__get16be(s) != 1)
6506  return stbi__errpuc("wrong version",
6507  "Unsupported version of PSD image");
6508 
6509  // Skip 6 reserved bytes.
6510  stbi__skip(s, 6);
6511 
6512  // Read the number of channels (R, G, B, A, etc).
6513  channelCount = stbi__get16be(s);
6514  if (channelCount < 0 || channelCount > 16)
6515  return stbi__errpuc("wrong channel count",
6516  "Unsupported number of channels in PSD image");
6517 
6518  // Read the rows and columns of the image.
6519  h = stbi__get32be(s);
6520  w = stbi__get32be(s);
6521 
6522  // Make sure the depth is 8 bits.
6523  bitdepth = stbi__get16be(s);
6524  if (bitdepth != 8 && bitdepth != 16)
6525  return stbi__errpuc("unsupported bit depth",
6526  "PSD bit depth is not 8 or 16 bit");
6527 
6528  // Make sure the color mode is RGB.
6529  // Valid options are:
6530  // 0: Bitmap
6531  // 1: Grayscale
6532  // 2: Indexed color
6533  // 3: RGB color
6534  // 4: CMYK color
6535  // 7: Multichannel
6536  // 8: Duotone
6537  // 9: Lab color
6538  if (stbi__get16be(s) != 3)
6539  return stbi__errpuc("wrong color format",
6540  "PSD is not in RGB color format");
6541 
6542  // Skip the Mode Data. (It's the palette for indexed color; other info for
6543  // other modes.)
6544  stbi__skip(s, stbi__get32be(s));
6545 
6546  // Skip the image resources. (resolution, pen tool paths, etc)
6547  stbi__skip(s, stbi__get32be(s));
6548 
6549  // Skip the reserved data.
6550  stbi__skip(s, stbi__get32be(s));
6551 
6552  // Find out if the data is compressed.
6553  // Known values:
6554  // 0: no compression
6555  // 1: RLE compressed
6556  compression = stbi__get16be(s);
6557  if (compression > 1)
6558  return stbi__errpuc("bad compression",
6559  "PSD has an unknown compression format");
6560 
6561  // Check size
6562  if (!stbi__mad3sizes_valid(4, w, h, 0))
6563  return stbi__errpuc("too large", "Corrupt PSD");
6564 
6565  // Create the destination image.
6566 
6567  if (!compression && bitdepth == 16 && bpc == 16) {
6568  out = (stbi_uc *)stbi__malloc_mad3(8, w, h, 0);
6569  ri->bits_per_channel = 16;
6570  } else
6571  out = (stbi_uc *)stbi__malloc(4 * w * h);
6572 
6573  if (!out)
6574  return stbi__errpuc("outofmem", "Out of memory");
6575  pixelCount = w * h;
6576 
6577  // Initialize the data to zero.
6578  // memset( out, 0, pixelCount * 4 );
6579 
6580  // Finally, the image data.
6581  if (compression) {
6582  // RLE as used by .PSD and .TIFF
6583  // Loop until you get the number of unpacked bytes you are expecting:
6584  // Read the next source byte into n.
6585  // If n is between 0 and 127 inclusive, copy the next n+1 bytes
6586  // literally. Else if n is between -127 and -1 inclusive, copy the
6587  // next byte -n+1 times. Else if n is 128, noop.
6588  // Endloop
6589 
6590  // The RLE-compressed data is preceeded by a 2-byte data count for each
6591  // row in the data, which we're going to just skip.
6592  stbi__skip(s, h * channelCount * 2);
6593 
6594  // Read the RLE data by channel.
6595  for (channel = 0; channel < 4; channel++) {
6596  stbi_uc *p;
6597 
6598  p = out + channel;
6599  if (channel >= channelCount) {
6600  // Fill this channel with default data.
6601  for (i = 0; i < pixelCount; i++, p += 4)
6602  *p = (channel == 3 ? 255 : 0);
6603  } else {
6604  // Read the RLE data.
6605  if (!stbi__psd_decode_rle(s, p, pixelCount)) {
6606  STBI_FREE(out);
6607  return stbi__errpuc("corrupt", "bad RLE data");
6608  }
6609  }
6610  }
6611 
6612  } else {
6613  // We're at the raw image data. It's each channel in order (Red, Green,
6614  // Blue, Alpha, ...) where each channel consists of an 8-bit (or 16-bit)
6615  // value for each pixel in the image.
6616 
6617  // Read the data by channel.
6618  for (channel = 0; channel < 4; channel++) {
6619  if (channel >= channelCount) {
6620  // Fill this channel with default data.
6621  if (bitdepth == 16 && bpc == 16) {
6622  stbi__uint16 *q = ((stbi__uint16 *)out) + channel;
6623  stbi__uint16 val = channel == 3 ? 65535 : 0;
6624  for (i = 0; i < pixelCount; i++, q += 4)
6625  *q = val;
6626  } else {
6627  stbi_uc *p = out + channel;
6628  stbi_uc val = channel == 3 ? 255 : 0;
6629  for (i = 0; i < pixelCount; i++, p += 4)
6630  *p = val;
6631  }
6632  } else {
6633  if (ri->bits_per_channel == 16) { // output bpc
6634  stbi__uint16 *q = ((stbi__uint16 *)out) + channel;
6635  for (i = 0; i < pixelCount; i++, q += 4)
6636  *q = (stbi__uint16)stbi__get16be(s);
6637  } else {
6638  stbi_uc *p = out + channel;
6639  if (bitdepth == 16) { // input bpc
6640  for (i = 0; i < pixelCount; i++, p += 4)
6641  *p = (stbi_uc)(stbi__get16be(s) >> 8);
6642  } else {
6643  for (i = 0; i < pixelCount; i++, p += 4)
6644  *p = stbi__get8(s);
6645  }
6646  }
6647  }
6648  }
6649  }
6650 
6651  // remove weird white matte from PSD
6652  if (channelCount >= 4) {
6653  if (ri->bits_per_channel == 16) {
6654  for (i = 0; i < w * h; ++i) {
6655  stbi__uint16 *pixel = (stbi__uint16 *)out + 4 * i;
6656  if (pixel[3] != 0 && pixel[3] != 65535) {
6657  float a = pixel[3] / 65535.0f;
6658  float ra = 1.0f / a;
6659  float inv_a = 65535.0f * (1 - ra);
6660  pixel[0] = (stbi__uint16)(pixel[0] * ra + inv_a);
6661  pixel[1] = (stbi__uint16)(pixel[1] * ra + inv_a);
6662  pixel[2] = (stbi__uint16)(pixel[2] * ra + inv_a);
6663  }
6664  }
6665  } else {
6666  for (i = 0; i < w * h; ++i) {
6667  unsigned char *pixel = out + 4 * i;
6668  if (pixel[3] != 0 && pixel[3] != 255) {
6669  float a = pixel[3] / 255.0f;
6670  float ra = 1.0f / a;
6671  float inv_a = 255.0f * (1 - ra);
6672  pixel[0] = (unsigned char)(pixel[0] * ra + inv_a);
6673  pixel[1] = (unsigned char)(pixel[1] * ra + inv_a);
6674  pixel[2] = (unsigned char)(pixel[2] * ra + inv_a);
6675  }
6676  }
6677  }
6678  }
6679 
6680  // convert to desired output format
6681  if (req_comp && req_comp != 4) {
6682  if (ri->bits_per_channel == 16)
6683  out = (stbi_uc *)stbi__convert_format16((stbi__uint16 *)out, 4,
6684  req_comp, w, h);
6685  else
6686  out = stbi__convert_format(out, 4, req_comp, w, h);
6687  if (out == NULL)
6688  return out; // stbi__convert_format frees input on failure
6689  }
6690 
6691  if (comp)
6692  *comp = 4;
6693  *y = h;
6694  *x = w;
6695 
6696  return out;
6697 }
6698 #endif
6699 
6700 // *************************************************************************************************
6701 // Softimage PIC loader
6702 // by Tom Seddon
6703 //
6704 // See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
6705 // See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
6706 
6707 #ifndef STBI_NO_PIC
6708 static int stbi__pic_is4(stbi__context *s, const char *str)
6709 {
6710  int i;
6711  for (i = 0; i < 4; ++i)
6712  if (stbi__get8(s) != (stbi_uc)str[i])
6713  return 0;
6714 
6715  return 1;
6716 }
6717 
6718 static int stbi__pic_test_core(stbi__context *s)
6719 {
6720  int i;
6721 
6722  if (!stbi__pic_is4(s, "\x53\x80\xF6\x34"))
6723  return 0;
6724 
6725  for (i = 0; i < 84; ++i)
6726  stbi__get8(s);
6727 
6728  if (!stbi__pic_is4(s, "PICT"))
6729  return 0;
6730 
6731  return 1;
6732 }
6733 
6734 typedef struct {
6735  stbi_uc size, type, channel;
6736 } stbi__pic_packet;
6737 
6738 static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
6739 {
6740  int mask = 0x80, i;
6741 
6742  for (i = 0; i < 4; ++i, mask >>= 1) {
6743  if (channel & mask) {
6744  if (stbi__at_eof(s))
6745  return stbi__errpuc("bad file", "PIC file too short");
6746  dest[i] = stbi__get8(s);
6747  }
6748  }
6749 
6750  return dest;
6751 }
6752 
6753 static void stbi__copyval(int channel, stbi_uc *dest, const stbi_uc *src)
6754 {
6755  int mask = 0x80, i;
6756 
6757  for (i = 0; i < 4; ++i, mask >>= 1)
6758  if (channel & mask)
6759  dest[i] = src[i];
6760 }
6761 
6762 static stbi_uc *stbi__pic_load_core(stbi__context *s, int width, int height,
6763  int *comp, stbi_uc *result)
6764 {
6765  int act_comp = 0, num_packets = 0, y, chained;
6766  stbi__pic_packet packets[10];
6767 
6768  // this will (should...) cater for even some bizarre stuff like having data
6769  // for the same channel in multiple packets.
6770  do {
6771  stbi__pic_packet *packet;
6772 
6773  if (num_packets == sizeof(packets) / sizeof(packets[0]))
6774  return stbi__errpuc("bad format", "too many packets");
6775 
6776  packet = &packets[num_packets++];
6777 
6778  chained = stbi__get8(s);
6779  packet->size = stbi__get8(s);
6780  packet->type = stbi__get8(s);
6781  packet->channel = stbi__get8(s);
6782 
6783  act_comp |= packet->channel;
6784 
6785  if (stbi__at_eof(s))
6786  return stbi__errpuc("bad file", "file too short (reading packets)");
6787  if (packet->size != 8)
6788  return stbi__errpuc("bad format", "packet isn't 8bpp");
6789  } while (chained);
6790 
6791  *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
6792 
6793  for (y = 0; y < height; ++y) {
6794  int packet_idx;
6795 
6796  for (packet_idx = 0; packet_idx < num_packets; ++packet_idx) {
6797  stbi__pic_packet *packet = &packets[packet_idx];
6798  stbi_uc *dest = result + y * width * 4;
6799 
6800  switch (packet->type) {
6801  default:
6802  return stbi__errpuc("bad format",
6803  "packet has bad compression type");
6804 
6805  case 0: { // uncompressed
6806  int x;
6807 
6808  for (x = 0; x < width; ++x, dest += 4)
6809  if (!stbi__readval(s, packet->channel, dest))
6810  return 0;
6811  break;
6812  }
6813 
6814  case 1: // Pure RLE
6815  {
6816  int left = width, i;
6817 
6818  while (left > 0) {
6819  stbi_uc count, value[4];
6820 
6821  count = stbi__get8(s);
6822  if (stbi__at_eof(s))
6823  return stbi__errpuc("bad file",
6824  "file too short (pure read count)");
6825 
6826  if (count > left)
6827  count = (stbi_uc)left;
6828 
6829  if (!stbi__readval(s, packet->channel, value))
6830  return 0;
6831 
6832  for (i = 0; i < count; ++i, dest += 4)
6833  stbi__copyval(packet->channel, dest, value);
6834  left -= count;
6835  }
6836  } break;
6837 
6838  case 2: { // Mixed RLE
6839  int left = width;
6840  while (left > 0) {
6841  int count = stbi__get8(s), i;
6842  if (stbi__at_eof(s))
6843  return stbi__errpuc(
6844  "bad file", "file too short (mixed read count)");
6845 
6846  if (count >= 128) { // Repeated
6847  stbi_uc value[4];
6848 
6849  if (count == 128)
6850  count = stbi__get16be(s);
6851  else
6852  count -= 127;
6853  if (count > left)
6854  return stbi__errpuc("bad file", "scanline overrun");
6855 
6856  if (!stbi__readval(s, packet->channel, value))
6857  return 0;
6858 
6859  for (i = 0; i < count; ++i, dest += 4)
6860  stbi__copyval(packet->channel, dest, value);
6861  } else { // Raw
6862  ++count;
6863  if (count > left)
6864  return stbi__errpuc("bad file", "scanline overrun");
6865 
6866  for (i = 0; i < count; ++i, dest += 4)
6867  if (!stbi__readval(s, packet->channel, dest))
6868  return 0;
6869  }
6870  left -= count;
6871  }
6872  break;
6873  }
6874  }
6875  }
6876  }
6877 
6878  return result;
6879 }
6880 
6881 static void *stbi__pic_load(stbi__context *s, int *px, int *py, int *comp,
6882  int req_comp, stbi__result_info *ri)
6883 {
6884  stbi_uc *result;
6885  int i, x, y, internal_comp;
6886  STBI_NOTUSED(ri);
6887 
6888  if (!comp)
6889  comp = &internal_comp;
6890 
6891  for (i = 0; i < 92; ++i)
6892  stbi__get8(s);
6893 
6894  x = stbi__get16be(s);
6895  y = stbi__get16be(s);
6896  if (stbi__at_eof(s))
6897  return stbi__errpuc("bad file", "file too short (pic header)");
6898  if (!stbi__mad3sizes_valid(x, y, 4, 0))
6899  return stbi__errpuc("too large", "PIC image too large to decode");
6900 
6901  stbi__get32be(s); // skip `ratio'
6902  stbi__get16be(s); // skip `fields'
6903  stbi__get16be(s); // skip `pad'
6904 
6905  // intermediate buffer is RGBA
6906  result = (stbi_uc *)stbi__malloc_mad3(x, y, 4, 0);
6907  memset(result, 0xff, x * y * 4);
6908 
6909  if (!stbi__pic_load_core(s, x, y, comp, result)) {
6910  STBI_FREE(result);
6911  result = 0;
6912  }
6913  *px = x;
6914  *py = y;
6915  if (req_comp == 0)
6916  req_comp = *comp;
6917  result = stbi__convert_format(result, 4, req_comp, x, y);
6918 
6919  return result;
6920 }
6921 
6922 static int stbi__pic_test(stbi__context *s)
6923 {
6924  int r = stbi__pic_test_core(s);
6925  stbi__rewind(s);
6926  return r;
6927 }
6928 #endif
6929 
6930 // *************************************************************************************************
6931 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by
6932 // stb
6933 
6934 #ifndef STBI_NO_GIF
6935 typedef struct {
6936  stbi__int16 prefix;
6937  stbi_uc first;
6938  stbi_uc suffix;
6939 } stbi__gif_lzw;
6940 
6941 typedef struct {
6942  int w, h;
6943  stbi_uc *out, *old_out; // output buffer (always 4 components)
6944  int flags, bgindex, ratio, transparent, eflags, delay;
6945  stbi_uc pal[256][4];
6946  stbi_uc lpal[256][4];
6947  stbi__gif_lzw codes[4096];
6948  stbi_uc *color_table;
6949  int parse, step;
6950  int lflags;
6951  int start_x, start_y;
6952  int max_x, max_y;
6953  int cur_x, cur_y;
6954  int line_size;
6955 } stbi__gif;
6956 
6957 static int stbi__gif_test_raw(stbi__context *s)
6958 {
6959  int sz;
6960  if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' ||
6961  stbi__get8(s) != '8')
6962  return 0;
6963  sz = stbi__get8(s);
6964  if (sz != '9' && sz != '7')
6965  return 0;
6966  if (stbi__get8(s) != 'a')
6967  return 0;
6968  return 1;
6969 }
6970 
6971 static int stbi__gif_test(stbi__context *s)
6972 {
6973  int r = stbi__gif_test_raw(s);
6974  stbi__rewind(s);
6975  return r;
6976 }
6977 
6978 static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4],
6979  int num_entries, int transp)
6980 {
6981  int i;
6982  for (i = 0; i < num_entries; ++i) {
6983  pal[i][2] = stbi__get8(s);
6984  pal[i][1] = stbi__get8(s);
6985  pal[i][0] = stbi__get8(s);
6986  pal[i][3] = transp == i ? 0 : 255;
6987  }
6988 }
6989 
6990 static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp,
6991  int is_info)
6992 {
6993  stbi_uc version;
6994  if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' ||
6995  stbi__get8(s) != '8')
6996  return stbi__err("not GIF", "Corrupt GIF");
6997 
6998  version = stbi__get8(s);
6999  if (version != '7' && version != '9')
7000  return stbi__err("not GIF", "Corrupt GIF");
7001  if (stbi__get8(s) != 'a')
7002  return stbi__err("not GIF", "Corrupt GIF");
7003 
7004  stbi__g_failure_reason = "";
7005  g->w = stbi__get16le(s);
7006  g->h = stbi__get16le(s);
7007  g->flags = stbi__get8(s);
7008  g->bgindex = stbi__get8(s);
7009  g->ratio = stbi__get8(s);
7010  g->transparent = -1;
7011 
7012  if (comp != 0)
7013  *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the
7014  // comments
7015 
7016  if (is_info)
7017  return 1;
7018 
7019  if (g->flags & 0x80)
7020  stbi__gif_parse_colortable(s, g->pal, 2 << (g->flags & 7), -1);
7021 
7022  return 1;
7023 }
7024 
7025 static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
7026 {
7027  stbi__gif *g = (stbi__gif *)stbi__malloc(sizeof(stbi__gif));
7028  if (!stbi__gif_header(s, g, comp, 1)) {
7029  STBI_FREE(g);
7030  stbi__rewind(s);
7031  return 0;
7032  }
7033  if (x)
7034  *x = g->w;
7035  if (y)
7036  *y = g->h;
7037  STBI_FREE(g);
7038  return 1;
7039 }
7040 
7041 static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
7042 {
7043  stbi_uc *p, *c;
7044 
7045  // recurse to decode the prefixes, since the linked-list is backwards,
7046  // and working backwards through an interleaved image would be nasty
7047  if (g->codes[code].prefix >= 0)
7048  stbi__out_gif_code(g, g->codes[code].prefix);
7049 
7050  if (g->cur_y >= g->max_y)
7051  return;
7052 
7053  p = &g->out[g->cur_x + g->cur_y];
7054  c = &g->color_table[g->codes[code].suffix * 4];
7055 
7056  if (c[3] >= 128) {
7057  p[0] = c[2];
7058  p[1] = c[1];
7059  p[2] = c[0];
7060  p[3] = c[3];
7061  }
7062  g->cur_x += 4;
7063 
7064  if (g->cur_x >= g->max_x) {
7065  g->cur_x = g->start_x;
7066  g->cur_y += g->step;
7067 
7068  while (g->cur_y >= g->max_y && g->parse > 0) {
7069  g->step = (1 << g->parse) * g->line_size;
7070  g->cur_y = g->start_y + (g->step >> 1);
7071  --g->parse;
7072  }
7073  }
7074 }
7075 
7076 static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
7077 {
7078  stbi_uc lzw_cs;
7079  stbi__int32 len, init_code;
7080  stbi__uint32 first;
7081  stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
7082  stbi__gif_lzw *p;
7083 
7084  lzw_cs = stbi__get8(s);
7085  if (lzw_cs > 12)
7086  return NULL;
7087  clear = 1 << lzw_cs;
7088  first = 1;
7089  codesize = lzw_cs + 1;
7090  codemask = (1 << codesize) - 1;
7091  bits = 0;
7092  valid_bits = 0;
7093  for (init_code = 0; init_code < clear; init_code++) {
7094  g->codes[init_code].prefix = -1;
7095  g->codes[init_code].first = (stbi_uc)init_code;
7096  g->codes[init_code].suffix = (stbi_uc)init_code;
7097  }
7098 
7099  // support no starting clear code
7100  avail = clear + 2;
7101  oldcode = -1;
7102 
7103  len = 0;
7104  for (;;) {
7105  if (valid_bits < codesize) {
7106  if (len == 0) {
7107  len = stbi__get8(s); // start new block
7108  if (len == 0)
7109  return g->out;
7110  }
7111  --len;
7112  bits |= (stbi__int32)stbi__get8(s) << valid_bits;
7113  valid_bits += 8;
7114  } else {
7115  stbi__int32 code = bits & codemask;
7116  bits >>= codesize;
7117  valid_bits -= codesize;
7118  // @OPTIMIZE: is there some way we can accelerate the non-clear
7119  // path?
7120  if (code == clear) { // clear code
7121  codesize = lzw_cs + 1;
7122  codemask = (1 << codesize) - 1;
7123  avail = clear + 2;
7124  oldcode = -1;
7125  first = 0;
7126  } else if (code == clear + 1) { // end of stream code
7127  stbi__skip(s, len);
7128  while ((len = stbi__get8(s)) > 0)
7129  stbi__skip(s, len);
7130  return g->out;
7131  } else if (code <= avail) {
7132  if (first)
7133  return stbi__errpuc("no clear code", "Corrupt GIF");
7134 
7135  if (oldcode >= 0) {
7136  p = &g->codes[avail++];
7137  if (avail > 4096)
7138  return stbi__errpuc("too many codes", "Corrupt GIF");
7139  p->prefix = (stbi__int16)oldcode;
7140  p->first = g->codes[oldcode].first;
7141  p->suffix =
7142  (code == avail) ? p->first : g->codes[code].first;
7143  } else if (code == avail)
7144  return stbi__errpuc("illegal code in raster",
7145  "Corrupt GIF");
7146 
7147  stbi__out_gif_code(g, (stbi__uint16)code);
7148 
7149  if ((avail & codemask) == 0 && avail <= 0x0FFF) {
7150  codesize++;
7151  codemask = (1 << codesize) - 1;
7152  }
7153 
7154  oldcode = code;
7155  } else {
7156  return stbi__errpuc("illegal code in raster", "Corrupt GIF");
7157  }
7158  }
7159  }
7160 }
7161 
7162 static void stbi__fill_gif_background(stbi__gif *g, int x0, int y0, int x1,
7163  int y1)
7164 {
7165  int x, y;
7166  stbi_uc *c = g->pal[g->bgindex];
7167  for (y = y0; y < y1; y += 4 * g->w) {
7168  for (x = x0; x < x1; x += 4) {
7169  stbi_uc *p = &g->out[y + x];
7170  p[0] = c[2];
7171  p[1] = c[1];
7172  p[2] = c[0];
7173  p[3] = 0;
7174  }
7175  }
7176 }
7177 
7178 // this function is designed to support animated gifs, although stb_image
7179 // doesn't support it
7180 static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp,
7181  int req_comp)
7182 {
7183  int i;
7184  stbi_uc *prev_out = 0;
7185 
7186  if (g->out == 0 && !stbi__gif_header(s, g, comp, 0))
7187  return 0; // stbi__g_failure_reason set by stbi__gif_header
7188 
7189  if (!stbi__mad3sizes_valid(g->w, g->h, 4, 0))
7190  return stbi__errpuc("too large", "GIF too large");
7191 
7192  prev_out = g->out;
7193  g->out = (stbi_uc *)stbi__malloc_mad3(4, g->w, g->h, 0);
7194  if (g->out == 0)
7195  return stbi__errpuc("outofmem", "Out of memory");
7196 
7197  switch ((g->eflags & 0x1C) >> 2) {
7198  case 0: // unspecified (also always used on 1st frame)
7199  stbi__fill_gif_background(g, 0, 0, 4 * g->w, 4 * g->w * g->h);
7200  break;
7201  case 1: // do not dispose
7202  if (prev_out)
7203  memcpy(g->out, prev_out, 4 * g->w * g->h);
7204  g->old_out = prev_out;
7205  break;
7206  case 2: // dispose to background
7207  if (prev_out)
7208  memcpy(g->out, prev_out, 4 * g->w * g->h);
7209  stbi__fill_gif_background(g, g->start_x, g->start_y, g->max_x,
7210  g->max_y);
7211  break;
7212  case 3: // dispose to previous
7213  if (g->old_out) {
7214  for (i = g->start_y; i < g->max_y; i += 4 * g->w)
7215  memcpy(&g->out[i + g->start_x], &g->old_out[i + g->start_x],
7216  g->max_x - g->start_x);
7217  }
7218  break;
7219  }
7220 
7221  for (;;) {
7222  switch (stbi__get8(s)) {
7223  case 0x2C: /* Image Descriptor */
7224  {
7225  int prev_trans = -1;
7226  stbi__int32 x, y, w, h;
7227  stbi_uc *o;
7228 
7229  x = stbi__get16le(s);
7230  y = stbi__get16le(s);
7231  w = stbi__get16le(s);
7232  h = stbi__get16le(s);
7233  if (((x + w) > (g->w)) || ((y + h) > (g->h)))
7234  return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
7235 
7236  g->line_size = g->w * 4;
7237  g->start_x = x * 4;
7238  g->start_y = y * g->line_size;
7239  g->max_x = g->start_x + w * 4;
7240  g->max_y = g->start_y + h * g->line_size;
7241  g->cur_x = g->start_x;
7242  g->cur_y = g->start_y;
7243 
7244  g->lflags = stbi__get8(s);
7245 
7246  if (g->lflags & 0x40) {
7247  g->step = 8 * g->line_size; // first interlaced spacing
7248  g->parse = 3;
7249  } else {
7250  g->step = g->line_size;
7251  g->parse = 0;
7252  }
7253 
7254  if (g->lflags & 0x80) {
7255  stbi__gif_parse_colortable(s, g->lpal, 2 << (g->lflags & 7),
7256  g->eflags & 0x01 ? g->transparent
7257  : -1);
7258  g->color_table = (stbi_uc *)g->lpal;
7259  } else if (g->flags & 0x80) {
7260  if (g->transparent >= 0 && (g->eflags & 0x01)) {
7261  prev_trans = g->pal[g->transparent][3];
7262  g->pal[g->transparent][3] = 0;
7263  }
7264  g->color_table = (stbi_uc *)g->pal;
7265  } else
7266  return stbi__errpuc("missing color table", "Corrupt GIF");
7267 
7268  o = stbi__process_gif_raster(s, g);
7269  if (o == NULL)
7270  return NULL;
7271 
7272  if (prev_trans != -1)
7273  g->pal[g->transparent][3] = (stbi_uc)prev_trans;
7274 
7275  return o;
7276  }
7277 
7278  case 0x21: // Comment Extension.
7279  {
7280  int len;
7281  if (stbi__get8(s) == 0xF9) { // Graphic Control Extension.
7282  len = stbi__get8(s);
7283  if (len == 4) {
7284  g->eflags = stbi__get8(s);
7285  g->delay = stbi__get16le(s);
7286  g->transparent = stbi__get8(s);
7287  } else {
7288  stbi__skip(s, len);
7289  break;
7290  }
7291  }
7292  while ((len = stbi__get8(s)) != 0)
7293  stbi__skip(s, len);
7294  break;
7295  }
7296 
7297  case 0x3B: // gif stream termination code
7298  return (stbi_uc *)s; // using '1' causes warning on some compilers
7299 
7300  default:
7301  return stbi__errpuc("unknown code", "Corrupt GIF");
7302  }
7303  }
7304 
7305  STBI_NOTUSED(req_comp);
7306 }
7307 
7308 static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp,
7309  int req_comp, stbi__result_info *ri)
7310 {
7311  stbi_uc *u = 0;
7312  stbi__gif *g = (stbi__gif *)stbi__malloc(sizeof(stbi__gif));
7313  memset(g, 0, sizeof(*g));
7314  STBI_NOTUSED(ri);
7315 
7316  u = stbi__gif_load_next(s, g, comp, req_comp);
7317  if (u == (stbi_uc *)s)
7318  u = 0; // end of animated gif marker
7319  if (u) {
7320  *x = g->w;
7321  *y = g->h;
7322  if (req_comp && req_comp != 4)
7323  u = stbi__convert_format(u, 4, req_comp, g->w, g->h);
7324  } else if (g->out)
7325  STBI_FREE(g->out);
7326  STBI_FREE(g);
7327  return u;
7328 }
7329 
7330 static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
7331 {
7332  return stbi__gif_info_raw(s, x, y, comp);
7333 }
7334 #endif
7335 
7336 // *************************************************************************************************
7337 // Radiance RGBE HDR loader
7338 // originally by Nicolas Schulz
7339 #ifndef STBI_NO_HDR
7340 static int stbi__hdr_test_core(stbi__context *s, const char *signature)
7341 {
7342  int i;
7343  for (i = 0; signature[i]; ++i)
7344  if (stbi__get8(s) != signature[i])
7345  return 0;
7346  stbi__rewind(s);
7347  return 1;
7348 }
7349 
7350 static int stbi__hdr_test(stbi__context *s)
7351 {
7352  int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
7353  stbi__rewind(s);
7354  if (!r) {
7355  r = stbi__hdr_test_core(s, "#?RGBE\n");
7356  stbi__rewind(s);
7357  }
7358  return r;
7359 }
7360 
7361 #define STBI__HDR_BUFLEN 1024
7362 static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
7363 {
7364  int len = 0;
7365  char c = '\0';
7366 
7367  c = (char)stbi__get8(z);
7368 
7369  while (!stbi__at_eof(z) && c != '\n') {
7370  buffer[len++] = c;
7371  if (len == STBI__HDR_BUFLEN - 1) {
7372  // flush to end of line
7373  while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
7374  ;
7375  break;
7376  }
7377  c = (char)stbi__get8(z);
7378  }
7379 
7380  buffer[len] = 0;
7381  return buffer;
7382 }
7383 
7384 static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
7385 {
7386  if (input[3] != 0) {
7387  float f1;
7388  // Exponent
7389  f1 = (float)ldexp(1.0f, input[3] - (int)(128 + 8));
7390  if (req_comp <= 2)
7391  output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
7392  else {
7393  output[0] = input[0] * f1;
7394  output[1] = input[1] * f1;
7395  output[2] = input[2] * f1;
7396  }
7397  if (req_comp == 2)
7398  output[1] = 1;
7399  if (req_comp == 4)
7400  output[3] = 1;
7401  } else {
7402  switch (req_comp) {
7403  case 4:
7404  output[3] = 1; /* fallthrough */
7405  case 3:
7406  output[0] = output[1] = output[2] = 0;
7407  break;
7408  case 2:
7409  output[1] = 1; /* fallthrough */
7410  case 1:
7411  output[0] = 0;
7412  break;
7413  }
7414  }
7415 }
7416 
7417 static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp,
7418  int req_comp, stbi__result_info *ri)
7419 {
7420  char buffer[STBI__HDR_BUFLEN];
7421  char *token;
7422  int valid = 0;
7423  int width, height;
7424  stbi_uc *scanline;
7425  float *hdr_data;
7426  int len;
7427  unsigned char count, value;
7428  int i, j, k, c1, c2, z;
7429  const char *headerToken;
7430  STBI_NOTUSED(ri);
7431 
7432  // Check identifier
7433  headerToken = stbi__hdr_gettoken(s, buffer);
7434  if (strcmp(headerToken, "#?RADIANCE") != 0 &&
7435  strcmp(headerToken, "#?RGBE") != 0)
7436  return stbi__errpf("not HDR", "Corrupt HDR image");
7437 
7438  // Parse header
7439  for (;;) {
7440  token = stbi__hdr_gettoken(s, buffer);
7441  if (token[0] == 0)
7442  break;
7443  if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0)
7444  valid = 1;
7445  }
7446 
7447  if (!valid)
7448  return stbi__errpf("unsupported format", "Unsupported HDR format");
7449 
7450  // Parse width and height
7451  // can't use sscanf() if we're not using stdio!
7452  token = stbi__hdr_gettoken(s, buffer);
7453  if (strncmp(token, "-Y ", 3))
7454  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
7455  token += 3;
7456  height = (int)strtol(token, &token, 10);
7457  while (*token == ' ')
7458  ++token;
7459  if (strncmp(token, "+X ", 3))
7460  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
7461  token += 3;
7462  width = (int)strtol(token, NULL, 10);
7463 
7464  *x = width;
7465  *y = height;
7466 
7467  if (comp)
7468  *comp = 3;
7469  if (req_comp == 0)
7470  req_comp = 3;
7471 
7472  if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
7473  return stbi__errpf("too large", "HDR image is too large");
7474 
7475  // Read data
7476  hdr_data =
7477  (float *)stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
7478  if (!hdr_data)
7479  return stbi__errpf("outofmem", "Out of memory");
7480 
7481  // Load image data
7482  // image data is stored as some number of sca
7483  if (width < 8 || width >= 32768) {
7484  // Read flat data
7485  for (j = 0; j < height; ++j) {
7486  for (i = 0; i < width; ++i) {
7487  stbi_uc rgbe[4];
7488  main_decode_loop:
7489  stbi__getn(s, rgbe, 4);
7490  stbi__hdr_convert(hdr_data + j * width * req_comp +
7491  i * req_comp,
7492  rgbe, req_comp);
7493  }
7494  }
7495  } else {
7496  // Read RLE-encoded data
7497  scanline = NULL;
7498 
7499  for (j = 0; j < height; ++j) {
7500  c1 = stbi__get8(s);
7501  c2 = stbi__get8(s);
7502  len = stbi__get8(s);
7503  if (c1 != 2 || c2 != 2 || (len & 0x80)) {
7504  // not run-length encoded, so we have to actually use THIS data
7505  // as a decoded pixel (note this can't be a valid pixel--one of
7506  // RGB must be >= 128)
7507  stbi_uc rgbe[4];
7508  rgbe[0] = (stbi_uc)c1;
7509  rgbe[1] = (stbi_uc)c2;
7510  rgbe[2] = (stbi_uc)len;
7511  rgbe[3] = (stbi_uc)stbi__get8(s);
7512  stbi__hdr_convert(hdr_data, rgbe, req_comp);
7513  i = 1;
7514  j = 0;
7515  STBI_FREE(scanline);
7516  goto main_decode_loop; // yes, this makes no sense
7517  }
7518  len <<= 8;
7519  len |= stbi__get8(s);
7520  if (len != width) {
7521  STBI_FREE(hdr_data);
7522  STBI_FREE(scanline);
7523  return stbi__errpf("invalid decoded scanline length",
7524  "corrupt HDR");
7525  }
7526  if (scanline == NULL) {
7527  scanline = (stbi_uc *)stbi__malloc_mad2(width, 4, 0);
7528  if (!scanline) {
7529  STBI_FREE(hdr_data);
7530  return stbi__errpf("outofmem", "Out of memory");
7531  }
7532  }
7533 
7534  for (k = 0; k < 4; ++k) {
7535  int nleft;
7536  i = 0;
7537  while ((nleft = width - i) > 0) {
7538  count = stbi__get8(s);
7539  if (count > 128) {
7540  // Run
7541  value = stbi__get8(s);
7542  count -= 128;
7543  if (count > nleft) {
7544  STBI_FREE(hdr_data);
7545  STBI_FREE(scanline);
7546  return stbi__errpf("corrupt",
7547  "bad RLE data in HDR");
7548  }
7549  for (z = 0; z < count; ++z)
7550  scanline[i++ * 4 + k] = value;
7551  } else {
7552  // Dump
7553  if (count > nleft) {
7554  STBI_FREE(hdr_data);
7555  STBI_FREE(scanline);
7556  return stbi__errpf("corrupt",
7557  "bad RLE data in HDR");
7558  }
7559  for (z = 0; z < count; ++z)
7560  scanline[i++ * 4 + k] = stbi__get8(s);
7561  }
7562  }
7563  }
7564  for (i = 0; i < width; ++i)
7565  stbi__hdr_convert(hdr_data + (j * width + i) * req_comp,
7566  scanline + i * 4, req_comp);
7567  }
7568  if (scanline)
7569  STBI_FREE(scanline);
7570  }
7571 
7572  return hdr_data;
7573 }
7574 
7575 static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
7576 {
7577  char buffer[STBI__HDR_BUFLEN];
7578  char *token;
7579  int valid = 0;
7580  int dummy;
7581 
7582  if (!x)
7583  x = &dummy;
7584  if (!y)
7585  y = &dummy;
7586  if (!comp)
7587  comp = &dummy;
7588 
7589  if (stbi__hdr_test(s) == 0) {
7590  stbi__rewind(s);
7591  return 0;
7592  }
7593 
7594  for (;;) {
7595  token = stbi__hdr_gettoken(s, buffer);
7596  if (token[0] == 0)
7597  break;
7598  if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0)
7599  valid = 1;
7600  }
7601 
7602  if (!valid) {
7603  stbi__rewind(s);
7604  return 0;
7605  }
7606  token = stbi__hdr_gettoken(s, buffer);
7607  if (strncmp(token, "-Y ", 3)) {
7608  stbi__rewind(s);
7609  return 0;
7610  }
7611  token += 3;
7612  *y = (int)strtol(token, &token, 10);
7613  while (*token == ' ')
7614  ++token;
7615  if (strncmp(token, "+X ", 3)) {
7616  stbi__rewind(s);
7617  return 0;
7618  }
7619  token += 3;
7620  *x = (int)strtol(token, NULL, 10);
7621  *comp = 3;
7622  return 1;
7623 }
7624 #endif // STBI_NO_HDR
7625 
7626 #ifndef STBI_NO_BMP
7627 static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
7628 {
7629  void *p;
7630  stbi__bmp_data info;
7631 
7632  info.all_a = 255;
7633  p = stbi__bmp_parse_header(s, &info);
7634  stbi__rewind(s);
7635  if (p == NULL)
7636  return 0;
7637  if (x)
7638  *x = s->img_x;
7639  if (y)
7640  *y = s->img_y;
7641  if (comp)
7642  *comp = info.ma ? 4 : 3;
7643  return 1;
7644 }
7645 #endif
7646 
7647 #ifndef STBI_NO_PSD
7648 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
7649 {
7650  int channelCount, dummy;
7651  if (!x)
7652  x = &dummy;
7653  if (!y)
7654  y = &dummy;
7655  if (!comp)
7656  comp = &dummy;
7657  if (stbi__get32be(s) != 0x38425053) {
7658  stbi__rewind(s);
7659  return 0;
7660  }
7661  if (stbi__get16be(s) != 1) {
7662  stbi__rewind(s);
7663  return 0;
7664  }
7665  stbi__skip(s, 6);
7666  channelCount = stbi__get16be(s);
7667  if (channelCount < 0 || channelCount > 16) {
7668  stbi__rewind(s);
7669  return 0;
7670  }
7671  *y = stbi__get32be(s);
7672  *x = stbi__get32be(s);
7673  if (stbi__get16be(s) != 8) {
7674  stbi__rewind(s);
7675  return 0;
7676  }
7677  if (stbi__get16be(s) != 3) {
7678  stbi__rewind(s);
7679  return 0;
7680  }
7681  *comp = 4;
7682  return 1;
7683 }
7684 #endif
7685 
7686 #ifndef STBI_NO_PIC
7687 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
7688 {
7689  int act_comp = 0, num_packets = 0, chained, dummy;
7690  stbi__pic_packet packets[10];
7691 
7692  if (!x)
7693  x = &dummy;
7694  if (!y)
7695  y = &dummy;
7696  if (!comp)
7697  comp = &dummy;
7698 
7699  if (!stbi__pic_is4(s, "\x53\x80\xF6\x34")) {
7700  stbi__rewind(s);
7701  return 0;
7702  }
7703 
7704  stbi__skip(s, 88);
7705 
7706  *x = stbi__get16be(s);
7707  *y = stbi__get16be(s);
7708  if (stbi__at_eof(s)) {
7709  stbi__rewind(s);
7710  return 0;
7711  }
7712  if ((*x) != 0 && (1 << 28) / (*x) < (*y)) {
7713  stbi__rewind(s);
7714  return 0;
7715  }
7716 
7717  stbi__skip(s, 8);
7718 
7719  do {
7720  stbi__pic_packet *packet;
7721 
7722  if (num_packets == sizeof(packets) / sizeof(packets[0]))
7723  return 0;
7724 
7725  packet = &packets[num_packets++];
7726  chained = stbi__get8(s);
7727  packet->size = stbi__get8(s);
7728  packet->type = stbi__get8(s);
7729  packet->channel = stbi__get8(s);
7730  act_comp |= packet->channel;
7731 
7732  if (stbi__at_eof(s)) {
7733  stbi__rewind(s);
7734  return 0;
7735  }
7736  if (packet->size != 8) {
7737  stbi__rewind(s);
7738  return 0;
7739  }
7740  } while (chained);
7741 
7742  *comp = (act_comp & 0x10 ? 4 : 3);
7743 
7744  return 1;
7745 }
7746 #endif
7747 
7748 // *************************************************************************************************
7749 // Portable Gray Map and Portable Pixel Map loader
7750 // by Ken Miller
7751 //
7752 // PGM: http://netpbm.sourceforge.net/doc/pgm.html
7753 // PPM: http://netpbm.sourceforge.net/doc/ppm.html
7754 //
7755 // Known limitations:
7756 // Does not support comments in the header section
7757 // Does not support ASCII image data (formats P2 and P3)
7758 // Does not support 16-bit-per-channel
7759 
7760 #ifndef STBI_NO_PNM
7761 
7762 static int stbi__pnm_test(stbi__context *s)
7763 {
7764  char p, t;
7765  p = (char)stbi__get8(s);
7766  t = (char)stbi__get8(s);
7767  if (p != 'P' || (t != '5' && t != '6')) {
7768  stbi__rewind(s);
7769  return 0;
7770  }
7771  return 1;
7772 }
7773 
7774 static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp,
7775  int req_comp, stbi__result_info *ri)
7776 {
7777  stbi_uc *out;
7778  STBI_NOTUSED(ri);
7779 
7780  if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y,
7781  (int *)&s->img_n))
7782  return 0;
7783 
7784  *x = s->img_x;
7785  *y = s->img_y;
7786  if (comp)
7787  *comp = s->img_n;
7788 
7789  if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))
7790  return stbi__errpuc("too large", "PNM too large");
7791 
7792  out = (stbi_uc *)stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0);
7793  if (!out)
7794  return stbi__errpuc("outofmem", "Out of memory");
7795  stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
7796 
7797  if (req_comp && req_comp != s->img_n) {
7798  out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
7799  if (out == NULL)
7800  return out; // stbi__convert_format frees input on failure
7801  }
7802  return out;
7803 }
7804 
7805 static int stbi__pnm_isspace(char c)
7806 {
7807  return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' ||
7808  c == '\r';
7809 }
7810 
7811 static void stbi__pnm_skip_whitespace(stbi__context *s, char *c)
7812 {
7813  for (;;) {
7814  while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
7815  *c = (char)stbi__get8(s);
7816 
7817  if (stbi__at_eof(s) || *c != '#')
7818  break;
7819 
7820  while (!stbi__at_eof(s) && *c != '\n' && *c != '\r')
7821  *c = (char)stbi__get8(s);
7822  }
7823 }
7824 
7825 static int stbi__pnm_isdigit(char c)
7826 {
7827  return c >= '0' && c <= '9';
7828 }
7829 
7830 static int stbi__pnm_getinteger(stbi__context *s, char *c)
7831 {
7832  int value = 0;
7833 
7834  while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
7835  value = value * 10 + (*c - '0');
7836  *c = (char)stbi__get8(s);
7837  }
7838 
7839  return value;
7840 }
7841 
7842 static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
7843 {
7844  int maxv, dummy;
7845  char c, p, t;
7846 
7847  if (!x)
7848  x = &dummy;
7849  if (!y)
7850  y = &dummy;
7851  if (!comp)
7852  comp = &dummy;
7853 
7854  stbi__rewind(s);
7855 
7856  // Get identifier
7857  p = (char)stbi__get8(s);
7858  t = (char)stbi__get8(s);
7859  if (p != 'P' || (t != '5' && t != '6')) {
7860  stbi__rewind(s);
7861  return 0;
7862  }
7863 
7864  *comp =
7865  (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm
7866 
7867  c = (char)stbi__get8(s);
7868  stbi__pnm_skip_whitespace(s, &c);
7869 
7870  *x = stbi__pnm_getinteger(s, &c); // read width
7871  stbi__pnm_skip_whitespace(s, &c);
7872 
7873  *y = stbi__pnm_getinteger(s, &c); // read height
7874  stbi__pnm_skip_whitespace(s, &c);
7875 
7876  maxv = stbi__pnm_getinteger(s, &c); // read max value
7877 
7878  if (maxv > 255)
7879  return stbi__err("max value > 255", "PPM image not 8-bit");
7880  else
7881  return 1;
7882 }
7883 #endif
7884 
7885 static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
7886 {
7887 #ifndef STBI_NO_JPEG
7888  if (stbi__jpeg_info(s, x, y, comp))
7889  return 1;
7890 #endif
7891 
7892 #ifndef STBI_NO_PNG
7893  if (stbi__png_info(s, x, y, comp))
7894  return 1;
7895 #endif
7896 
7897 #ifndef STBI_NO_GIF
7898  if (stbi__gif_info(s, x, y, comp))
7899  return 1;
7900 #endif
7901 
7902 #ifndef STBI_NO_BMP
7903  if (stbi__bmp_info(s, x, y, comp))
7904  return 1;
7905 #endif
7906 
7907 #ifndef STBI_NO_PSD
7908  if (stbi__psd_info(s, x, y, comp))
7909  return 1;
7910 #endif
7911 
7912 #ifndef STBI_NO_PIC
7913  if (stbi__pic_info(s, x, y, comp))
7914  return 1;
7915 #endif
7916 
7917 #ifndef STBI_NO_PNM
7918  if (stbi__pnm_info(s, x, y, comp))
7919  return 1;
7920 #endif
7921 
7922 #ifndef STBI_NO_HDR
7923  if (stbi__hdr_info(s, x, y, comp))
7924  return 1;
7925 #endif
7926 
7927 // test tga last because it's a crappy test!
7928 #ifndef STBI_NO_TGA
7929  if (stbi__tga_info(s, x, y, comp))
7930  return 1;
7931 #endif
7932  return stbi__err("unknown image type",
7933  "Image not of any known type, or corrupt");
7934 }
7935 
7936 #ifndef STBI_NO_STDIO
7937 STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
7938 {
7939  FILE *f = stbi__fopen(filename, "rb");
7940  int result;
7941  if (!f)
7942  return stbi__err("can't fopen", "Unable to open file");
7943  result = stbi_info_from_file(f, x, y, comp);
7944  fclose(f);
7945  return result;
7946 }
7947 
7948 STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
7949 {
7950  int r;
7951  stbi__context s;
7952  long pos = ftell(f);
7953  stbi__start_file(&s, f);
7954  r = stbi__info_main(&s, x, y, comp);
7955  fseek(f, pos, SEEK_SET);
7956  return r;
7957 }
7958 #endif // !STBI_NO_STDIO
7959 
7960 STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x,
7961  int *y, int *comp)
7962 {
7963  stbi__context s;
7964  stbi__start_mem(&s, buffer, len);
7965  return stbi__info_main(&s, x, y, comp);
7966 }
7967 
7968 STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user,
7969  int *x, int *y, int *comp)
7970 {
7971  stbi__context s;
7972  stbi__start_callbacks(&s, (stbi_io_callbacks *)c, user);
7973  return stbi__info_main(&s, x, y, comp);
7974 }
7975 
7976 #endif // STB_IMAGE_IMPLEMENTATION
7977 
7978 /*
7979  revision history:
7980  2.16 (2017-07-23) all functions have 16-bit variants;
7981  STBI_NO_STDIO works again;
7982  compilation fixes;
7983  fix rounding in unpremultiply;
7984  optimize vertical flip;
7985  disable raw_len validation;
7986  documentation fixes
7987  2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
7988  warning fixes; disable run-time SSE detection on gcc;
7989  uniform handling of optional "return" values;
7990  thread-safe initialization of zlib tables
7991  2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet
7992  JPGs 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now 2.12
7993  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes 2.11 (2016-04-02)
7994  allocate large structures on the stack remove white matting for transparent
7995  PSD fix reported channel count for PNG & BMP re-enable SSE2 in non-gcc 64-bit
7996  support RGB-formatted JPEG
7997  read 16-bit PNGs (only as 8-bit)
7998  2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
7999  2.09 (2016-01-16) allow comments in PNM files
8000  16-bit-per-pixel TGA (not bit-per-component)
8001  info() for TGA could break due to .hdr handling
8002  info() for BMP to shares code instead of sloppy parse
8003  can use STBI_REALLOC_SIZED if allocator doesn't support
8004  realloc code cleanup 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD
8005  as RGBA 2.07 (2015-09-13) fix compiler warnings partial animated GIF support
8006  limited 16-bpc PSD support
8007  #ifdef unused functions
8008  bug with < 92 byte PIC,PNM,HDR,TGA
8009  2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value
8010  2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning
8011  2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit
8012  2.03 (2015-04-12) extra corruption checking (mmozeiko)
8013  stbi_set_flip_vertically_on_load (nguillemot)
8014  fix NEON support; fix mingw support
8015  2.02 (2015-01-19) fix incorrect assert, fix warning
8016  2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit
8017  without -msse2 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG 2.00
8018  (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg) progressive
8019  JPEG (stb) PGM/PPM support (Ken Miller) STBI_MALLOC,STBI_REALLOC,STBI_FREE
8020  GIF bugfix -- seemingly never worked
8021  STBI_NO_*, STBI_ONLY_*
8022  1.48 (2014-12-14) fix incorrectly-named assert()
8023  1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar
8024  Cornut & stb) optimize PNG (ryg) fix bug in interlaced PNG with
8025  user-specified channel count (stb) 1.46 (2014-08-26) fix broken tRNS chunk
8026  (colorkey-style transparency) in non-paletted PNG 1.45 (2014-08-16) fix
8027  MSVC-ARM internal compiler error by wrapping malloc 1.44 (2014-08-07)
8028  various warning fixes from Ronny Chevalier
8029  1.43 (2014-07-15)
8030  fix MSVC-only compiler problem in code changed in 1.42
8031  1.42 (2014-07-09)
8032  don't define _CRT_SECURE_NO_WARNINGS (affects user code)
8033  fixes to stbi__cleanup_jpeg path
8034  added STBI_ASSERT to avoid requiring assert.h
8035  1.41 (2014-06-25)
8036  fix search&replace from 1.36 that messed up comments/error
8037  messages 1.40 (2014-06-22) fix gcc struct-initialization warning 1.39
8038  (2014-06-15) fix to TGA optimization when req_comp != number of components in
8039  TGA; fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my
8040  test suite) add support for BMP version 5 (more ignored fields) 1.38
8041  (2014-06-06) suppress MSVC warnings on integer casts truncating values fix
8042  accidental rename of 'skip' field of I/O 1.37 (2014-06-04) remove duplicate
8043  typedef 1.36 (2014-06-03) convert to header file single-file library if
8044  de-iphone isn't set, load iphone images color-swapped instead of returning
8045  NULL 1.35 (2014-05-27) various warnings fix broken STBI_SIMD path fix bug
8046  where stbi_load_from_file no longer left file pointer in correct place fix
8047  broken non-easy path for 32-bit BMP (possibly never used) TGA optimization by
8048  Arseny Kapoulkine 1.34 (unknown) use STBI_NOTUSED in
8049  stbi__resample_row_generic(), fix one more leak in tga failure case 1.33
8050  (2011-07-14) make stbi_is_hdr work in STBI_NO_HDR (as specified), minor
8051  compiler-friendly improvements 1.32 (2011-07-13) support for "info" function
8052  for all supported filetypes (SpartanJ) 1.31 (2011-06-20) a few more leak
8053  fixes, bug in PNG handling (SpartanJ) 1.30 (2011-06-11) added ability to
8054  load files via callbacks to accomidate custom input streams (Ben Wenger)
8055  removed deprecated format-specific test/load functions
8056  removed support for installable file formats (stbi_loader) --
8057  would have been broken for IO callbacks anyway error cases in bmp and tga
8058  give messages and don't leak (Raymond Barbiero, grisha) fix inefficiency in
8059  decoding 32-bit BMP (David Woo) 1.29 (2010-08-16) various warning fixes from
8060  Aurelien Pocheville 1.28 (2010-08-01) fix bug in GIF palette transparency
8061  (SpartanJ) 1.27 (2010-08-01) cast-to-stbi_uc to fix warnings 1.26
8062  (2010-07-24) fix bug in file buffering for PNG reported by SpartanJ 1.25
8063  (2010-07-17) refix trans_data warning (Won Chun) 1.24 (2010-07-12) perf
8064  improvements reading from files on platforms with lock-heavy fgetc() minor
8065  perf improvements for jpeg deprecated type-specific functions so we'll get
8066  feedback if they're needed attempt to fix trans_data warning (Won Chun) 1.23
8067  fixed bug in iPhone support 1.22 (2010-07-10) removed image *writing*
8068  support stbi_info support from Jetro Lauha GIF support from Jean-Marc Lienher
8069  iPhone PNG-extensions from James Brown
8070  warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err.
8071  Janez (U+017D)emva) 1.21 fix use of 'stbi_uc' in header (reported by jon
8072  blow) 1.20 added support for Softimage PIC, by Tom Seddon 1.19 bug in
8073  interlaced PNG corruption check (found by ryg) 1.18 (2008-08-02) fix a
8074  threading bug (local mutable static) 1.17 support interlaced PNG 1.16
8075  major bugfix - stbi__convert_format converted one too many pixels 1.15
8076  initialize some fields for thread safety 1.14 fix threadsafe conversion
8077  bug header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
8078  1.13 threadsafe
8079  1.12 const qualifiers in the API
8080  1.11 Support installable IDCT, colorspace conversion routines
8081  1.10 Fixes for 64-bit (don't use "unsigned long")
8082  optimized upsampling by Fabian "ryg" Giesen
8083  1.09 Fix format-conversion for PSD code (bad global variables!)
8084  1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz
8085  1.07 attempt to fix C++ warning/errors again
8086  1.06 attempt to fix C++ warning/errors again
8087  1.05 fix TGA loading to return correct *comp and use good luminance
8088  calc 1.04 default float alpha is 1, not 255; use 'void *' for
8089  stbi_image_free 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR 1.02
8090  support for (subset of) HDR files, float interface for preferred access to
8091  them 1.01 fix bug: possible bug in handling right-side up bmps... not sure
8092  fix bug: the stbi__bmp_load() and stbi__tga_load() functions
8093  didn't work at all 1.00 interface to zlib that skips zlib header 0.99
8094  correct handling of alpha in palette 0.98 TGA loader by lonesock;
8095  dynamically add loaders (untested) 0.97 jpeg errors on too large a file;
8096  also catch another malloc failure 0.96 fix detection of invalid v value -
8097  particleman@mollyrocket forum 0.95 during header scan, seek to markers in
8098  case of padding 0.94 STBI_NO_STDIO to disable stdio usage; rename all
8099  #defines the same 0.93 handle jpegtran output; verbose errors 0.92 read
8100  4,8,16,24,32-bit BMP files of several formats 0.91 output 24-bit
8101  Windows 3.0 BMP files 0.90 fix a few more warnings; bump version number to
8102  approach 1.0 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd 0.60
8103  fix compiling as c++ 0.59 fix warnings: merge Dave Moore's -Wall fixes
8104  0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian
8105  0.57 fix bug: jpg last huffman symbol before marker was >9 bits but
8106  less than 16 available 0.56 fix bug: zlib uncompressed mode len vs. nlen
8107  0.55 fix bug: restart_interval not initialized to 0
8108  0.54 allow NULL for 'int *comp'
8109  0.53 fix bug in png 3->4; speedup png decoding
8110  0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments
8111  0.51 obey req_comp requests, 1-component jpegs return as 1-component,
8112  on 'test' only check type, not whether we support this variant
8113  0.50 (2006-11-19)
8114  first released version
8115 */
8116 
8117 /*
8118 ------------------------------------------------------------------------------
8119 This software is available under 2 licenses -- choose whichever you prefer.
8120 ------------------------------------------------------------------------------
8121 ALTERNATIVE A - MIT License
8122 Copyright (c) 2017 Sean Barrett
8123 Permission is hereby granted, free of charge, to any person obtaining a copy of
8124 this software and associated documentation files (the "Software"), to deal in
8125 the Software without restriction, including without limitation the rights to
8126 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
8127 of the Software, and to permit persons to whom the Software is furnished to do
8128 so, subject to the following conditions:
8129 The above copyright notice and this permission notice shall be included in all
8130 copies or substantial portions of the Software.
8131 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
8132 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
8133 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
8134 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
8135 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
8136 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
8137 SOFTWARE.
8138 ------------------------------------------------------------------------------
8139 ALTERNATIVE B - Public Domain (www.unlicense.org)
8140 This is free and unencumbered software released into the public domain.
8141 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
8142 software, either in source code form or as a compiled binary, for any purpose,
8143 commercial or non-commercial, and by any means.
8144 In jurisdictions that recognize copyright laws, the author or authors of this
8145 software dedicate any and all copyright interest in the software to the public
8146 domain. We make this dedication for the benefit of the public at large and to
8147 the detriment of our heirs and successors. We intend this dedication to be an
8148 overt act of relinquishment in perpetuity of all present and future rights to
8149 this software under copyright law.
8150 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
8151 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
8152 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
8153 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
8154 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
8155 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8156 ------------------------------------------------------------------------------
8157 */
#define STBIDEF
Definition: stb_image.h:330
STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
Definition: stb_image.h:315
STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
STBIDEF const char * stbi_failure_reason(void)
STBIDEF stbi_uc * stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
STBIDEF void stbi_hdr_to_ldr_scale(float scale)
STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
unsigned short stbi_us
Definition: stb_image.h:321
STBIDEF float * stbi_loadf(char const *filename, int *x, int *y, int *channels_in_file, int desired_channels)
STBIDEF stbi_us * stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
STBIDEF void stbi_ldr_to_hdr_gamma(float gamma)
Definition: stb_image.h:316
STBIDEF stbi_us * stbi_load_16(char const *filename, int *x, int *y, int *channels_in_file, int desired_channels)
Definition: stb_image.h:342
STBIDEF void stbi_hdr_to_ldr_gamma(float gamma)
STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
STBIDEF void stbi_ldr_to_hdr_scale(float scale)
STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
STBIDEF int stbi_is_hdr_from_file(FILE *f)
STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
STBIDEF stbi_uc * stbi_load_from_file(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels)
Definition: stb_image.h:317
STBIDEF float * stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp)
Definition: stb_image.h:314
STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
STBIDEF stbi_us * stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
STBIDEF stbi_uc * stbi_load(char const *filename, int *x, int *y, int *channels_in_file, int desired_channels)
STBIDEF int stbi_is_hdr(char const *filename)
unsigned char stbi_uc
Definition: stb_image.h:320
STBIDEF void stbi_image_free(void *retval_from_stbi_load)
STBIDEF float * stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
STBIDEF stbi_uc * stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
STBIDEF char * stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen)
STBIDEF char * stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
STBIDEF char * stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen)
Definition: stb_image.h:312
STBIDEF float * stbi_loadf_from_file(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels)
STBIDEF stbi_us * stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels)
STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
STBIDEF char * stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)