From e8fbf39f886b2b5dbf5e14f07cd1dbefe8d48bf4 Mon Sep 17 00:00:00 2001
From: Juan Linietsky <reduzio@gmail.com>
Date: Sun, 3 Jan 2016 17:14:28 -0300
Subject: [PATCH] -Replaced tinyjpg for jpgd (public domain), fixes progressive
 encoded jpgs and speeds up. Closes #2040 -Removed support of loading BitMap
 as image, now it must be load as a pnm, also closes #2040

---
 core/ustring.cpp                              |   10 +-
 core/ustring.h                                |    2 +-
 drivers/SCsub                                 |    4 +-
 drivers/{jpg => jpegd}/SCsub                  |    5 +-
 drivers/jpegd/image_loader_jpegd.cpp          |  108 +
 .../image_loader_jpegd.h}                     |    0
 drivers/jpegd/jpgd.cpp                        | 3172 +++++++++++++++++
 drivers/jpegd/jpgd.h                          |  319 ++
 drivers/jpg/image_loader_jpg.cpp              |   93 -
 drivers/jpg/jidctflt.c                        |  286 --
 drivers/jpg/loadjpeg.c                        |  341 --
 drivers/jpg/tinyjpeg-internal.h               |  162 -
 drivers/jpg/tinyjpeg.c                        | 2202 ------------
 drivers/jpg/tinyjpeg.h                        |   74 -
 drivers/pnm/SCsub                             |   10 +
 drivers/pnm/bitmap_loader_pnm.cpp             |  232 ++
 drivers/pnm/bitmap_loader_pnm.h               |   33 +
 drivers/register_driver_types.cpp             |   10 +-
 scene/register_scene_types.cpp                |    5 +-
 scene/resources/bit_mask.cpp                  |   54 -
 scene/resources/bit_mask.h                    |   11 -
 tools/editor/editor_node.cpp                  |    1 +
 .../editor/plugins/editor_preview_plugins.cpp |   76 +
 tools/editor/plugins/editor_preview_plugins.h |   11 +
 24 files changed, 3985 insertions(+), 3236 deletions(-)
 rename drivers/{jpg => jpegd}/SCsub (65%)
 create mode 100644 drivers/jpegd/image_loader_jpegd.cpp
 rename drivers/{jpg/image_loader_jpg.h => jpegd/image_loader_jpegd.h} (100%)
 create mode 100644 drivers/jpegd/jpgd.cpp
 create mode 100644 drivers/jpegd/jpgd.h
 delete mode 100644 drivers/jpg/image_loader_jpg.cpp
 delete mode 100644 drivers/jpg/jidctflt.c
 delete mode 100644 drivers/jpg/loadjpeg.c
 delete mode 100644 drivers/jpg/tinyjpeg-internal.h
 delete mode 100644 drivers/jpg/tinyjpeg.c
 delete mode 100644 drivers/jpg/tinyjpeg.h
 create mode 100644 drivers/pnm/SCsub
 create mode 100644 drivers/pnm/bitmap_loader_pnm.cpp
 create mode 100644 drivers/pnm/bitmap_loader_pnm.h

diff --git a/core/ustring.cpp b/core/ustring.cpp
index c93fb80ca8e..21c0d78fdba 100644
--- a/core/ustring.cpp
+++ b/core/ustring.cpp
@@ -1636,12 +1636,16 @@ int64_t String::to_int64() const {
 	return integer*sign;
 }
 
-int String::to_int(const char* p_str) {
+int String::to_int(const char* p_str,int p_len) {
 
 
 	int to=0;
-	while(p_str[to]!=0 && p_str[to]!='.')
-		to++;
+	if (p_len>=0)
+		to=p_len;
+	else {
+		while(p_str[to]!=0 && p_str[to]!='.')
+			to++;
+	}
 
 
 	int integer=0;
diff --git a/core/ustring.h b/core/ustring.h
index 4c76b8e8636..2b967d368a0 100644
--- a/core/ustring.h
+++ b/core/ustring.h
@@ -144,7 +144,7 @@ public:
 	int to_int() const;
 
 	int64_t to_int64() const;
-	static int to_int(const char* p_str);
+	static int to_int(const char* p_str, int p_len=-1);
 	static double to_double(const char* p_str);
 	static double to_double(const CharType* p_str, const CharType **r_end=NULL);
 	static int64_t to_int(const CharType* p_str,int p_len=-1);
diff --git a/drivers/SCsub b/drivers/SCsub
index e52d6538e51..a00d7fc3f9f 100644
--- a/drivers/SCsub
+++ b/drivers/SCsub
@@ -12,11 +12,13 @@ SConscript('windows/SCsub');
 SConscript('gles2/SCsub');
 SConscript('gl_context/SCsub');
 SConscript('openssl/SCsub');
+SConscript('pnm/SCsub');
 
 if (env["png"]=="yes"):
 	SConscript("png/SCsub");
 if (env["jpg"]=="yes"):
-	SConscript("jpg/SCsub");
+	#SConscript("jpg/SCsub");
+	SConscript("jpegd/SCsub");
 if (env["webp"]=="yes"):
 	SConscript("webp/SCsub");
 SConscript("dds/SCsub");
diff --git a/drivers/jpg/SCsub b/drivers/jpegd/SCsub
similarity index 65%
rename from drivers/jpg/SCsub
rename to drivers/jpegd/SCsub
index df91b10a025..dfdb19402ee 100644
--- a/drivers/jpg/SCsub
+++ b/drivers/jpegd/SCsub
@@ -2,9 +2,8 @@ Import('env')
 
 
 jpg_sources = [
-	"jpg/tinyjpeg.c",
-	"jpg/jidctflt.c",
-	"jpg/image_loader_jpg.cpp"
+	"jpegd/jpgd.cpp",
+	"jpegd/image_loader_jpegd.cpp"
 	]
 
 env.drivers_sources+=jpg_sources
diff --git a/drivers/jpegd/image_loader_jpegd.cpp b/drivers/jpegd/image_loader_jpegd.cpp
new file mode 100644
index 00000000000..4805cf4d5d9
--- /dev/null
+++ b/drivers/jpegd/image_loader_jpegd.cpp
@@ -0,0 +1,108 @@
+/*************************************************/
+/*  image_loader_jpg.cpp                         */
+/*************************************************/
+/*            This file is part of:              */
+/*                GODOT ENGINE                   */
+/*************************************************/
+/*       Source code within this file is:        */
+/*  (c) 2007-2016 Juan Linietsky, Ariel Manzur   */
+/*             All Rights Reserved.              */
+/*************************************************/
+
+#include "image_loader_jpegd.h"
+
+#include "print_string.h"
+#include "os/os.h"
+#include "jpgd.h"
+#include <string.h>
+
+
+Error ImageLoaderJPG::load_image(Image *p_image,FileAccess *f) {
+
+
+	DVector<uint8_t> src_image;
+	int src_image_len = f->get_len();
+	ERR_FAIL_COND_V(src_image_len == 0, ERR_FILE_CORRUPT);
+	src_image.resize(src_image_len);
+
+	DVector<uint8_t>::Write w = src_image.write();
+
+	f->get_buffer(&w[0],src_image_len);
+
+	f->close();
+
+
+
+	jpgd::jpeg_decoder_mem_stream mem_stream(w.ptr(),src_image_len);
+
+	jpgd::jpeg_decoder decoder(&mem_stream);
+
+	if (decoder.get_error_code() != jpgd::JPGD_SUCCESS) {
+		return ERR_CANT_OPEN;
+	}
+
+	const int image_width = decoder.get_width();
+	const int image_height = decoder.get_height();
+	int comps = decoder.get_num_components();
+	if (comps==3)
+		comps=4; //weird
+
+	if (decoder.begin_decoding() != jpgd::JPGD_SUCCESS)
+		return ERR_FILE_CORRUPT;
+
+	const int dst_bpl = image_width * comps;
+
+	DVector<uint8_t> data;
+
+	data.resize(dst_bpl * image_height);
+
+	DVector<uint8_t>::Write dw = data.write();
+
+	jpgd::uint8 *pImage_data = (jpgd::uint8*)dw.ptr();
+
+	for (int y = 0; y < image_height; y++)
+	{
+		const jpgd::uint8* pScan_line;
+		uint scan_line_len;
+		if (decoder.decode((const void**)&pScan_line, &scan_line_len) != jpgd::JPGD_SUCCESS)
+		{
+			return ERR_FILE_CORRUPT;
+		}
+
+		jpgd::uint8 *pDst = pImage_data + y * dst_bpl;
+		memcpy(pDst, pScan_line, dst_bpl);
+
+
+	}
+
+
+	//all good
+
+	Image::Format fmt;
+	if (comps==1)
+		fmt=Image::FORMAT_GRAYSCALE;
+	else
+		fmt=Image::FORMAT_RGBA;
+
+	dw = DVector<uint8_t>::Write();
+	w = DVector<uint8_t>::Write();
+
+	p_image->create(image_width,image_height,0,fmt,data);
+
+	return OK;
+
+}
+
+void ImageLoaderJPG::get_recognized_extensions(List<String> *p_extensions) const {
+	
+	p_extensions->push_back("jpg");
+	p_extensions->push_back("jpeg");
+}
+
+
+ImageLoaderJPG::ImageLoaderJPG() {
+
+
+}
+
+
diff --git a/drivers/jpg/image_loader_jpg.h b/drivers/jpegd/image_loader_jpegd.h
similarity index 100%
rename from drivers/jpg/image_loader_jpg.h
rename to drivers/jpegd/image_loader_jpegd.h
diff --git a/drivers/jpegd/jpgd.cpp b/drivers/jpegd/jpgd.cpp
new file mode 100644
index 00000000000..fad9a37a9a1
--- /dev/null
+++ b/drivers/jpegd/jpgd.cpp
@@ -0,0 +1,3172 @@
+// jpgd.cpp - C++ class for JPEG decompression.
+// Public domain, Rich Geldreich <richgel99@gmail.com>
+// Alex Evans: Linear memory allocator (taken from jpge.h).
+// v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings (all looked harmless)
+//
+// Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
+//
+// Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling.
+// Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain"
+// http://vision.ai.uiuc.edu/~dugad/research/dct/index.html
+
+#include "jpgd.h"
+#include <string.h>
+
+#include <assert.h>
+#define JPGD_ASSERT(x) assert(x)
+
+#ifdef _MSC_VER
+#pragma warning (disable : 4611) // warning C4611: interaction between '_setjmp' and C++ object destruction is non-portable
+#endif
+
+// Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling).
+// This is slower, but results in higher quality on images with highly saturated colors.
+#define JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING 1
+
+#define JPGD_TRUE (1)
+#define JPGD_FALSE (0)
+
+#define JPGD_MAX(a,b) (((a)>(b)) ? (a) : (b))
+#define JPGD_MIN(a,b) (((a)<(b)) ? (a) : (b))
+
+namespace jpgd {
+
+static inline void *jpgd_malloc(size_t nSize) { return malloc(nSize); }
+static inline void jpgd_free(void *p) { free(p); }
+
+// DCT coefficients are stored in this sequence.
+static int g_ZAG[64] = {  0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 };
+
+enum JPEG_MARKER
+{
+  M_SOF0  = 0xC0, M_SOF1  = 0xC1, M_SOF2  = 0xC2, M_SOF3  = 0xC3, M_SOF5  = 0xC5, M_SOF6  = 0xC6, M_SOF7  = 0xC7, M_JPG   = 0xC8,
+  M_SOF9  = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT   = 0xC4, M_DAC   = 0xCC,
+  M_RST0  = 0xD0, M_RST1  = 0xD1, M_RST2  = 0xD2, M_RST3  = 0xD3, M_RST4  = 0xD4, M_RST5  = 0xD5, M_RST6  = 0xD6, M_RST7  = 0xD7,
+  M_SOI   = 0xD8, M_EOI   = 0xD9, M_SOS   = 0xDA, M_DQT   = 0xDB, M_DNL   = 0xDC, M_DRI   = 0xDD, M_DHP   = 0xDE, M_EXP   = 0xDF,
+  M_APP0  = 0xE0, M_APP15 = 0xEF, M_JPG0  = 0xF0, M_JPG13 = 0xFD, M_COM   = 0xFE, M_TEM   = 0x01, M_ERROR = 0x100, RST0   = 0xD0
+};
+
+enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 };
+
+#define CONST_BITS  13
+#define PASS1_BITS  2
+#define SCALEDONE ((int32)1)
+
+#define FIX_0_298631336  ((int32)2446)        /* FIX(0.298631336) */
+#define FIX_0_390180644  ((int32)3196)        /* FIX(0.390180644) */
+#define FIX_0_541196100  ((int32)4433)        /* FIX(0.541196100) */
+#define FIX_0_765366865  ((int32)6270)        /* FIX(0.765366865) */
+#define FIX_0_899976223  ((int32)7373)        /* FIX(0.899976223) */
+#define FIX_1_175875602  ((int32)9633)        /* FIX(1.175875602) */
+#define FIX_1_501321110  ((int32)12299)       /* FIX(1.501321110) */
+#define FIX_1_847759065  ((int32)15137)       /* FIX(1.847759065) */
+#define FIX_1_961570560  ((int32)16069)       /* FIX(1.961570560) */
+#define FIX_2_053119869  ((int32)16819)       /* FIX(2.053119869) */
+#define FIX_2_562915447  ((int32)20995)       /* FIX(2.562915447) */
+#define FIX_3_072711026  ((int32)25172)       /* FIX(3.072711026) */
+
+#define DESCALE(x,n)  (((x) + (SCALEDONE << ((n)-1))) >> (n))
+#define DESCALE_ZEROSHIFT(x,n)  (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n))
+
+#define MULTIPLY(var, cnst)  ((var) * (cnst))
+
+#define CLAMP(i) ((static_cast<uint>(i) > 255) ? (((~i) >> 31) & 0xFF) : (i))
+
+// Compiler creates a fast path 1D IDCT for X non-zero columns
+template <int NONZERO_COLS>
+struct Row
+{
+  static void idct(int* pTemp, const jpgd_block_t* pSrc)
+  {
+    // ACCESS_COL() will be optimized at compile time to either an array access, or 0.
+    #define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
+
+    const int z2 = ACCESS_COL(2), z3 = ACCESS_COL(6);
+
+    const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+    const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
+    const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
+
+    const int tmp0 = (ACCESS_COL(0) + ACCESS_COL(4)) << CONST_BITS;
+    const int tmp1 = (ACCESS_COL(0) - ACCESS_COL(4)) << CONST_BITS;
+
+    const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
+
+    const int atmp0 = ACCESS_COL(7), atmp1 = ACCESS_COL(5), atmp2 = ACCESS_COL(3), atmp3 = ACCESS_COL(1);
+
+    const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
+    const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
+
+    const int az1 = MULTIPLY(bz1, - FIX_0_899976223);
+    const int az2 = MULTIPLY(bz2, - FIX_2_562915447);
+    const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5;
+    const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5;
+
+    const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
+    const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
+    const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
+    const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
+
+    pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS);
+    pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS);
+    pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS);
+    pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS);
+    pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS);
+    pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS);
+    pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS);
+    pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS);
+  }
+};
+
+template <>
+struct Row<0>
+{
+  static void idct(int* pTemp, const jpgd_block_t* pSrc)
+  {
+#ifdef _MSC_VER
+    pTemp; pSrc;
+#endif
+  }
+};
+
+template <>
+struct Row<1>
+{
+  static void idct(int* pTemp, const jpgd_block_t* pSrc)
+  {
+    const int dcval = (pSrc[0] << PASS1_BITS);
+
+    pTemp[0] = dcval;
+    pTemp[1] = dcval;
+    pTemp[2] = dcval;
+    pTemp[3] = dcval;
+    pTemp[4] = dcval;
+    pTemp[5] = dcval;
+    pTemp[6] = dcval;
+    pTemp[7] = dcval;
+  }
+};
+
+// Compiler creates a fast path 1D IDCT for X non-zero rows
+template <int NONZERO_ROWS>
+struct Col
+{
+  static void idct(uint8* pDst_ptr, const int* pTemp)
+  {
+    // ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
+    #define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
+
+    const int z2 = ACCESS_ROW(2);
+    const int z3 = ACCESS_ROW(6);
+
+    const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+    const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
+    const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
+
+    const int tmp0 = (ACCESS_ROW(0) + ACCESS_ROW(4)) << CONST_BITS;
+    const int tmp1 = (ACCESS_ROW(0) - ACCESS_ROW(4)) << CONST_BITS;
+
+    const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
+
+    const int atmp0 = ACCESS_ROW(7), atmp1 = ACCESS_ROW(5), atmp2 = ACCESS_ROW(3), atmp3 = ACCESS_ROW(1);
+
+    const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
+    const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
+
+    const int az1 = MULTIPLY(bz1, - FIX_0_899976223);
+    const int az2 = MULTIPLY(bz2, - FIX_2_562915447);
+    const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5;
+    const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5;
+
+    const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
+    const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
+    const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
+    const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
+
+    int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3);
+    pDst_ptr[8*0] = (uint8)CLAMP(i);
+
+    i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3);
+    pDst_ptr[8*7] = (uint8)CLAMP(i);
+
+    i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3);
+    pDst_ptr[8*1] = (uint8)CLAMP(i);
+
+    i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3);
+    pDst_ptr[8*6] = (uint8)CLAMP(i);
+
+    i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3);
+    pDst_ptr[8*2] = (uint8)CLAMP(i);
+
+    i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3);
+    pDst_ptr[8*5] = (uint8)CLAMP(i);
+
+    i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3);
+    pDst_ptr[8*3] = (uint8)CLAMP(i);
+
+    i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3);
+    pDst_ptr[8*4] = (uint8)CLAMP(i);
+  }
+};
+
+template <>
+struct Col<1>
+{
+  static void idct(uint8* pDst_ptr, const int* pTemp)
+  {
+    int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3);
+    const uint8 dcval_clamped = (uint8)CLAMP(dcval);
+    pDst_ptr[0*8] = dcval_clamped;
+    pDst_ptr[1*8] = dcval_clamped;
+    pDst_ptr[2*8] = dcval_clamped;
+    pDst_ptr[3*8] = dcval_clamped;
+    pDst_ptr[4*8] = dcval_clamped;
+    pDst_ptr[5*8] = dcval_clamped;
+    pDst_ptr[6*8] = dcval_clamped;
+    pDst_ptr[7*8] = dcval_clamped;
+  }
+};
+
+static const uint8 s_idct_row_table[] =
+{
+  1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
+  4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
+  6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
+  6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
+  8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
+  8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
+  8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
+  8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
+};
+
+static const uint8 s_idct_col_table[] = { 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 };
+
+void idct(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr, int block_max_zag)
+{
+  JPGD_ASSERT(block_max_zag >= 1);
+  JPGD_ASSERT(block_max_zag <= 64);
+
+  if (block_max_zag <= 1)
+  {
+    int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
+    k = CLAMP(k);
+    k = k | (k<<8);
+    k = k | (k<<16);
+
+    for (int i = 8; i > 0; i--)
+    {
+      *(int*)&pDst_ptr[0] = k;
+      *(int*)&pDst_ptr[4] = k;
+      pDst_ptr += 8;
+    }
+    return;
+  }
+
+  int temp[64];
+
+  const jpgd_block_t* pSrc = pSrc_ptr;
+  int* pTemp = temp;
+
+  const uint8* pRow_tab = &s_idct_row_table[(block_max_zag - 1) * 8];
+  int i;
+  for (i = 8; i > 0; i--, pRow_tab++)
+  {
+    switch (*pRow_tab)
+    {
+      case 0: Row<0>::idct(pTemp, pSrc); break;
+      case 1: Row<1>::idct(pTemp, pSrc); break;
+      case 2: Row<2>::idct(pTemp, pSrc); break;
+      case 3: Row<3>::idct(pTemp, pSrc); break;
+      case 4: Row<4>::idct(pTemp, pSrc); break;
+      case 5: Row<5>::idct(pTemp, pSrc); break;
+      case 6: Row<6>::idct(pTemp, pSrc); break;
+      case 7: Row<7>::idct(pTemp, pSrc); break;
+      case 8: Row<8>::idct(pTemp, pSrc); break;
+    }
+
+    pSrc += 8;
+    pTemp += 8;
+  }
+
+  pTemp = temp;
+
+  const int nonzero_rows = s_idct_col_table[block_max_zag - 1];
+  for (i = 8; i > 0; i--)
+  {
+    switch (nonzero_rows)
+    {
+      case 1: Col<1>::idct(pDst_ptr, pTemp); break;
+      case 2: Col<2>::idct(pDst_ptr, pTemp); break;
+      case 3: Col<3>::idct(pDst_ptr, pTemp); break;
+      case 4: Col<4>::idct(pDst_ptr, pTemp); break;
+      case 5: Col<5>::idct(pDst_ptr, pTemp); break;
+      case 6: Col<6>::idct(pDst_ptr, pTemp); break;
+      case 7: Col<7>::idct(pDst_ptr, pTemp); break;
+      case 8: Col<8>::idct(pDst_ptr, pTemp); break;
+    }
+
+    pTemp++;
+    pDst_ptr++;
+  }
+}
+
+void idct_4x4(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr)
+{
+  int temp[64];
+  int* pTemp = temp;
+  const jpgd_block_t* pSrc = pSrc_ptr;
+
+  for (int i = 4; i > 0; i--)
+  {
+    Row<4>::idct(pTemp, pSrc);
+    pSrc += 8;
+    pTemp += 8;
+  }
+
+  pTemp = temp;
+  for (int i = 8; i > 0; i--)
+  {
+    Col<4>::idct(pDst_ptr, pTemp);
+    pTemp++;
+    pDst_ptr++;
+  }
+}
+
+// Retrieve one character from the input stream.
+inline uint jpeg_decoder::get_char()
+{
+  // Any bytes remaining in buffer?
+  if (!m_in_buf_left)
+  {
+    // Try to get more bytes.
+    prep_in_buffer();
+    // Still nothing to get?
+    if (!m_in_buf_left)
+    {
+      // Pad the end of the stream with 0xFF 0xD9 (EOI marker)
+      int t = m_tem_flag;
+      m_tem_flag ^= 1;
+      if (t)
+        return 0xD9;
+      else
+        return 0xFF;
+    }
+  }
+
+  uint c = *m_pIn_buf_ofs++;
+  m_in_buf_left--;
+
+  return c;
+}
+
+// Same as previous method, except can indicate if the character is a pad character or not.
+inline uint jpeg_decoder::get_char(bool *pPadding_flag)
+{
+  if (!m_in_buf_left)
+  {
+    prep_in_buffer();
+    if (!m_in_buf_left)
+    {
+      *pPadding_flag = true;
+      int t = m_tem_flag;
+      m_tem_flag ^= 1;
+      if (t)
+        return 0xD9;
+      else
+        return 0xFF;
+    }
+  }
+
+  *pPadding_flag = false;
+
+  uint c = *m_pIn_buf_ofs++;
+  m_in_buf_left--;
+
+  return c;
+}
+
+// Inserts a previously retrieved character back into the input buffer.
+inline void jpeg_decoder::stuff_char(uint8 q)
+{
+  *(--m_pIn_buf_ofs) = q;
+  m_in_buf_left++;
+}
+
+// Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
+inline uint8 jpeg_decoder::get_octet()
+{
+  bool padding_flag;
+  int c = get_char(&padding_flag);
+
+  if (c == 0xFF)
+  {
+    if (padding_flag)
+      return 0xFF;
+
+    c = get_char(&padding_flag);
+    if (padding_flag)
+    {
+      stuff_char(0xFF);
+      return 0xFF;
+    }
+
+    if (c == 0x00)
+      return 0xFF;
+    else
+    {
+      stuff_char(static_cast<uint8>(c));
+      stuff_char(0xFF);
+      return 0xFF;
+    }
+  }
+
+  return static_cast<uint8>(c);
+}
+
+// Retrieves a variable number of bits from the input stream. Does not recognize markers.
+inline uint jpeg_decoder::get_bits(int num_bits)
+{
+  if (!num_bits)
+    return 0;
+
+  uint i = m_bit_buf >> (32 - num_bits);
+
+  if ((m_bits_left -= num_bits) <= 0)
+  {
+    m_bit_buf <<= (num_bits += m_bits_left);
+
+    uint c1 = get_char();
+    uint c2 = get_char();
+    m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
+
+    m_bit_buf <<= -m_bits_left;
+
+    m_bits_left += 16;
+
+    JPGD_ASSERT(m_bits_left >= 0);
+  }
+  else
+    m_bit_buf <<= num_bits;
+
+  return i;
+}
+
+// Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
+inline uint jpeg_decoder::get_bits_no_markers(int num_bits)
+{
+  if (!num_bits)
+    return 0;
+
+  uint i = m_bit_buf >> (32 - num_bits);
+
+  if ((m_bits_left -= num_bits) <= 0)
+  {
+    m_bit_buf <<= (num_bits += m_bits_left);
+
+    if ((m_in_buf_left < 2) || (m_pIn_buf_ofs[0] == 0xFF) || (m_pIn_buf_ofs[1] == 0xFF))
+    {
+      uint c1 = get_octet();
+      uint c2 = get_octet();
+      m_bit_buf |= (c1 << 8) | c2;
+    }
+    else
+    {
+      m_bit_buf |= ((uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
+      m_in_buf_left -= 2;
+      m_pIn_buf_ofs += 2;
+    }
+
+    m_bit_buf <<= -m_bits_left;
+
+    m_bits_left += 16;
+
+    JPGD_ASSERT(m_bits_left >= 0);
+  }
+  else
+    m_bit_buf <<= num_bits;
+
+  return i;
+}
+
+// Decodes a Huffman encoded symbol.
+inline int jpeg_decoder::huff_decode(huff_tables *pH)
+{
+  int symbol;
+
+  // Check first 8-bits: do we have a complete symbol?
+  if ((symbol = pH->look_up[m_bit_buf >> 24]) < 0)
+  {
+    // Decode more bits, use a tree traversal to find symbol.
+    int ofs = 23;
+    do
+    {
+      symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))];
+      ofs--;
+    } while (symbol < 0);
+
+    get_bits_no_markers(8 + (23 - ofs));
+  }
+  else
+    get_bits_no_markers(pH->code_size[symbol]);
+
+  return symbol;
+}
+
+// Decodes a Huffman encoded symbol.
+inline int jpeg_decoder::huff_decode(huff_tables *pH, int& extra_bits)
+{
+  int symbol;
+
+  // Check first 8-bits: do we have a complete symbol?
+  if ((symbol = pH->look_up2[m_bit_buf >> 24]) < 0)
+  {
+    // Use a tree traversal to find symbol.
+    int ofs = 23;
+    do
+    {
+      symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))];
+      ofs--;
+    } while (symbol < 0);
+
+    get_bits_no_markers(8 + (23 - ofs));
+
+    extra_bits = get_bits_no_markers(symbol & 0xF);
+  }
+  else
+  {
+    JPGD_ASSERT(((symbol >> 8) & 31) == pH->code_size[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0));
+
+    if (symbol & 0x8000)
+    {
+      get_bits_no_markers((symbol >> 8) & 31);
+      extra_bits = symbol >> 16;
+    }
+    else
+    {
+      int code_size = (symbol >> 8) & 31;
+      int num_extra_bits = symbol & 0xF;
+      int bits = code_size + num_extra_bits;
+      if (bits <= (m_bits_left + 16))
+        extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
+      else
+      {
+        get_bits_no_markers(code_size);
+        extra_bits = get_bits_no_markers(num_extra_bits);
+      }
+    }
+
+    symbol &= 0xFF;
+  }
+
+  return symbol;
+}
+
+// Tables and macro used to fully decode the DPCM differences.
+static const int s_extend_test[16] = { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
+static const int s_extend_offset[16] = { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 };
+static const int s_extend_mask[] = { 0, (1<<0), (1<<1), (1<<2), (1<<3), (1<<4), (1<<5), (1<<6), (1<<7), (1<<8), (1<<9), (1<<10), (1<<11), (1<<12), (1<<13), (1<<14), (1<<15), (1<<16) };
+// The logical AND's in this macro are to shut up static code analysis (aren't really necessary - couldn't find another way to do this)
+#define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x))
+
+// Clamps a value between 0-255.
+inline uint8 jpeg_decoder::clamp(int i)
+{
+  if (static_cast<uint>(i) > 255)
+    i = (((~i) >> 31) & 0xFF);
+
+  return static_cast<uint8>(i);
+}
+
+namespace DCT_Upsample
+{
+  struct Matrix44
+  {
+    typedef int Element_Type;
+    enum { NUM_ROWS = 4, NUM_COLS = 4 };
+
+    Element_Type v[NUM_ROWS][NUM_COLS];
+
+    inline int rows() const { return NUM_ROWS; }
+    inline int cols() const { return NUM_COLS; }
+
+    inline const Element_Type & at(int r, int c) const { return v[r][c]; }
+    inline       Element_Type & at(int r, int c)       { return v[r][c]; }
+
+    inline Matrix44() { }
+
+    inline Matrix44& operator += (const Matrix44& a)
+    {
+      for (int r = 0; r < NUM_ROWS; r++)
+      {
+        at(r, 0) += a.at(r, 0);
+        at(r, 1) += a.at(r, 1);
+        at(r, 2) += a.at(r, 2);
+        at(r, 3) += a.at(r, 3);
+      }
+      return *this;
+    }
+
+    inline Matrix44& operator -= (const Matrix44& a)
+    {
+      for (int r = 0; r < NUM_ROWS; r++)
+      {
+        at(r, 0) -= a.at(r, 0);
+        at(r, 1) -= a.at(r, 1);
+        at(r, 2) -= a.at(r, 2);
+        at(r, 3) -= a.at(r, 3);
+      }
+      return *this;
+    }
+
+    friend inline Matrix44 operator + (const Matrix44& a, const Matrix44& b)
+    {
+      Matrix44 ret;
+      for (int r = 0; r < NUM_ROWS; r++)
+      {
+        ret.at(r, 0) = a.at(r, 0) + b.at(r, 0);
+        ret.at(r, 1) = a.at(r, 1) + b.at(r, 1);
+        ret.at(r, 2) = a.at(r, 2) + b.at(r, 2);
+        ret.at(r, 3) = a.at(r, 3) + b.at(r, 3);
+      }
+      return ret;
+    }
+
+    friend inline Matrix44 operator - (const Matrix44& a, const Matrix44& b)
+    {
+      Matrix44 ret;
+      for (int r = 0; r < NUM_ROWS; r++)
+      {
+        ret.at(r, 0) = a.at(r, 0) - b.at(r, 0);
+        ret.at(r, 1) = a.at(r, 1) - b.at(r, 1);
+        ret.at(r, 2) = a.at(r, 2) - b.at(r, 2);
+        ret.at(r, 3) = a.at(r, 3) - b.at(r, 3);
+      }
+      return ret;
+    }
+
+    static inline void add_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b)
+    {
+      for (int r = 0; r < 4; r++)
+      {
+        pDst[0*8 + r] = static_cast<jpgd_block_t>(a.at(r, 0) + b.at(r, 0));
+        pDst[1*8 + r] = static_cast<jpgd_block_t>(a.at(r, 1) + b.at(r, 1));
+        pDst[2*8 + r] = static_cast<jpgd_block_t>(a.at(r, 2) + b.at(r, 2));
+        pDst[3*8 + r] = static_cast<jpgd_block_t>(a.at(r, 3) + b.at(r, 3));
+      }
+    }
+
+    static inline void sub_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b)
+    {
+      for (int r = 0; r < 4; r++)
+      {
+        pDst[0*8 + r] = static_cast<jpgd_block_t>(a.at(r, 0) - b.at(r, 0));
+        pDst[1*8 + r] = static_cast<jpgd_block_t>(a.at(r, 1) - b.at(r, 1));
+        pDst[2*8 + r] = static_cast<jpgd_block_t>(a.at(r, 2) - b.at(r, 2));
+        pDst[3*8 + r] = static_cast<jpgd_block_t>(a.at(r, 3) - b.at(r, 3));
+      }
+    }
+  };
+
+  const int FRACT_BITS = 10;
+  const int SCALE = 1 << FRACT_BITS;
+
+  typedef int Temp_Type;
+  #define D(i) (((i) + (SCALE >> 1)) >> FRACT_BITS)
+  #define F(i) ((int)((i) * SCALE + .5f))
+
+  // Any decent C++ compiler will optimize this at compile time to a 0, or an array access.
+  #define AT(c, r) ((((c)>=NUM_COLS)||((r)>=NUM_ROWS)) ? 0 : pSrc[(c)+(r)*8])
+
+  // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix
+  template<int NUM_ROWS, int NUM_COLS>
+  struct P_Q
+  {
+    static void calc(Matrix44& P, Matrix44& Q, const jpgd_block_t* pSrc)
+    {
+      // 4x8 = 4x8 times 8x8, matrix 0 is constant
+      const Temp_Type X000 = AT(0, 0);
+      const Temp_Type X001 = AT(0, 1);
+      const Temp_Type X002 = AT(0, 2);
+      const Temp_Type X003 = AT(0, 3);
+      const Temp_Type X004 = AT(0, 4);
+      const Temp_Type X005 = AT(0, 5);
+      const Temp_Type X006 = AT(0, 6);
+      const Temp_Type X007 = AT(0, 7);
+      const Temp_Type X010 = D(F(0.415735f) * AT(1, 0) + F(0.791065f) * AT(3, 0) + F(-0.352443f) * AT(5, 0) + F(0.277785f) * AT(7, 0));
+      const Temp_Type X011 = D(F(0.415735f) * AT(1, 1) + F(0.791065f) * AT(3, 1) + F(-0.352443f) * AT(5, 1) + F(0.277785f) * AT(7, 1));
+      const Temp_Type X012 = D(F(0.415735f) * AT(1, 2) + F(0.791065f) * AT(3, 2) + F(-0.352443f) * AT(5, 2) + F(0.277785f) * AT(7, 2));
+      const Temp_Type X013 = D(F(0.415735f) * AT(1, 3) + F(0.791065f) * AT(3, 3) + F(-0.352443f) * AT(5, 3) + F(0.277785f) * AT(7, 3));
+      const Temp_Type X014 = D(F(0.415735f) * AT(1, 4) + F(0.791065f) * AT(3, 4) + F(-0.352443f) * AT(5, 4) + F(0.277785f) * AT(7, 4));
+      const Temp_Type X015 = D(F(0.415735f) * AT(1, 5) + F(0.791065f) * AT(3, 5) + F(-0.352443f) * AT(5, 5) + F(0.277785f) * AT(7, 5));
+      const Temp_Type X016 = D(F(0.415735f) * AT(1, 6) + F(0.791065f) * AT(3, 6) + F(-0.352443f) * AT(5, 6) + F(0.277785f) * AT(7, 6));
+      const Temp_Type X017 = D(F(0.415735f) * AT(1, 7) + F(0.791065f) * AT(3, 7) + F(-0.352443f) * AT(5, 7) + F(0.277785f) * AT(7, 7));
+      const Temp_Type X020 = AT(4, 0);
+      const Temp_Type X021 = AT(4, 1);
+      const Temp_Type X022 = AT(4, 2);
+      const Temp_Type X023 = AT(4, 3);
+      const Temp_Type X024 = AT(4, 4);
+      const Temp_Type X025 = AT(4, 5);
+      const Temp_Type X026 = AT(4, 6);
+      const Temp_Type X027 = AT(4, 7);
+      const Temp_Type X030 = D(F(0.022887f) * AT(1, 0) + F(-0.097545f) * AT(3, 0) + F(0.490393f) * AT(5, 0) + F(0.865723f) * AT(7, 0));
+      const Temp_Type X031 = D(F(0.022887f) * AT(1, 1) + F(-0.097545f) * AT(3, 1) + F(0.490393f) * AT(5, 1) + F(0.865723f) * AT(7, 1));
+      const Temp_Type X032 = D(F(0.022887f) * AT(1, 2) + F(-0.097545f) * AT(3, 2) + F(0.490393f) * AT(5, 2) + F(0.865723f) * AT(7, 2));
+      const Temp_Type X033 = D(F(0.022887f) * AT(1, 3) + F(-0.097545f) * AT(3, 3) + F(0.490393f) * AT(5, 3) + F(0.865723f) * AT(7, 3));
+      const Temp_Type X034 = D(F(0.022887f) * AT(1, 4) + F(-0.097545f) * AT(3, 4) + F(0.490393f) * AT(5, 4) + F(0.865723f) * AT(7, 4));
+      const Temp_Type X035 = D(F(0.022887f) * AT(1, 5) + F(-0.097545f) * AT(3, 5) + F(0.490393f) * AT(5, 5) + F(0.865723f) * AT(7, 5));
+      const Temp_Type X036 = D(F(0.022887f) * AT(1, 6) + F(-0.097545f) * AT(3, 6) + F(0.490393f) * AT(5, 6) + F(0.865723f) * AT(7, 6));
+      const Temp_Type X037 = D(F(0.022887f) * AT(1, 7) + F(-0.097545f) * AT(3, 7) + F(0.490393f) * AT(5, 7) + F(0.865723f) * AT(7, 7));
+
+      // 4x4 = 4x8 times 8x4, matrix 1 is constant
+      P.at(0, 0) = X000;
+      P.at(0, 1) = D(X001 * F(0.415735f) + X003 * F(0.791065f) + X005 * F(-0.352443f) + X007 * F(0.277785f));
+      P.at(0, 2) = X004;
+      P.at(0, 3) = D(X001 * F(0.022887f) + X003 * F(-0.097545f) + X005 * F(0.490393f) + X007 * F(0.865723f));
+      P.at(1, 0) = X010;
+      P.at(1, 1) = D(X011 * F(0.415735f) + X013 * F(0.791065f) + X015 * F(-0.352443f) + X017 * F(0.277785f));
+      P.at(1, 2) = X014;
+      P.at(1, 3) = D(X011 * F(0.022887f) + X013 * F(-0.097545f) + X015 * F(0.490393f) + X017 * F(0.865723f));
+      P.at(2, 0) = X020;
+      P.at(2, 1) = D(X021 * F(0.415735f) + X023 * F(0.791065f) + X025 * F(-0.352443f) + X027 * F(0.277785f));
+      P.at(2, 2) = X024;
+      P.at(2, 3) = D(X021 * F(0.022887f) + X023 * F(-0.097545f) + X025 * F(0.490393f) + X027 * F(0.865723f));
+      P.at(3, 0) = X030;
+      P.at(3, 1) = D(X031 * F(0.415735f) + X033 * F(0.791065f) + X035 * F(-0.352443f) + X037 * F(0.277785f));
+      P.at(3, 2) = X034;
+      P.at(3, 3) = D(X031 * F(0.022887f) + X033 * F(-0.097545f) + X035 * F(0.490393f) + X037 * F(0.865723f));
+      // 40 muls 24 adds
+
+      // 4x4 = 4x8 times 8x4, matrix 1 is constant
+      Q.at(0, 0) = D(X001 * F(0.906127f) + X003 * F(-0.318190f) + X005 * F(0.212608f) + X007 * F(-0.180240f));
+      Q.at(0, 1) = X002;
+      Q.at(0, 2) = D(X001 * F(-0.074658f) + X003 * F(0.513280f) + X005 * F(0.768178f) + X007 * F(-0.375330f));
+      Q.at(0, 3) = X006;
+      Q.at(1, 0) = D(X011 * F(0.906127f) + X013 * F(-0.318190f) + X015 * F(0.212608f) + X017 * F(-0.180240f));
+      Q.at(1, 1) = X012;
+      Q.at(1, 2) = D(X011 * F(-0.074658f) + X013 * F(0.513280f) + X015 * F(0.768178f) + X017 * F(-0.375330f));
+      Q.at(1, 3) = X016;
+      Q.at(2, 0) = D(X021 * F(0.906127f) + X023 * F(-0.318190f) + X025 * F(0.212608f) + X027 * F(-0.180240f));
+      Q.at(2, 1) = X022;
+      Q.at(2, 2) = D(X021 * F(-0.074658f) + X023 * F(0.513280f) + X025 * F(0.768178f) + X027 * F(-0.375330f));
+      Q.at(2, 3) = X026;
+      Q.at(3, 0) = D(X031 * F(0.906127f) + X033 * F(-0.318190f) + X035 * F(0.212608f) + X037 * F(-0.180240f));
+      Q.at(3, 1) = X032;
+      Q.at(3, 2) = D(X031 * F(-0.074658f) + X033 * F(0.513280f) + X035 * F(0.768178f) + X037 * F(-0.375330f));
+      Q.at(3, 3) = X036;
+      // 40 muls 24 adds
+    }
+  };
+
+  template<int NUM_ROWS, int NUM_COLS>
+  struct R_S
+  {
+    static void calc(Matrix44& R, Matrix44& S, const jpgd_block_t* pSrc)
+    {
+      // 4x8 = 4x8 times 8x8, matrix 0 is constant
+      const Temp_Type X100 = D(F(0.906127f) * AT(1, 0) + F(-0.318190f) * AT(3, 0) + F(0.212608f) * AT(5, 0) + F(-0.180240f) * AT(7, 0));
+      const Temp_Type X101 = D(F(0.906127f) * AT(1, 1) + F(-0.318190f) * AT(3, 1) + F(0.212608f) * AT(5, 1) + F(-0.180240f) * AT(7, 1));
+      const Temp_Type X102 = D(F(0.906127f) * AT(1, 2) + F(-0.318190f) * AT(3, 2) + F(0.212608f) * AT(5, 2) + F(-0.180240f) * AT(7, 2));
+      const Temp_Type X103 = D(F(0.906127f) * AT(1, 3) + F(-0.318190f) * AT(3, 3) + F(0.212608f) * AT(5, 3) + F(-0.180240f) * AT(7, 3));
+      const Temp_Type X104 = D(F(0.906127f) * AT(1, 4) + F(-0.318190f) * AT(3, 4) + F(0.212608f) * AT(5, 4) + F(-0.180240f) * AT(7, 4));
+      const Temp_Type X105 = D(F(0.906127f) * AT(1, 5) + F(-0.318190f) * AT(3, 5) + F(0.212608f) * AT(5, 5) + F(-0.180240f) * AT(7, 5));
+      const Temp_Type X106 = D(F(0.906127f) * AT(1, 6) + F(-0.318190f) * AT(3, 6) + F(0.212608f) * AT(5, 6) + F(-0.180240f) * AT(7, 6));
+      const Temp_Type X107 = D(F(0.906127f) * AT(1, 7) + F(-0.318190f) * AT(3, 7) + F(0.212608f) * AT(5, 7) + F(-0.180240f) * AT(7, 7));
+      const Temp_Type X110 = AT(2, 0);
+      const Temp_Type X111 = AT(2, 1);
+      const Temp_Type X112 = AT(2, 2);
+      const Temp_Type X113 = AT(2, 3);
+      const Temp_Type X114 = AT(2, 4);
+      const Temp_Type X115 = AT(2, 5);
+      const Temp_Type X116 = AT(2, 6);
+      const Temp_Type X117 = AT(2, 7);
+      const Temp_Type X120 = D(F(-0.074658f) * AT(1, 0) + F(0.513280f) * AT(3, 0) + F(0.768178f) * AT(5, 0) + F(-0.375330f) * AT(7, 0));
+      const Temp_Type X121 = D(F(-0.074658f) * AT(1, 1) + F(0.513280f) * AT(3, 1) + F(0.768178f) * AT(5, 1) + F(-0.375330f) * AT(7, 1));
+      const Temp_Type X122 = D(F(-0.074658f) * AT(1, 2) + F(0.513280f) * AT(3, 2) + F(0.768178f) * AT(5, 2) + F(-0.375330f) * AT(7, 2));
+      const Temp_Type X123 = D(F(-0.074658f) * AT(1, 3) + F(0.513280f) * AT(3, 3) + F(0.768178f) * AT(5, 3) + F(-0.375330f) * AT(7, 3));
+      const Temp_Type X124 = D(F(-0.074658f) * AT(1, 4) + F(0.513280f) * AT(3, 4) + F(0.768178f) * AT(5, 4) + F(-0.375330f) * AT(7, 4));
+      const Temp_Type X125 = D(F(-0.074658f) * AT(1, 5) + F(0.513280f) * AT(3, 5) + F(0.768178f) * AT(5, 5) + F(-0.375330f) * AT(7, 5));
+      const Temp_Type X126 = D(F(-0.074658f) * AT(1, 6) + F(0.513280f) * AT(3, 6) + F(0.768178f) * AT(5, 6) + F(-0.375330f) * AT(7, 6));
+      const Temp_Type X127 = D(F(-0.074658f) * AT(1, 7) + F(0.513280f) * AT(3, 7) + F(0.768178f) * AT(5, 7) + F(-0.375330f) * AT(7, 7));
+      const Temp_Type X130 = AT(6, 0);
+      const Temp_Type X131 = AT(6, 1);
+      const Temp_Type X132 = AT(6, 2);
+      const Temp_Type X133 = AT(6, 3);
+      const Temp_Type X134 = AT(6, 4);
+      const Temp_Type X135 = AT(6, 5);
+      const Temp_Type X136 = AT(6, 6);
+      const Temp_Type X137 = AT(6, 7);
+      // 80 muls 48 adds
+
+      // 4x4 = 4x8 times 8x4, matrix 1 is constant
+      R.at(0, 0) = X100;
+      R.at(0, 1) = D(X101 * F(0.415735f) + X103 * F(0.791065f) + X105 * F(-0.352443f) + X107 * F(0.277785f));
+      R.at(0, 2) = X104;
+      R.at(0, 3) = D(X101 * F(0.022887f) + X103 * F(-0.097545f) + X105 * F(0.490393f) + X107 * F(0.865723f));
+      R.at(1, 0) = X110;
+      R.at(1, 1) = D(X111 * F(0.415735f) + X113 * F(0.791065f) + X115 * F(-0.352443f) + X117 * F(0.277785f));
+      R.at(1, 2) = X114;
+      R.at(1, 3) = D(X111 * F(0.022887f) + X113 * F(-0.097545f) + X115 * F(0.490393f) + X117 * F(0.865723f));
+      R.at(2, 0) = X120;
+      R.at(2, 1) = D(X121 * F(0.415735f) + X123 * F(0.791065f) + X125 * F(-0.352443f) + X127 * F(0.277785f));
+      R.at(2, 2) = X124;
+      R.at(2, 3) = D(X121 * F(0.022887f) + X123 * F(-0.097545f) + X125 * F(0.490393f) + X127 * F(0.865723f));
+      R.at(3, 0) = X130;
+      R.at(3, 1) = D(X131 * F(0.415735f) + X133 * F(0.791065f) + X135 * F(-0.352443f) + X137 * F(0.277785f));
+      R.at(3, 2) = X134;
+      R.at(3, 3) = D(X131 * F(0.022887f) + X133 * F(-0.097545f) + X135 * F(0.490393f) + X137 * F(0.865723f));
+      // 40 muls 24 adds
+      // 4x4 = 4x8 times 8x4, matrix 1 is constant
+      S.at(0, 0) = D(X101 * F(0.906127f) + X103 * F(-0.318190f) + X105 * F(0.212608f) + X107 * F(-0.180240f));
+      S.at(0, 1) = X102;
+      S.at(0, 2) = D(X101 * F(-0.074658f) + X103 * F(0.513280f) + X105 * F(0.768178f) + X107 * F(-0.375330f));
+      S.at(0, 3) = X106;
+      S.at(1, 0) = D(X111 * F(0.906127f) + X113 * F(-0.318190f) + X115 * F(0.212608f) + X117 * F(-0.180240f));
+      S.at(1, 1) = X112;
+      S.at(1, 2) = D(X111 * F(-0.074658f) + X113 * F(0.513280f) + X115 * F(0.768178f) + X117 * F(-0.375330f));
+      S.at(1, 3) = X116;
+      S.at(2, 0) = D(X121 * F(0.906127f) + X123 * F(-0.318190f) + X125 * F(0.212608f) + X127 * F(-0.180240f));
+      S.at(2, 1) = X122;
+      S.at(2, 2) = D(X121 * F(-0.074658f) + X123 * F(0.513280f) + X125 * F(0.768178f) + X127 * F(-0.375330f));
+      S.at(2, 3) = X126;
+      S.at(3, 0) = D(X131 * F(0.906127f) + X133 * F(-0.318190f) + X135 * F(0.212608f) + X137 * F(-0.180240f));
+      S.at(3, 1) = X132;
+      S.at(3, 2) = D(X131 * F(-0.074658f) + X133 * F(0.513280f) + X135 * F(0.768178f) + X137 * F(-0.375330f));
+      S.at(3, 3) = X136;
+      // 40 muls 24 adds
+    }
+  };
+} // end namespace DCT_Upsample
+
+// Unconditionally frees all allocated m_blocks.
+void jpeg_decoder::free_all_blocks()
+{
+  m_pStream = NULL;
+  for (mem_block *b = m_pMem_blocks; b; )
+  {
+    mem_block *n = b->m_pNext;
+    jpgd_free(b);
+    b = n;
+  }
+  m_pMem_blocks = NULL;
+}
+
+// This method handles all errors. It will never return.
+// It could easily be changed to use C++ exceptions.
+JPGD_NORETURN void jpeg_decoder::stop_decoding(jpgd_status status)
+{
+  m_error_code = status;
+  free_all_blocks();
+  longjmp(m_jmp_state, status);
+}
+
+void *jpeg_decoder::alloc(size_t nSize, bool zero)
+{
+  nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
+  char *rv = NULL;
+  for (mem_block *b = m_pMem_blocks; b; b = b->m_pNext)
+  {
+    if ((b->m_used_count + nSize) <= b->m_size)
+    {
+      rv = b->m_data + b->m_used_count;
+      b->m_used_count += nSize;
+      break;
+    }
+  }
+  if (!rv)
+  {
+    int capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
+    mem_block *b = (mem_block*)jpgd_malloc(sizeof(mem_block) + capacity);
+    if (!b) { stop_decoding(JPGD_NOTENOUGHMEM); }
+    b->m_pNext = m_pMem_blocks; m_pMem_blocks = b;
+    b->m_used_count = nSize;
+    b->m_size = capacity;
+    rv = b->m_data;
+  }
+  if (zero) memset(rv, 0, nSize);
+  return rv;
+}
+
+void jpeg_decoder::word_clear(void *p, uint16 c, uint n)
+{
+  uint8 *pD = (uint8*)p;
+  const uint8 l = c & 0xFF, h = (c >> 8) & 0xFF;
+  while (n)
+  {
+    pD[0] = l; pD[1] = h; pD += 2;
+    n--;
+  }
+}
+
+// Refill the input buffer.
+// This method will sit in a loop until (A) the buffer is full or (B)
+// the stream's read() method reports and end of file condition.
+void jpeg_decoder::prep_in_buffer()
+{
+  m_in_buf_left = 0;
+  m_pIn_buf_ofs = m_in_buf;
+
+  if (m_eof_flag)
+    return;
+
+  do
+  {
+    int bytes_read = m_pStream->read(m_in_buf + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag);
+    if (bytes_read == -1)
+      stop_decoding(JPGD_STREAM_READ);
+
+    m_in_buf_left += bytes_read;
+  } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
+
+  m_total_bytes_read += m_in_buf_left;
+
+  // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
+  // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
+  word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
+}
+
+// Read a Huffman code table.
+void jpeg_decoder::read_dht_marker()
+{
+  int i, index, count;
+  uint8 huff_num[17];
+  uint8 huff_val[256];
+
+  uint num_left = get_bits(16);
+
+  if (num_left < 2)
+    stop_decoding(JPGD_BAD_DHT_MARKER);
+
+  num_left -= 2;
+
+  while (num_left)
+  {
+    index = get_bits(8);
+
+    huff_num[0] = 0;
+
+    count = 0;
+
+    for (i = 1; i <= 16; i++)
+    {
+      huff_num[i] = static_cast<uint8>(get_bits(8));
+      count += huff_num[i];
+    }
+
+    if (count > 255)
+      stop_decoding(JPGD_BAD_DHT_COUNTS);
+
+    for (i = 0; i < count; i++)
+      huff_val[i] = static_cast<uint8>(get_bits(8));
+
+    i = 1 + 16 + count;
+
+    if (num_left < (uint)i)
+      stop_decoding(JPGD_BAD_DHT_MARKER);
+
+    num_left -= i;
+
+    if ((index & 0x10) > 0x10)
+      stop_decoding(JPGD_BAD_DHT_INDEX);
+
+    index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
+
+    if (index >= JPGD_MAX_HUFF_TABLES)
+      stop_decoding(JPGD_BAD_DHT_INDEX);
+
+    if (!m_huff_num[index])
+      m_huff_num[index] = (uint8 *)alloc(17);
+
+    if (!m_huff_val[index])
+      m_huff_val[index] = (uint8 *)alloc(256);
+
+    m_huff_ac[index] = (index & 0x10) != 0;
+    memcpy(m_huff_num[index], huff_num, 17);
+    memcpy(m_huff_val[index], huff_val, 256);
+  }
+}
+
+// Read a quantization table.
+void jpeg_decoder::read_dqt_marker()
+{
+  int n, i, prec;
+  uint num_left;
+  uint temp;
+
+  num_left = get_bits(16);
+
+  if (num_left < 2)
+    stop_decoding(JPGD_BAD_DQT_MARKER);
+
+  num_left -= 2;
+
+  while (num_left)
+  {
+    n = get_bits(8);
+    prec = n >> 4;
+    n &= 0x0F;
+
+    if (n >= JPGD_MAX_QUANT_TABLES)
+      stop_decoding(JPGD_BAD_DQT_TABLE);
+
+    if (!m_quant[n])
+      m_quant[n] = (jpgd_quant_t *)alloc(64 * sizeof(jpgd_quant_t));
+
+    // read quantization entries, in zag order
+    for (i = 0; i < 64; i++)
+    {
+      temp = get_bits(8);
+
+      if (prec)
+        temp = (temp << 8) + get_bits(8);
+
+			m_quant[n][i] = static_cast<jpgd_quant_t>(temp);
+    }
+
+    i = 64 + 1;
+
+    if (prec)
+      i += 64;
+
+    if (num_left < (uint)i)
+      stop_decoding(JPGD_BAD_DQT_LENGTH);
+
+    num_left -= i;
+  }
+}
+
+// Read the start of frame (SOF) marker.
+void jpeg_decoder::read_sof_marker()
+{
+  int i;
+  uint num_left;
+
+  num_left = get_bits(16);
+
+  if (get_bits(8) != 8)   /* precision: sorry, only 8-bit precision is supported right now */
+    stop_decoding(JPGD_BAD_PRECISION);
+
+  m_image_y_size = get_bits(16);
+
+  if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT))
+    stop_decoding(JPGD_BAD_HEIGHT);
+
+  m_image_x_size = get_bits(16);
+
+  if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH))
+    stop_decoding(JPGD_BAD_WIDTH);
+
+  m_comps_in_frame = get_bits(8);
+
+  if (m_comps_in_frame > JPGD_MAX_COMPONENTS)
+    stop_decoding(JPGD_TOO_MANY_COMPONENTS);
+
+  if (num_left != (uint)(m_comps_in_frame * 3 + 8))
+    stop_decoding(JPGD_BAD_SOF_LENGTH);
+
+  for (i = 0; i < m_comps_in_frame; i++)
+  {
+    m_comp_ident[i]  = get_bits(8);
+    m_comp_h_samp[i] = get_bits(4);
+    m_comp_v_samp[i] = get_bits(4);
+    m_comp_quant[i]  = get_bits(8);
+  }
+}
+
+// Used to skip unrecognized markers.
+void jpeg_decoder::skip_variable_marker()
+{
+  uint num_left;
+
+  num_left = get_bits(16);
+
+  if (num_left < 2)
+    stop_decoding(JPGD_BAD_VARIABLE_MARKER);
+
+  num_left -= 2;
+
+  while (num_left)
+  {
+    get_bits(8);
+    num_left--;
+  }
+}
+
+// Read a define restart interval (DRI) marker.
+void jpeg_decoder::read_dri_marker()
+{
+  if (get_bits(16) != 4)
+    stop_decoding(JPGD_BAD_DRI_LENGTH);
+
+  m_restart_interval = get_bits(16);
+}
+
+// Read a start of scan (SOS) marker.
+void jpeg_decoder::read_sos_marker()
+{
+  uint num_left;
+  int i, ci, n, c, cc;
+
+  num_left = get_bits(16);
+
+  n = get_bits(8);
+
+  m_comps_in_scan = n;
+
+  num_left -= 3;
+
+  if ( (num_left != (uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) )
+    stop_decoding(JPGD_BAD_SOS_LENGTH);
+
+  for (i = 0; i < n; i++)
+  {
+    cc = get_bits(8);
+    c = get_bits(8);
+    num_left -= 2;
+
+    for (ci = 0; ci < m_comps_in_frame; ci++)
+      if (cc == m_comp_ident[ci])
+        break;
+
+    if (ci >= m_comps_in_frame)
+      stop_decoding(JPGD_BAD_SOS_COMP_ID);
+
+    m_comp_list[i]    = ci;
+    m_comp_dc_tab[ci] = (c >> 4) & 15;
+    m_comp_ac_tab[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
+  }
+
+  m_spectral_start  = get_bits(8);
+  m_spectral_end    = get_bits(8);
+  m_successive_high = get_bits(4);
+  m_successive_low  = get_bits(4);
+
+  if (!m_progressive_flag)
+  {
+    m_spectral_start = 0;
+    m_spectral_end = 63;
+  }
+
+  num_left -= 3;
+
+  while (num_left)                  /* read past whatever is num_left */
+  {
+    get_bits(8);
+    num_left--;
+  }
+}
+
+// Finds the next marker.
+int jpeg_decoder::next_marker()
+{
+  uint c, bytes;
+
+  bytes = 0;
+
+  do
+  {
+    do
+    {
+      bytes++;
+      c = get_bits(8);
+    } while (c != 0xFF);
+
+    do
+    {
+      c = get_bits(8);
+    } while (c == 0xFF);
+
+  } while (c == 0);
+
+  // If bytes > 0 here, there where extra bytes before the marker (not good).
+
+  return c;
+}
+
+// Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
+// encountered.
+int jpeg_decoder::process_markers()
+{
+  int c;
+
+  for ( ; ; )
+  {
+    c = next_marker();
+
+    switch (c)
+    {
+      case M_SOF0:
+      case M_SOF1:
+      case M_SOF2:
+      case M_SOF3:
+      case M_SOF5:
+      case M_SOF6:
+      case M_SOF7:
+//      case M_JPG:
+      case M_SOF9:
+      case M_SOF10:
+      case M_SOF11:
+      case M_SOF13:
+      case M_SOF14:
+      case M_SOF15:
+      case M_SOI:
+      case M_EOI:
+      case M_SOS:
+      {
+        return c;
+      }
+      case M_DHT:
+      {
+        read_dht_marker();
+        break;
+      }
+      // No arithmitic support - dumb patents!
+      case M_DAC:
+      {
+        stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
+        break;
+      }
+      case M_DQT:
+      {
+        read_dqt_marker();
+        break;
+      }
+      case M_DRI:
+      {
+        read_dri_marker();
+        break;
+      }
+      //case M_APP0:  /* no need to read the JFIF marker */
+
+      case M_JPG:
+      case M_RST0:    /* no parameters */
+      case M_RST1:
+      case M_RST2:
+      case M_RST3:
+      case M_RST4:
+      case M_RST5:
+      case M_RST6:
+      case M_RST7:
+      case M_TEM:
+      {
+        stop_decoding(JPGD_UNEXPECTED_MARKER);
+        break;
+      }
+      default:    /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
+      {
+        skip_variable_marker();
+        break;
+      }
+    }
+  }
+}
+
+// Finds the start of image (SOI) marker.
+// This code is rather defensive: it only checks the first 512 bytes to avoid
+// false positives.
+void jpeg_decoder::locate_soi_marker()
+{
+  uint lastchar, thischar;
+  uint bytesleft;
+
+  lastchar = get_bits(8);
+
+  thischar = get_bits(8);
+
+  /* ok if it's a normal JPEG file without a special header */
+
+  if ((lastchar == 0xFF) && (thischar == M_SOI))
+    return;
+
+  bytesleft = 4096; //512;
+
+  for ( ; ; )
+  {
+    if (--bytesleft == 0)
+      stop_decoding(JPGD_NOT_JPEG);
+
+    lastchar = thischar;
+
+    thischar = get_bits(8);
+
+    if (lastchar == 0xFF)
+    {
+      if (thischar == M_SOI)
+        break;
+      else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end
+        stop_decoding(JPGD_NOT_JPEG);
+    }
+  }
+
+  // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
+  thischar = (m_bit_buf >> 24) & 0xFF;
+
+  if (thischar != 0xFF)
+    stop_decoding(JPGD_NOT_JPEG);
+}
+
+// Find a start of frame (SOF) marker.
+void jpeg_decoder::locate_sof_marker()
+{
+  locate_soi_marker();
+
+  int c = process_markers();
+
+  switch (c)
+  {
+    case M_SOF2:
+      m_progressive_flag = JPGD_TRUE;
+    case M_SOF0:  /* baseline DCT */
+    case M_SOF1:  /* extended sequential DCT */
+    {
+      read_sof_marker();
+      break;
+    }
+    case M_SOF9:  /* Arithmitic coding */
+    {
+      stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
+      break;
+    }
+    default:
+    {
+      stop_decoding(JPGD_UNSUPPORTED_MARKER);
+      break;
+    }
+  }
+}
+
+// Find a start of scan (SOS) marker.
+int jpeg_decoder::locate_sos_marker()
+{
+  int c;
+
+  c = process_markers();
+
+  if (c == M_EOI)
+    return JPGD_FALSE;
+  else if (c != M_SOS)
+    stop_decoding(JPGD_UNEXPECTED_MARKER);
+
+  read_sos_marker();
+
+  return JPGD_TRUE;
+}
+
+// Reset everything to default/uninitialized state.
+void jpeg_decoder::init(jpeg_decoder_stream *pStream)
+{
+  m_pMem_blocks = NULL;
+  m_error_code = JPGD_SUCCESS;
+  m_ready_flag = false;
+  m_image_x_size = m_image_y_size = 0;
+  m_pStream = pStream;
+  m_progressive_flag = JPGD_FALSE;
+
+  memset(m_huff_ac, 0, sizeof(m_huff_ac));
+  memset(m_huff_num, 0, sizeof(m_huff_num));
+  memset(m_huff_val, 0, sizeof(m_huff_val));
+  memset(m_quant, 0, sizeof(m_quant));
+
+  m_scan_type = 0;
+  m_comps_in_frame = 0;
+
+  memset(m_comp_h_samp, 0, sizeof(m_comp_h_samp));
+  memset(m_comp_v_samp, 0, sizeof(m_comp_v_samp));
+  memset(m_comp_quant, 0, sizeof(m_comp_quant));
+  memset(m_comp_ident, 0, sizeof(m_comp_ident));
+  memset(m_comp_h_blocks, 0, sizeof(m_comp_h_blocks));
+  memset(m_comp_v_blocks, 0, sizeof(m_comp_v_blocks));
+
+  m_comps_in_scan = 0;
+  memset(m_comp_list, 0, sizeof(m_comp_list));
+  memset(m_comp_dc_tab, 0, sizeof(m_comp_dc_tab));
+  memset(m_comp_ac_tab, 0, sizeof(m_comp_ac_tab));
+
+  m_spectral_start = 0;
+  m_spectral_end = 0;
+  m_successive_low = 0;
+  m_successive_high = 0;
+  m_max_mcu_x_size = 0;
+  m_max_mcu_y_size = 0;
+  m_blocks_per_mcu = 0;
+  m_max_blocks_per_row = 0;
+  m_mcus_per_row = 0;
+  m_mcus_per_col = 0;
+  m_expanded_blocks_per_component = 0;
+  m_expanded_blocks_per_mcu = 0;
+  m_expanded_blocks_per_row = 0;
+  m_freq_domain_chroma_upsample = false;
+
+  memset(m_mcu_org, 0, sizeof(m_mcu_org));
+
+  m_total_lines_left = 0;
+  m_mcu_lines_left = 0;
+  m_real_dest_bytes_per_scan_line = 0;
+  m_dest_bytes_per_scan_line = 0;
+  m_dest_bytes_per_pixel = 0;
+
+  memset(m_pHuff_tabs, 0, sizeof(m_pHuff_tabs));
+
+  memset(m_dc_coeffs, 0, sizeof(m_dc_coeffs));
+  memset(m_ac_coeffs, 0, sizeof(m_ac_coeffs));
+  memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
+
+  m_eob_run = 0;
+
+  memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
+
+  m_pIn_buf_ofs = m_in_buf;
+  m_in_buf_left = 0;
+  m_eof_flag = false;
+  m_tem_flag = 0;
+
+  memset(m_in_buf_pad_start, 0, sizeof(m_in_buf_pad_start));
+  memset(m_in_buf, 0, sizeof(m_in_buf));
+  memset(m_in_buf_pad_end, 0, sizeof(m_in_buf_pad_end));
+
+  m_restart_interval = 0;
+  m_restarts_left    = 0;
+  m_next_restart_num = 0;
+
+  m_max_mcus_per_row = 0;
+  m_max_blocks_per_mcu = 0;
+  m_max_mcus_per_col = 0;
+
+  memset(m_last_dc_val, 0, sizeof(m_last_dc_val));
+  m_pMCU_coefficients = NULL;
+  m_pSample_buf = NULL;
+
+  m_total_bytes_read = 0;
+
+  m_pScan_line_0 = NULL;
+  m_pScan_line_1 = NULL;
+
+  // Ready the input buffer.
+  prep_in_buffer();
+
+  // Prime the bit buffer.
+  m_bits_left = 16;
+  m_bit_buf = 0;
+
+  get_bits(16);
+  get_bits(16);
+
+  for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++)
+    m_mcu_block_max_zag[i] = 64;
+}
+
+#define SCALEBITS 16
+#define ONE_HALF  ((int) 1 << (SCALEBITS-1))
+#define FIX(x)    ((int) ((x) * (1L<<SCALEBITS) + 0.5f))
+
+// Create a few tables that allow us to quickly convert YCbCr to RGB.
+void jpeg_decoder::create_look_ups()
+{
+  for (int i = 0; i <= 255; i++)
+  {
+    int k = i - 128;
+    m_crr[i] = ( FIX(1.40200f)  * k + ONE_HALF) >> SCALEBITS;
+    m_cbb[i] = ( FIX(1.77200f)  * k + ONE_HALF) >> SCALEBITS;
+    m_crg[i] = (-FIX(0.71414f)) * k;
+    m_cbg[i] = (-FIX(0.34414f)) * k + ONE_HALF;
+  }
+}
+
+// This method throws back into the stream any bytes that where read
+// into the bit buffer during initial marker scanning.
+void jpeg_decoder::fix_in_buffer()
+{
+  // In case any 0xFF's where pulled into the buffer during marker scanning.
+  JPGD_ASSERT((m_bits_left & 7) == 0);
+
+  if (m_bits_left == 16)
+    stuff_char( (uint8)(m_bit_buf & 0xFF));
+
+  if (m_bits_left >= 8)
+    stuff_char( (uint8)((m_bit_buf >> 8) & 0xFF));
+
+  stuff_char((uint8)((m_bit_buf >> 16) & 0xFF));
+  stuff_char((uint8)((m_bit_buf >> 24) & 0xFF));
+
+  m_bits_left = 16;
+  get_bits_no_markers(16);
+  get_bits_no_markers(16);
+}
+
+void jpeg_decoder::transform_mcu(int mcu_row)
+{
+  jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
+  uint8* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
+
+  for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
+  {
+    idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]);
+    pSrc_ptr += 64;
+    pDst_ptr += 64;
+  }
+}
+
+static const uint8 s_max_rc[64] =
+{
+  17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86,
+  102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136,
+  136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,
+  136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136
+};
+
+void jpeg_decoder::transform_mcu_expand(int mcu_row)
+{
+  jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
+  uint8* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64;
+
+  // Y IDCT
+	int mcu_block;
+  for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++)
+  {
+    idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]);
+    pSrc_ptr += 64;
+    pDst_ptr += 64;
+  }
+
+  // Chroma IDCT, with upsampling
+	jpgd_block_t temp_block[64];
+
+  for (int i = 0; i < 2; i++)
+  {
+    DCT_Upsample::Matrix44 P, Q, R, S;
+
+    JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] >= 1);
+    JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] <= 64);
+
+    int max_zag = m_mcu_block_max_zag[mcu_block++] - 1; 
+    if (max_zag <= 0) max_zag = 0; // should never happen, only here to shut up static analysis
+    switch (s_max_rc[max_zag])
+    {
+    case 1*16+1:
+      DCT_Upsample::P_Q<1, 1>::calc(P, Q, pSrc_ptr);
+      DCT_Upsample::R_S<1, 1>::calc(R, S, pSrc_ptr);
+      break;
+    case 1*16+2:
+      DCT_Upsample::P_Q<1, 2>::calc(P, Q, pSrc_ptr);
+      DCT_Upsample::R_S<1, 2>::calc(R, S, pSrc_ptr);
+      break;
+    case 2*16+2:
+      DCT_Upsample::P_Q<2, 2>::calc(P, Q, pSrc_ptr);
+      DCT_Upsample::R_S<2, 2>::calc(R, S, pSrc_ptr);
+      break;
+    case 3*16+2:
+      DCT_Upsample::P_Q<3, 2>::calc(P, Q, pSrc_ptr);
+      DCT_Upsample::R_S<3, 2>::calc(R, S, pSrc_ptr);
+      break;
+    case 3*16+3:
+      DCT_Upsample::P_Q<3, 3>::calc(P, Q, pSrc_ptr);
+      DCT_Upsample::R_S<3, 3>::calc(R, S, pSrc_ptr);
+      break;
+    case 3*16+4:
+      DCT_Upsample::P_Q<3, 4>::calc(P, Q, pSrc_ptr);
+      DCT_Upsample::R_S<3, 4>::calc(R, S, pSrc_ptr);
+      break;
+    case 4*16+4:
+      DCT_Upsample::P_Q<4, 4>::calc(P, Q, pSrc_ptr);
+      DCT_Upsample::R_S<4, 4>::calc(R, S, pSrc_ptr);
+      break;
+    case 5*16+4:
+      DCT_Upsample::P_Q<5, 4>::calc(P, Q, pSrc_ptr);
+      DCT_Upsample::R_S<5, 4>::calc(R, S, pSrc_ptr);
+      break;
+    case 5*16+5:
+      DCT_Upsample::P_Q<5, 5>::calc(P, Q, pSrc_ptr);
+      DCT_Upsample::R_S<5, 5>::calc(R, S, pSrc_ptr);
+      break;
+    case 5*16+6:
+      DCT_Upsample::P_Q<5, 6>::calc(P, Q, pSrc_ptr);
+      DCT_Upsample::R_S<5, 6>::calc(R, S, pSrc_ptr);
+      break;
+    case 6*16+6:
+      DCT_Upsample::P_Q<6, 6>::calc(P, Q, pSrc_ptr);
+      DCT_Upsample::R_S<6, 6>::calc(R, S, pSrc_ptr);
+      break;
+    case 7*16+6:
+      DCT_Upsample::P_Q<7, 6>::calc(P, Q, pSrc_ptr);
+      DCT_Upsample::R_S<7, 6>::calc(R, S, pSrc_ptr);
+      break;
+    case 7*16+7:
+      DCT_Upsample::P_Q<7, 7>::calc(P, Q, pSrc_ptr);
+      DCT_Upsample::R_S<7, 7>::calc(R, S, pSrc_ptr);
+      break;
+    case 7*16+8:
+      DCT_Upsample::P_Q<7, 8>::calc(P, Q, pSrc_ptr);
+      DCT_Upsample::R_S<7, 8>::calc(R, S, pSrc_ptr);
+      break;
+    case 8*16+8:
+      DCT_Upsample::P_Q<8, 8>::calc(P, Q, pSrc_ptr);
+      DCT_Upsample::R_S<8, 8>::calc(R, S, pSrc_ptr);
+      break;
+    default:
+      JPGD_ASSERT(false);
+    }
+
+    DCT_Upsample::Matrix44 a(P + Q); P -= Q;
+    DCT_Upsample::Matrix44& b = P;
+    DCT_Upsample::Matrix44 c(R + S); R -= S;
+    DCT_Upsample::Matrix44& d = R;
+
+    DCT_Upsample::Matrix44::add_and_store(temp_block, a, c);
+    idct_4x4(temp_block, pDst_ptr);
+    pDst_ptr += 64;
+
+    DCT_Upsample::Matrix44::sub_and_store(temp_block, a, c);
+    idct_4x4(temp_block, pDst_ptr);
+    pDst_ptr += 64;
+
+    DCT_Upsample::Matrix44::add_and_store(temp_block, b, d);
+    idct_4x4(temp_block, pDst_ptr);
+    pDst_ptr += 64;
+
+    DCT_Upsample::Matrix44::sub_and_store(temp_block, b, d);
+    idct_4x4(temp_block, pDst_ptr);
+    pDst_ptr += 64;
+
+    pSrc_ptr += 64;
+  }
+}
+
+// Loads and dequantizes the next row of (already decoded) coefficients.
+// Progressive images only.
+void jpeg_decoder::load_next_row()
+{
+  int i;
+  jpgd_block_t *p;
+  jpgd_quant_t *q;
+  int mcu_row, mcu_block, row_block = 0;
+  int component_num, component_id;
+  int block_x_mcu[JPGD_MAX_COMPONENTS];
+
+  memset(block_x_mcu, 0, JPGD_MAX_COMPONENTS * sizeof(int));
+
+  for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
+  {
+    int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
+
+    for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
+    {
+      component_id = m_mcu_org[mcu_block];
+      q = m_quant[m_comp_quant[component_id]];
+
+      p = m_pMCU_coefficients + 64 * mcu_block;
+
+      jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
+      jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
+      p[0] = pDC[0];
+      memcpy(&p[1], &pAC[1], 63 * sizeof(jpgd_block_t));
+
+      for (i = 63; i > 0; i--)
+        if (p[g_ZAG[i]])
+          break;
+
+      m_mcu_block_max_zag[mcu_block] = i + 1;
+
+      for ( ; i >= 0; i--)
+				if (p[g_ZAG[i]])
+					p[g_ZAG[i]] = static_cast<jpgd_block_t>(p[g_ZAG[i]] * q[i]);
+
+      row_block++;
+
+      if (m_comps_in_scan == 1)
+        block_x_mcu[component_id]++;
+      else
+      {
+        if (++block_x_mcu_ofs == m_comp_h_samp[component_id])
+        {
+          block_x_mcu_ofs = 0;
+
+          if (++block_y_mcu_ofs == m_comp_v_samp[component_id])
+          {
+            block_y_mcu_ofs = 0;
+
+            block_x_mcu[component_id] += m_comp_h_samp[component_id];
+          }
+        }
+      }
+    }
+
+    if (m_freq_domain_chroma_upsample)
+      transform_mcu_expand(mcu_row);
+    else
+      transform_mcu(mcu_row);
+  }
+
+  if (m_comps_in_scan == 1)
+    m_block_y_mcu[m_comp_list[0]]++;
+  else
+  {
+    for (component_num = 0; component_num < m_comps_in_scan; component_num++)
+    {
+      component_id = m_comp_list[component_num];
+
+      m_block_y_mcu[component_id] += m_comp_v_samp[component_id];
+    }
+  }
+}
+
+// Restart interval processing.
+void jpeg_decoder::process_restart()
+{
+  int i;
+  int c = 0;
+
+  // Align to a byte boundry
+  // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
+  //get_bits_no_markers(m_bits_left & 7);
+
+  // Let's scan a little bit to find the marker, but not _too_ far.
+  // 1536 is a "fudge factor" that determines how much to scan.
+  for (i = 1536; i > 0; i--)
+    if (get_char() == 0xFF)
+      break;
+
+  if (i == 0)
+    stop_decoding(JPGD_BAD_RESTART_MARKER);
+
+  for ( ; i > 0; i--)
+    if ((c = get_char()) != 0xFF)
+      break;
+
+  if (i == 0)
+    stop_decoding(JPGD_BAD_RESTART_MARKER);
+
+  // Is it the expected marker? If not, something bad happened.
+  if (c != (m_next_restart_num + M_RST0))
+    stop_decoding(JPGD_BAD_RESTART_MARKER);
+
+  // Reset each component's DC prediction values.
+  memset(&m_last_dc_val, 0, m_comps_in_frame * sizeof(uint));
+
+  m_eob_run = 0;
+
+  m_restarts_left = m_restart_interval;
+
+  m_next_restart_num = (m_next_restart_num + 1) & 7;
+
+  // Get the bit buffer going again...
+
+  m_bits_left = 16;
+  get_bits_no_markers(16);
+  get_bits_no_markers(16);
+}
+
+static inline int dequantize_ac(int c, int q) {	c *= q;	return c; }
+
+// Decodes and dequantizes the next row of coefficients.
+void jpeg_decoder::decode_next_row()
+{
+  int row_block = 0;
+
+  for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
+  {
+    if ((m_restart_interval) && (m_restarts_left == 0))
+      process_restart();
+
+    jpgd_block_t* p = m_pMCU_coefficients;
+    for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64)
+    {
+      int component_id = m_mcu_org[mcu_block];
+      jpgd_quant_t* q = m_quant[m_comp_quant[component_id]];
+
+      int r, s;
+      s = huff_decode(m_pHuff_tabs[m_comp_dc_tab[component_id]], r);
+      s = JPGD_HUFF_EXTEND(r, s);
+
+      m_last_dc_val[component_id] = (s += m_last_dc_val[component_id]);
+
+      p[0] = static_cast<jpgd_block_t>(s * q[0]);
+
+      int prev_num_set = m_mcu_block_max_zag[mcu_block];
+
+      huff_tables *pH = m_pHuff_tabs[m_comp_ac_tab[component_id]];
+
+      int k;
+      for (k = 1; k < 64; k++)
+      {
+        int extra_bits;
+        s = huff_decode(pH, extra_bits);
+
+        r = s >> 4;
+        s &= 15;
+
+        if (s)
+        {
+          if (r)
+          {
+            if ((k + r) > 63)
+              stop_decoding(JPGD_DECODE_ERROR);
+
+            if (k < prev_num_set)
+            {
+              int n = JPGD_MIN(r, prev_num_set - k);
+              int kt = k;
+              while (n--)
+                p[g_ZAG[kt++]] = 0;
+            }
+
+            k += r;
+          }
+          
+          s = JPGD_HUFF_EXTEND(extra_bits, s);
+
+          JPGD_ASSERT(k < 64);
+
+          p[g_ZAG[k]] = static_cast<jpgd_block_t>(dequantize_ac(s, q[k])); //s * q[k];
+        }
+        else
+        {
+          if (r == 15)
+          {
+            if ((k + 16) > 64)
+              stop_decoding(JPGD_DECODE_ERROR);
+
+            if (k < prev_num_set)
+            {
+              int n = JPGD_MIN(16, prev_num_set - k);
+              int kt = k;
+              while (n--)
+              {
+                JPGD_ASSERT(kt <= 63);
+                p[g_ZAG[kt++]] = 0;
+              }
+            }
+
+            k += 16 - 1; // - 1 because the loop counter is k
+            JPGD_ASSERT(p[g_ZAG[k]] == 0);
+          }
+          else
+            break;
+        }
+      }
+
+      if (k < prev_num_set)
+      {
+        int kt = k;
+        while (kt < prev_num_set)
+          p[g_ZAG[kt++]] = 0;
+      }
+
+      m_mcu_block_max_zag[mcu_block] = k;
+
+      row_block++;
+    }
+
+    if (m_freq_domain_chroma_upsample)
+      transform_mcu_expand(mcu_row);
+    else
+      transform_mcu(mcu_row);
+
+    m_restarts_left--;
+  }
+}
+
+// YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
+void jpeg_decoder::H1V1Convert()
+{
+  int row = m_max_mcu_y_size - m_mcu_lines_left;
+  uint8 *d = m_pScan_line_0;
+  uint8 *s = m_pSample_buf + row * 8;
+
+  for (int i = m_max_mcus_per_row; i > 0; i--)
+  {
+    for (int j = 0; j < 8; j++)
+    {
+      int y = s[j];
+      int cb = s[64+j];
+      int cr = s[128+j];
+
+      d[0] = clamp(y + m_crr[cr]);
+      d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
+      d[2] = clamp(y + m_cbb[cb]);
+      d[3] = 255;
+
+      d += 4;
+    }
+
+    s += 64*3;
+  }
+}
+
+// YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
+void jpeg_decoder::H2V1Convert()
+{
+  int row = m_max_mcu_y_size - m_mcu_lines_left;
+  uint8 *d0 = m_pScan_line_0;
+  uint8 *y = m_pSample_buf + row * 8;
+  uint8 *c = m_pSample_buf + 2*64 + row * 8;
+
+  for (int i = m_max_mcus_per_row; i > 0; i--)
+  {
+    for (int l = 0; l < 2; l++)
+    {
+      for (int j = 0; j < 4; j++)
+      {
+        int cb = c[0];
+        int cr = c[64];
+
+        int rc = m_crr[cr];
+        int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
+        int bc = m_cbb[cb];
+
+        int yy = y[j<<1];
+        d0[0] = clamp(yy+rc);
+        d0[1] = clamp(yy+gc);
+        d0[2] = clamp(yy+bc);
+        d0[3] = 255;
+
+        yy = y[(j<<1)+1];
+        d0[4] = clamp(yy+rc);
+        d0[5] = clamp(yy+gc);
+        d0[6] = clamp(yy+bc);
+        d0[7] = 255;
+
+        d0 += 8;
+
+        c++;
+      }
+      y += 64;
+    }
+
+    y += 64*4 - 64*2;
+    c += 64*4 - 8;
+  }
+}
+
+// YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
+void jpeg_decoder::H1V2Convert()
+{
+  int row = m_max_mcu_y_size - m_mcu_lines_left;
+  uint8 *d0 = m_pScan_line_0;
+  uint8 *d1 = m_pScan_line_1;
+  uint8 *y;
+  uint8 *c;
+
+  if (row < 8)
+    y = m_pSample_buf + row * 8;
+  else
+    y = m_pSample_buf + 64*1 + (row & 7) * 8;
+
+  c = m_pSample_buf + 64*2 + (row >> 1) * 8;
+
+  for (int i = m_max_mcus_per_row; i > 0; i--)
+  {
+    for (int j = 0; j < 8; j++)
+    {
+      int cb = c[0+j];
+      int cr = c[64+j];
+
+      int rc = m_crr[cr];
+      int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
+      int bc = m_cbb[cb];
+
+      int yy = y[j];
+      d0[0] = clamp(yy+rc);
+      d0[1] = clamp(yy+gc);
+      d0[2] = clamp(yy+bc);
+      d0[3] = 255;
+
+      yy = y[8+j];
+      d1[0] = clamp(yy+rc);
+      d1[1] = clamp(yy+gc);
+      d1[2] = clamp(yy+bc);
+      d1[3] = 255;
+
+      d0 += 4;
+      d1 += 4;
+    }
+
+    y += 64*4;
+    c += 64*4;
+  }
+}
+
+// YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
+void jpeg_decoder::H2V2Convert()
+{
+	int row = m_max_mcu_y_size - m_mcu_lines_left;
+	uint8 *d0 = m_pScan_line_0;
+	uint8 *d1 = m_pScan_line_1;
+	uint8 *y;
+	uint8 *c;
+
+	if (row < 8)
+		y = m_pSample_buf + row * 8;
+	else
+		y = m_pSample_buf + 64*2 + (row & 7) * 8;
+
+	c = m_pSample_buf + 64*4 + (row >> 1) * 8;
+
+	for (int i = m_max_mcus_per_row; i > 0; i--)
+	{
+		for (int l = 0; l < 2; l++)
+		{
+			for (int j = 0; j < 8; j += 2)
+			{
+				int cb = c[0];
+				int cr = c[64];
+
+				int rc = m_crr[cr];
+				int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
+				int bc = m_cbb[cb];
+
+				int yy = y[j];
+				d0[0] = clamp(yy+rc);
+				d0[1] = clamp(yy+gc);
+				d0[2] = clamp(yy+bc);
+				d0[3] = 255;
+
+				yy = y[j+1];
+				d0[4] = clamp(yy+rc);
+				d0[5] = clamp(yy+gc);
+				d0[6] = clamp(yy+bc);
+				d0[7] = 255;
+
+				yy = y[j+8];
+				d1[0] = clamp(yy+rc);
+				d1[1] = clamp(yy+gc);
+				d1[2] = clamp(yy+bc);
+				d1[3] = 255;
+
+				yy = y[j+8+1];
+				d1[4] = clamp(yy+rc);
+				d1[5] = clamp(yy+gc);
+				d1[6] = clamp(yy+bc);
+				d1[7] = 255;
+
+				d0 += 8;
+				d1 += 8;
+
+				c++;
+			}
+			y += 64;
+		}
+
+		y += 64*6 - 64*2;
+		c += 64*6 - 8;
+	}
+}
+
+// Y (1 block per MCU) to 8-bit grayscale
+void jpeg_decoder::gray_convert()
+{
+  int row = m_max_mcu_y_size - m_mcu_lines_left;
+  uint8 *d = m_pScan_line_0;
+  uint8 *s = m_pSample_buf + row * 8;
+
+  for (int i = m_max_mcus_per_row; i > 0; i--)
+  {
+    *(uint *)d = *(uint *)s;
+    *(uint *)(&d[4]) = *(uint *)(&s[4]);
+
+    s += 64;
+    d += 8;
+  }
+}
+
+void jpeg_decoder::expanded_convert()
+{
+  int row = m_max_mcu_y_size - m_mcu_lines_left;
+
+  uint8* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp[0] + (row & 7) * 8;
+
+  uint8* d = m_pScan_line_0;
+
+  for (int i = m_max_mcus_per_row; i > 0; i--)
+  {
+    for (int k = 0; k < m_max_mcu_x_size; k += 8)
+    {
+      const int Y_ofs = k * 8;
+      const int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component;
+      const int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2;
+      for (int j = 0; j < 8; j++)
+      {
+        int y = Py[Y_ofs + j];
+        int cb = Py[Cb_ofs + j];
+        int cr = Py[Cr_ofs + j];
+
+        d[0] = clamp(y + m_crr[cr]);
+        d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
+        d[2] = clamp(y + m_cbb[cb]);
+        d[3] = 255;
+
+        d += 4;
+      }
+    }
+
+    Py += 64 * m_expanded_blocks_per_mcu;
+  }
+}
+
+// Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
+void jpeg_decoder::find_eoi()
+{
+  if (!m_progressive_flag)
+  {
+    // Attempt to read the EOI marker.
+    //get_bits_no_markers(m_bits_left & 7);
+
+    // Prime the bit buffer
+    m_bits_left = 16;
+    get_bits(16);
+    get_bits(16);
+
+    // The next marker _should_ be EOI
+    process_markers();
+  }
+
+  m_total_bytes_read -= m_in_buf_left;
+}
+
+int jpeg_decoder::decode(const void** pScan_line, uint* pScan_line_len)
+{
+  if ((m_error_code) || (!m_ready_flag))
+    return JPGD_FAILED;
+
+  if (m_total_lines_left == 0)
+    return JPGD_DONE;
+
+  if (m_mcu_lines_left == 0)
+  {
+    if (setjmp(m_jmp_state))
+      return JPGD_FAILED;
+
+    if (m_progressive_flag)
+      load_next_row();
+    else
+      decode_next_row();
+
+    // Find the EOI marker if that was the last row.
+    if (m_total_lines_left <= m_max_mcu_y_size)
+      find_eoi();
+
+    m_mcu_lines_left = m_max_mcu_y_size;
+  }
+
+  if (m_freq_domain_chroma_upsample)
+  {
+    expanded_convert();
+    *pScan_line = m_pScan_line_0;
+  }
+  else
+  {
+    switch (m_scan_type)
+    {
+      case JPGD_YH2V2:
+      {
+        if ((m_mcu_lines_left & 1) == 0)
+        {
+          H2V2Convert();
+          *pScan_line = m_pScan_line_0;
+        }
+        else
+          *pScan_line = m_pScan_line_1;
+
+        break;
+      }
+      case JPGD_YH2V1:
+      {
+        H2V1Convert();
+        *pScan_line = m_pScan_line_0;
+        break;
+      }
+      case JPGD_YH1V2:
+      {
+        if ((m_mcu_lines_left & 1) == 0)
+        {
+          H1V2Convert();
+          *pScan_line = m_pScan_line_0;
+        }
+        else
+          *pScan_line = m_pScan_line_1;
+
+        break;
+      }
+      case JPGD_YH1V1:
+      {
+        H1V1Convert();
+        *pScan_line = m_pScan_line_0;
+        break;
+      }
+      case JPGD_GRAYSCALE:
+      {
+        gray_convert();
+        *pScan_line = m_pScan_line_0;
+
+        break;
+      }
+    }
+  }
+
+  *pScan_line_len = m_real_dest_bytes_per_scan_line;
+
+  m_mcu_lines_left--;
+  m_total_lines_left--;
+
+  return JPGD_SUCCESS;
+}
+
+// Creates the tables needed for efficient Huffman decoding.
+void jpeg_decoder::make_huff_table(int index, huff_tables *pH)
+{
+  int p, i, l, si;
+  uint8 huffsize[257];
+  uint huffcode[257];
+  uint code;
+  uint subtree;
+  int code_size;
+  int lastp;
+  int nextfreeentry;
+  int currententry;
+
+  pH->ac_table = m_huff_ac[index] != 0;
+
+  p = 0;
+
+  for (l = 1; l <= 16; l++)
+  {
+    for (i = 1; i <= m_huff_num[index][l]; i++)
+      huffsize[p++] = static_cast<uint8>(l);
+  }
+
+  huffsize[p] = 0;
+
+  lastp = p;
+
+  code = 0;
+  si = huffsize[0];
+  p = 0;
+
+  while (huffsize[p])
+  {
+    while (huffsize[p] == si)
+    {
+      huffcode[p++] = code;
+      code++;
+    }
+
+    code <<= 1;
+    si++;
+  }
+
+  memset(pH->look_up, 0, sizeof(pH->look_up));
+  memset(pH->look_up2, 0, sizeof(pH->look_up2));
+  memset(pH->tree, 0, sizeof(pH->tree));
+  memset(pH->code_size, 0, sizeof(pH->code_size));
+
+  nextfreeentry = -1;
+
+  p = 0;
+
+  while (p < lastp)
+  {
+    i = m_huff_val[index][p];
+    code = huffcode[p];
+    code_size = huffsize[p];
+
+    pH->code_size[i] = static_cast<uint8>(code_size);
+
+    if (code_size <= 8)
+    {
+      code <<= (8 - code_size);
+
+      for (l = 1 << (8 - code_size); l > 0; l--)
+      {
+        JPGD_ASSERT(i < 256);
+
+        pH->look_up[code] = i;
+
+        bool has_extrabits = false;
+				int extra_bits = 0;
+        int num_extra_bits = i & 15;
+
+        int bits_to_fetch = code_size;
+        if (num_extra_bits)
+        {
+          int total_codesize = code_size + num_extra_bits;
+          if (total_codesize <= 8)
+          {
+            has_extrabits = true;
+            extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
+            JPGD_ASSERT(extra_bits <= 0x7FFF);
+            bits_to_fetch += num_extra_bits;
+          }
+        }
+
+        if (!has_extrabits)
+          pH->look_up2[code] = i | (bits_to_fetch << 8);
+        else
+          pH->look_up2[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
+
+        code++;
+      }
+    }
+    else
+    {
+      subtree = (code >> (code_size - 8)) & 0xFF;
+
+      currententry = pH->look_up[subtree];
+
+      if (currententry == 0)
+      {
+        pH->look_up[subtree] = currententry = nextfreeentry;
+        pH->look_up2[subtree] = currententry = nextfreeentry;
+
+        nextfreeentry -= 2;
+      }
+
+      code <<= (16 - (code_size - 8));
+
+      for (l = code_size; l > 9; l--)
+      {
+        if ((code & 0x8000) == 0)
+          currententry--;
+
+        if (pH->tree[-currententry - 1] == 0)
+        {
+          pH->tree[-currententry - 1] = nextfreeentry;
+
+          currententry = nextfreeentry;
+
+          nextfreeentry -= 2;
+        }
+        else
+          currententry = pH->tree[-currententry - 1];
+
+        code <<= 1;
+      }
+
+      if ((code & 0x8000) == 0)
+        currententry--;
+
+      pH->tree[-currententry - 1] = i;
+    }
+
+    p++;
+  }
+}
+
+// Verifies the quantization tables needed for this scan are available.
+void jpeg_decoder::check_quant_tables()
+{
+  for (int i = 0; i < m_comps_in_scan; i++)
+    if (m_quant[m_comp_quant[m_comp_list[i]]] == NULL)
+      stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
+}
+
+// Verifies that all the Huffman tables needed for this scan are available.
+void jpeg_decoder::check_huff_tables()
+{
+  for (int i = 0; i < m_comps_in_scan; i++)
+  {
+    if ((m_spectral_start == 0) && (m_huff_num[m_comp_dc_tab[m_comp_list[i]]] == NULL))
+      stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
+
+    if ((m_spectral_end > 0) && (m_huff_num[m_comp_ac_tab[m_comp_list[i]]] == NULL))
+      stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
+  }
+
+  for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++)
+    if (m_huff_num[i])
+    {
+      if (!m_pHuff_tabs[i])
+        m_pHuff_tabs[i] = (huff_tables *)alloc(sizeof(huff_tables));
+
+      make_huff_table(i, m_pHuff_tabs[i]);
+    }
+}
+
+// Determines the component order inside each MCU.
+// Also calcs how many MCU's are on each row, etc.
+void jpeg_decoder::calc_mcu_block_order()
+{
+  int component_num, component_id;
+  int max_h_samp = 0, max_v_samp = 0;
+
+  for (component_id = 0; component_id < m_comps_in_frame; component_id++)
+  {
+    if (m_comp_h_samp[component_id] > max_h_samp)
+      max_h_samp = m_comp_h_samp[component_id];
+
+    if (m_comp_v_samp[component_id] > max_v_samp)
+      max_v_samp = m_comp_v_samp[component_id];
+  }
+
+  for (component_id = 0; component_id < m_comps_in_frame; component_id++)
+  {
+    m_comp_h_blocks[component_id] = ((((m_image_x_size * m_comp_h_samp[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
+    m_comp_v_blocks[component_id] = ((((m_image_y_size * m_comp_v_samp[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
+  }
+
+  if (m_comps_in_scan == 1)
+  {
+    m_mcus_per_row = m_comp_h_blocks[m_comp_list[0]];
+    m_mcus_per_col = m_comp_v_blocks[m_comp_list[0]];
+  }
+  else
+  {
+    m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
+    m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
+  }
+
+  if (m_comps_in_scan == 1)
+  {
+    m_mcu_org[0] = m_comp_list[0];
+
+    m_blocks_per_mcu = 1;
+  }
+  else
+  {
+    m_blocks_per_mcu = 0;
+
+    for (component_num = 0; component_num < m_comps_in_scan; component_num++)
+    {
+      int num_blocks;
+
+      component_id = m_comp_list[component_num];
+
+      num_blocks = m_comp_h_samp[component_id] * m_comp_v_samp[component_id];
+
+      while (num_blocks--)
+        m_mcu_org[m_blocks_per_mcu++] = component_id;
+    }
+  }
+}
+
+// Starts a new scan.
+int jpeg_decoder::init_scan()
+{
+  if (!locate_sos_marker())
+    return JPGD_FALSE;
+
+  calc_mcu_block_order();
+
+  check_huff_tables();
+
+  check_quant_tables();
+
+  memset(m_last_dc_val, 0, m_comps_in_frame * sizeof(uint));
+
+  m_eob_run = 0;
+
+  if (m_restart_interval)
+  {
+    m_restarts_left = m_restart_interval;
+    m_next_restart_num = 0;
+  }
+
+  fix_in_buffer();
+
+  return JPGD_TRUE;
+}
+
+// Starts a frame. Determines if the number of components or sampling factors
+// are supported.
+void jpeg_decoder::init_frame()
+{
+  int i;
+
+  if (m_comps_in_frame == 1)
+  {
+    if ((m_comp_h_samp[0] != 1) || (m_comp_v_samp[0] != 1))
+      stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
+
+    m_scan_type = JPGD_GRAYSCALE;
+    m_max_blocks_per_mcu = 1;
+    m_max_mcu_x_size = 8;
+    m_max_mcu_y_size = 8;
+  }
+  else if (m_comps_in_frame == 3)
+  {
+    if ( ((m_comp_h_samp[1] != 1) || (m_comp_v_samp[1] != 1)) ||
+         ((m_comp_h_samp[2] != 1) || (m_comp_v_samp[2] != 1)) )
+      stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
+
+    if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1))
+    {
+      m_scan_type = JPGD_YH1V1;
+
+      m_max_blocks_per_mcu = 3;
+      m_max_mcu_x_size = 8;
+      m_max_mcu_y_size = 8;
+    }
+    else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1))
+    {
+      m_scan_type = JPGD_YH2V1;
+      m_max_blocks_per_mcu = 4;
+      m_max_mcu_x_size = 16;
+      m_max_mcu_y_size = 8;
+    }
+    else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 2))
+    {
+      m_scan_type = JPGD_YH1V2;
+      m_max_blocks_per_mcu = 4;
+      m_max_mcu_x_size = 8;
+      m_max_mcu_y_size = 16;
+    }
+    else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2))
+    {
+      m_scan_type = JPGD_YH2V2;
+      m_max_blocks_per_mcu = 6;
+      m_max_mcu_x_size = 16;
+      m_max_mcu_y_size = 16;
+    }
+    else
+      stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
+  }
+  else
+    stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
+
+  m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
+  m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
+
+  // These values are for the *destination* pixels: after conversion.
+  if (m_scan_type == JPGD_GRAYSCALE)
+    m_dest_bytes_per_pixel = 1;
+  else
+    m_dest_bytes_per_pixel = 4;
+
+  m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
+
+  m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
+
+  // Initialize two scan line buffers.
+  m_pScan_line_0 = (uint8 *)alloc(m_dest_bytes_per_scan_line, true);
+  if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2))
+    m_pScan_line_1 = (uint8 *)alloc(m_dest_bytes_per_scan_line, true);
+
+  m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
+
+  // Should never happen
+  if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW)
+    stop_decoding(JPGD_ASSERTION_ERROR);
+
+  // Allocate the coefficient buffer, enough for one MCU
+  m_pMCU_coefficients = (jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_t));
+
+  for (i = 0; i < m_max_blocks_per_mcu; i++)
+    m_mcu_block_max_zag[i] = 64;
+
+  m_expanded_blocks_per_component = m_comp_h_samp[0] * m_comp_v_samp[0];
+  m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame;
+  m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu;
+	// Freq. domain chroma upsampling is only supported for H2V2 subsampling factor (the most common one I've seen).
+  m_freq_domain_chroma_upsample = false;
+#if JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING
+  m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3);
+#endif
+
+  if (m_freq_domain_chroma_upsample)
+    m_pSample_buf = (uint8 *)alloc(m_expanded_blocks_per_row * 64);
+  else
+    m_pSample_buf = (uint8 *)alloc(m_max_blocks_per_row * 64);
+
+  m_total_lines_left = m_image_y_size;
+
+  m_mcu_lines_left = 0;
+
+  create_look_ups();
+}
+
+// The coeff_buf series of methods originally stored the coefficients
+// into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
+// was used to make this process more efficient. Now, we can store the entire
+// thing in RAM.
+jpeg_decoder::coeff_buf* jpeg_decoder::coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y)
+{
+  coeff_buf* cb = (coeff_buf*)alloc(sizeof(coeff_buf));
+
+  cb->block_num_x = block_num_x;
+  cb->block_num_y = block_num_y;
+  cb->block_len_x = block_len_x;
+  cb->block_len_y = block_len_y;
+  cb->block_size = (block_len_x * block_len_y) * sizeof(jpgd_block_t);
+  cb->pData = (uint8 *)alloc(cb->block_size * block_num_x * block_num_y, true);
+  return cb;
+}
+
+inline jpgd_block_t *jpeg_decoder::coeff_buf_getp(coeff_buf *cb, int block_x, int block_y)
+{
+  JPGD_ASSERT((block_x < cb->block_num_x) && (block_y < cb->block_num_y));
+  return (jpgd_block_t *)(cb->pData + block_x * cb->block_size + block_y * (cb->block_size * cb->block_num_x));
+}
+
+// The following methods decode the various types of m_blocks encountered
+// in progressively encoded images.
+void jpeg_decoder::decode_block_dc_first(jpeg_decoder *pD, int component_id, int block_x, int block_y)
+{
+  int s, r;
+  jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
+
+  if ((s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_dc_tab[component_id]])) != 0)
+  {
+    r = pD->get_bits_no_markers(s);
+    s = JPGD_HUFF_EXTEND(r, s);
+  }
+
+  pD->m_last_dc_val[component_id] = (s += pD->m_last_dc_val[component_id]);
+
+  p[0] = static_cast<jpgd_block_t>(s << pD->m_successive_low);
+}
+
+void jpeg_decoder::decode_block_dc_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y)
+{
+  if (pD->get_bits_no_markers(1))
+  {
+    jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
+
+    p[0] |= (1 << pD->m_successive_low);
+  }
+}
+
+void jpeg_decoder::decode_block_ac_first(jpeg_decoder *pD, int component_id, int block_x, int block_y)
+{
+  int k, s, r;
+
+  if (pD->m_eob_run)
+  {
+    pD->m_eob_run--;
+    return;
+  }
+
+  jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
+
+  for (k = pD->m_spectral_start; k <= pD->m_spectral_end; k++)
+  {
+    s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]);
+
+    r = s >> 4;
+    s &= 15;
+
+    if (s)
+    {
+      if ((k += r) > 63)
+        pD->stop_decoding(JPGD_DECODE_ERROR);
+
+      r = pD->get_bits_no_markers(s);
+      s = JPGD_HUFF_EXTEND(r, s);
+
+      p[g_ZAG[k]] = static_cast<jpgd_block_t>(s << pD->m_successive_low);
+    }
+    else
+    {
+      if (r == 15)
+      {
+        if ((k += 15) > 63)
+          pD->stop_decoding(JPGD_DECODE_ERROR);
+      }
+      else
+      {
+        pD->m_eob_run = 1 << r;
+
+        if (r)
+          pD->m_eob_run += pD->get_bits_no_markers(r);
+
+        pD->m_eob_run--;
+
+        break;
+      }
+    }
+  }
+}
+
+void jpeg_decoder::decode_block_ac_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y)
+{
+  int s, k, r;
+  int p1 = 1 << pD->m_successive_low;
+  int m1 = (-1) << pD->m_successive_low;
+  jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
+  
+  JPGD_ASSERT(pD->m_spectral_end <= 63);
+  
+  k = pD->m_spectral_start;
+  
+  if (pD->m_eob_run == 0)
+  {
+    for ( ; k <= pD->m_spectral_end; k++)
+    {
+      s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]);
+
+      r = s >> 4;
+      s &= 15;
+
+      if (s)
+      {
+        if (s != 1)
+          pD->stop_decoding(JPGD_DECODE_ERROR);
+
+        if (pD->get_bits_no_markers(1))
+          s = p1;
+        else
+          s = m1;
+      }
+      else
+      {
+        if (r != 15)
+        {
+          pD->m_eob_run = 1 << r;
+
+          if (r)
+            pD->m_eob_run += pD->get_bits_no_markers(r);
+
+          break;
+        }
+      }
+
+      do
+      {
+        jpgd_block_t *this_coef = p + g_ZAG[k & 63];
+
+        if (*this_coef != 0)
+        {
+          if (pD->get_bits_no_markers(1))
+          {
+            if ((*this_coef & p1) == 0)
+            {
+              if (*this_coef >= 0)
+                *this_coef = static_cast<jpgd_block_t>(*this_coef + p1);
+              else
+                *this_coef = static_cast<jpgd_block_t>(*this_coef + m1);
+            }
+          }
+        }
+        else
+        {
+          if (--r < 0)
+            break;
+        }
+
+        k++;
+
+      } while (k <= pD->m_spectral_end);
+
+      if ((s) && (k < 64))
+      {
+        p[g_ZAG[k]] = static_cast<jpgd_block_t>(s);
+      }
+    }
+  }
+
+  if (pD->m_eob_run > 0)
+  {
+    for ( ; k <= pD->m_spectral_end; k++)
+    {
+      jpgd_block_t *this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis
+
+      if (*this_coef != 0)
+      {
+        if (pD->get_bits_no_markers(1))
+        {
+          if ((*this_coef & p1) == 0)
+          {
+            if (*this_coef >= 0)
+              *this_coef = static_cast<jpgd_block_t>(*this_coef + p1);
+            else
+              *this_coef = static_cast<jpgd_block_t>(*this_coef + m1);
+          }
+        }
+      }
+    }
+
+    pD->m_eob_run--;
+  }
+}
+
+// Decode a scan in a progressively encoded image.
+void jpeg_decoder::decode_scan(pDecode_block_func decode_block_func)
+{
+  int mcu_row, mcu_col, mcu_block;
+  int block_x_mcu[JPGD_MAX_COMPONENTS], m_block_y_mcu[JPGD_MAX_COMPONENTS];
+
+  memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
+
+  for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++)
+  {
+    int component_num, component_id;
+
+    memset(block_x_mcu, 0, sizeof(block_x_mcu));
+
+    for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
+    {
+      int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
+
+      if ((m_restart_interval) && (m_restarts_left == 0))
+        process_restart();
+
+      for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
+      {
+        component_id = m_mcu_org[mcu_block];
+
+        decode_block_func(this, component_id, block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
+
+        if (m_comps_in_scan == 1)
+          block_x_mcu[component_id]++;
+        else
+        {
+          if (++block_x_mcu_ofs == m_comp_h_samp[component_id])
+          {
+            block_x_mcu_ofs = 0;
+
+            if (++block_y_mcu_ofs == m_comp_v_samp[component_id])
+            {
+              block_y_mcu_ofs = 0;
+              block_x_mcu[component_id] += m_comp_h_samp[component_id];
+            }
+          }
+        }
+      }
+
+      m_restarts_left--;
+    }
+
+    if (m_comps_in_scan == 1)
+      m_block_y_mcu[m_comp_list[0]]++;
+    else
+    {
+      for (component_num = 0; component_num < m_comps_in_scan; component_num++)
+      {
+        component_id = m_comp_list[component_num];
+        m_block_y_mcu[component_id] += m_comp_v_samp[component_id];
+      }
+    }
+  }
+}
+
+// Decode a progressively encoded image.
+void jpeg_decoder::init_progressive()
+{
+  int i;
+
+  if (m_comps_in_frame == 4)
+    stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
+
+  // Allocate the coefficient buffers.
+  for (i = 0; i < m_comps_in_frame; i++)
+  {
+    m_dc_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 1, 1);
+    m_ac_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 8, 8);
+  }
+
+  for ( ; ; )
+  {
+    int dc_only_scan, refinement_scan;
+    pDecode_block_func decode_block_func;
+
+    if (!init_scan())
+      break;
+
+    dc_only_scan = (m_spectral_start == 0);
+    refinement_scan = (m_successive_high != 0);
+
+    if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63))
+      stop_decoding(JPGD_BAD_SOS_SPECTRAL);
+
+    if (dc_only_scan)
+    {
+      if (m_spectral_end)
+        stop_decoding(JPGD_BAD_SOS_SPECTRAL);
+    }
+    else if (m_comps_in_scan != 1)  /* AC scans can only contain one component */
+      stop_decoding(JPGD_BAD_SOS_SPECTRAL);
+
+    if ((refinement_scan) && (m_successive_low != m_successive_high - 1))
+      stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
+
+    if (dc_only_scan)
+    {
+      if (refinement_scan)
+        decode_block_func = decode_block_dc_refine;
+      else
+        decode_block_func = decode_block_dc_first;
+    }
+    else
+    {
+      if (refinement_scan)
+        decode_block_func = decode_block_ac_refine;
+      else
+        decode_block_func = decode_block_ac_first;
+    }
+
+    decode_scan(decode_block_func);
+
+    m_bits_left = 16;
+    get_bits(16);
+    get_bits(16);
+  }
+
+  m_comps_in_scan = m_comps_in_frame;
+
+  for (i = 0; i < m_comps_in_frame; i++)
+    m_comp_list[i] = i;
+
+  calc_mcu_block_order();
+}
+
+void jpeg_decoder::init_sequential()
+{
+  if (!init_scan())
+    stop_decoding(JPGD_UNEXPECTED_MARKER);
+}
+
+void jpeg_decoder::decode_start()
+{
+  init_frame();
+
+  if (m_progressive_flag)
+    init_progressive();
+  else
+    init_sequential();
+}
+
+void jpeg_decoder::decode_init(jpeg_decoder_stream *pStream)
+{
+  init(pStream);
+  locate_sof_marker();
+}
+
+jpeg_decoder::jpeg_decoder(jpeg_decoder_stream *pStream)
+{
+  if (setjmp(m_jmp_state))
+    return;
+  decode_init(pStream);
+}
+
+int jpeg_decoder::begin_decoding()
+{
+  if (m_ready_flag)
+    return JPGD_SUCCESS;
+
+  if (m_error_code)
+    return JPGD_FAILED;
+
+  if (setjmp(m_jmp_state))
+    return JPGD_FAILED;
+
+  decode_start();
+
+  m_ready_flag = true;
+
+  return JPGD_SUCCESS;
+}
+
+jpeg_decoder::~jpeg_decoder()
+{
+  free_all_blocks();
+}
+
+jpeg_decoder_file_stream::jpeg_decoder_file_stream()
+{
+  m_pFile = NULL;
+  m_eof_flag = false;
+  m_error_flag = false;
+}
+
+void jpeg_decoder_file_stream::close()
+{
+  if (m_pFile)
+  {
+    fclose(m_pFile);
+    m_pFile = NULL;
+  }
+
+  m_eof_flag = false;
+  m_error_flag = false;
+}
+
+jpeg_decoder_file_stream::~jpeg_decoder_file_stream()
+{
+  close();
+}
+
+bool jpeg_decoder_file_stream::open(const char *Pfilename)
+{
+  close();
+
+  m_eof_flag = false;
+  m_error_flag = false;
+
+#if defined(_MSC_VER)
+  m_pFile = NULL;
+  fopen_s(&m_pFile, Pfilename, "rb");
+#else
+  m_pFile = fopen(Pfilename, "rb");
+#endif
+  return m_pFile != NULL;
+}
+
+int jpeg_decoder_file_stream::read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag)
+{
+  if (!m_pFile)
+    return -1;
+
+  if (m_eof_flag)
+  {
+    *pEOF_flag = true;
+    return 0;
+  }
+
+  if (m_error_flag)
+    return -1;
+
+  int bytes_read = static_cast<int>(fread(pBuf, 1, max_bytes_to_read, m_pFile));
+  if (bytes_read < max_bytes_to_read)
+  {
+    if (ferror(m_pFile))
+    {
+      m_error_flag = true;
+      return -1;
+    }
+
+    m_eof_flag = true;
+    *pEOF_flag = true;
+  }
+
+  return bytes_read;
+}
+
+bool jpeg_decoder_mem_stream::open(const uint8 *pSrc_data, uint size)
+{
+  close();
+  m_pSrc_data = pSrc_data;
+  m_ofs = 0;
+  m_size = size;
+  return true;
+}
+
+int jpeg_decoder_mem_stream::read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag)
+{
+  *pEOF_flag = false;
+
+  if (!m_pSrc_data)
+    return -1;
+
+  uint bytes_remaining = m_size - m_ofs;
+  if ((uint)max_bytes_to_read > bytes_remaining)
+  {
+    max_bytes_to_read = bytes_remaining;
+    *pEOF_flag = true;
+  }
+
+  memcpy(pBuf, m_pSrc_data + m_ofs, max_bytes_to_read);
+  m_ofs += max_bytes_to_read;
+
+  return max_bytes_to_read;
+}
+
+unsigned char *decompress_jpeg_image_from_stream(jpeg_decoder_stream *pStream, int *width, int *height, int *actual_comps, int req_comps)
+{
+  if (!actual_comps)
+    return NULL;
+  *actual_comps = 0;
+
+  if ((!pStream) || (!width) || (!height) || (!req_comps))
+    return NULL;
+
+  if ((req_comps != 1) && (req_comps != 3) && (req_comps != 4))
+    return NULL;
+
+  jpeg_decoder decoder(pStream);
+  if (decoder.get_error_code() != JPGD_SUCCESS)
+    return NULL;
+
+  const int image_width = decoder.get_width(), image_height = decoder.get_height();
+  *width = image_width;
+  *height = image_height;
+  *actual_comps = decoder.get_num_components();
+
+  if (decoder.begin_decoding() != JPGD_SUCCESS)
+    return NULL;
+
+  const int dst_bpl = image_width * req_comps;
+
+  uint8 *pImage_data = (uint8*)jpgd_malloc(dst_bpl * image_height);
+  if (!pImage_data)
+    return NULL;
+
+  for (int y = 0; y < image_height; y++)
+  {
+    const uint8* pScan_line;
+    uint scan_line_len;
+    if (decoder.decode((const void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS)
+    {
+      jpgd_free(pImage_data);
+      return NULL;
+    }
+
+    uint8 *pDst = pImage_data + y * dst_bpl;
+
+    if (((req_comps == 1) && (decoder.get_num_components() == 1)) || ((req_comps == 4) && (decoder.get_num_components() == 3)))
+      memcpy(pDst, pScan_line, dst_bpl);
+    else if (decoder.get_num_components() == 1)
+    {
+      if (req_comps == 3)
+      {
+        for (int x = 0; x < image_width; x++)
+        {
+          uint8 luma = pScan_line[x];
+          pDst[0] = luma;
+          pDst[1] = luma;
+          pDst[2] = luma;
+          pDst += 3;
+        }
+      }
+      else
+      {
+        for (int x = 0; x < image_width; x++)
+        {
+          uint8 luma = pScan_line[x];
+          pDst[0] = luma;
+          pDst[1] = luma;
+          pDst[2] = luma;
+          pDst[3] = 255;
+          pDst += 4;
+        }
+      }
+    }
+    else if (decoder.get_num_components() == 3)
+    {
+      if (req_comps == 1)
+      {
+        const int YR = 19595, YG = 38470, YB = 7471;
+        for (int x = 0; x < image_width; x++)
+        {
+          int r = pScan_line[x*4+0];
+          int g = pScan_line[x*4+1];
+          int b = pScan_line[x*4+2];
+          *pDst++ = static_cast<uint8>((r * YR + g * YG + b * YB + 32768) >> 16);
+        }
+      }
+      else
+      {
+        for (int x = 0; x < image_width; x++)
+        {
+          pDst[0] = pScan_line[x*4+0];
+          pDst[1] = pScan_line[x*4+1];
+          pDst[2] = pScan_line[x*4+2];
+          pDst += 3;
+        }
+      }
+    }
+  }
+
+  return pImage_data;
+}
+
+unsigned char *decompress_jpeg_image_from_memory(const unsigned char *pSrc_data, int src_data_size, int *width, int *height, int *actual_comps, int req_comps)
+{
+  jpgd::jpeg_decoder_mem_stream mem_stream(pSrc_data, src_data_size);
+  return decompress_jpeg_image_from_stream(&mem_stream, width, height, actual_comps, req_comps);
+}
+
+unsigned char *decompress_jpeg_image_from_file(const char *pSrc_filename, int *width, int *height, int *actual_comps, int req_comps)
+{
+  jpgd::jpeg_decoder_file_stream file_stream;
+  if (!file_stream.open(pSrc_filename))
+    return NULL;
+  return decompress_jpeg_image_from_stream(&file_stream, width, height, actual_comps, req_comps);
+}
+
+} // namespace jpgd
\ No newline at end of file
diff --git a/drivers/jpegd/jpgd.h b/drivers/jpegd/jpgd.h
new file mode 100644
index 00000000000..150b9a0b26f
--- /dev/null
+++ b/drivers/jpegd/jpgd.h
@@ -0,0 +1,319 @@
+// jpgd.h - C++ class for JPEG decompression.
+// Public domain, Rich Geldreich <richgel99@gmail.com>
+#ifndef JPEG_DECODER_H
+#define JPEG_DECODER_H
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <setjmp.h>
+
+#ifdef _MSC_VER
+  #define JPGD_NORETURN __declspec(noreturn) 
+#elif defined(__GNUC__)
+  #define JPGD_NORETURN __attribute__ ((noreturn))
+#else
+  #define JPGD_NORETURN
+#endif
+
+namespace jpgd
+{
+  typedef unsigned char  uint8;
+  typedef   signed short int16;
+  typedef unsigned short uint16;
+  typedef unsigned int   uint;
+  typedef   signed int   int32;
+
+  // Loads a JPEG image from a memory buffer or a file.
+  // req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA).
+  // On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB).
+  // Notes: For more control over where and how the source data is read, see the decompress_jpeg_image_from_stream() function below, or call the jpeg_decoder class directly.
+  // Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp.
+  unsigned char *decompress_jpeg_image_from_memory(const unsigned char *pSrc_data, int src_data_size, int *width, int *height, int *actual_comps, int req_comps);
+  unsigned char *decompress_jpeg_image_from_file(const char *pSrc_filename, int *width, int *height, int *actual_comps, int req_comps);
+
+  // Success/failure error codes.
+  enum jpgd_status
+  {
+    JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1,
+    JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE, 
+    JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS, 
+    JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH,
+    JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER,
+    JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS,
+    JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE,
+    JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, JPGD_ASSERTION_ERROR,
+    JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM
+  };
+    
+  // Input stream interface.
+  // Derive from this class to read input data from sources other than files or memory. Set m_eof_flag to true when no more data is available.
+  // The decoder is rather greedy: it will keep on calling this method until its internal input buffer is full, or until the EOF flag is set.
+  // It the input stream contains data after the JPEG stream's EOI (end of image) marker it will probably be pulled into the internal buffer.
+  // Call the get_total_bytes_read() method to determine the actual size of the JPEG stream after successful decoding.
+  class jpeg_decoder_stream
+  {
+  public:
+    jpeg_decoder_stream() { }
+    virtual ~jpeg_decoder_stream() { }
+
+    // The read() method is called when the internal input buffer is empty.
+    // Parameters:
+    // pBuf - input buffer
+    // max_bytes_to_read - maximum bytes that can be written to pBuf
+    // pEOF_flag - set this to true if at end of stream (no more bytes remaining)
+    // Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0).
+    // Notes: This method will be called in a loop until you set *pEOF_flag to true or the internal buffer is full.
+    virtual int read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag) = 0;
+  };
+
+  // stdio FILE stream class.
+  class jpeg_decoder_file_stream : public jpeg_decoder_stream
+  {
+    jpeg_decoder_file_stream(const jpeg_decoder_file_stream &);
+    jpeg_decoder_file_stream &operator =(const jpeg_decoder_file_stream &);
+
+    FILE *m_pFile;
+    bool m_eof_flag, m_error_flag;
+
+  public:
+    jpeg_decoder_file_stream();
+    virtual ~jpeg_decoder_file_stream();
+    
+    bool open(const char *Pfilename);
+    void close();
+
+    virtual int read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag);
+  };
+
+  // Memory stream class.
+  class jpeg_decoder_mem_stream : public jpeg_decoder_stream
+  {
+    const uint8 *m_pSrc_data;
+    uint m_ofs, m_size;
+
+  public:
+    jpeg_decoder_mem_stream() : m_pSrc_data(NULL), m_ofs(0), m_size(0) { }
+    jpeg_decoder_mem_stream(const uint8 *pSrc_data, uint size) : m_pSrc_data(pSrc_data), m_ofs(0), m_size(size) { }
+
+    virtual ~jpeg_decoder_mem_stream() { }
+
+    bool open(const uint8 *pSrc_data, uint size);
+    void close() { m_pSrc_data = NULL; m_ofs = 0; m_size = 0; }
+    
+    virtual int read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag);
+  };
+
+  // Loads JPEG file from a jpeg_decoder_stream.
+  unsigned char *decompress_jpeg_image_from_stream(jpeg_decoder_stream *pStream, int *width, int *height, int *actual_comps, int req_comps);
+
+  enum 
+  { 
+    JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4, 
+    JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 8192, JPGD_MAX_HEIGHT = 16384, JPGD_MAX_WIDTH = 16384 
+  };
+          
+  typedef int16 jpgd_quant_t;
+  typedef int16 jpgd_block_t;
+
+  class jpeg_decoder
+  {
+  public:
+    // Call get_error_code() after constructing to determine if the stream is valid or not. You may call the get_width(), get_height(), etc.
+    // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline.
+    jpeg_decoder(jpeg_decoder_stream *pStream);
+
+    ~jpeg_decoder();
+
+    // Call this method after constructing the object to begin decompression.
+    // If JPGD_SUCCESS is returned you may then call decode() on each scanline.
+    int begin_decoding();
+
+    // Returns the next scan line.
+    // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1). 
+    // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and get_bytes_per_pixel() will return 4).
+    // Returns JPGD_SUCCESS if a scan line has been returned.
+    // Returns JPGD_DONE if all scan lines have been returned.
+    // Returns JPGD_FAILED if an error occurred. Call get_error_code() for a more info.
+    int decode(const void** pScan_line, uint* pScan_line_len);
+    
+    inline jpgd_status get_error_code() const { return m_error_code; }
+
+    inline int get_width() const { return m_image_x_size; }
+    inline int get_height() const { return m_image_y_size; }
+
+    inline int get_num_components() const { return m_comps_in_frame; }
+
+    inline int get_bytes_per_pixel() const { return m_dest_bytes_per_pixel; }
+    inline int get_bytes_per_scan_line() const { return m_image_x_size * get_bytes_per_pixel(); }
+
+    // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file).
+    inline int get_total_bytes_read() const { return m_total_bytes_read; }
+    
+  private:
+    jpeg_decoder(const jpeg_decoder &);
+    jpeg_decoder &operator =(const jpeg_decoder &);
+
+    typedef void (*pDecode_block_func)(jpeg_decoder *, int, int, int);
+
+    struct huff_tables
+    {
+      bool ac_table;
+      uint  look_up[256];
+      uint  look_up2[256];
+      uint8 code_size[256];
+      uint  tree[512];
+    };
+
+    struct coeff_buf
+    {
+      uint8 *pData;
+      int block_num_x, block_num_y;
+      int block_len_x, block_len_y;
+      int block_size;
+    };
+
+    struct mem_block
+    {
+      mem_block *m_pNext;
+      size_t m_used_count;
+      size_t m_size;
+      char m_data[1];
+    };
+
+    jmp_buf m_jmp_state;
+    mem_block *m_pMem_blocks;
+    int m_image_x_size;
+    int m_image_y_size;
+    jpeg_decoder_stream *m_pStream;
+    int m_progressive_flag;
+    uint8 m_huff_ac[JPGD_MAX_HUFF_TABLES];
+    uint8* m_huff_num[JPGD_MAX_HUFF_TABLES];      // pointer to number of Huffman codes per bit size
+    uint8* m_huff_val[JPGD_MAX_HUFF_TABLES];      // pointer to Huffman codes per bit size
+    jpgd_quant_t* m_quant[JPGD_MAX_QUANT_TABLES]; // pointer to quantization tables
+    int m_scan_type;                              // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported)
+    int m_comps_in_frame;                         // # of components in frame
+    int m_comp_h_samp[JPGD_MAX_COMPONENTS];       // component's horizontal sampling factor
+    int m_comp_v_samp[JPGD_MAX_COMPONENTS];       // component's vertical sampling factor
+    int m_comp_quant[JPGD_MAX_COMPONENTS];        // component's quantization table selector
+    int m_comp_ident[JPGD_MAX_COMPONENTS];        // component's ID
+    int m_comp_h_blocks[JPGD_MAX_COMPONENTS];
+    int m_comp_v_blocks[JPGD_MAX_COMPONENTS];
+    int m_comps_in_scan;                          // # of components in scan
+    int m_comp_list[JPGD_MAX_COMPS_IN_SCAN];      // components in this scan
+    int m_comp_dc_tab[JPGD_MAX_COMPONENTS];       // component's DC Huffman coding table selector
+    int m_comp_ac_tab[JPGD_MAX_COMPONENTS];       // component's AC Huffman coding table selector
+    int m_spectral_start;                         // spectral selection start
+    int m_spectral_end;                           // spectral selection end
+    int m_successive_low;                         // successive approximation low
+    int m_successive_high;                        // successive approximation high
+    int m_max_mcu_x_size;                         // MCU's max. X size in pixels
+    int m_max_mcu_y_size;                         // MCU's max. Y size in pixels
+    int m_blocks_per_mcu;
+    int m_max_blocks_per_row;
+    int m_mcus_per_row, m_mcus_per_col;
+    int m_mcu_org[JPGD_MAX_BLOCKS_PER_MCU];
+    int m_total_lines_left;                       // total # lines left in image
+    int m_mcu_lines_left;                         // total # lines left in this MCU
+    int m_real_dest_bytes_per_scan_line;
+    int m_dest_bytes_per_scan_line;               // rounded up
+    int m_dest_bytes_per_pixel;                   // 4 (RGB) or 1 (Y)
+    huff_tables* m_pHuff_tabs[JPGD_MAX_HUFF_TABLES];
+    coeff_buf* m_dc_coeffs[JPGD_MAX_COMPONENTS];
+    coeff_buf* m_ac_coeffs[JPGD_MAX_COMPONENTS];
+    int m_eob_run;
+    int m_block_y_mcu[JPGD_MAX_COMPONENTS];
+    uint8* m_pIn_buf_ofs;
+    int m_in_buf_left;
+    int m_tem_flag;
+    bool m_eof_flag;
+    uint8 m_in_buf_pad_start[128];
+    uint8 m_in_buf[JPGD_IN_BUF_SIZE + 128];
+    uint8 m_in_buf_pad_end[128];
+    int m_bits_left;
+    uint m_bit_buf;
+    int m_restart_interval;
+    int m_restarts_left;
+    int m_next_restart_num;
+    int m_max_mcus_per_row;
+    int m_max_blocks_per_mcu;
+    int m_expanded_blocks_per_mcu;
+    int m_expanded_blocks_per_row;
+    int m_expanded_blocks_per_component;
+    bool  m_freq_domain_chroma_upsample;
+    int m_max_mcus_per_col;
+    uint m_last_dc_val[JPGD_MAX_COMPONENTS];
+    jpgd_block_t* m_pMCU_coefficients;
+    int m_mcu_block_max_zag[JPGD_MAX_BLOCKS_PER_MCU];
+    uint8* m_pSample_buf;
+    int m_crr[256];
+    int m_cbb[256];
+    int m_crg[256];
+    int m_cbg[256];
+    uint8* m_pScan_line_0;
+    uint8* m_pScan_line_1;
+    jpgd_status m_error_code;
+    bool m_ready_flag;
+    int m_total_bytes_read;
+
+    void free_all_blocks();
+    JPGD_NORETURN void stop_decoding(jpgd_status status);
+    void *alloc(size_t n, bool zero = false);
+    void word_clear(void *p, uint16 c, uint n);
+    void prep_in_buffer();
+    void read_dht_marker();
+    void read_dqt_marker();
+    void read_sof_marker();
+    void skip_variable_marker();
+    void read_dri_marker();
+    void read_sos_marker();
+    int next_marker();
+    int process_markers();
+    void locate_soi_marker();
+    void locate_sof_marker();
+    int locate_sos_marker();
+    void init(jpeg_decoder_stream * pStream);
+    void create_look_ups();
+    void fix_in_buffer();
+    void transform_mcu(int mcu_row);
+    void transform_mcu_expand(int mcu_row);
+    coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y);
+    inline jpgd_block_t *coeff_buf_getp(coeff_buf *cb, int block_x, int block_y);
+    void load_next_row();
+    void decode_next_row();
+    void make_huff_table(int index, huff_tables *pH);
+    void check_quant_tables();
+    void check_huff_tables();
+    void calc_mcu_block_order();
+    int init_scan();
+    void init_frame();
+    void process_restart();
+    void decode_scan(pDecode_block_func decode_block_func);
+    void init_progressive();
+    void init_sequential();
+    void decode_start();
+    void decode_init(jpeg_decoder_stream * pStream);
+    void H2V2Convert();
+    void H2V1Convert();
+    void H1V2Convert();
+    void H1V1Convert();
+    void gray_convert();
+    void expanded_convert();
+    void find_eoi();
+    inline uint get_char();
+    inline uint get_char(bool *pPadding_flag);
+    inline void stuff_char(uint8 q);
+    inline uint8 get_octet();
+    inline uint get_bits(int num_bits);
+    inline uint get_bits_no_markers(int numbits);
+    inline int huff_decode(huff_tables *pH);
+    inline int huff_decode(huff_tables *pH, int& extrabits);
+    static inline uint8 clamp(int i);
+    static void decode_block_dc_first(jpeg_decoder *pD, int component_id, int block_x, int block_y);
+    static void decode_block_dc_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y);
+    static void decode_block_ac_first(jpeg_decoder *pD, int component_id, int block_x, int block_y);
+    static void decode_block_ac_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y);
+  };
+  
+} // namespace jpgd
+
+#endif // JPEG_DECODER_H
diff --git a/drivers/jpg/image_loader_jpg.cpp b/drivers/jpg/image_loader_jpg.cpp
deleted file mode 100644
index 62b587f61ca..00000000000
--- a/drivers/jpg/image_loader_jpg.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-/*************************************************/
-/*  image_loader_jpg.cpp                         */
-/*************************************************/
-/*            This file is part of:              */
-/*                GODOT ENGINE                   */
-/*************************************************/
-/*       Source code within this file is:        */
-/*  (c) 2007-2016 Juan Linietsky, Ariel Manzur   */
-/*             All Rights Reserved.              */
-/*************************************************/
-
-#include "image_loader_jpg.h"
-
-#include "print_string.h"
-#include "os/os.h"
-#include "drivers/jpg/tinyjpeg.h"
-
-
-static void* _tinyjpg_alloc(unsigned int amount) {
-
-	return memalloc(amount);
-}
-
-static void _tinyjpg_free(void *ptr) {
-
-	memfree(ptr);
-}
-
-Error ImageLoaderJPG::load_image(Image *p_image,FileAccess *f) {
-
-
-	DVector<uint8_t> src_image;
-	int src_image_len = f->get_len();
-	ERR_FAIL_COND_V(src_image_len == 0, ERR_FILE_CORRUPT);
-	src_image.resize(src_image_len);
-
-	DVector<uint8_t>::Write w = src_image.write();
-
-	f->get_buffer(&w[0],src_image_len);
-
-	f->close();
-
-	jdec_private* jdec=tinyjpeg_init(_tinyjpg_alloc,_tinyjpg_free);
-	ERR_FAIL_COND_V(!jdec,ERR_UNAVAILABLE);
-
-	int ret = tinyjpeg_parse_header(jdec,&w[0],src_image_len);
-
-	if (ret!=0) {
-		tinyjpeg_free(jdec);
-	}
-
-	ERR_FAIL_COND_V(ret!=0,ERR_FILE_CORRUPT);
-
-	unsigned int width,height;
-
-
-	tinyjpeg_get_size(jdec,&width,&height);
-
-
-
-	DVector<uint8_t> imgdata;
-	imgdata.resize(width*height*3);
-	DVector<uint8_t>::Write imgdataw = imgdata.write();
-
-
-	unsigned char *components[1]={&imgdataw[0]};
-	tinyjpeg_set_components(jdec,components,1);
-	tinyjpeg_decode(jdec,TINYJPEG_FMT_RGB24);
-	imgdataw = DVector<uint8_t>::Write();
-
-	Image dst_image(width,height,0,Image::FORMAT_RGB,imgdata);
-
-	tinyjpeg_free(jdec);
-
-	*p_image=dst_image;
-
-	return OK;
-
-}
-
-void ImageLoaderJPG::get_recognized_extensions(List<String> *p_extensions) const {
-	
-	p_extensions->push_back("jpg");
-	p_extensions->push_back("jpeg");
-}
-
-
-ImageLoaderJPG::ImageLoaderJPG() {
-
-
-}
-
-
diff --git a/drivers/jpg/jidctflt.c b/drivers/jpg/jidctflt.c
deleted file mode 100644
index 40a9eab83e4..00000000000
--- a/drivers/jpg/jidctflt.c
+++ /dev/null
@@ -1,286 +0,0 @@
-/*
- * jidctflt.c
- *
- * Copyright (C) 1994-1998, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- *
- * The authors make NO WARRANTY or representation, either express or implied,
- * with respect to this software, its quality, accuracy, merchantability, or 
- * fitness for a particular purpose.  This software is provided "AS IS", and you,
- * its user, assume the entire risk as to its quality and accuracy.
- *
- * This software is copyright (C) 1991-1998, Thomas G. Lane.
- * All Rights Reserved except as specified below.
- *
- * Permission is hereby granted to use, copy, modify, and distribute this
- * software (or portions thereof) for any purpose, without fee, subject to these
- * conditions:
- * (1) If any part of the source code for this software is distributed, then this
- * README file must be included, with this copyright and no-warranty notice
- * unaltered; and any additions, deletions, or changes to the original files
- * must be clearly indicated in accompanying documentation.
- * (2) If only executable code is distributed, then the accompanying
- * documentation must state that "this software is based in part on the work of
- * the Independent JPEG Group".
- * (3) Permission for use of this software is granted only if the user accepts
- * full responsibility for any undesirable consequences; the authors accept
- * NO LIABILITY for damages of any kind.
- * 
- * These conditions apply to any software derived from or based on the IJG code,
- * not just to the unmodified library.  If you use our work, you ought to
- * acknowledge us.
- * 
- * Permission is NOT granted for the use of any IJG author's name or company name
- * in advertising or publicity relating to this software or products derived from
- * it.  This software may be referred to only as "the Independent JPEG Group's
- * software".
- * 
- * We specifically permit and encourage the use of this software as the basis of
- * commercial products, provided that all warranty or liability claims are
- * assumed by the product vendor.
- *
- *
- * This file contains a floating-point implementation of the
- * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
- * must also perform dequantization of the input coefficients.
- *
- * This implementation should be more accurate than either of the integer
- * IDCT implementations.  However, it may not give the same results on all
- * machines because of differences in roundoff behavior.  Speed will depend
- * on the hardware's floating point capacity.
- *
- * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
- * on each row (or vice versa, but it's more convenient to emit a row at
- * a time).  Direct algorithms are also available, but they are much more
- * complex and seem not to be any faster when reduced to code.
- *
- * This implementation is based on Arai, Agui, and Nakajima's algorithm for
- * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
- * Japanese, but the algorithm is described in the Pennebaker & Mitchell
- * JPEG textbook (see REFERENCES section in file README).  The following code
- * is based directly on figure 4-8 in P&M.
- * While an 8-point DCT cannot be done in less than 11 multiplies, it is
- * possible to arrange the computation so that many of the multiplies are
- * simple scalings of the final outputs.  These multiplies can then be
- * folded into the multiplications or divisions by the JPEG quantization
- * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
- * to be done in the DCT itself.
- * The primary disadvantage of this method is that with a fixed-point
- * implementation, accuracy is lost due to imprecise representation of the
- * scaled quantization values.  However, that problem does not arise if
- * we use floating point arithmetic.
- */
-
-#include "tinyjpeg-internal.h"
-
-#define FAST_FLOAT float
-#define DCTSIZE	   8
-#define DCTSIZE2   (DCTSIZE*DCTSIZE)
-
-#define DEQUANTIZE(coef,quantval)  (((FAST_FLOAT) (coef)) * (quantval))
-
-#if 0 && defined(__GNUC__) && (defined(__i686__))
-// || defined(__x86_64__))
-
-static inline unsigned char descale_and_clamp(int x, int shift)
-{
-  __asm__ (
-      "add %3,%1\n"
-      "\tsar %2,%1\n"
-      "\tsub $-128,%1\n"
-      "\tcmovl %5,%1\n"	/* Use the sub to compare to 0 */
-      "\tcmpl %4,%1\n" 
-      "\tcmovg %4,%1\n"
-      : "=r"(x) 
-      : "0"(x), "Ir"(shift), "ir"(1UL<<(shift-1)), "r" (0xff), "r" (0)
-      );
-  return x;
-}
-
-#else
-static __inline unsigned char descale_and_clamp(int x, int shift)
-{
-  x += (1UL<<(shift-1));
-  if (x<0)
-    x = (x >> shift) | ((~(0UL)) << (32-(shift)));
-  else
-    x >>= shift;
-  x += 128;
-  if (x>255)
-    return 255;
-  else if (x<0)
-    return 0;
-  else 
-    return x;
-}
-#endif
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients.
- */
-
-void
-tinyjpeg_idct_float (struct component *compptr, uint8_t *output_buf, int stride)
-{
-  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
-  FAST_FLOAT z5, z10, z11, z12, z13;
-  int16_t *inptr;
-  FAST_FLOAT *quantptr;
-  FAST_FLOAT *wsptr;
-  uint8_t *outptr;
-  int ctr;
-  FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = compptr->DCT;
-  quantptr = compptr->Q_table;
-  wsptr = workspace;
-  for (ctr = DCTSIZE; ctr > 0; ctr--) {
-    /* Due to quantization, we will usually find that many of the input
-     * coefficients are zero, especially the AC terms.  We can exploit this
-     * by short-circuiting the IDCT calculation for any column in which all
-     * the AC terms are zero.  In that case each output is equal to the
-     * DC coefficient (with scale factor as needed).
-     * With typical images and quantization tables, half or more of the
-     * column DCT calculations can be simplified this way.
-     */
-    
-    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
-      /* AC terms all zero */
-      FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-      
-      wsptr[DCTSIZE*0] = dcval;
-      wsptr[DCTSIZE*1] = dcval;
-      wsptr[DCTSIZE*2] = dcval;
-      wsptr[DCTSIZE*3] = dcval;
-      wsptr[DCTSIZE*4] = dcval;
-      wsptr[DCTSIZE*5] = dcval;
-      wsptr[DCTSIZE*6] = dcval;
-      wsptr[DCTSIZE*7] = dcval;
-      
-      inptr++;			/* advance pointers to next column */
-      quantptr++;
-      wsptr++;
-      continue;
-    }
-    
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp10 = tmp0 + tmp2;	/* phase 3 */
-    tmp11 = tmp0 - tmp2;
-
-    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
-    tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */
-
-    tmp0 = tmp10 + tmp13;	/* phase 2 */
-    tmp3 = tmp10 - tmp13;
-    tmp1 = tmp11 + tmp12;
-    tmp2 = tmp11 - tmp12;
-    
-    /* Odd part */
-
-    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    z13 = tmp6 + tmp5;		/* phase 6 */
-    z10 = tmp6 - tmp5;
-    z11 = tmp4 + tmp7;
-    z12 = tmp4 - tmp7;
-
-    tmp7 = z11 + z13;		/* phase 5 */
-    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */
-
-    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
-    tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */
-    tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */
-
-    tmp6 = tmp12 - tmp7;	/* phase 2 */
-    tmp5 = tmp11 - tmp6;
-    tmp4 = tmp10 + tmp5;
-
-    wsptr[DCTSIZE*0] = tmp0 + tmp7;
-    wsptr[DCTSIZE*7] = tmp0 - tmp7;
-    wsptr[DCTSIZE*1] = tmp1 + tmp6;
-    wsptr[DCTSIZE*6] = tmp1 - tmp6;
-    wsptr[DCTSIZE*2] = tmp2 + tmp5;
-    wsptr[DCTSIZE*5] = tmp2 - tmp5;
-    wsptr[DCTSIZE*4] = tmp3 + tmp4;
-    wsptr[DCTSIZE*3] = tmp3 - tmp4;
-
-    inptr++;			/* advance pointers to next column */
-    quantptr++;
-    wsptr++;
-  }
-  
-  /* Pass 2: process rows from work array, store into output array. */
-  /* Note that we must descale the results by a factor of 8 == 2**3. */
-
-  wsptr = workspace;
-  outptr = output_buf;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    /* Rows of zeroes can be exploited in the same way as we did with columns.
-     * However, the column calculation has created many nonzero AC terms, so
-     * the simplification applies less often (typically 5% to 10% of the time).
-     * And testing floats for zero is relatively expensive, so we don't bother.
-     */
-    
-    /* Even part */
-
-    tmp10 = wsptr[0] + wsptr[4];
-    tmp11 = wsptr[0] - wsptr[4];
-
-    tmp13 = wsptr[2] + wsptr[6];
-    tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT) 1.414213562) - tmp13;
-
-    tmp0 = tmp10 + tmp13;
-    tmp3 = tmp10 - tmp13;
-    tmp1 = tmp11 + tmp12;
-    tmp2 = tmp11 - tmp12;
-
-    /* Odd part */
-
-    z13 = wsptr[5] + wsptr[3];
-    z10 = wsptr[5] - wsptr[3];
-    z11 = wsptr[1] + wsptr[7];
-    z12 = wsptr[1] - wsptr[7];
-
-    tmp7 = z11 + z13;
-    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562);
-
-    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
-    tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */
-    tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */
-
-    tmp6 = tmp12 - tmp7;
-    tmp5 = tmp11 - tmp6;
-    tmp4 = tmp10 + tmp5;
-
-    /* Final output stage: scale down by a factor of 8 and range-limit */
-
-    outptr[0] = descale_and_clamp((int)(tmp0 + tmp7), 3);
-    outptr[7] = descale_and_clamp((int)(tmp0 - tmp7), 3);
-    outptr[1] = descale_and_clamp((int)(tmp1 + tmp6), 3);
-    outptr[6] = descale_and_clamp((int)(tmp1 - tmp6), 3);
-    outptr[2] = descale_and_clamp((int)(tmp2 + tmp5), 3);
-    outptr[5] = descale_and_clamp((int)(tmp2 - tmp5), 3);
-    outptr[4] = descale_and_clamp((int)(tmp3 + tmp4), 3);
-    outptr[3] = descale_and_clamp((int)(tmp3 - tmp4), 3);
-
-    
-    wsptr += DCTSIZE;		/* advance pointer to next row */
-    outptr += stride;
-  }
-}
-
diff --git a/drivers/jpg/loadjpeg.c b/drivers/jpg/loadjpeg.c
deleted file mode 100644
index 82072d42726..00000000000
--- a/drivers/jpg/loadjpeg.c
+++ /dev/null
@@ -1,341 +0,0 @@
-/*
- * Small jpeg decoder library - testing application
- *
- * Copyright (c) 2006, Luc Saillard <luc@saillard.org>
- * All rights reserved.
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * 
- * - Redistributions of source code must retain the above copyright notice,
- *  this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *  this list of conditions and the following disclaimer in the documentation
- *  and/or other materials provided with the distribution.
- *
- * - Neither the name of the author nor the names of its contributors may be
- *  used to endorse or promote products derived from this software without
- *  specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include "tinyjpeg.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#define snprintf(buf, size, fmt, ...) sprintf(buf, fmt, __VA_ARGS__)
-
-static void exitmessage(const char *message) __attribute__((noreturn));
-static void exitmessage(const char *message)
-{
-  printf("%s\n", message);
-  exit(0);
-}
-
-static int filesize(FILE *fp)
-{
-  long pos;
-  fseek(fp, 0, SEEK_END);
-  pos = ftell(fp);
-  fseek(fp, 0, SEEK_SET);
-  return pos;
-}
-
-/**
- * Save a buffer in 24bits Targa format 
- * (BGR byte order)
- */
-static void write_tga(const char *filename, int output_format, int width, int height, unsigned char **components)
-{
-  unsigned char targaheader[18];
-  FILE *F;
-  char temp[1024];
-  unsigned int bufferlen = width * height * 3;
-  unsigned char *rgb_data = components[0];
-
-  sprintf(temp, sizeof(temp), filename);
-
-  memset(targaheader,0,sizeof(targaheader));
-
-  targaheader[12] = (unsigned char) (width & 0xFF);
-  targaheader[13] = (unsigned char) (width >> 8);
-  targaheader[14] = (unsigned char) (height & 0xFF);
-  targaheader[15] = (unsigned char) (height >> 8);
-  targaheader[17] = 0x20;    /* Top-down, non-interlaced */
-  targaheader[2]  = 2;       /* image type = uncompressed RGB */
-  targaheader[16] = 24;
-
-  if (output_format == TINYJPEG_FMT_RGB24)
-   {
-     unsigned char *data = rgb_data + bufferlen - 3;
-     do
-      { 
-	unsigned char c = data[0];
-	data[0] = data[2];
-	data[2] = c;
-	data-=3;
-      }
-     while (data > rgb_data);
-   }
-
-  F = fopen(temp, "wb");
-  fwrite(targaheader, sizeof(targaheader), 1, F);
-  fwrite(rgb_data, 1, bufferlen, F);
-  fclose(F);
-}
-
-/**
- * Save a buffer in three files (.Y, .U, .V) useable by yuvsplittoppm
- */
-static void write_yuv(const char *filename, int width, int height, unsigned char **components)
-{
-  FILE *F;
-  char temp[1024];
-
-  snprintf(temp, 1024, "%s.Y", filename);
-  F = fopen(temp, "wb");
-  fwrite(components[0], width, height, F);
-  fclose(F);
-  snprintf(temp, 1024, "%s.U", filename);
-  F = fopen(temp, "wb");
-  fwrite(components[1], width*height/4, 1, F);
-  fclose(F);
-  snprintf(temp, 1024, "%s.V", filename);
-  F = fopen(temp, "wb");
-  fwrite(components[2], width*height/4, 1, F);
-  fclose(F);
-}
-
-/**
- * Save a buffer in grey image (pgm format)
- */
-static void write_pgm(const char *filename, int width, int height, unsigned char **components)
-{
-  FILE *F;
-  char temp[1024];
-
-  snprintf(temp, 1024, "%s", filename);
-  F = fopen(temp, "wb");
-  fprintf(F, "P5\n%d %d\n255\n", width, height);
-  fwrite(components[0], width, height, F);
-  fclose(F);
-}
-
-/**
- * Load one jpeg image, and try to decompress 1000 times, and save the result.
- * This is mainly used for benchmarking the decoder, or to test if between each
- * called of the library the DCT is corrected reset (a bug was found).
- */
-int load_multiple_times(const char *filename, const char *outfilename, int output_format)
-{
-  FILE *fp;
-  int count, length_of_file;
-  unsigned int width, height;
-  unsigned char *buf;
-  struct jdec_private *jdec;
-  unsigned char *components[4];
-
-  jdec = tinyjpeg_init();
-  count = 0;
-
-  /* Load the Jpeg into memory */
-  fp = fopen(filename, "rb");
-  if (fp == NULL)
-    exitmessage("Cannot open filename\n");
-  length_of_file = filesize(fp);
-  buf = (unsigned char *)malloc(length_of_file + 4);
-  fread(buf, length_of_file, 1, fp);
-  fclose(fp);
-
-  while (count<1000)
-   {
-     if (tinyjpeg_parse_header(jdec, buf, length_of_file)<0)
-       exitmessage(tinyjpeg_get_errorstring(jdec));
-
-     tinyjpeg_decode(jdec, output_format);
-
-     count++;
-   }
-
-  /* 
-   * Get address for each plane (not only max 3 planes is supported), and
-   * depending of the output mode, only some components will be filled 
-   * RGB: 1 plane, YUV420P: 3 planes, GREY: 1 plane
-   */
-  tinyjpeg_get_components(jdec, components);
-  tinyjpeg_get_size(jdec, &width, &height);
-
-  /* Save it */
-  switch (output_format)
-   {
-    case TINYJPEG_FMT_RGB24:
-    case TINYJPEG_FMT_BGR24:
-      write_tga(outfilename, output_format, width, height, components);
-      break;
-    case TINYJPEG_FMT_YUV420P:
-      write_yuv(outfilename, width, height, components);
-      break;
-    case TINYJPEG_FMT_GREY:
-      write_pgm(outfilename, width, height, components);
-      break;
-   }
-
-  free(buf);
-  tinyjpeg_free(jdec);
-  return 0;
-}
-
-/**
- * Load one jpeg image, and decompress it, and save the result.
- */
-int convert_one_image(const char *infilename, const char *outfilename, int output_format)
-{
-  FILE *fp;
-  unsigned int length_of_file;
-  unsigned int width, height;
-  unsigned char *buf;
-  struct jdec_private *jdec;
-  unsigned char *components[3];
-
-  /* Load the Jpeg into memory */
-  fp = fopen(infilename, "rb");
-  if (fp == NULL)
-    exitmessage("Cannot open filename\n");
-  length_of_file = filesize(fp);
-  buf = (unsigned char *)malloc(length_of_file + 4);
-  if (buf == NULL)
-    exitmessage("Not enough memory for loading file\n");
-  fread(buf, length_of_file, 1, fp);
-  fclose(fp);
-
-  /* Decompress it */
-  jdec = tinyjpeg_init();
-  if (jdec == NULL)
-    exitmessage("Not enough memory to alloc the structure need for decompressing\n");
-
-  if (tinyjpeg_parse_header(jdec, buf, length_of_file)<0)
-    exitmessage(tinyjpeg_get_errorstring(jdec));
-
-  /* Get the size of the image */
-  tinyjpeg_get_size(jdec, &width, &height);
-
-  printf("Decoding JPEG image...\n");
-  if (tinyjpeg_decode(jdec, output_format) < 0)
-    exitmessage(tinyjpeg_get_errorstring(jdec));
-
-  /* 
-   * Get address for each plane (not only max 3 planes is supported), and
-   * depending of the output mode, only some components will be filled 
-   * RGB: 1 plane, YUV420P: 3 planes, GREY: 1 plane
-   */
-  tinyjpeg_get_components(jdec, components);
-
-  /* Save it */
-  switch (output_format)
-   {
-    case TINYJPEG_FMT_RGB24:
-    case TINYJPEG_FMT_BGR24:
-      write_tga(outfilename, output_format, width, height, components);
-      break;
-    case TINYJPEG_FMT_YUV420P:
-      write_yuv(outfilename, width, height, components);
-      break;
-    case TINYJPEG_FMT_GREY:
-      write_pgm(outfilename, width, height, components);
-      break;
-   }
-
-  /* Only called this if the buffers were allocated by tinyjpeg_decode() */
-  tinyjpeg_free(jdec);
-  /* else called just free(jdec); */
-
-  free(buf);
-  return 0;
-}
-
-static void usage(void)
-{
-    fprintf(stderr, "Usage: loadjpeg [options] <input_filename.jpeg> <format> <output_filename>\n");
-    fprintf(stderr, "options:\n");
-    fprintf(stderr, "  --benchmark - Convert 1000 times the same image\n");
-    fprintf(stderr, "format:\n");
-    fprintf(stderr, "  yuv420p - output 3 files .Y,.U,.V\n");
-    fprintf(stderr, "  rgb24   - output a .tga image\n");
-    fprintf(stderr, "  bgr24   - output a .tga image\n");
-    fprintf(stderr, "  gray    - output a .pgm image\n");
-    exit(1);
-}
-
-/**
- * main
- *
- */
-int main(int argc, char *argv[])
-{
-  int output_format = TINYJPEG_FMT_YUV420P;
-  char *output_filename, *input_filename;
-  clock_t start_time, finish_time;
-  unsigned int duration;
-  int current_argument;
-  int benchmark_mode = 0;
-
-  if (argc < 3)
-    usage();
-
-  current_argument = 1;
-  while (1)
-   {
-     if (strcmp(argv[current_argument], "--benchmark")==0)
-       benchmark_mode = 1;
-     else
-       break;
-     current_argument++;
-   }
-
-  if (argc < current_argument+2)
-    usage();
-
-  input_filename = argv[current_argument];
-  if (strcmp(argv[current_argument+1],"yuv420p")==0)
-    output_format = TINYJPEG_FMT_YUV420P;
-  else if (strcmp(argv[current_argument+1],"rgb24")==0)
-    output_format = TINYJPEG_FMT_RGB24;
-  else if (strcmp(argv[current_argument+1],"bgr24")==0)
-    output_format = TINYJPEG_FMT_BGR24;
-  else if (strcmp(argv[current_argument+1],"grey")==0)
-    output_format = TINYJPEG_FMT_GREY;
-  else
-    exitmessage("Bad format: need to be one of yuv420p, rgb24, bgr24, grey\n");
-  output_filename = argv[current_argument+2];
-
-  start_time = clock();
-
-  if (benchmark_mode)
-    load_multiple_times(input_filename, output_filename, output_format);
-  else
-    convert_one_image(input_filename, output_filename, output_format);
-
-  finish_time = clock();
-  duration = finish_time - start_time;
-  printf("Decoding finished in %u ticks\n", duration);
-
-  return 0;
-}
-
-
-
-
diff --git a/drivers/jpg/tinyjpeg-internal.h b/drivers/jpg/tinyjpeg-internal.h
deleted file mode 100644
index b2d5fe42aaa..00000000000
--- a/drivers/jpg/tinyjpeg-internal.h
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Small jpeg decoder library (Internal header)
- *
- * Copyright (c) 2006, Luc Saillard <luc@saillard.org>
- * All rights reserved.
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * 
- * - Redistributions of source code must retain the above copyright notice,
- *  this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *  this list of conditions and the following disclaimer in the documentation
- *  and/or other materials provided with the distribution.
- *
- * - Neither the name of the author nor the names of its contributors may be
- *  used to endorse or promote products derived from this software without
- *  specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-
-#ifndef __TINYJPEG_INTERNAL_H_
-#define __TINYJPEG_INTERNAL_H_
-
-#ifdef _MSC_VER
-
-typedef signed __int8		int8_t;
-typedef unsigned __int8		uint8_t;
-typedef signed __int16		int16_t;
-typedef unsigned __int16	uint16_t;
-typedef signed __int32		int32_t;
-typedef unsigned __int32	uint32_t;
-typedef signed __int64		int64_t;
-typedef unsigned __int64	uint64_t;
-
-#else
-
-#ifdef NO_STDINT_H
-typedef unsigned char   uint8_t;
-typedef signed char     int8_t;
-typedef unsigned short  uint16_t;
-typedef signed short    int16_t;
-typedef unsigned int    uint32_t;
-typedef signed int      int32_t;
-typedef long long	int64_t;
-typedef unsigned long long int64_t;
-#else
-#include <stdint.h>
-#endif
-#endif
-
-
-#include <setjmp.h>
-
-#define SANITY_CHECK 1
-
-struct jdec_private;
-
-#define HUFFMAN_BITS_SIZE  256
-#define HUFFMAN_HASH_NBITS 9
-#define HUFFMAN_HASH_SIZE  (1UL<<HUFFMAN_HASH_NBITS)
-#define HUFFMAN_HASH_MASK  (HUFFMAN_HASH_SIZE-1)
-
-#define HUFFMAN_TABLES	   4
-#define COMPONENTS	   3
-#define JPEG_MAX_WIDTH	   4096
-#define JPEG_MAX_HEIGHT	   4096
-
-struct huffman_table
-{
-  /* Fast look up table, using HUFFMAN_HASH_NBITS bits we can have directly the symbol,
-   * if the symbol is <0, then we need to look into the tree table */
-  short int lookup[HUFFMAN_HASH_SIZE];
-  /* code size: give the number of bits of a symbol is encoded */
-  unsigned char code_size[HUFFMAN_HASH_SIZE];
-  /* some place to store value that is not encoded in the lookup table 
-   * FIXME: Calculate if 256 value is enough to store all values
-   */
-  uint16_t slowtable[16-HUFFMAN_HASH_NBITS][256];
-};
-
-struct component 
-{
-  unsigned int Hfactor;
-  unsigned int Vfactor;
-  float *Q_table;		/* Pointer to the quantisation table to use */
-  struct huffman_table *AC_table;
-  struct huffman_table *DC_table;
-  short int previous_DC;	/* Previous DC coefficient */
-  short int DCT[64];		/* DCT coef */
-#if SANITY_CHECK
-  unsigned int cid;
-#endif
-};
-
-
-typedef void (*decode_MCU_fct) (struct jdec_private *priv);
-typedef void (*convert_colorspace_fct) (struct jdec_private *priv);
-
-struct jdec_private
-{
-  void *(*allocate_mem)(unsigned int amount);
-  void (*free_mem)(void *mem);
-
-  /* Public variables */
-  uint8_t *components[COMPONENTS];
-  unsigned int width, height;	/* Size of the image */
-  unsigned int flags;
-
-  /* Private variables */
-  const unsigned char *stream_begin, *stream_end;
-  unsigned int stream_length;
-
-  const unsigned char *stream;	/* Pointer to the current stream */
-  unsigned int reservoir, nbits_in_reservoir;
-
-  struct component component_infos[COMPONENTS];
-  float Q_tables[COMPONENTS][64];		/* quantization tables */
-  struct huffman_table HTDC[HUFFMAN_TABLES];	/* DC huffman tables   */
-  struct huffman_table HTAC[HUFFMAN_TABLES];	/* AC huffman tables   */
-  int default_huffman_table_initialized;
-  int restart_interval;
-  int restarts_to_go;				/* MCUs left in this restart interval */
-  int last_rst_marker_seen;			/* Rst marker is incremented each time */
-
-  /* Temp space used after the IDCT to store each components */
-  uint8_t Y[64*4], Cr[64], Cb[64];
-
-  jmp_buf jump_state;
-  /* Internal Pointer use for colorspace conversion, do not modify it !!! */
-  uint8_t *plane[COMPONENTS];
-
-  uint8_t decomp_block[16][16*3];
-
-};
-
-#if defined(__GNUC__) && (__GNUC__ > 3) && defined(__OPTIMIZE__)
-#define __likely(x)       __builtin_expect(!!(x), 1)
-#define __unlikely(x)     __builtin_expect(!!(x), 0)
-#else
-#define __likely(x)       (x)
-#define __unlikely(x)     (x)
-#endif
-
-#define IDCT tinyjpeg_idct_float
-void tinyjpeg_idct_float (struct component *compptr, uint8_t *output_buf, int stride);
-
-#endif
-
diff --git a/drivers/jpg/tinyjpeg.c b/drivers/jpg/tinyjpeg.c
deleted file mode 100644
index 8e3c934ce09..00000000000
--- a/drivers/jpg/tinyjpeg.c
+++ /dev/null
@@ -1,2202 +0,0 @@
-/*
- * Small jpeg decoder library
- *
- * Copyright (c) 2006, Luc Saillard <luc@saillard.org>
- * All rights reserved.
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * 
- * - Redistributions of source code must retain the above copyright notice,
- *  this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *  this list of conditions and the following disclaimer in the documentation
- *  and/or other materials provided with the distribution.
- *
- * - Neither the name of the author nor the names of its contributors may be
- *  used to endorse or promote products derived from this software without
- *  specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <errno.h>
-
-#include "tinyjpeg.h"
-#include "tinyjpeg-internal.h"
-
-enum std_markers {
-   DQT  = 0xDB, /* Define Quantization Table */
-   SOF  = 0xC0, /* Start of Frame (size information) */
-   DHT  = 0xC4, /* Huffman Table */
-   SOI  = 0xD8, /* Start of Image */
-   SOS  = 0xDA, /* Start of Scan */
-   RST  = 0xD0, /* Reset Marker d0 -> .. */
-   RST7 = 0xD7, /* Reset Marker .. -> d7 */
-   EOI  = 0xD9, /* End of Image */
-   DRI  = 0xDD, /* Define Restart Interval */
-   APP0 = 0xE0,
-};
-
-#define cY	0
-#define cCb	1
-#define cCr	2
-
-#define BLACK_Y 0
-#define BLACK_U 127
-#define BLACK_V 127
-
-#if DEBUG
-#define trace(fmt, args...) do { \
-   fprintf(stderr, fmt, ## args); \
-   fflush(stderr); \
-} while(0)
-#else
-#define trace(fmt, ...) do { } while (0)
-#endif
-
-#define error(fmt, ...) do { \
-   sprintf(error_string, fmt, ## __VA_ARGS__); \
-   return -1; \
-} while(0)
-
-
-#if 0
-static char *print_bits(unsigned int value, char *bitstr)
-{
-  int i, j;
-  i=31;
-  while (i>0)
-   {
-     if (value & (1UL<<i))
-       break;
-     i--;
-   }
-  j=0;
-  while (i>=0)
-   {
-     bitstr[j++] = (value & (1UL<<i))?'1':'0';
-     i--;
-   }
-  bitstr[j] = 0;
-  return bitstr;
-}
-
-static void print_next_16bytes(int offset, const unsigned char *stream)
-{
-  trace("%4.4x: %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n",
-	offset,
-	stream[0], stream[1], stream[2], stream[3], 
-	stream[4], stream[5], stream[6], stream[7],
-	stream[8], stream[9], stream[10], stream[11], 
-	stream[12], stream[13], stream[14], stream[15]);
-}
-
-#endif
-
-/* Global variable to return the last error found while deconding */
-static char error_string[256];
-
-static const unsigned char zigzag[64] = 
-{
-   0,  1,  5,  6, 14, 15, 27, 28,
-   2,  4,  7, 13, 16, 26, 29, 42,
-   3,  8, 12, 17, 25, 30, 41, 43,
-   9, 11, 18, 24, 31, 40, 44, 53,
-  10, 19, 23, 32, 39, 45, 52, 54,
-  20, 22, 33, 38, 46, 51, 55, 60,
-  21, 34, 37, 47, 50, 56, 59, 61,
-  35, 36, 48, 49, 57, 58, 62, 63
-};
-
-/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */
-/* IMPORTANT: these are only valid for 8-bit data precision! */
-static const unsigned char bits_dc_luminance[17] =
-{ 
-  0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 
-};
-static const unsigned char val_dc_luminance[] =
-{
-  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 
-};
-  
-static const unsigned char bits_dc_chrominance[17] =
-{
-  0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 
-};
-static const unsigned char val_dc_chrominance[] = 
-{
-  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 
-};
-  
-static const unsigned char bits_ac_luminance[17] =
-{
-  0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d 
-};
-static const unsigned char val_ac_luminance[] =
-{
-  0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
-  0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
-  0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
-  0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
-  0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
-  0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
-  0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
-  0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
-  0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
-  0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
-  0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
-  0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
-  0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
-  0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
-  0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
-  0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
-  0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
-  0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
-  0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
-  0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-  0xf9, 0xfa
-};
-
-static const unsigned char bits_ac_chrominance[17] =
-{ 
-  0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 
-};
-
-static const unsigned char val_ac_chrominance[] =
-{
-  0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
-  0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
-  0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
-  0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
-  0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
-  0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
-  0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
-  0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
-  0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
-  0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
-  0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
-  0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
-  0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
-  0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
-  0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
-  0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
-  0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
-  0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
-  0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
-  0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-  0xf9, 0xfa
-};
-
-
-/*
- * 4 functions to manage the stream
- *
- *  fill_nbits: put at least nbits in the reservoir of bits.
- *              But convert any 0xff,0x00 into 0xff
- *  get_nbits: read nbits from the stream, and put it in result,
- *             bits is removed from the stream and the reservoir is filled
- *             automaticaly. The result is signed according to the number of
- *             bits.
- *  look_nbits: read nbits from the stream without marking as read.
- *  skip_nbits: read nbits from the stream but do not return the result.
- * 
- * stream: current pointer in the jpeg data (read bytes per bytes)
- * nbits_in_reservoir: number of bits filled into the reservoir
- * reservoir: register that contains bits information. Only nbits_in_reservoir
- *            is valid.
- *                          nbits_in_reservoir
- *                        <--    17 bits    -->
- *            Ex: 0000 0000 1010 0000 1111 0000   <== reservoir
- *                        ^
- *                        bit 1
- *            To get two bits from this example
- *                 result = (reservoir >> 15) & 3
- *
- */
-#define fill_nbits(reservoir,nbits_in_reservoir,stream,nbits_wanted) do { \
-   while (nbits_in_reservoir<nbits_wanted) \
-    { \
-      unsigned char c; \
-      if (stream >= priv->stream_end) \
-        longjmp(priv->jump_state, -EIO); \
-      c = *stream++; \
-      reservoir <<= 8; \
-      if (c == 0xff && *stream == 0x00) \
-        stream++; \
-      reservoir |= c; \
-      nbits_in_reservoir+=8; \
-    } \
-}  while(0);
-
-/* Signed version !!!! */
-#define get_nbits(reservoir,nbits_in_reservoir,stream,nbits_wanted,result) do { \
-   fill_nbits(reservoir,nbits_in_reservoir,stream,(nbits_wanted)); \
-   result = ((reservoir)>>(nbits_in_reservoir-(nbits_wanted))); \
-   nbits_in_reservoir -= (nbits_wanted);  \
-   reservoir &= ((1U<<nbits_in_reservoir)-1); \
-   if ((unsigned int)result < (1UL<<((nbits_wanted)-1))) \
-       result += (0xFFFFFFFFUL<<(nbits_wanted))+1; \
-}  while(0);
-
-#define look_nbits(reservoir,nbits_in_reservoir,stream,nbits_wanted,result) do { \
-   fill_nbits(reservoir,nbits_in_reservoir,stream,(nbits_wanted)); \
-   result = ((reservoir)>>(nbits_in_reservoir-(nbits_wanted))); \
-}  while(0);
-
-/* To speed up the decoding, we assume that the reservoir have enough bit 
- * slow version:
- * #define skip_nbits(reservoir,nbits_in_reservoir,stream,nbits_wanted) do { \
- *   fill_nbits(reservoir,nbits_in_reservoir,stream,(nbits_wanted)); \
- *   nbits_in_reservoir -= (nbits_wanted); \
- *   reservoir &= ((1U<<nbits_in_reservoir)-1); \
- * }  while(0);
- */
-#define skip_nbits(reservoir,nbits_in_reservoir,stream,nbits_wanted) do { \
-   nbits_in_reservoir -= (nbits_wanted); \
-   reservoir &= ((1U<<nbits_in_reservoir)-1); \
-}  while(0);
-
-
-#define be16_to_cpu(x) (((x)[0]<<8)|(x)[1])
-
-static void resync(struct jdec_private *priv);
-
-/**
- * Get the next (valid) huffman code in the stream.
- *
- * To speedup the procedure, we look HUFFMAN_HASH_NBITS bits and the code is
- * lower than HUFFMAN_HASH_NBITS we have automaticaly the length of the code
- * and the value by using two lookup table.
- * Else if the value is not found, just search (linear) into an array for each
- * bits is the code is present.
- *
- * If the code is not present for any reason, -1 is return.
- */
-static int get_next_huffman_code(struct jdec_private *priv, struct huffman_table *huffman_table)
-{
-  int value, hcode;
-  unsigned int extra_nbits, nbits;
-  uint16_t *slowtable;
-
-  look_nbits(priv->reservoir, priv->nbits_in_reservoir, priv->stream, HUFFMAN_HASH_NBITS, hcode);
-  value = huffman_table->lookup[hcode];
-  if (__likely(value >= 0))
-  { 
-     unsigned int code_size = huffman_table->code_size[value];
-     skip_nbits(priv->reservoir, priv->nbits_in_reservoir, priv->stream, code_size);
-     return value;
-  }
-
-  /* Decode more bits each time ... */
-  for (extra_nbits=0; extra_nbits<16-HUFFMAN_HASH_NBITS; extra_nbits++)
-   {
-     nbits = HUFFMAN_HASH_NBITS + 1 + extra_nbits;
-
-     look_nbits(priv->reservoir, priv->nbits_in_reservoir, priv->stream, nbits, hcode);
-     slowtable = huffman_table->slowtable[extra_nbits];
-     /* Search if the code is in this array */
-     while (slowtable[0]) {
-	if (slowtable[0] == hcode) {
-	   skip_nbits(priv->reservoir, priv->nbits_in_reservoir, priv->stream, nbits);
-	   return slowtable[1];
-	}
-	slowtable+=2;
-     }
-   }
-  return 0;
-}
-
-
-
-
-/**
- *
- * Decode a single block that contains the DCT coefficients.
- * The table coefficients is already dezigzaged at the end of the operation.
- *
- */
-static void process_Huffman_data_unit(struct jdec_private *priv, int component)
-{
-  unsigned char j;
-  unsigned int huff_code;
-  unsigned char size_val, count_0;
-
-  struct component *c = &priv->component_infos[component];
-  short int DCT[64];
-
-
-  /* Initialize the DCT coef table */
-  memset(DCT, 0, sizeof(DCT));
-
-  /* DC coefficient decoding */
-  huff_code = get_next_huffman_code(priv, c->DC_table);
-  //trace("+ %x\n", huff_code);
-  if (huff_code) {
-     get_nbits(priv->reservoir, priv->nbits_in_reservoir, priv->stream, huff_code, DCT[0]);
-     DCT[0] += c->previous_DC;
-     c->previous_DC = DCT[0];
-  } else {
-     DCT[0] = c->previous_DC;
-  }
-
-  /* AC coefficient decoding */
-  j = 1;
-  while (j<64)
-   {
-     huff_code = get_next_huffman_code(priv, c->AC_table);
-     //trace("- %x\n", huff_code);
-
-     size_val = huff_code & 0xF;
-     count_0 = huff_code >> 4;
-
-     if (size_val == 0)
-      { /* RLE */
-	if (count_0 == 0)
-	  break;	/* EOB found, go out */
-	else if (count_0 == 0xF)
-	  j += 16;	/* skip 16 zeros */
-      }
-     else
-      {
-	j += count_0;	/* skip count_0 zeroes */
-	if (__unlikely(j >= 64))
-	 {
-	   sprintf(error_string, "Bad huffman data (buffer overflow)");
-	   break;
-	 }
-	get_nbits(priv->reservoir, priv->nbits_in_reservoir, priv->stream, size_val, DCT[j]);
-	j++;
-      }
-   }
-
-  for (j = 0; j < 64; j++)
-    c->DCT[j] = DCT[zigzag[j]];
-}
-
-/*
- * Takes two array of bits, and build the huffman table for size, and code
- * 
- * lookup will return the symbol if the code is less or equal than HUFFMAN_HASH_NBITS.
- * code_size will be used to known how many bits this symbol is encoded.
- * slowtable will be used when the first lookup didn't give the result.
- */
-static void build_huffman_table(const unsigned char *bits, const unsigned char *vals, struct huffman_table *table)
-{
-  unsigned int i, j, code, code_size, val, nbits;
-  unsigned char huffsize[HUFFMAN_BITS_SIZE+1], *hz;
-  unsigned int huffcode[HUFFMAN_BITS_SIZE+1], *hc;
-  int next_free_entry;
-
-  /*
-   * Build a temp array 
-   *   huffsize[X] => numbers of bits to write vals[X]
-   */
-  hz = huffsize;
-  for (i=1; i<=16; i++)
-   {
-     for (j=1; j<=bits[i]; j++)
-       *hz++ = i;
-   }
-  *hz = 0;
-
-  memset(table->lookup, 0xff, sizeof(table->lookup));
-  for (i=0; i<(16-HUFFMAN_HASH_NBITS); i++)
-    table->slowtable[i][0] = 0;
-
-  /* Build a temp array
-   *   huffcode[X] => code used to write vals[X]
-   */
-  code = 0;
-  hc = huffcode;
-  hz = huffsize;
-  nbits = *hz;
-  while (*hz)
-   {
-     while (*hz == nbits)
-      {
-	*hc++ = code++;
-	hz++;
-      }
-     code <<= 1;
-     nbits++;
-   }
-
-  /*
-   * Build the lookup table, and the slowtable if needed.
-   */
-  next_free_entry = -1;
-  for (i=0; huffsize[i]; i++)
-   {
-     val = vals[i];
-     code = huffcode[i];
-     code_size = huffsize[i];
-
-     trace("val=%2.2x code=%8.8x codesize=%2.2d\n", val, code, code_size);
-
-     table->code_size[val] = code_size;
-     if (code_size <= HUFFMAN_HASH_NBITS)
-      {
-	/*
-	 * Good: val can be put in the lookup table, so fill all value of this
-	 * column with value val 
-	 */
-	int repeat = 1UL<<(HUFFMAN_HASH_NBITS - code_size);
-	code <<= HUFFMAN_HASH_NBITS - code_size;
-	while ( repeat-- )
-	  table->lookup[code++] = val;
-
-      }
-     else
-      {
-	/* Perhaps sorting the array will be an optimization */
-	uint16_t *slowtable = table->slowtable[code_size-HUFFMAN_HASH_NBITS-1];
-	while(slowtable[0])
-	  slowtable+=2;
-	slowtable[0] = code;
-	slowtable[1] = val;
-	slowtable[2] = 0;
-	/* TODO: NEED TO CHECK FOR AN OVERFLOW OF THE TABLE */
-      }
-
-   }
-}
-
-static void build_default_huffman_tables(struct jdec_private *priv)
-{
-  if (   (priv->flags & TINYJPEG_FLAGS_MJPEG_TABLE) 
-      && priv->default_huffman_table_initialized)
-    return;
-
-  build_huffman_table(bits_dc_luminance, val_dc_luminance, &priv->HTDC[0]);
-  build_huffman_table(bits_ac_luminance, val_ac_luminance, &priv->HTAC[0]);
-
-  build_huffman_table(bits_dc_chrominance, val_dc_chrominance, &priv->HTDC[1]);
-  build_huffman_table(bits_ac_chrominance, val_ac_chrominance, &priv->HTAC[1]);
-
-  priv->default_huffman_table_initialized = 1;
-}
-
-
-
-/*******************************************************************************
- *
- * Colorspace conversion routine
- *
- *
- * Note:
- * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
- * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
- * The conversion equations to be implemented are therefore
- *      R = Y                + 1.40200 * Cr
- *      G = Y - 0.34414 * Cb - 0.71414 * Cr
- *      B = Y + 1.77200 * Cb
- * 
- ******************************************************************************/
-
-static unsigned char clamp(int i)
-{
-  if (i<0)
-    return 0;
-  else if (i>255)
-    return 255;
-  else
-    return i;
-}   
-
-
-/**
- *  YCrCb -> YUV420P (1x1)
- *  .---.
- *  | 1 |
- *  `---'
- */
-static void YCrCB_to_YUV420P_1x1(struct jdec_private *priv)
-{
-  const unsigned char *s, *y;
-  unsigned char *p;
-  int i,j;
-
-  p = priv->plane[0];
-  y = priv->Y;
-  for (i=0; i<8; i++)
-   {
-     memcpy(p, y, 8);
-     p+=priv->width;
-     y+=8;
-   }
-
-  p = priv->plane[1];
-  s = priv->Cb;
-  for (i=0; i<8; i+=2)
-   {
-     for (j=0; j<8; j+=2, s+=2)
-       *p++ = *s;
-     s += 8; /* Skip one line */
-     p += priv->width/2 - 4;
-   }
-
-  p = priv->plane[2];
-  s = priv->Cr;
-  for (i=0; i<8; i+=2)
-   {
-     for (j=0; j<8; j+=2, s+=2)
-       *p++ = *s;
-     s += 8; /* Skip one line */
-     p += priv->width/2 - 4;
-   }
-}
-
-/**
- *  YCrCb -> YUV420P (2x1)
- *  .-------.
- *  | 1 | 2 |
- *  `-------'
- */
-static void YCrCB_to_YUV420P_2x1(struct jdec_private *priv)
-{
-  unsigned char *p;
-  const unsigned char *s, *y1;
-  unsigned int i;
-
-  p = priv->plane[0];
-  y1 = priv->Y;
-  for (i=0; i<8; i++)
-   {
-     memcpy(p, y1, 16);
-     p += priv->width;
-     y1 += 16;
-   }
-
-  p = priv->plane[1];
-  s = priv->Cb;
-  for (i=0; i<8; i+=2)
-   {
-     memcpy(p, s, 8);
-     s += 16; /* Skip one line */
-     p += priv->width/2;
-   }
-
-  p = priv->plane[2];
-  s = priv->Cr;
-  for (i=0; i<8; i+=2)
-   {
-     memcpy(p, s, 8);
-     s += 16; /* Skip one line */
-     p += priv->width/2;
-   }
-}
-
-
-/**
- *  YCrCb -> YUV420P (1x2)
- *  .---.
- *  | 1 |
- *  |---|
- *  | 2 |
- *  `---'
- */
-static void YCrCB_to_YUV420P_1x2(struct jdec_private *priv)
-{
-  const unsigned char *s, *y;
-  unsigned char *p;
-  int i,j;
-
-  p = priv->plane[0];
-  y = priv->Y;
-  for (i=0; i<16; i++)
-   {
-     memcpy(p, y, 8);
-     p+=priv->width;
-     y+=8;
-   }
-
-  p = priv->plane[1];
-  s = priv->Cb;
-  for (i=0; i<8; i++)
-   {
-     for (j=0; j<8; j+=2, s+=2)
-       *p++ = *s;
-     p += priv->width/2 - 4;
-   }
-
-  p = priv->plane[2];
-  s = priv->Cr;
-  for (i=0; i<8; i++)
-   {
-     for (j=0; j<8; j+=2, s+=2)
-       *p++ = *s;
-     p += priv->width/2 - 4;
-   }
-}
-
-/**
- *  YCrCb -> YUV420P (2x2)
- *  .-------.
- *  | 1 | 2 |
- *  |---+---|
- *  | 3 | 4 |
- *  `-------'
- */
-static void YCrCB_to_YUV420P_2x2(struct jdec_private *priv)
-{
-  unsigned char *p;
-  const unsigned char *s, *y1;
-  unsigned int i;
-
-  p = priv->plane[0];
-  y1 = priv->Y;
-  for (i=0; i<16; i++)
-   {
-     memcpy(p, y1, 16);
-     p += priv->width;
-     y1 += 16;
-   }
-
-  p = priv->plane[1];
-  s = priv->Cb;
-  for (i=0; i<8; i++)
-   {
-     memcpy(p, s, 8);
-     s += 8;
-     p += priv->width/2;
-   }
-
-  p = priv->plane[2];
-  s = priv->Cr;
-  for (i=0; i<8; i++)
-   {
-     memcpy(p, s, 8);
-     s += 8;
-     p += priv->width/2;
-   }
-}
-
-/**
- *  YCrCb -> RGB24 (1x1)
- *  .---.
- *  | 1 |
- *  `---'
- */
-static void YCrCB_to_RGB24_1x1(struct jdec_private *priv)
-{
-  const unsigned char *Y, *Cb, *Cr;
-  unsigned char *p;
-  int i,j;
-  int offset_to_next_row;
-
-#define SCALEBITS       10
-#define ONE_HALF        (1UL << (SCALEBITS-1))
-#define FIX(x)          ((int)((x) * (1UL<<SCALEBITS) + 0.5))
-
-  p = priv->plane[0];
-  Y = priv->Y;
-  Cb = priv->Cb;
-  Cr = priv->Cr;
-  offset_to_next_row = priv->width*3 - 8*3;
-  for (i=0; i<8; i++) {
-
-    for (j=0; j<8; j++) {
-
-       int y, cb, cr;
-       int add_r, add_g, add_b;
-       int r, g , b;
-
-       y  = (*Y++) << SCALEBITS;
-       cb = *Cb++ - 128;
-       cr = *Cr++ - 128;
-       add_r = FIX(1.40200) * cr + ONE_HALF;
-       add_g = - FIX(0.34414) * cb - FIX(0.71414) * cr + ONE_HALF;
-       add_b = FIX(1.77200) * cb + ONE_HALF;
-
-
-       r = (y + add_r) >> SCALEBITS;
-       g = (y + add_g) >> SCALEBITS;
-       b = (y + add_b) >> SCALEBITS;
-       priv->decomp_block[i][j*3+0]=clamp(r);
-       priv->decomp_block[i][j*3+1]=clamp(g);
-       priv->decomp_block[i][j*3+2]=clamp(b);
-
-    }
-
-//    p += offset_to_next_row;
-  }
-
-#undef SCALEBITS
-#undef ONE_HALF
-#undef FIX
-
-}
-
-/**
- *  YCrCb -> BGR24 (1x1)
- *  .---.
- *  | 1 |
- *  `---'
- */
-static void YCrCB_to_BGR24_1x1(struct jdec_private *priv)
-{
-  const unsigned char *Y, *Cb, *Cr;
-  unsigned char *p;
-  int i,j;
-  int offset_to_next_row;
-
-#define SCALEBITS       10
-#define ONE_HALF        (1UL << (SCALEBITS-1))
-#define FIX(x)          ((int)((x) * (1UL<<SCALEBITS) + 0.5))
-
-  p = priv->plane[0];
-  Y = priv->Y;
-  Cb = priv->Cb;
-  Cr = priv->Cr;
-  offset_to_next_row = priv->width*3 - 8*3;
-  for (i=0; i<8; i++) {
-
-    for (j=0; j<8; j++) {
-
-       int y, cb, cr;
-       int add_r, add_g, add_b;
-       int r, g , b;
-
-       y  = (*Y++) << SCALEBITS;
-       cb = *Cb++ - 128;
-       cr = *Cr++ - 128;
-       add_r = FIX(1.40200) * cr + ONE_HALF;
-       add_g = - FIX(0.34414) * cb - FIX(0.71414) * cr + ONE_HALF;
-       add_b = FIX(1.77200) * cb + ONE_HALF;
-
-       b = (y + add_b) >> SCALEBITS;
-       *p++ = clamp(b);
-       g = (y + add_g) >> SCALEBITS;
-       *p++ = clamp(g);
-       r = (y + add_r) >> SCALEBITS;
-       *p++ = clamp(r);
-
-    }
-
-    p += offset_to_next_row;
-  }
-
-#undef SCALEBITS
-#undef ONE_HALF
-#undef FIX
-
-}
-
-
-/**
- *  YCrCb -> RGB24 (2x1)
- *  .-------.
- *  | 1 | 2 |
- *  `-------'
- */
-static void YCrCB_to_RGB24_2x1(struct jdec_private *priv)
-{
-  const unsigned char *Y, *Cb, *Cr;
-  unsigned char *p;
-  int i,j;
-  int offset_to_next_row;
-
-#define SCALEBITS       10
-#define ONE_HALF        (1UL << (SCALEBITS-1))
-#define FIX(x)          ((int)((x) * (1UL<<SCALEBITS) + 0.5))
-
-  p = priv->plane[0];
-  Y = priv->Y;
-  Cb = priv->Cb;
-  Cr = priv->Cr;
-  offset_to_next_row = priv->width*3 - 16*3;
-  for (i=0; i<8; i++) {
-
-    for (j=0; j<8; j++) {
-
-       int y, cb, cr;
-       int add_r, add_g, add_b;
-       int r, g , b;
-
-       y  = (*Y++) << SCALEBITS;
-       cb = *Cb++ - 128;
-       cr = *Cr++ - 128;
-       add_r = FIX(1.40200) * cr + ONE_HALF;
-       add_g = - FIX(0.34414) * cb - FIX(0.71414) * cr + ONE_HALF;
-       add_b = FIX(1.77200) * cb + ONE_HALF;
-
-       r = (y + add_r) >> SCALEBITS;
-       g = (y + add_g) >> SCALEBITS;
-       b = (y + add_b) >> SCALEBITS;
-
-       priv->decomp_block[i][j*6+0]=clamp(r);
-       priv->decomp_block[i][j*6+1]=clamp(g);
-       priv->decomp_block[i][j*6+2]=clamp(b);
-
-       y  = (*Y++) << SCALEBITS;
-
-       r = (y + add_r) >> SCALEBITS;
-       g = (y + add_g) >> SCALEBITS;
-       b = (y + add_b) >> SCALEBITS;
-
-       priv->decomp_block[i][j*6+3]=clamp(r);
-       priv->decomp_block[i][j*6+4]=clamp(g);
-       priv->decomp_block[i][j*6+5]=clamp(b);
-
-    }
-
-    p += offset_to_next_row;
-  }
-
-#undef SCALEBITS
-#undef ONE_HALF
-#undef FIX
-
-}
-
-/*
- *  YCrCb -> BGR24 (2x1)
- *  .-------.
- *  | 1 | 2 |
- *  `-------'
- */
-static void YCrCB_to_BGR24_2x1(struct jdec_private *priv)
-{
-  const unsigned char *Y, *Cb, *Cr;
-  unsigned char *p;
-  int i,j;
-  int offset_to_next_row;
-
-#define SCALEBITS       10
-#define ONE_HALF        (1UL << (SCALEBITS-1))
-#define FIX(x)          ((int)((x) * (1UL<<SCALEBITS) + 0.5))
-
-  p = priv->plane[0];
-  Y = priv->Y;
-  Cb = priv->Cb;
-  Cr = priv->Cr;
-  offset_to_next_row = priv->width*3 - 16*3;
-  for (i=0; i<8; i++) {
-
-    for (j=0; j<8; j++) {
-
-       int y, cb, cr;
-       int add_r, add_g, add_b;
-       int r, g , b;
-
-       cb = *Cb++ - 128;
-       cr = *Cr++ - 128;
-       add_r = FIX(1.40200) * cr + ONE_HALF;
-       add_g = - FIX(0.34414) * cb - FIX(0.71414) * cr + ONE_HALF;
-       add_b = FIX(1.77200) * cb + ONE_HALF;
-
-       y  = (*Y++) << SCALEBITS;
-       b = (y + add_b) >> SCALEBITS;
-       *p++ = clamp(b);
-       g = (y + add_g) >> SCALEBITS;
-       *p++ = clamp(g);
-       r = (y + add_r) >> SCALEBITS;
-       *p++ = clamp(r);
-
-       y  = (*Y++) << SCALEBITS;
-       b = (y + add_b) >> SCALEBITS;
-       *p++ = clamp(b);
-       g = (y + add_g) >> SCALEBITS;
-       *p++ = clamp(g);
-       r = (y + add_r) >> SCALEBITS;
-       *p++ = clamp(r);
-
-    }
-
-    p += offset_to_next_row;
-  }
-
-#undef SCALEBITS
-#undef ONE_HALF
-#undef FIX
-
-}
-
-/**
- *  YCrCb -> RGB24 (1x2)
- *  .---.
- *  | 1 |
- *  |---|
- *  | 2 |
- *  `---'
- */
-static void YCrCB_to_RGB24_1x2(struct jdec_private *priv)
-{
-  const unsigned char *Y, *Cb, *Cr;
-  unsigned char *p, *p2;
-  int i,j;
-  int offset_to_next_row;
-
-#define SCALEBITS       10
-#define ONE_HALF        (1UL << (SCALEBITS-1))
-#define FIX(x)          ((int)((x) * (1UL<<SCALEBITS) + 0.5))
-
-  p = priv->plane[0];
-  p2 = priv->plane[0] + priv->width*3;
-  Y = priv->Y;
-  Cb = priv->Cb;
-  Cr = priv->Cr;
-  offset_to_next_row = 2*priv->width*3 - 8*3;
-  for (i=0; i<8; i++) {
-
-    for (j=0; j<8; j++) {
-
-       int y, cb, cr;
-       int add_r, add_g, add_b;
-       int r, g , b;
-
-       cb = *Cb++ - 128;
-       cr = *Cr++ - 128;
-       add_r = FIX(1.40200) * cr + ONE_HALF;
-       add_g = - FIX(0.34414) * cb - FIX(0.71414) * cr + ONE_HALF;
-       add_b = FIX(1.77200) * cb + ONE_HALF;
-
-       y  = (*Y++) << SCALEBITS;
-       r = (y + add_r) >> SCALEBITS;
-       g = (y + add_g) >> SCALEBITS;
-       b = (y + add_b) >> SCALEBITS;
-
-       priv->decomp_block[i*2][j*3+0]=clamp(r);
-       priv->decomp_block[i*2][j*3+1]=clamp(g);
-       priv->decomp_block[i*2][j*3+2]=clamp(b);
-
-       y  = (Y[8-1]) << SCALEBITS;
-       r = (y + add_r) >> SCALEBITS;
-       g = (y + add_g) >> SCALEBITS;
-       b = (y + add_b) >> SCALEBITS;
-
-       priv->decomp_block[i*2+1][j*3+0]=clamp(r);
-       priv->decomp_block[i*2+1][j*3+1]=clamp(g);
-       priv->decomp_block[i*2+1][j*3+2]=clamp(b);
-
-    }
-    Y += 8;
-    p += offset_to_next_row;
-    p2 += offset_to_next_row;
-  }
-
-#undef SCALEBITS
-#undef ONE_HALF
-#undef FIX
-
-}
-
-/*
- *  YCrCb -> BGR24 (1x2)
- *  .---.
- *  | 1 |
- *  |---|
- *  | 2 |
- *  `---'
- */
-static void YCrCB_to_BGR24_1x2(struct jdec_private *priv)
-{
-  const unsigned char *Y, *Cb, *Cr;
-  unsigned char *p, *p2;
-  int i,j;
-  int offset_to_next_row;
-
-#define SCALEBITS       10
-#define ONE_HALF        (1UL << (SCALEBITS-1))
-#define FIX(x)          ((int)((x) * (1UL<<SCALEBITS) + 0.5))
-
-  p = priv->plane[0];
-  p2 = priv->plane[0] + priv->width*3;
-  Y = priv->Y;
-  Cb = priv->Cb;
-  Cr = priv->Cr;
-  offset_to_next_row = 2*priv->width*3 - 8*3;
-  for (i=0; i<8; i++) {
-
-    for (j=0; j<8; j++) {
-
-       int y, cb, cr;
-       int add_r, add_g, add_b;
-       int r, g , b;
-
-       cb = *Cb++ - 128;
-       cr = *Cr++ - 128;
-       add_r = FIX(1.40200) * cr + ONE_HALF;
-       add_g = - FIX(0.34414) * cb - FIX(0.71414) * cr + ONE_HALF;
-       add_b = FIX(1.77200) * cb + ONE_HALF;
-
-       y  = (*Y++) << SCALEBITS;
-       b = (y + add_b) >> SCALEBITS;
-       *p++ = clamp(b);
-       g = (y + add_g) >> SCALEBITS;
-       *p++ = clamp(g);
-       r = (y + add_r) >> SCALEBITS;
-       *p++ = clamp(r);
-
-       y  = (Y[8-1]) << SCALEBITS;
-       b = (y + add_b) >> SCALEBITS;
-       *p2++ = clamp(b);
-       g = (y + add_g) >> SCALEBITS;
-       *p2++ = clamp(g);
-       r = (y + add_r) >> SCALEBITS;
-       *p2++ = clamp(r);
-
-    }
-    Y += 8;
-    p += offset_to_next_row;
-    p2 += offset_to_next_row;
-  }
-
-#undef SCALEBITS
-#undef ONE_HALF
-#undef FIX
-
-}
-
-
-/**
- *  YCrCb -> RGB24 (2x2)
- *  .-------.
- *  | 1 | 2 |
- *  |---+---|
- *  | 3 | 4 |
- *  `-------'
- */
-static void YCrCB_to_RGB24_2x2(struct jdec_private *priv)
-{
-  const unsigned char *Y, *Cb, *Cr;
-  unsigned char *p, *p2;
-  int i,j;
-  int offset_to_next_row;
-
-#define SCALEBITS       10
-#define ONE_HALF        (1UL << (SCALEBITS-1))
-#define FIX(x)          ((int)((x) * (1UL<<SCALEBITS) + 0.5))
-
-  p = priv->plane[0];
-  p2 = priv->plane[0] + priv->width*3;
-  Y = priv->Y;
-  Cb = priv->Cb;
-  Cr = priv->Cr;
-  offset_to_next_row = (priv->width*3*2) - 16*3;
-  for (i=0; i<8; i++) {
-
-    for (j=0; j<8; j++) {
-
-       int y, cb, cr;
-       int add_r, add_g, add_b;
-       int r, g , b;
-
-       cb = *Cb++ - 128;
-       cr = *Cr++ - 128;
-       add_r = FIX(1.40200) * cr + ONE_HALF;
-       add_g = - FIX(0.34414) * cb - FIX(0.71414) * cr + ONE_HALF;
-       add_b = FIX(1.77200) * cb + ONE_HALF;
-
-       y  = (*Y++) << SCALEBITS;
-       r = (y + add_r) >> SCALEBITS;
-       g = (y + add_g) >> SCALEBITS;
-       b = (y + add_b) >> SCALEBITS;
-
-       priv->decomp_block[i*2][j*6+0]=clamp(r);
-       priv->decomp_block[i*2][j*6+1]=clamp(g);
-       priv->decomp_block[i*2][j*6+2]=clamp(b);
-
-       y  = (*Y++) << SCALEBITS;
-       r = (y + add_r) >> SCALEBITS;
-       g = (y + add_g) >> SCALEBITS;
-       b = (y + add_b) >> SCALEBITS;
-
-       priv->decomp_block[i*2][j*6+3]=clamp(r);
-       priv->decomp_block[i*2][j*6+4]=clamp(g);
-       priv->decomp_block[i*2][j*6+5]=clamp(b);
-
-       y  = (Y[16-2]) << SCALEBITS;
-       r = (y + add_r) >> SCALEBITS;
-       g = (y + add_g) >> SCALEBITS;
-       b = (y + add_b) >> SCALEBITS;
-
-       priv->decomp_block[i*2+1][j*6+0]=clamp(r);
-       priv->decomp_block[i*2+1][j*6+1]=clamp(g);
-       priv->decomp_block[i*2+1][j*6+2]=clamp(b);
-
-       y  = (Y[16-1]) << SCALEBITS;
-       r = (y + add_r) >> SCALEBITS;
-       g = (y + add_g) >> SCALEBITS;
-       b = (y + add_b) >> SCALEBITS;
-
-       priv->decomp_block[i*2+1][j*6+3]=clamp(r);
-       priv->decomp_block[i*2+1][j*6+4]=clamp(g);
-       priv->decomp_block[i*2+1][j*6+5]=clamp(b);
-
-    }
-    Y  += 16;
-    p  += offset_to_next_row;
-    p2 += offset_to_next_row;
-  }
-
-#undef SCALEBITS
-#undef ONE_HALF
-#undef FIX
-
-}
-
-
-/*
- *  YCrCb -> BGR24 (2x2)
- *  .-------.
- *  | 1 | 2 |
- *  |---+---|
- *  | 3 | 4 |
- *  `-------'
- */
-static void YCrCB_to_BGR24_2x2(struct jdec_private *priv)
-{
-  const unsigned char *Y, *Cb, *Cr;
-  unsigned char *p, *p2;
-  int i,j;
-  int offset_to_next_row;
-
-#define SCALEBITS       10
-#define ONE_HALF        (1UL << (SCALEBITS-1))
-#define FIX(x)          ((int)((x) * (1UL<<SCALEBITS) + 0.5))
-
-  p = priv->plane[0];
-  p2 = priv->plane[0] + priv->width*3;
-  Y = priv->Y;
-  Cb = priv->Cb;
-  Cr = priv->Cr;
-  offset_to_next_row = (priv->width*3*2) - 16*3;
-  for (i=0; i<8; i++) {
-
-    for (j=0; j<8; j++) {
-
-       int y, cb, cr;
-       int add_r, add_g, add_b;
-       int r, g , b;
-
-       cb = *Cb++ - 128;
-       cr = *Cr++ - 128;
-       add_r = FIX(1.40200) * cr + ONE_HALF;
-       add_g = - FIX(0.34414) * cb - FIX(0.71414) * cr + ONE_HALF;
-       add_b = FIX(1.77200) * cb + ONE_HALF;
-
-       y  = (*Y++) << SCALEBITS;
-       b = (y + add_b) >> SCALEBITS;
-       *p++ = clamp(b);
-       g = (y + add_g) >> SCALEBITS;
-       *p++ = clamp(g);
-       r = (y + add_r) >> SCALEBITS;
-       *p++ = clamp(r);
-
-       y  = (*Y++) << SCALEBITS;
-       b = (y + add_b) >> SCALEBITS;
-       *p++ = clamp(b);
-       g = (y + add_g) >> SCALEBITS;
-       *p++ = clamp(g);
-       r = (y + add_r) >> SCALEBITS;
-       *p++ = clamp(r);
-
-       y  = (Y[16-2]) << SCALEBITS;
-       b = (y + add_b) >> SCALEBITS;
-       *p2++ = clamp(b);
-       g = (y + add_g) >> SCALEBITS;
-       *p2++ = clamp(g);
-       r = (y + add_r) >> SCALEBITS;
-       *p2++ = clamp(r);
-
-       y  = (Y[16-1]) << SCALEBITS;
-       b = (y + add_b) >> SCALEBITS;
-       *p2++ = clamp(b);
-       g = (y + add_g) >> SCALEBITS;
-       *p2++ = clamp(g);
-       r = (y + add_r) >> SCALEBITS;
-       *p2++ = clamp(r);
-    }
-    Y  += 16;
-    p  += offset_to_next_row;
-    p2 += offset_to_next_row;
-  }
-
-#undef SCALEBITS
-#undef ONE_HALF
-#undef FIX
-
-}
-
-
-
-/**
- *  YCrCb -> Grey (1x1)
- *  .---.
- *  | 1 |
- *  `---'
- */
-static void YCrCB_to_Grey_1x1(struct jdec_private *priv)
-{
-  const unsigned char *y;
-  unsigned char *p;
-  unsigned int i;
-  int offset_to_next_row;
-
-  p = priv->plane[0];
-  y = priv->Y;
-  offset_to_next_row = priv->width;
-
-  for (i=0; i<8; i++) {
-     memcpy(p, y, 8);
-     y+=8;
-     p += offset_to_next_row;
-  }
-}
-
-/**
- *  YCrCb -> Grey (2x1)
- *  .-------.
- *  | 1 | 2 |
- *  `-------'
- */
-static void YCrCB_to_Grey_2x1(struct jdec_private *priv)
-{
-  const unsigned char *y;
-  unsigned char *p;
-  unsigned int i;
-
-  p = priv->plane[0];
-  y = priv->Y;
-
-  for (i=0; i<8; i++) {
-     memcpy(p, y, 16);
-     y += 16;
-     p += priv->width;
-  }
-}
-
-
-/**
- *  YCrCb -> Grey (1x2)
- *  .---.
- *  | 1 |
- *  |---|
- *  | 2 |
- *  `---'
- */
-static void YCrCB_to_Grey_1x2(struct jdec_private *priv)
-{
-  const unsigned char *y;
-  unsigned char *p;
-  unsigned int i;
-
-  p = priv->plane[0];
-  y = priv->Y;
-
-  for (i=0; i<16; i++) {
-     memcpy(p, y, 8);
-     y += 8;
-     p += priv->width;
-  }
-}
-
-/**
- *  YCrCb -> Grey (2x2)
- *  .-------.
- *  | 1 | 2 |
- *  |---+---|
- *  | 3 | 4 |
- *  `-------'
- */
-static void YCrCB_to_Grey_2x2(struct jdec_private *priv)
-{
-  const unsigned char *y;
-  unsigned char *p;
-  unsigned int i;
-
-  p = priv->plane[0];
-  y = priv->Y;
-
-  for (i=0; i<16; i++) {
-     memcpy(p, y, 16);
-     y += 16;
-     p += priv->width;
-  }
-}
-
-
-/*
- * Decode all the 3 components for 1x1 
- */
-static void decode_MCU_1x1_3planes(struct jdec_private *priv)
-{
-  // Y
-  process_Huffman_data_unit(priv, cY);
-  IDCT(&priv->component_infos[cY], priv->Y, 8);
-  
-  // Cb
-  process_Huffman_data_unit(priv, cCb);
-  IDCT(&priv->component_infos[cCb], priv->Cb, 8);
-
-  // Cr
-  process_Huffman_data_unit(priv, cCr);
-  IDCT(&priv->component_infos[cCr], priv->Cr, 8);
-}
-
-/*
- * Decode a 1x1 directly in 1 color
- */
-static void decode_MCU_1x1_1plane(struct jdec_private *priv)
-{
-  // Y
-  process_Huffman_data_unit(priv, cY);
-  IDCT(&priv->component_infos[cY], priv->Y, 8);
-  
-  // Cb
-  process_Huffman_data_unit(priv, cCb);
-  IDCT(&priv->component_infos[cCb], priv->Cb, 8);
-
-  // Cr
-  process_Huffman_data_unit(priv, cCr);
-  IDCT(&priv->component_infos[cCr], priv->Cr, 8);
-}
-
-
-/*
- * Decode a 2x1
- *  .-------.
- *  | 1 | 2 |
- *  `-------'
- */
-static void decode_MCU_2x1_3planes(struct jdec_private *priv)
-{
-  // Y
-  process_Huffman_data_unit(priv, cY);
-  IDCT(&priv->component_infos[cY], priv->Y, 16);
-  process_Huffman_data_unit(priv, cY);
-  IDCT(&priv->component_infos[cY], priv->Y+8, 16);
-
-  // Cb
-  process_Huffman_data_unit(priv, cCb);
-  IDCT(&priv->component_infos[cCb], priv->Cb, 8);
-
-  // Cr
-  process_Huffman_data_unit(priv, cCr);
-  IDCT(&priv->component_infos[cCr], priv->Cr, 8);
-}
-
-/*
- * Decode a 2x1
- *  .-------.
- *  | 1 | 2 |
- *  `-------'
- */
-static void decode_MCU_2x1_1plane(struct jdec_private *priv)
-{
-  // Y
-  process_Huffman_data_unit(priv, cY);
-  IDCT(&priv->component_infos[cY], priv->Y, 16);
-  process_Huffman_data_unit(priv, cY);
-  IDCT(&priv->component_infos[cY], priv->Y+8, 16);
-
-  // Cb
-  process_Huffman_data_unit(priv, cCb);
-
-  // Cr
-  process_Huffman_data_unit(priv, cCr);
-}
-
-
-/*
- * Decode a 2x2
- *  .-------.
- *  | 1 | 2 |
- *  |---+---|
- *  | 3 | 4 |
- *  `-------'
- */
-static void decode_MCU_2x2_3planes(struct jdec_private *priv)
-{
-  // Y
-  process_Huffman_data_unit(priv, cY);
-  IDCT(&priv->component_infos[cY], priv->Y, 16);
-  process_Huffman_data_unit(priv, cY);
-  IDCT(&priv->component_infos[cY], priv->Y+8, 16);
-  process_Huffman_data_unit(priv, cY);
-  IDCT(&priv->component_infos[cY], priv->Y+64*2, 16);
-  process_Huffman_data_unit(priv, cY);
-  IDCT(&priv->component_infos[cY], priv->Y+64*2+8, 16);
-
-  // Cb
-  process_Huffman_data_unit(priv, cCb);
-  IDCT(&priv->component_infos[cCb], priv->Cb, 8);
-
-  // Cr
-  process_Huffman_data_unit(priv, cCr);
-  IDCT(&priv->component_infos[cCr], priv->Cr, 8);
-}
-
-/*
- * Decode a 2x2 directly in GREY format (8bits)
- *  .-------.
- *  | 1 | 2 |
- *  |---+---|
- *  | 3 | 4 |
- *  `-------'
- */
-static void decode_MCU_2x2_1plane(struct jdec_private *priv)
-{
-  // Y
-  process_Huffman_data_unit(priv, cY);
-  IDCT(&priv->component_infos[cY], priv->Y, 16);
-  process_Huffman_data_unit(priv, cY);
-  IDCT(&priv->component_infos[cY], priv->Y+8, 16);
-  process_Huffman_data_unit(priv, cY);
-  IDCT(&priv->component_infos[cY], priv->Y+64*2, 16);
-  process_Huffman_data_unit(priv, cY);
-  IDCT(&priv->component_infos[cY], priv->Y+64*2+8, 16);
-
-  // Cb
-  process_Huffman_data_unit(priv, cCb);
-
-  // Cr
-  process_Huffman_data_unit(priv, cCr);
-}
-
-/*
- * Decode a 1x2 mcu
- *  .---.
- *  | 1 |
- *  |---|
- *  | 2 |
- *  `---'
- */
-static void decode_MCU_1x2_3planes(struct jdec_private *priv)
-{
-  // Y
-  process_Huffman_data_unit(priv, cY);
-  IDCT(&priv->component_infos[cY], priv->Y, 8);
-  process_Huffman_data_unit(priv, cY);
-  IDCT(&priv->component_infos[cY], priv->Y+64, 8);
-
-  // Cb
-  process_Huffman_data_unit(priv, cCb);
-  IDCT(&priv->component_infos[cCb], priv->Cb, 8);
-
-  // Cr
-  process_Huffman_data_unit(priv, cCr);
-  IDCT(&priv->component_infos[cCr], priv->Cr, 8);
-}
-
-/*
- * Decode a 1x2 mcu
- *  .---.
- *  | 1 |
- *  |---|
- *  | 2 |
- *  `---'
- */
-static void decode_MCU_1x2_1plane(struct jdec_private *priv)
-{
-  // Y
-  process_Huffman_data_unit(priv, cY);
-  IDCT(&priv->component_infos[cY], priv->Y, 8);
-  process_Huffman_data_unit(priv, cY);
-  IDCT(&priv->component_infos[cY], priv->Y+64, 8);
-
-  // Cb
-  process_Huffman_data_unit(priv, cCb);
-
-  // Cr
-  process_Huffman_data_unit(priv, cCr);
-}
-
-static void print_SOF(const unsigned char *stream)
-{
-  int width, height, nr_components, precision;
-#if DEBUG
-  const char *nr_components_to_string[] = {
-     "????",
-     "Grayscale",
-     "????",
-     "YCbCr",
-     "CYMK"
-  };
-#endif
-
-  precision = stream[2];
-  height = be16_to_cpu(stream+3);
-  width  = be16_to_cpu(stream+5);
-  nr_components = stream[7];
-
-  trace("> SOF marker\n");
-  trace("Size:%dx%d nr_components:%d (%s)  precision:%d\n", 
-      width, height,
-      nr_components, nr_components_to_string[nr_components],
-      precision);
-}
-
-/*******************************************************************************
- *
- * JPEG/JFIF Parsing functions
- *
- * Note: only a small subset of the jpeg file format is supported. No markers,
- * nor progressive stream is supported.
- *
- ******************************************************************************/
-
-static void build_quantization_table(float *qtable, const unsigned char *ref_table)
-{
-  /* Taken from libjpeg. Copyright Independent JPEG Group's LLM idct.
-   * For float AA&N IDCT method, divisors are equal to quantization
-   * coefficients scaled by scalefactor[row]*scalefactor[col], where
-   *   scalefactor[0] = 1
-   *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
-   * We apply a further scale factor of 8.
-   * What's actually stored is 1/divisor so that the inner loop can
-   * use a multiplication rather than a division.
-   */
-  int i, j;
-  static const double aanscalefactor[8] = {
-     1.0, 1.387039845, 1.306562965, 1.175875602,
-     1.0, 0.785694958, 0.541196100, 0.275899379
-  };
-  const unsigned char *zz = zigzag;
-
-  for (i=0; i<8; i++) {
-     for (j=0; j<8; j++) {
-       *qtable++ = ref_table[*zz++] * aanscalefactor[i] * aanscalefactor[j];
-     }
-   }
-
-}
-
-static int parse_DQT(struct jdec_private *priv, const unsigned char *stream)
-{
-  int qi;
-  float *table;
-  const unsigned char *dqt_block_end;
-
-  trace("> DQT marker\n");
-  dqt_block_end = stream + be16_to_cpu(stream);
-  stream += 2;	/* Skip length */
-
-  while (stream < dqt_block_end)
-   {
-     qi = *stream++;
-#if SANITY_CHECK
-     if (qi>>4)
-       error("16 bits quantization table is not supported\n");
-     if (qi>4)
-       error("No more 4 quantization table is supported (got %d)\n", qi);
-#endif
-     table = priv->Q_tables[qi];
-     build_quantization_table(table, stream);
-     stream += 64;
-   }
-  trace("< DQT marker\n");
-  return 0;
-}
-
-static int parse_SOF(struct jdec_private *priv, const unsigned char *stream)
-{
-  int i, width, height, nr_components, cid, sampling_factor;
-  int Q_table;
-  struct component *c;
-
-  trace("> SOF marker\n");
-  print_SOF(stream);
-
-  height = be16_to_cpu(stream+3);
-  width  = be16_to_cpu(stream+5);
-  nr_components = stream[7];
-#if SANITY_CHECK
-  if (stream[2] != 8)
-    error("Precision other than 8 is not supported\n");
-  if (width>JPEG_MAX_WIDTH || height>JPEG_MAX_HEIGHT)
-    error("Width and Height (%dx%d) seems suspicious\n", width, height);
-  if (nr_components != 3)
-    error("We only support YUV images\n");
-  //if (height%16)
-//    error("Height need to be a multiple of 16 (current height is %d)\n", height);
-//  if (width%16)
-  //  error("Width need to be a multiple of 16 (current Width is %d)\n", width);
-#endif
-  stream += 8;
-  for (i=0; i<nr_components; i++) {
-     cid = *stream++;
-     sampling_factor = *stream++;
-     Q_table = *stream++;
-     c = &priv->component_infos[i];
-#if SANITY_CHECK
-     c->cid = cid;
-     if (Q_table >= COMPONENTS)
-       error("Bad Quantization table index (got %d, max allowed %d)\n", Q_table, COMPONENTS-1);
-#endif
-     c->Vfactor = sampling_factor&0xf;
-     c->Hfactor = sampling_factor>>4;
-     c->Q_table = priv->Q_tables[Q_table];
-     trace("Component:%d  factor:%dx%d  Quantization table:%d\n",
-           cid, c->Hfactor, c->Hfactor, Q_table );
-
-  }
-  priv->width = width;
-  priv->height = height;
-
-  trace("< SOF marker\n");
-
-  return 0;
-}
-
-static int parse_SOS(struct jdec_private *priv, const unsigned char *stream)
-{
-  unsigned int i, cid, table;
-  unsigned int nr_components = stream[2];
-
-  trace("> SOS marker\n");
-
-#if SANITY_CHECK
-  if (nr_components != 3)
-    error("We only support YCbCr image\n");
-#endif
-
-  stream += 3;
-  for (i=0;i<nr_components;i++) {
-     cid = *stream++;
-     table = *stream++;
-#if SANITY_CHECK
-     if ((table&0xf)>=4)
-	error("We do not support more than 2 AC Huffman table\n");
-     if ((table>>4)>=4)
-	error("We do not support more than 2 DC Huffman table\n");
-     if (cid != priv->component_infos[i].cid)
-        error("SOS cid order (%d:%d) isn't compatible with the SOF marker (%d:%d)\n",
-	      i, cid, i, priv->component_infos[i].cid);
-     trace("ComponentId:%d  tableAC:%d tableDC:%d\n", cid, table&0xf, table>>4);
-#endif
-     priv->component_infos[i].AC_table = &priv->HTAC[table&0xf];
-     priv->component_infos[i].DC_table = &priv->HTDC[table>>4];
-  }
-  priv->stream = stream+3;
-  trace("< SOS marker\n");
-  return 0;
-}
-
-static int parse_DHT(struct jdec_private *priv, const unsigned char *stream)
-{
-  unsigned int count, i;
-  unsigned char huff_bits[17];
-  int length, index;
-
-  length = be16_to_cpu(stream) - 2;
-  stream += 2;	/* Skip length */
-
-  trace("> DHT marker (length=%d)\n", length);
-
-  while (length>0) {
-     index = *stream++;
-
-     /* We need to calculate the number of bytes 'vals' will takes */
-     huff_bits[0] = 0;
-     count = 0;
-     for (i=1; i<17; i++) {
-	huff_bits[i] = *stream++;
-	count += huff_bits[i];
-     }
-#if SANITY_CHECK
-     if (count >= HUFFMAN_BITS_SIZE)
-       error("No more than %d bytes is allowed to describe a huffman table", HUFFMAN_BITS_SIZE);
-     if ( (index &0xf) >= HUFFMAN_TABLES)
-       error("No more than %d Huffman tables is supported (got %d)\n", HUFFMAN_TABLES, index&0xf);
-     trace("Huffman table %s[%d] length=%d\n", (index&0xf0)?"AC":"DC", index&0xf, count);
-#endif
-
-     if (index & 0xf0 )
-       build_huffman_table(huff_bits, stream, &priv->HTAC[index&0xf]);
-     else
-       build_huffman_table(huff_bits, stream, &priv->HTDC[index&0xf]);
-
-     length -= 1;
-     length -= 16;
-     length -= count;
-     stream += count;
-  }
-  trace("< DHT marker\n");
-  return 0;
-}
-
-static int parse_DRI(struct jdec_private *priv, const unsigned char *stream)
-{
-  unsigned int length;
-
-  trace("> DRI marker\n");
-
-  length = be16_to_cpu(stream);
-
-#if SANITY_CHECK
-  if (length != 4)
-    error("Length of DRI marker need to be 4\n");
-#endif
-
-  priv->restart_interval = be16_to_cpu(stream+2);
-
-#if DEBUG
-  trace("Restart interval = %d\n", priv->restart_interval);
-#endif
-
-  trace("< DRI marker\n");
-
-  return 0;
-}
-
-
-
-static void resync(struct jdec_private *priv)
-{
-  int i;
-
-  /* Init DC coefficients */
-  for (i=0; i<COMPONENTS; i++)
-     priv->component_infos[i].previous_DC = 0;
-
-  priv->reservoir = 0;
-  priv->nbits_in_reservoir = 0;
-  if (priv->restart_interval > 0)
-    priv->restarts_to_go = priv->restart_interval;
-  else
-    priv->restarts_to_go = -1;
-}
-
-static int find_next_rst_marker(struct jdec_private *priv)
-{
-  int rst_marker_found = 0;
-  int marker;
-  const unsigned char *stream = priv->stream;
-
-  /* Parse marker */
-  while (!rst_marker_found)
-   {
-     while (*stream++ != 0xff)
-      {
-	if (stream >= priv->stream_end)
-	  error("EOF while search for a RST marker.");
-      }
-     /* Skip any padding ff byte (this is normal) */
-     while (*stream == 0xff)
-       stream++;
-
-     marker = *stream++;
-     if ((RST+priv->last_rst_marker_seen) == marker)
-       rst_marker_found = 1;
-     else if (marker >= RST && marker <= RST7)
-       error("Wrong Reset marker found, abording");
-     else if (marker == EOI)
-       return 0;
-   }
-  trace("RST Marker %d found at offset %d\n", priv->last_rst_marker_seen, stream - priv->stream_begin);
-
-  priv->stream = stream;
-  priv->last_rst_marker_seen++;
-  priv->last_rst_marker_seen &= 7;
-
-  return 0;
-}
-
-static int parse_JFIF(struct jdec_private *priv, const unsigned char *stream)
-{
-  int chuck_len;
-  int marker;
-  int sos_marker_found = 0;
-  int dht_marker_found = 0;
-  const unsigned char *next_chunck;
-
-  /* Parse marker */
-  while (!sos_marker_found)
-   {
-     if (*stream++ != 0xff)
-       goto bogus_jpeg_format;
-     /* Skip any padding ff byte (this is normal) */
-     while (*stream == 0xff)
-       stream++;
-
-     marker = *stream++;
-     chuck_len = be16_to_cpu(stream);
-     next_chunck = stream + chuck_len;
-     switch (marker)
-      {
-       case SOF:
-	 if (parse_SOF(priv, stream) < 0)
-	   return -1;
-	 break;
-       case DQT:
-	 if (parse_DQT(priv, stream) < 0)
-	   return -1;
-	 break;
-       case SOS:
-	 if (parse_SOS(priv, stream) < 0)
-	   return -1;
-	 sos_marker_found = 1;
-	 break;
-       case DHT:
-	 if (parse_DHT(priv, stream) < 0)
-	   return -1;
-	 dht_marker_found = 1;
-	 break;
-       case DRI:
-	 if (parse_DRI(priv, stream) < 0)
-	   return -1;
-	 break;
-       default:
-	 trace("> Unknown marker %2.2x\n", marker);
-	 break;
-      }
-
-     stream = next_chunck;
-   }
-
-  if (!dht_marker_found) {
-    trace("No Huffman table loaded, using the default one\n");
-    build_default_huffman_tables(priv);
-  }
-
-#ifdef SANITY_CHECK
-  if (   (priv->component_infos[cY].Hfactor < priv->component_infos[cCb].Hfactor)
-      || (priv->component_infos[cY].Hfactor < priv->component_infos[cCr].Hfactor))
-    error("Horizontal sampling factor for Y should be greater than horitontal sampling factor for Cb or Cr\n");
-  if (   (priv->component_infos[cY].Vfactor < priv->component_infos[cCb].Vfactor)
-      || (priv->component_infos[cY].Vfactor < priv->component_infos[cCr].Vfactor))
-    error("Vertical sampling factor for Y should be greater than vertical sampling factor for Cb or Cr\n");
-  if (   (priv->component_infos[cCb].Hfactor!=1) 
-      || (priv->component_infos[cCr].Hfactor!=1)
-      || (priv->component_infos[cCb].Vfactor!=1)
-      || (priv->component_infos[cCr].Vfactor!=1))
-    error("Sampling other than 1x1 for Cr and Cb is not supported");
-#endif
-
-  return 0;
-bogus_jpeg_format:
-  trace("Bogus jpeg format\n");
-  return -1;
-}
-
-/*******************************************************************************
- *
- * Functions exported of the library.
- *
- * Note: Some applications can access directly to internal pointer of the
- * structure. It's is not recommended, but if you have many images to
- * uncompress with the same parameters, some functions can be called to speedup
- * the decoding.
- *
- ******************************************************************************/
-
-/**
- * Allocate a new tinyjpeg decoder object.
- *
- * Before calling any other functions, an object need to be called.
- */
-struct jdec_private *tinyjpeg_init(void *(*allocate_mem)(unsigned int),void (*free_mem)(void *))
-{
-  struct jdec_private *priv;
-  unsigned int i;
- 
-  priv = (struct jdec_private *)allocate_mem(sizeof(struct jdec_private));
-  for(i=0;i<sizeof(struct jdec_private);i++) {
-	  char *pzero = (char*)priv;
-	  pzero[i]=0;
-  }
-  priv->allocate_mem=allocate_mem;
-  priv->free_mem=free_mem;
-  if (priv == NULL)
-    return NULL;
-  return priv;
-}
-
-/**
- * Free a tinyjpeg object.
- *
- * No others function can be called after this one.
- */
-void tinyjpeg_free(struct jdec_private *priv)
-{
-  int i;
-  for (i=0; i<COMPONENTS; i++) {
-     if (priv->components[i]) {
-     //  priv->free_mem(priv->components[i]);
-	priv->components[i] = NULL;
-    }
-  }
-  priv->free_mem(priv);
-}
-
-/**
- * Initialize the tinyjpeg object and prepare the decoding of the stream.
- *
- * Check if the jpeg can be decoded with this jpeg decoder.
- * Fill some table used for preprocessing.
- */
-int tinyjpeg_parse_header(struct jdec_private *priv, const unsigned char *buf, unsigned int size)
-{
-  int ret;
-
-  /* Identify the file */
-  if ((buf[0] != 0xFF) || (buf[1] != SOI))
-    error("Not a JPG file ?\n");
-
-  priv->stream_begin = buf+2;
-  priv->stream_length = size-2;
-  priv->stream_end = priv->stream_begin + priv->stream_length;
-
-  ret = parse_JFIF(priv, priv->stream_begin);
-
-  return ret;
-}
-
-static const decode_MCU_fct decode_mcu_3comp_table[4] = {
-   decode_MCU_1x1_3planes,
-   decode_MCU_1x2_3planes,
-   decode_MCU_2x1_3planes,
-   decode_MCU_2x2_3planes,
-};
-
-static const decode_MCU_fct decode_mcu_1comp_table[4] = {
-   decode_MCU_1x1_1plane,
-   decode_MCU_1x2_1plane,
-   decode_MCU_2x1_1plane,
-   decode_MCU_2x2_1plane,
-};
-
-static const convert_colorspace_fct convert_colorspace_yuv420p[4] = {
-   YCrCB_to_YUV420P_1x1,
-   YCrCB_to_YUV420P_1x2,
-   YCrCB_to_YUV420P_2x1,
-   YCrCB_to_YUV420P_2x2,
-};
-
-static const convert_colorspace_fct convert_colorspace_rgb24[4] = {
-   YCrCB_to_RGB24_1x1,
-   YCrCB_to_RGB24_1x2,
-   YCrCB_to_RGB24_2x1,
-   YCrCB_to_RGB24_2x2,
-};
-
-static const convert_colorspace_fct convert_colorspace_bgr24[4] = {
-   YCrCB_to_BGR24_1x1,
-   YCrCB_to_BGR24_1x2,
-   YCrCB_to_BGR24_2x1,
-   YCrCB_to_BGR24_2x2,
-};
-
-static const convert_colorspace_fct convert_colorspace_grey[4] = {
-   YCrCB_to_Grey_1x1,
-   YCrCB_to_Grey_1x2,
-   YCrCB_to_Grey_2x1,
-   YCrCB_to_Grey_2x2,
-};
-
-/**
- * Decode and convert the jpeg image into @pixfmt@ image
- *
- * Note: components will be automaticaly allocated if no memory is attached.
- */
-int tinyjpeg_decode(struct jdec_private *priv, int pixfmt)
-{
-  unsigned int x, y, xstride_by_mcu, ystride_by_mcu;
-  unsigned int bytes_per_blocklines[3], bytes_per_mcu[3];
-  decode_MCU_fct decode_MCU;
-  const decode_MCU_fct *decode_mcu_table;
-  const convert_colorspace_fct *colorspace_array_conv;
-  convert_colorspace_fct convert_to_pixfmt;
-
-  if (pixfmt!=TINYJPEG_FMT_RGB24)
-	  error("Only TINYJPEG_FMT_RGB24 is supported in this version");
-
-  if (setjmp(priv->jump_state))
-    return -1;
-
-  /* To keep gcc happy initialize some array */
-  bytes_per_mcu[1] = 0;
-  bytes_per_mcu[2] = 0;
-  bytes_per_blocklines[1] = 0;
-  bytes_per_blocklines[2] = 0;
-
-  decode_mcu_table = decode_mcu_3comp_table;
-  switch (pixfmt) {
-     case TINYJPEG_FMT_YUV420P:
-       colorspace_array_conv = convert_colorspace_yuv420p;
-       if (priv->components[0] == NULL)
-	 priv->components[0] = (uint8_t *)priv->allocate_mem(priv->width * priv->height);
-       if (priv->components[1] == NULL)
-	 priv->components[1] = (uint8_t *)priv->allocate_mem(priv->width * priv->height/4);
-       if (priv->components[2] == NULL)
-	 priv->components[2] = (uint8_t *)priv->allocate_mem(priv->width * priv->height/4);
-       bytes_per_blocklines[0] = priv->width;
-       bytes_per_blocklines[1] = priv->width/4;
-       bytes_per_blocklines[2] = priv->width/4;
-       bytes_per_mcu[0] = 8;
-       bytes_per_mcu[1] = 4;
-       bytes_per_mcu[2] = 4;
-       break;
-
-     case TINYJPEG_FMT_RGB24:
-       colorspace_array_conv = convert_colorspace_rgb24;
-       if (priv->components[0] == NULL)
-	 priv->components[0] = (uint8_t *)priv->allocate_mem(priv->width * priv->height * 3);
-       bytes_per_blocklines[0] = priv->width * 3;
-       bytes_per_mcu[0] = 3*8;
-       break;
-
-     case TINYJPEG_FMT_BGR24:
-       colorspace_array_conv = convert_colorspace_bgr24;
-       if (priv->components[0] == NULL)
-	 priv->components[0] = (uint8_t *)priv->allocate_mem(priv->width * priv->height * 3);
-       bytes_per_blocklines[0] = priv->width * 3;
-       bytes_per_mcu[0] = 3*8;
-       break;
-
-     case TINYJPEG_FMT_GREY:
-       decode_mcu_table = decode_mcu_1comp_table;
-       colorspace_array_conv = convert_colorspace_grey;
-       if (priv->components[0] == NULL)
-	 priv->components[0] = (uint8_t *)priv->allocate_mem(priv->width * priv->height);
-       bytes_per_blocklines[0] = priv->width;
-       bytes_per_mcu[0] = 8;
-       break;
-
-     default:
-       trace("Bad pixel format\n");
-       return -1;
-  }
-
-  xstride_by_mcu = ystride_by_mcu = 8;
-  if ((priv->component_infos[cY].Hfactor | priv->component_infos[cY].Vfactor) == 1) {
-     decode_MCU = decode_mcu_table[0];
-     convert_to_pixfmt = colorspace_array_conv[0];
-     trace("Use decode 1x1 sampling\n");
-  } else if (priv->component_infos[cY].Hfactor == 1) {
-     decode_MCU = decode_mcu_table[1];
-     convert_to_pixfmt = colorspace_array_conv[1];
-     ystride_by_mcu = 16;
-     trace("Use decode 1x2 sampling (not supported)\n");
-  } else if (priv->component_infos[cY].Vfactor == 2) {
-     decode_MCU = decode_mcu_table[3];
-     convert_to_pixfmt = colorspace_array_conv[3];
-     xstride_by_mcu = 16;
-     ystride_by_mcu = 16;
-     trace("Use decode 2x2 sampling\n");
-  } else {
-     decode_MCU = decode_mcu_table[2];
-     convert_to_pixfmt = colorspace_array_conv[2];
-     xstride_by_mcu = 16;
-     trace("Use decode 2x1 sampling\n");
-  }
-
-  resync(priv);
-
-  /* Don't forget to that block can be either 8 or 16 lines */
-  bytes_per_blocklines[0] *= ystride_by_mcu;
-  bytes_per_blocklines[1] *= ystride_by_mcu;
-  bytes_per_blocklines[2] *= ystride_by_mcu;
-
-  bytes_per_mcu[0] *= xstride_by_mcu/8;
-  bytes_per_mcu[1] *= xstride_by_mcu/8;
-  bytes_per_mcu[2] *= xstride_by_mcu/8;
-
-  /* Just the decode the image by macroblock (size is 8x8, 8x16, or 16x16) */
-
-
-
-  for (y=0; y < priv->height; y+=ystride_by_mcu)
-   {
-     //trace("Decoding row %d\n", y);
-   ///  priv->plane[0] = priv->components[0] + (y/ystride_by_mcu * bytes_per_blocklines[0]);
-   ///  priv->plane[1] = priv->components[1] + (y/ystride_by_mcu * bytes_per_blocklines[1]);
-   ///  priv->plane[2] = priv->components[2] + (y/ystride_by_mcu * bytes_per_blocklines[2]);
-     for (x=0; x < priv->width; x+=xstride_by_mcu)
-      {
-	     int i,copy_x,copy_y;
-	decode_MCU(priv);
-	convert_to_pixfmt(priv);
-	//priv->plane[0] += bytes_per_mcu[0];
-	//priv->plane[1] += bytes_per_mcu[1];
-	//priv->plane[2] += bytes_per_mcu[2];
-
-	copy_x=priv->width-x;
-	if (copy_x>xstride_by_mcu)
-		copy_x=xstride_by_mcu;
-
-	copy_y=priv->height-y;
-	if (copy_y>ystride_by_mcu)
-		copy_y=ystride_by_mcu;
-
-	for(i=0;i<copy_y;i++) {
-		unsigned char *dst = &priv->components[0][((y+i)*priv->width+x)*3];
-		memcpy(dst,&priv->decomp_block[i][0],copy_x*3);
-	}
-
-	if (priv->restarts_to_go>0)
-	 {
-	   priv->restarts_to_go--;
-	   if (priv->restarts_to_go == 0)
-	    {
-	      priv->stream -= (priv->nbits_in_reservoir/8);
-	      resync(priv);
-	      if (find_next_rst_marker(priv) < 0)
-		return -1;
-	    }
-	 }
-      }
-   }
-
-  trace("Input file size: %d\n", priv->stream_length+2);
-  trace("Input bytes actually read: %d\n", priv->stream - priv->stream_begin + 2);
-
-  return 0;
-}
-
-const char *tinyjpeg_get_errorstring(struct jdec_private *priv)
-{
-  /* FIXME: the error string must be store in the context */
-  priv = priv;
-  return error_string;
-}
-
-void tinyjpeg_get_size(struct jdec_private *priv, unsigned int *width, unsigned int *height)
-{
-  *width = priv->width;
-  *height = priv->height;
-}
-
-int tinyjpeg_get_components(struct jdec_private *priv, unsigned char **components)
-{
-  int i;
-  for (i=0; priv->components[i] && i<COMPONENTS; i++)
-    components[i] = priv->components[i];
-  return 0;
-}
-
-int tinyjpeg_set_components(struct jdec_private *priv, unsigned char **components, unsigned int ncomponents)
-{
-  unsigned int i;
-  if (ncomponents > COMPONENTS)
-    ncomponents = COMPONENTS;
-  for (i=0; i<ncomponents; i++)
-    priv->components[i] = components[i];
-  return 0;
-}
-
-int tinyjpeg_set_flags(struct jdec_private *priv, int flags)
-{
-  int oldflags = priv->flags;
-  priv->flags = flags;
-  return oldflags;
-}
-
diff --git a/drivers/jpg/tinyjpeg.h b/drivers/jpg/tinyjpeg.h
deleted file mode 100644
index fe4652360ba..00000000000
--- a/drivers/jpg/tinyjpeg.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Small jpeg decoder library (header file)
- *
- * Copyright (c) 2006, Luc Saillard <luc@saillard.org>
- * All rights reserved.
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * 
- * - Redistributions of source code must retain the above copyright notice,
- *  this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- *  this list of conditions and the following disclaimer in the documentation
- *  and/or other materials provided with the distribution.
- *
- * - Neither the name of the author nor the names of its contributors may be
- *  used to endorse or promote products derived from this software without
- *  specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-
-#ifndef __JPEGDEC_H__
-#define __JPEGDEC_H__
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct jdec_private;
-
-/* Flags that can be set by any applications */
-#define TINYJPEG_FLAGS_MJPEG_TABLE	(1<<1)
-
-/* Format accepted in outout */
-enum tinyjpeg_fmt {
-   TINYJPEG_FMT_GREY = 1,
-   TINYJPEG_FMT_BGR24,
-   TINYJPEG_FMT_RGB24,
-   TINYJPEG_FMT_YUV420P,
-};
-
-struct jdec_private *tinyjpeg_init(void *(*allocate_mem)(unsigned int),void (*free_mem)(void *));
-
-void tinyjpeg_free(struct jdec_private *priv);
-
-int tinyjpeg_parse_header(struct jdec_private *priv, const unsigned char *buf, unsigned int size);
-int tinyjpeg_decode(struct jdec_private *priv, int pixel_format);
-const char *tinyjpeg_get_errorstring(struct jdec_private *priv);
-void tinyjpeg_get_size(struct jdec_private *priv, unsigned int *width, unsigned int *height);
-int tinyjpeg_get_components(struct jdec_private *priv, unsigned char **components);
-int tinyjpeg_set_components(struct jdec_private *priv, unsigned char **components, unsigned int ncomponents);
-int tinyjpeg_set_flags(struct jdec_private *priv, int flags);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
-
-
-
diff --git a/drivers/pnm/SCsub b/drivers/pnm/SCsub
new file mode 100644
index 00000000000..28b35773a42
--- /dev/null
+++ b/drivers/pnm/SCsub
@@ -0,0 +1,10 @@
+Import('env')
+
+
+pnm_sources = [
+	"pnm/bitmap_loader_pnm.cpp"
+	]
+
+env.drivers_sources+=pnm_sources
+
+#env.add_source_files(env.drivers_sources, pnm_sources)
diff --git a/drivers/pnm/bitmap_loader_pnm.cpp b/drivers/pnm/bitmap_loader_pnm.cpp
new file mode 100644
index 00000000000..874e5977c1d
--- /dev/null
+++ b/drivers/pnm/bitmap_loader_pnm.cpp
@@ -0,0 +1,232 @@
+/*************************************************/
+/*  image_loader_jpg.cpp                         */
+/*************************************************/
+/*            This file is part of:              */
+/*                GODOT ENGINE                   */
+/*************************************************/
+/*       Source code within this file is:        */
+/*  (c) 2007-2016 Juan Linietsky, Ariel Manzur   */
+/*             All Rights Reserved.              */
+/*************************************************/
+
+#include "bitmap_loader_pnm.h"
+#include "os/file_access.h"
+#include "scene/resources/bit_mask.h"
+
+
+static bool _get_token(FileAccessRef& f,uint8_t &saved,DVector<uint8_t>& r_token,bool p_binary=false,bool p_single_chunk=false) {
+
+
+	int token_max = r_token.size();
+	DVector<uint8_t>::Write w;
+	if (token_max)
+		w=r_token.write();
+	int ofs=0;
+	bool lf=false;
+
+
+	while(true) {
+
+		uint8_t b;
+		if (saved) {
+			b=saved;
+			saved=0;
+		} else {
+			b = f->get_8();
+		}
+		if (f->eof_reached()) {
+			if (ofs) {
+				w=DVector<uint8_t>::Write();
+				r_token.resize(ofs);
+				return true;
+			} else {
+				return false;
+			}
+		}
+
+		if (!ofs && !p_binary && b=='#') {
+			//skip comment
+			while(b!='\n') {
+				if (f->eof_reached()) {
+					return false;
+				}
+
+				b = f->get_8();
+			}
+
+			lf=true;
+
+		} else if (b<=32 && !(p_binary && (ofs || lf))) {
+
+			if (b=='\n') {
+				lf=true;
+			}
+
+
+			if (ofs && !p_single_chunk) {
+				w=DVector<uint8_t>::Write();
+				r_token.resize(ofs);
+				saved=b;
+
+				return true;
+			}
+		} else {
+
+			bool resized=false;
+			while (ofs>=token_max) {
+				if (token_max)
+					token_max<<=1;
+				else
+					token_max=1;
+				resized=true;
+			}
+			if (resized) {
+				w=DVector<uint8_t>::Write();
+				r_token.resize(token_max);
+				w=r_token.write();
+			}
+			w[ofs++]=b;
+		}
+	}
+
+	return false;
+}
+
+static int _get_number_from_token(DVector<uint8_t>& r_token) {
+
+	int len = r_token.size();
+	DVector<uint8_t>::Read r = r_token.read();
+	return String::to_int((const char*)r.ptr(),len);
+
+}
+
+
+RES ResourceFormatPBM::load(const String &p_path,const String& p_original_path,Error *r_error) {
+
+#define _RETURN(m_err)\
+{\
+	if (r_error)\
+		*r_error=m_err;\
+	ERR_FAIL_V(RES());\
+}
+
+
+	FileAccessRef f=FileAccess::open(p_path,FileAccess::READ);
+	uint8_t saved=0;
+	if (!f)
+		_RETURN(ERR_CANT_OPEN);
+
+	DVector<uint8_t> token;
+
+	if (!_get_token(f,saved,token)) {
+		_RETURN(ERR_PARSE_ERROR);
+	}
+
+	if (token.size()!=2) {
+		_RETURN(ERR_FILE_CORRUPT);
+	}
+	if (token[0]!='P') {
+		_RETURN(ERR_FILE_CORRUPT);
+	}
+	if (token[1]!='1' && token[1]!='4') {
+		_RETURN(ERR_FILE_CORRUPT);
+	}
+
+	bool bits = token[1]=='4';
+
+	if (!_get_token(f,saved,token)) {
+		_RETURN(ERR_PARSE_ERROR);
+	}
+
+	int width = _get_number_from_token(token);
+	if (width<=0) {
+		_RETURN(ERR_FILE_CORRUPT);
+	}
+
+
+	if (!_get_token(f,saved,token)) {
+		_RETURN(ERR_PARSE_ERROR);
+	}
+
+	int height = _get_number_from_token(token);
+	if (height<=0) {
+		_RETURN(ERR_FILE_CORRUPT);
+	}
+
+
+	Ref<BitMap> bm;
+	bm.instance();
+	bm->create(Size2i(width,height));
+
+	if (!bits) {
+
+		int required_bytes = width*height;
+		if (!_get_token(f,saved,token,false,true)) {
+			_RETURN(ERR_PARSE_ERROR);
+		}
+
+		if (token.size()<required_bytes) {
+			_RETURN(ERR_FILE_CORRUPT);
+		}
+
+		DVector<uint8_t>::Read r=token.read();
+
+		for(int i=0;i<height;i++) {
+			for(int j=0;j<width;j++) {
+
+
+				char num = r[i*width+j];
+				bm->set_bit(Point2i(j,i),num=='0');
+			}
+
+		}
+
+
+
+	} else {
+		//a single, entire token of bits!
+		if (!_get_token(f,saved,token,true)) {
+			_RETURN(ERR_PARSE_ERROR);
+		}
+		int required_bytes = Math::ceil((width*height)/8.0);
+		if (token.size()<required_bytes) {
+			_RETURN(ERR_FILE_CORRUPT);
+		}
+
+		DVector<uint8_t>::Read r=token.read();
+
+		for(int i=0;i<height;i++) {
+			for(int j=0;j<width;j++) {
+
+				int ofs = width*i+j;
+
+				uint8_t byte = r[ofs/8];
+				bool bit = (byte>>(7-(ofs%8)))&1;
+
+				bm->set_bit(Point2i(j,i),!bit);
+
+			}
+
+		}
+
+	}
+
+	return bm;
+
+
+}
+
+void ResourceFormatPBM::get_recognized_extensions(List<String> *p_extensions) const {
+	p_extensions->push_back("pbm");
+}
+bool ResourceFormatPBM::handles_type(const String& p_type) const {
+	return p_type=="BitMap";
+}
+String ResourceFormatPBM::get_resource_type(const String &p_path) const {
+
+	if (p_path.extension().to_lower()=="pbm")
+		return "BitMap";
+	return "";
+}
+
+
diff --git a/drivers/pnm/bitmap_loader_pnm.h b/drivers/pnm/bitmap_loader_pnm.h
new file mode 100644
index 00000000000..6e6c8a59c8b
--- /dev/null
+++ b/drivers/pnm/bitmap_loader_pnm.h
@@ -0,0 +1,33 @@
+/*************************************************/
+/*  image_loader_jpg.h                           */
+/*************************************************/
+/*            This file is part of:              */
+/*                GODOT ENGINE                   */
+/*************************************************/
+/*       Source code within this file is:        */
+/*  (c) 2007-2016 Juan Linietsky, Ariel Manzur   */
+/*             All Rights Reserved.              */
+/*************************************************/
+
+#ifndef BITMAP_LOADER_PNM_H
+#define BITMAP_LOADER_PNM_H
+
+#include "io/resource_loader.h"
+
+/**
+	@author Juan Linietsky <reduzio@gmail.com>
+*/
+class ResourceFormatPBM : public ResourceFormatLoader {
+
+
+public:
+
+	virtual RES load(const String &p_path,const String& p_original_path="",Error *r_error=NULL);
+	virtual void get_recognized_extensions(List<String> *p_extensions) const;
+	virtual bool handles_type(const String& p_type) const;
+	virtual String get_resource_type(const String &p_path) const;
+};
+
+
+
+#endif
diff --git a/drivers/register_driver_types.cpp b/drivers/register_driver_types.cpp
index 3248177bab6..e7bbf28f016 100644
--- a/drivers/register_driver_types.cpp
+++ b/drivers/register_driver_types.cpp
@@ -14,11 +14,12 @@
 #include "png/image_loader_png.h"
 #include "webp/image_loader_webp.h"
 #include "png/resource_saver_png.h"
-#include "jpg/image_loader_jpg.h"
+#include "jpegd/image_loader_jpegd.h"
 #include "dds/texture_loader_dds.h"
 #include "pvr/texture_loader_pvr.h"
 #include "etc1/image_etc.h"
 #include "chibi/event_stream_chibi.h"
+#include "pnm/bitmap_loader_pnm.h"
 
 
 #ifdef TOOLS_ENABLED
@@ -112,6 +113,9 @@ static ResourceFormatLoaderAudioStreamMPC * mpc_stream_loader=NULL;
 #include "openssl/register_openssl.h"
 #endif
 
+
+static ResourceFormatPBM * pbm_loader=NULL;
+
 void register_core_driver_types() {
 
 #ifdef PNG_ENABLED
@@ -138,6 +142,9 @@ void register_core_driver_types() {
 	ImageLoader::add_image_format_loader( image_loader_jpg );
 #endif
 
+	pbm_loader = memnew( ResourceFormatPBM );
+	ResourceLoader::add_resource_format_loader(pbm_loader);
+
 	ObjectTypeDB::register_type<RegEx>();
 }
 
@@ -162,6 +169,7 @@ void unregister_core_driver_types() {
 		memdelete( image_loader_jpg );
 #endif
 
+	memdelete( pbm_loader );
 }
 
 
diff --git a/scene/register_scene_types.cpp b/scene/register_scene_types.cpp
index 84b65fc4d60..6c4fe1be791 100644
--- a/scene/register_scene_types.cpp
+++ b/scene/register_scene_types.cpp
@@ -221,7 +221,7 @@
 
 static ResourceFormatLoaderImage *resource_loader_image=NULL;
 static ResourceFormatLoaderWAV *resource_loader_wav=NULL;
-static ResourceFormatLoaderBitMap *resource_loader_bitmap=NULL;
+
 
 #ifdef TOOLS_ENABLED
 
@@ -249,8 +249,6 @@ void register_scene_types() {
 	resource_loader_wav = memnew( ResourceFormatLoaderWAV );
 	ResourceLoader::add_resource_format_loader( resource_loader_wav );
 
-	resource_loader_bitmap = memnew( ResourceFormatLoaderBitMap );
-	ResourceLoader::add_resource_format_loader( resource_loader_bitmap );
 
 #ifdef TOOLS_ENABLED
 
@@ -631,7 +629,6 @@ void unregister_scene_types() {
 	
 	memdelete( resource_loader_image );
 	memdelete( resource_loader_wav );
-	memdelete( resource_loader_bitmap );
 #ifdef TOOLS_ENABLED
 
 
diff --git a/scene/resources/bit_mask.cpp b/scene/resources/bit_mask.cpp
index 7cbc1450845..f5bfce3ef8e 100644
--- a/scene/resources/bit_mask.cpp
+++ b/scene/resources/bit_mask.cpp
@@ -204,57 +204,3 @@ BitMap::BitMap() {
 
 //////////////////////////////////////
 
-
-RES ResourceFormatLoaderBitMap::load(const String &p_path, const String& p_original_path, Error *r_error) {
-
-	if (r_error)
-		*r_error=ERR_FILE_CANT_OPEN;
-
-	BitMap* ptr = memnew(BitMap);
-	Ref<BitMap> bitmap( ptr );
-
-
-	Image image;
-
-	Error err = ImageLoader::load_image(p_path,&image);
-
-	ERR_EXPLAIN("Failed loading image for BitMap: "+p_path);
-	ERR_FAIL_COND_V(err, RES());
-
-	bitmap->create_from_image_alpha(image);
-	if (r_error)
-		*r_error=OK;
-
-	return bitmap;
-
-}
-
-bool ResourceFormatLoaderBitMap::handles_type(const String& p_type) const {
-
-	return (p_type=="BitMap");
-}
-
-void ResourceFormatLoaderBitMap::get_recognized_extensions(List<String> *p_extensions) const {
-
-	ImageLoader::get_recognized_extensions(p_extensions);
-}
-
-String ResourceFormatLoaderBitMap::get_resource_type(const String &p_path) const {
-
-	List<String> extensions;
-	ImageLoader::get_recognized_extensions(&extensions);
-	String ext=p_path.extension().to_lower();
-	for(List<String>::Element *E=extensions.front();E;E=E->next()) {
-		if (E->get()==ext)
-			return "BitMap";
-	}
-	return "";
-}
-
-
-ResourceFormatLoaderBitMap::ResourceFormatLoaderBitMap() {
-
-
-}
-
-
diff --git a/scene/resources/bit_mask.h b/scene/resources/bit_mask.h
index 66623f55c82..b245ea15421 100644
--- a/scene/resources/bit_mask.h
+++ b/scene/resources/bit_mask.h
@@ -62,16 +62,5 @@ public:
 	BitMap();
 };
 
-class ResourceFormatLoaderBitMap : public ResourceFormatLoader {
-
-public:
-
-	virtual RES load(const String &p_path,const String& p_original_path="",Error *r_error=NULL);
-	virtual void get_recognized_extensions(List<String> *p_extensions) const;
-	virtual bool handles_type(const String& p_type) const;
-	virtual String get_resource_type(const String &p_path) const;
-
-	ResourceFormatLoaderBitMap();
-};
 
 #endif // BIT_MASK_H
diff --git a/tools/editor/editor_node.cpp b/tools/editor/editor_node.cpp
index 9151e63df60..2d9decaaf95 100644
--- a/tools/editor/editor_node.cpp
+++ b/tools/editor/editor_node.cpp
@@ -5953,6 +5953,7 @@ EditorNode::EditorNode() {
 	resource_preview->add_preview_generator( Ref<EditorScriptPreviewPlugin>( memnew(EditorScriptPreviewPlugin )));
 	resource_preview->add_preview_generator( Ref<EditorSamplePreviewPlugin>( memnew(EditorSamplePreviewPlugin )));
 	resource_preview->add_preview_generator( Ref<EditorMeshPreviewPlugin>( memnew(EditorMeshPreviewPlugin )));
+	resource_preview->add_preview_generator( Ref<EditorBitmapPreviewPlugin>( memnew(EditorBitmapPreviewPlugin )));
 
 	circle_step_msec=OS::get_singleton()->get_ticks_msec();
 	circle_step_frame=OS::get_singleton()->get_frames_drawn();
diff --git a/tools/editor/plugins/editor_preview_plugins.cpp b/tools/editor/plugins/editor_preview_plugins.cpp
index 5f52d4c3e7f..a0ce294219e 100644
--- a/tools/editor/plugins/editor_preview_plugins.cpp
+++ b/tools/editor/plugins/editor_preview_plugins.cpp
@@ -6,6 +6,7 @@
 #include "scene/resources/material.h"
 #include "scene/resources/sample.h"
 #include "scene/resources/mesh.h"
+#include "scene/resources/bit_mask.h"
 
 bool EditorTexturePreviewPlugin::handles(const String& p_type) const {
 
@@ -56,6 +57,81 @@ Ref<Texture> EditorTexturePreviewPlugin::generate(const RES& p_from) {
 EditorTexturePreviewPlugin::EditorTexturePreviewPlugin() {
 
 
+}
+
+////////////////////////////////////////////////////////////////////////////
+
+bool EditorBitmapPreviewPlugin::handles(const String& p_type) const {
+
+	return ObjectTypeDB::is_type(p_type,"BitMap");
+}
+
+Ref<Texture> EditorBitmapPreviewPlugin::generate(const RES& p_from) {
+
+	Ref<BitMap> bm =p_from;
+
+	if (bm->get_size()==Size2()) {
+		return Ref<Texture>();
+	}
+
+	DVector<uint8_t> data;
+
+	data.resize(bm->get_size().width*bm->get_size().height);
+
+	{
+		DVector<uint8_t>::Write w=data.write();
+
+		for(int i=0;i<bm->get_size().width;i++) {
+			for(int j=0;j<bm->get_size().height;j++) {
+				if (bm->get_bit(Point2i(i,j))) {
+					w[j*bm->get_size().width+i]=255;
+				} else {
+					w[j*bm->get_size().width+i]=0;
+
+				}
+			}
+
+		}
+	}
+
+
+	Image img(bm->get_size().width,bm->get_size().height,0,Image::FORMAT_GRAYSCALE,data);
+
+	int thumbnail_size = EditorSettings::get_singleton()->get("file_dialog/thumbnail_size");
+	if (img.is_compressed()) {
+		if (img.decompress()!=OK)
+			return Ref<Texture>();
+	} else if (img.get_format()!=Image::FORMAT_RGB && img.get_format()!=Image::FORMAT_RGBA) {
+		img.convert(Image::FORMAT_RGBA);
+	}
+
+	int width,height;
+	if (img.get_width() > thumbnail_size && img.get_width() >= img.get_height()) {
+
+		width=thumbnail_size;
+		height = img.get_height() * thumbnail_size / img.get_width();
+	} else if (img.get_height() > thumbnail_size &&  img.get_height() >= img.get_width()) {
+
+		height=thumbnail_size;
+		width = img.get_width() * thumbnail_size / img.get_height();
+	}  else {
+
+		width=img.get_width();
+		height=img.get_height();
+	}
+
+	img.resize(width,height);
+
+	Ref<ImageTexture> ptex = Ref<ImageTexture>( memnew( ImageTexture ));
+
+	ptex->create_from_image(img,0);
+	return ptex;
+
+}
+
+EditorBitmapPreviewPlugin::EditorBitmapPreviewPlugin() {
+
+
 }
 
 ///////////////////////////////////////////////////////////////////////////
diff --git a/tools/editor/plugins/editor_preview_plugins.h b/tools/editor/plugins/editor_preview_plugins.h
index 98071e2a0eb..b3bfda8045a 100644
--- a/tools/editor/plugins/editor_preview_plugins.h
+++ b/tools/editor/plugins/editor_preview_plugins.h
@@ -13,6 +13,17 @@ public:
 };
 
 
+class EditorBitmapPreviewPlugin : public EditorResourcePreviewGenerator {
+public:
+
+	virtual bool handles(const String& p_type) const;
+	virtual Ref<Texture> generate(const RES& p_from);
+
+	EditorBitmapPreviewPlugin();
+};
+
+
+
 class EditorPackedScenePreviewPlugin : public EditorResourcePreviewGenerator {
 
 	Ref<Texture> _gen_from_imd(Ref<ResourceImportMetadata> p_imd);