initial commit

2 years ago · 6e82372485
67 changed files with 10813 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
 /target
--- a/Cargo.lock
+++ b/Cargo.lock
@ -0,0 +1,21 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
 version = 3
 [[package]]
 name = "artspace"
 version = "0.1.0"
 dependencies = [
 "zune-jpeg",
 ]
 [[package]]
 name = "zune-core"
 version = "0.5.0-rc1"
 [[package]]
 name = "zune-jpeg"
 version = "0.5.0-rc1"
 dependencies = [
 "zune-core",
 ]
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,7 @@
 [package]
 name = "artspace"
 version = "0.1.0"
 edition = "2021"
 [dependencies]
 zune-jpeg = {path = "third_party/zune-jpeg"}
--- a/README.md
+++ b/README.md
@ -0,0 +1,14 @@
 artspace
 ========
 Tabs or spaces, the eternal question. Why limit yourself to whitespace that doesn't even make a visual difference?
 Instead, turn that wasted whitespace into artspace!
 ## Usage
 ```
 cargo run --release -- test_image.jpg src\main.rs artspace.rs
 ```
 Note: Only compatible with languages with `/* this style of multi-line comment */`.
--- a/artspace.rs
+++ b/artspace.rs
@ -0,0 +1,132 @@
 /*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠗                 ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿             ⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠇           ⢀⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠃          ⣀⣾⣿⣿⣿⣿⣿*/
 pub fn convert_bitmap_to_unicode(w: usize, h: usize, data: Vec<u8>) -> Vec<Vec<char>>/*⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠓          ⣈⣬⣿⣿⣿⣿⣿⣿⣿*/
 {/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠓                ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠃             ⣸⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁          ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⡿⠳⠁          ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠃                ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠃             ⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷          ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⡷⠓          ⢀⣬⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿*/
 /**/const CHARS: [&str; 4] = [/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠃              ⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠓         ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁          ⣈⣮⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠓ */
 /*⣿⣿⣿⣿*/" ⠁⠂⠃⠄⠅⠆⠇⠈⠉⠊⠋⠌⠍⠎⠏⠐⠑⠒⠓⠔⠕⠖⠗⠘⠙⠚⠛⠜⠝⠞⠟⠠⠡⠢⠣⠤⠥⠦⠧⠨⠩⠪⠫⠬⠭⠮⠯⠰⠱⠲⠳⠴⠵⠶⠷⠸⠹⠺⠻⠼⠽⠾⠿",/* ⣈⣾⣿⣿⣿⣿⣿⣿⣿⡿⠓⠁         ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁  */
 /*⣿⣿⣿⣿*/"⡀⡁⡂⡃⡄⡅⡆⡇⡈⡉⡊⡋⡌⡍⡎⡏⡐⡑⡒⡓⡔⡕⡖⡗⡘⡙⡚⡛⡜⡝⡞⡟⡠⡡⡢⡣⡤⡥⡦⡧⡨⡩⡪⡫⡬⡭⡮⡯⡰⡱⡲⡳⡴⡵⡶⡷⡸⡹⡺⡻⡼⡽⡾⡿",/*⣾⣿⣿⣿⣿⣿⣿⣿⡿⠓          ⣈⣮⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁    */
 /*⣿⣿⣿⣿*/"⢀⢁⢂⢃⢄⢅⢆⢇⢈⢉⢊⢋⢌⢍⢎⢏⢐⢑⢒⢓⢔⢕⢖⢗⢘⢙⢚⢛⢜⢝⢞⢟⢠⢡⢢⢣⢤⢥⢦⢧⢨⢩⢪⢫⢬⢭⢮⢯⢰⢱⢲⢳⢴⢵⢶⢷⢸⢹⢺⢻⢼⢽⢾⢿",/*⣿⣿⣿⣿⣿⣿⣿⠗          ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁      */
 /*⣿⣿⣿⣿*/"⣀⣁⣂⣃⣄⣅⣆⣇⣈⣉⣊⣋⣌⣍⣎⣏⣐⣑⣒⣓⣔⣕⣖⣗⣘⣙⣚⣛⣜⣝⣞⣟⣠⣡⣢⣣⣤⣥⣦⣧⣨⣩⣪⣫⣬⣭⣮⣯⣰⣱⣲⣳⣴⣵⣶⣷⣸⣹⣺⣻⣼⣽⣾⣿",/*⣿⣿⣿⣿⣿⠷⠁         ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠳⠁        */
 /**/];/*            ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏                             ⣈⣾⣿⣿⣿⣿⣿⣿⣿⡿⠃         ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠷⠑           */
 /**/let bitchars = CHARS.iter().flat_map(|t| t.chars()).collect::<Vec<_>>();/*⣿⠷         ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠷⠑              */
 /*⣿⣿⣿⠿           ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡷⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⢌                     ⣼⣿⣿⣿⣿⣿⣿⣿⡿⠁        ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠓⠁                ⢀*/
 /**/let px = |i: usize, j: usize| if i < w && j < h {data[j * w + i]} else {0};/*    ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⡿⠓                  ⢀⣬⣿*/
 /*⣿⠿          ⣨⣿⣿⣿⣿⣿⣿⣿⡿⠓       ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯                 ⣠⣿⣿⣿⣿⣿⣿⣿⠷        ⣀⣾⣿⣿⣿⣿⣿⣿⣿⣿⠗                   ⣬⣿⣿⣿*/
 /**/let mut output = vec![];/*  ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢏                ⣿⣿⣿⣿⣿⣿⣿⠿         ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌                  ⣰⣿⣿⣿⣿*/
 /*⣿          ⣼⣿⣿⣿⣿⣿⣿⣿⠇          ⡰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿               ⣠⣿⣿⣿⣿⣿⣿⣿⠏         ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⠈                ⣀⣿⣿⣿⣿⣿*/
 /**/for j in (0..h).step_by(4)/* ⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌              ⣸⣿⣿⣿⣿⣿⣿⣿⠏         ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈             ⢀⣿⣿⣿⣿⣿⣿*/
 /**/{/*     ⣼⣿⣿⣿⣿⣿⣿⣿⠟            ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏              ⣿⣿⣿⣿⣿⣿⣿⣿⠇         ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⢌           ⣰⣿⣿⣿⣿⣿⣿*/
 /*    */let mut line = vec![];/*  ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏             ⣰⣿⣿⣿⣿⣿⣿⣿⣿           ⠰⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⣎⢌         ⣷⣿⣿⣿⣿⣿*/
 /*    */for i in (0..w).step_by(2)/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏            ⢀⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏            ⠱⡳⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈       ⠱⣷⣿⣿⣿*/
 /*    */{/*⣿⣿⣿⣿⣿⣿⣿⣿⠿              ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏           ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏                ⠑⠱⠳⡷⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎        ⠐⡳⣿*/
 /*        */let mut index = 0;/*   ⣳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏          ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏                     ⠐⠱⣷⣿⣿⣿⣿⣿⣿⣿⣿⣯         ⠐*/
 /*       ⣸*/index |= if px(i+0, j+0) < 128 {0} else {1 << 0};/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏                        ⠱⣷⣿⣿⣿⣿⣿⣿⠷          */
 /*      ⢀⣿*/index |= if px(i+0, j+1) < 128 {0} else {1 << 1};/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠇                          ⣳⣿⣿⣿⠷⠓           */
 /*      ⣸⣿*/index |= if px(i+0, j+2) < 128 {0} else {1 << 2};/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿                ⢈⣌⣬⣮⣮⣮⣎⣌⣌⣌⣌⣾⣿⡿⠁             */
 /*     ⣀⣿⣿*/index |= if px(i+0, j+3) < 128 {0} else {1 << 3};/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠃             ⢀⣬⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢌             */
 /*    ⣈⣿⣿⣿*/index |= if px(i+1, j+0) < 128 {0} else {1 << 4};/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⠗             ⣈⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⣌⠈          */
 /*  ⢀⣼⣿⣿⣿⣿*/index |= if px(i+1, j+1) < 128 {0} else {1 << 5};/*⣿⣿⣿⣿⣿⣿⣿⡿⠃             ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⢌        */
 /*⢌⢈⣿⣿⣿⣿⣿⣿*/index |= if px(i+1, j+2) < 128 {0} else {1 << 6};/*⣿⣿⣿⣿⣿⣿⠷              ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡷⡷⡷⡷⣿⣿⣿⣿⣿⣯⣎⢈    ⣴*/
 /*⣿⣿⣿⣿⣿⣿⣿⣿*/index |= if px(i+1, j+3) < 128 {0} else {1 << 7};/*⣿⣿⣿⣿⡷⠃              ⣨⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠓      ⠑⡳⣿⣿⣿⣿⣿⣿⣮⣮⣮⣿*/
 /*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠿                                ⠐⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠑               ⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠿⠁         ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿⣿⣿⣿⣿*/line.push(bitchars[index]);/*        ⠱⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡷⠳⠑                ⣈⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷            ⣿⣿⣿⣿⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿*/}/*⠟             ⣾⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈               ⠐⠱⡷⣷⣿⡷⡷⠳⠑⠁                ⢀⣬⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠃            ⢀⣿⣿⣿⣿⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿*/output.push(line);/*⣿⣿⣿⣿⣿⣿⣿⣿⣎⢌                                    ⢀⣌⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠗              ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿*/
 /**/}/*⣿⣿⣿⣿             ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮⣌⢈⠈                             ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠳⠁              ⢀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿⣿⣿⣿⣿⣿            ⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⣮⣌⢈                       ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠷⠑                ⣀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/
 /**/output/*           ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⢌                 ⠈⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠑                 ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⡿⠷⠓⣳*/
 }/*⣿⣿⣿⣿⣿⣿⣿⣿⠏           ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠓⠑⠑⠑⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⠈            ⢀⡈⣚⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡷⠓                  ⢈⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⠓    */
 /*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏           ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⠃      ⠐⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈         ⢈⣽⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠳⠁                 ⢈⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁     */
 /*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌          ⣰⣿⣿⣿⣿⣿⣿⣿⣿⠟         ⠐⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⣌⢈⢈⢈⣌⣬⣮⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠓                 ⢀⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁      */
 fn read_jpeg_to_bitmap(file: &str) -> (usize, usize, Vec<u8>)/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁                 ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠿        */
 {/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌          ⣷⣿⣿⣿⣿⣿⣿⠿               ⠱⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁                  ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁        */
 /**/use zune_jpeg::zune_core::{colorspace::ColorSpace, options::DecoderOptions, bytestream::ZCursor};/*⣿⣿⣿⣿⣿⠁         */
 /**/let data = std::fs::read(file).unwrap();/*    ⣳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠿⠁                  ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏          */
 /**/let options = DecoderOptions::default().jpeg_set_out_colorspace(ColorSpace::Luma);/*⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿           */
 /**/let mut decoder = zune_jpeg::JpegDecoder::new_with_options(ZCursor::new(&data), options);/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁           */
 /**/let pixels = decoder.decode().unwrap();/*      ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠿⠁                  ⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠁            */
 /**/let (w, h) = decoder.dimensions().unwrap();/*  ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿                  ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠗             */
 /**/(w, h, pixels)/*⣯⣌⣌⣬⣾⣿⣿⣿⠎  ⢬⣭⣿⣿⣿⣿⣿⣿⣿⣿⣿         ⣳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏                  ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏              */
 }/*        ⠰⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁ ⣐⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿         ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿                  ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏              */
 /*⣯⠌        ⠐⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁   ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠿         ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠈                  ⣳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏              */
 /*⣿⣿⠌        ⠐⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⠿      ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏          ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏                  ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯              */
 fn main()/*   ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⠁      ⡰⣿⣿⣿⣿⣿⣿⣿⣿⣿           ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌                  ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿              */
 {/*⣿⣿⠏         ⣱⣿⣿⣿⣿⣿⣿⣿⠿        ⣳⣿⣿⣿⣿⣿⣿⣿⠿          ⢀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌                 ⣹⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏             */
 /**/match std::env::args().collect::<Vec<_>>().as_slice() {/*⣿⣿⣿⣿⣿⣿⣿⠌                ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢎            */
 /*⣿⣿⣿⣿*/[_, bitmap_file, source_file, output_file] =>/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢌               ⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢎           */
 /*⣿⣿⣿⣿*/{/*     ⠰⣿⣿⣿⣿⣿⣿⣿⣏                        ⣨⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈             ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⣎⠈        */
 /*⣿⣿⣿⣿⣿   */let (w, h, pixels) = read_jpeg_to_bitmap(&bitmap_file);/*⣿⣿⣯⢌           ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⣎⣌⢌⢈⢈⢈⢈*/
 /*⣿⣿⣿⣿⣿⠏         ⡰⣿⣿⣿⣿⣿⣿⣿⣿⣯⢌                   ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠑⠑⠑⠱⣷⣿⣿⣿⣿⣿⣿⣿⣯⢌       ⢀⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿⣿⣿  */let char_bitmap = convert_bitmap_to_unicode(w, h, pixels);/*⣿⣿⣿⣯⣎⢈   ⢀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡷⠳⠳⠑⠑⠑⠑⠑⠑⠳⡳⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿⣿⣿⢏         ⠐⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎               ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿         ⠱⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡷⠓              ⠑⠳⣷⣿⣿⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿⣿⣿⣿⠌*/let source = std::fs::read_to_string(source_file).unwrap();/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁                   ⠐⠳⣷⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿⣿⣿⣿⣯          ⡱⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎             ⡰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢏           ⡱⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠓                        ⠐⠱⠳⡷*/
 /*⣿⣿⣿⣿⣿⣿⣿⣿*/let max_width = 120;/*⣏             ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿            ⠰⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠃                             */
 /*⣿⣿⣿⣿⣿⣿⣿⣿⣿           ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⠈            ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏            ⠐⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁                              */
 /*⣿⣿⣿⣿⣿⣿⣿⣿*/let mut modified_lines = vec![];/*    ⠐⡳⣿⣿⣿⣿⣿⣿⣿⣿⠁              ⠱⡳⣿⣿⣿⣿⣿⣿⣿⡿⠓                ⢈⢈⢈⢈⢈⢈⢈⠈        */
 /*⣿⣿⣿⣿⣿⣿⣿⣿*/let mut buffer = Vec::with_capacity(1024);/*⣿⣿⣿⠿                  ⠑⠱⠳⠳⠓⠁            ⢈⣌⣬⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮⣌⠈    */
 /*⣿⣿⣿⣿⣿⣿⣿⣿*/let mut row = 0;/*⣿⣿⣿⣿⣿⣿⣿⣿⣎                ⠐⠑⠑⠁                                 ⢀⣈⣮⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮⢌⠈ */
 /*⣿⣿⣿⣿⣿⣿⣿⣿⣿⢏           ⣸⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏⠈                       ⢀⢈⣌⣮⣮⣎⣌⢈                  ⣈⣮⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮*/
 /*⣿⣿⣿⣿⣿⣿⣿⣿*/for line in source.lines()/*⣿⢎                     ⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈            ⢀⣬⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿⣿⣿⣿⣿*/{/*    ⢀⣜⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⢌                  ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮⢌       ⢀⣈⣮⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡷⠷⠳⠳⠳⠳⠳⠳⠳⡷⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿*/
 /*⣿⣿⡷⡷⡷⡷⣿⣿⣿⣿⣿⣿*/buffer.clear();/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⢌               ⣸⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮⣮⣌⣬⣮⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠷⠓⠁             ⠐⠱⠳⡷⣿⣿⣿⣿*/
 /*⠓     ⠐⣷⣿⣿⣿⣿*/buffer.extend(line.chars());/*⣯⢌⠈           ⣨⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠑                      ⠑⠱⠳*/
 /*        ⡳⣿⣿⣿*/if buffer.len() < max_width {/*⣿⣿⣮⢌⠈       ⣨⣿⣿⣿⣿⣿⣿⣿⣿⣿⡷⠳⠳⠑⠑⠳⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁                           */
 /*         ⠱⣷⣿⣿⣿⣿⣿*/let needed = max_width - buffer.len();/*⣿⣿⣿⣿⣿⣿⣿⠷⠁       ⠐⠳⣷⣿⣿⣿⣿⣿⣿⣿⡿⠳⠁                             */
 /*            ⠑⠑⠑ */buffer.extend(core::iter::repeat(' ').take(needed));/*     ⠐⠱⠳⠳⠳⠳⠑         ⢈⣈⣌⣌⣬⣮⣎⣌⣌⢈             */
 /*            */}/*                        ⠱⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠗                           ⣈⣮⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⣎⢌          */
 /* ⢈⣌⣬⣎⢌⠈             ⢈⢈⢈⢈⢈⢈⢈⠈               ⠐⠱⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠃                          ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮⢌⠈      */
 /*⣿⣿⣿⣿⣿⣿⣿⠌    */let mut i = 0;/*⣮⣎⣌⢈             ⠱⡳⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⠿⠁        ⢈⣌⣌⣌⠈             ⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮⣌⢈⢈⢈*/
 /*⣿⣿⣿⣿⣿⣿⣿⣿⢎   */for j in 0..buffer.len()/*          ⠐⠱⡳⣷⣿⣿⣿⡿⠓        ⣈⣾⣿⣿⣿⣿⣿⣯⢌         ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⣌⢈*/{/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⣌⠈              ⠁          ⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⢈     ⢀⣬⣿⣿⣿⣿⣿⣿⣿⡿⠷⠳⠓⠑⠑⠑⠑⠑⠑⠱⡳⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/if !buffer[j].is_whitespace()/*               ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⣎⣌⣌⣾⣿⣿⣿⣿⣿⣿⣿⠷⠁             ⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/
 /*⠳⠓⠑⠑⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/{/*         ⠐⠱⠳⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⢌                ⣨⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁                ⠰⣷⣿⣿⣿⣿⣿⣿⣿⣿*/
 /*     ⣳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠟  */if j - i > 3/*⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮⣌⢈          ⣈⣾⣿⣿⣿⣿⣿⡷⡷⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁                   ⠐⡳⣿⣿⣿⣿⣿⣿⣿*/
 /*     ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏  */{/*            ⠐⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⣎⢌⠈   ⢈⣬⣾⣿⣿⣿⣿⠷⠑    ⣱⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁                       ⠐⣳⣿⣿⣿⣿⣿*/
 /*     ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯      */let bmp_row = &char_bitmap[row];/*⣿⣿⣿⡿⠃       ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁                           ⡱⣿⣿⣿⣿*/
 /*     ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏     */for k in i..j {/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠿⠁        ⣿⣿⣿⣿⣿⣿⣿⣿⣿⠿                              ⠰⣷⣿⣿*/
 /*    ⣨⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏        */buffer[k] = bmp_row[k % bmp_row.len()];/*⣰⣿⣿⣿⣿⣿⣿⣿⣿⠗                                ⠐⣷⣿*/
 /*   ⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⠌  */}/*               ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠇         ⣿⣿⣿⣿⣿⣿⣿⣿⠿         ⣈⣌⢈                      ⠐⡳*/
 /*⢀⣌⣾⣿⣿⣿⣿⠷⠑ ⠐⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢎ */buffer[i  ] = '/';/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿         ⣰⣿⣿⣿⣿⣿⣿⣿⡿        ⣀⣾⣿⣿⣿⣯⠈                      */
 /*⣿⣿⣿⣿⣿⠿⠁     ⣱⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎*/buffer[i+1] = '*';/*⣳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏         ⣿⣿⣿⣿⣿⣿⣿⣿⠃       ⣀⣿⣿⣿⣿⣿⣿⣿⠈                     */
 /*⣿⣿⣿⣿⠗        ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/buffer[j-1] = '/';/*⡰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏        ⣠⣿⣿⣿⣿⣿⣿⣿⠿       ⣀⣿⣿⣿⣿⣿⣿⣿⣿⣏                     */
 /*⣿⣿⣿⠿         ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/buffer[j-2] = '*';/* ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏        ⣰⣿⣿⣿⣿⣿⣿⣿⠇       ⣸⣿⣿⣿⣿⣿⣿⣿⣿⣿⠎                    */
 /*⣿⣿⡿          ⣿⣿⣿⣿⣿⣿⣿*/}/*                      ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏        ⣸⣿⣿⣿⣿⣿⣿⣿        ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏                    */
 /*⣿⣿⠃          ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿                     ⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏        ⣿⣿⣿⣿⣿⣿⣿⠿       ⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏                    */
 /*⣿⠿           ⣿⣿⣿⣿⣿⣿⣿*/i = j + 1;/*            ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏        ⣰⣿⣿⣿⣿⣿⣿⠏       ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠁                    */
 /*⣿⠃          ⣠⣿⣿⣿*/}/*⣿⣿⣿⣿                     ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏        ⣿⣿⣿⣿⣿⣿⣿⠏       ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿                     */
 /*⣿⠈          */}/*⣿⣿⣿⣿⣿⣿⣿⣿⣏⠈                   ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏        ⣳⣿⣿⣿⣿⣿⣿⠏       ⡰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣌⢈                   */
 /*⣿⣯⠈          ⠐⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈                  ⣳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏        ⠰⣿⣿⣿⣿⣿⣿⣯⠈       ⠱⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈                */
 /*⣿⣿⣿⠌        */let j = buffer.len();/*           ⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏         ⣷⣿⣿⣿⣿⣿⣿⣏        ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢌               */
 /*⣿⣿⣿⣿⠈       */if j - i > 3/*⣿⣿⣿⣎⠈                ⣳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏         ⣰⣿⣿⣿⣿⣿⣿⣿⢎        ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌              */
 /*⣿⣿⣿⣿⣏       */{/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⢈               ⠱⣷⣿⣿⣿⣿⣿⣿⣿⣿⠃         ⣰⣿⣿⣿⣿⣿⣿⣿⣿⠌        ⠐⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠯              */
 /*⣿⣿⣿⣿⣿⢎         ⣰*/let bmp_row = &char_bitmap[row];/*⠱⣷⣿⣿⣿⣿⣿⠓          ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⠈         ⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠃         ⢈⣬⣮⣿⣿*/
 /*⣳⣿⣿⣿⣿⣿⣎⠈     ⢀⣀⣿*/for k in i..j {/*⣿⣿⣯⢌               ⠐⠳⠳⠳⠁           ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯          ⠐⠳⣷⣿⣿⣿⣿⣿⠷⠁       ⢀⣬⣾⣿⣿⣿⣿⣿*/
 /* ⣿⣿⣿⣿⣿⣿⣿⣯⣌⢌⢈⣌⣬⣿⣿⣿⣿⣿⣿*/buffer[k] = bmp_row[k % bmp_row.len()];/*       ⣸⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏             ⠑⠑⠁         ⣈⣾⣿⣿⣿⣿⣿⣿⣿*/
 /*⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/}/*⠁       ⠑⠱⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈                         ⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿                        ⣨⣿⣿⣿⣿⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/buffer[i  ] = '/';/*⣿⣿⣿⣿⣿⣿⣎⢈                      ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠈                     ⢈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿*/buffer[i+1] = '*';/*⣿⣿⣿⣿⣿⣿⣿⣿⣯⢌                   ⣀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏                 ⢀⢈⣬⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠓ */buffer[j-1] = '/';/*⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⢌⠈              ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏              ⣈⣮⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠟  */buffer[j-2] = '*';/* ⠐⠱⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮⣎⣌⢈⢈       ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌            ⣠⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠑⠑⠱⣷⣿⣿⣿⣿⣿*/
 /* ⠱⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿*/}/*                         ⠐⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⠌           ⣰⣿⣿⣿⣿⣿⣿⣿⡿⠁     ⣿⣿⣿⣿⣿*/
 /*   ⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏               ⢀⣈⣌⣌⣌⢈         ⠐⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠓⠑⠑⠑⠑⠳⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈         ⡰⣿⣿⣿⣿⣿⣿⣿⠃      ⣰⣿⣿⣿⣿*/
 /*    ⡱⣿⣿⣿⣿⣿⣿⣿*/modified_lines.push(buffer.iter().collect::<String>());/* ⠐⠱⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈        ⣳⣿⣿⣿⣿⣿⣿       ⣰⣿⣿⣿⣿*/
 /*     ⣱⣿⣿⣿⣿⣿⣿*/row += 1;/*   ⣸⣿⣿⣿⣿⣿⣿⣿⣿⣯⠈         ⣳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠃            ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢎       ⠐⣿⣿⣿⣿⣿⣿       ⣰⣿⣿⣿⣿*/
 /*     ⣿⣿⣿*/}/*⣿⣿            ⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏          ⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠓             ⢀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿       ⢀⣿⣿⣿⣿⣿⠏       ⣰⣿⣿⣿⣿*/
 /*    ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏           ⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠎         ⠰⣿⣿⣿⣿⣿⣿⣿⠷⠁             ⡈⣚⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁       ⣰⣿⣿⣿⣿⣿⠁       ⠐⣿⣿⣿⣿*/
 /*   ⣀⣿⣿⣿⣿*/let new_source = modified_lines.join("\n");/*⠑              ⡀⣚⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠓         ⣾⣿⣿⣿⣿⣿        ⣰⣿⣿⣿⣿*/
 /*  ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⠟           ⣸⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠎                           ⢀⣌⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠓          ⣸⣿⣿⣿⣿⣿⣿        ⣰⣿⣿⣿⣿*/
 /*  ⣼⣿⣿⣿⣿⣿*/println!("{new_source}");/*⣿⣿⣿⣯                        ⢀⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠓           ⣼⣿⣿⣿⣿⣿⣿⠿        ⣰⣿⣿⣿⣿*/
 /* ⣸⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠟           ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌                     ⢈⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁           ⢀⣾⣿⣿⣿⣿⣿⣿⣿⠇        ⣰⣿⣿⣿⣿*/
 /*⣀⣿⣿⣿⣿⣿⣿⣿*/std::fs::write(output_file, new_source).unwrap();/*⣌⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠳⠁            ⣈⣿⣿⣿⣿⣿⣿⣿⣿⡿         ⣰⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿*/}/*⣿⣿⠟           ⣠⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠎               ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠓             ⢀⣼⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁         ⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿*/[path] =>/*      ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏              ⣨⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠑              ⣀⣾⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁         ⣠⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿*/{/*⣿⠟            ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿             ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁               ⣀⣿⣿⣿⣿⣿⣿⣿⡿⠷⠓           ⣼⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿⣿⣿⣿⣿*/println!("usage: {path} art.jpg input_src output_src");/*⣿⡿⠳⠁                 ⡰⣷⡷⡷⠷⠳⠑⠁             ⣨⣿⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿*/}/*⠇               ⠑⠑     ⠐⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌           ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠓                                        ⣨⣿⣿⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿*/_ =>/*                     ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏          ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷         ⢀⣌⣬⣮⣾⣿⣿⣿⣿⣿⠌                    ⢀⣼⣿⣿⣿⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿*/{/*                        ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏          ⠰⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁       ⢀⣬⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿                   ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿*/}/*                        ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏           ⣷⣿⣿⣿⣿⣿⠷⠁       ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢎                  ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/
 /**/}/*⣿⣿                         ⣠⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏            ⠱⠳⠳⠑         ⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢎                 ⠱⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿*/
 /*⣿⣿⣿⣿⣿⣿⣿⠌                        ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌                        ⡳⡷⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏⠈                 ⠑⡳⣷⣿⣿⣿⣿⣿⣿*/
 }/*⣿⣿⣿⣿⣿⣿⣯⠈                     ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⠈                ⢀⢈⢈⢈       ⠑⠱⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌                   ⠑⠳⡷⣿⣿⣿*/
--- a/src/main.rs
+++ b/src/main.rs
@ -0,0 +1,132 @@
 pub fn convert_bitmap_to_unicode(w: usize, h: usize, data: Vec<u8>) -> Vec<Vec<char>>
 {
    const CHARS: [&str; 4] = [
        " ⠁⠂⠃⠄⠅⠆⠇⠈⠉⠊⠋⠌⠍⠎⠏⠐⠑⠒⠓⠔⠕⠖⠗⠘⠙⠚⠛⠜⠝⠞⠟⠠⠡⠢⠣⠤⠥⠦⠧⠨⠩⠪⠫⠬⠭⠮⠯⠰⠱⠲⠳⠴⠵⠶⠷⠸⠹⠺⠻⠼⠽⠾⠿",
        "⡀⡁⡂⡃⡄⡅⡆⡇⡈⡉⡊⡋⡌⡍⡎⡏⡐⡑⡒⡓⡔⡕⡖⡗⡘⡙⡚⡛⡜⡝⡞⡟⡠⡡⡢⡣⡤⡥⡦⡧⡨⡩⡪⡫⡬⡭⡮⡯⡰⡱⡲⡳⡴⡵⡶⡷⡸⡹⡺⡻⡼⡽⡾⡿",
        "⢀⢁⢂⢃⢄⢅⢆⢇⢈⢉⢊⢋⢌⢍⢎⢏⢐⢑⢒⢓⢔⢕⢖⢗⢘⢙⢚⢛⢜⢝⢞⢟⢠⢡⢢⢣⢤⢥⢦⢧⢨⢩⢪⢫⢬⢭⢮⢯⢰⢱⢲⢳⢴⢵⢶⢷⢸⢹⢺⢻⢼⢽⢾⢿",
        "⣀⣁⣂⣃⣄⣅⣆⣇⣈⣉⣊⣋⣌⣍⣎⣏⣐⣑⣒⣓⣔⣕⣖⣗⣘⣙⣚⣛⣜⣝⣞⣟⣠⣡⣢⣣⣤⣥⣦⣧⣨⣩⣪⣫⣬⣭⣮⣯⣰⣱⣲⣳⣴⣵⣶⣷⣸⣹⣺⣻⣼⣽⣾⣿",
    ];
    let bitchars = CHARS.iter().flat_map(|t| t.chars()).collect::<Vec<_>>();
    let px = |i: usize, j: usize| if i < w && j < h {data[j * w + i]} else {0};
    let mut output = vec![];
    for j in (0..h).step_by(4) 
    {
        let mut line = vec![];
        for i in (0..w).step_by(2)
        {
            let mut index = 0;
            index |= if px(i+0, j+0) < 128 {0} else {1 << 0};
            index |= if px(i+0, j+1) < 128 {0} else {1 << 1};
            index |= if px(i+0, j+2) < 128 {0} else {1 << 2};
            index |= if px(i+0, j+3) < 128 {0} else {1 << 3};
            index |= if px(i+1, j+0) < 128 {0} else {1 << 4};
            index |= if px(i+1, j+1) < 128 {0} else {1 << 5};
            index |= if px(i+1, j+2) < 128 {0} else {1 << 6};
            index |= if px(i+1, j+3) < 128 {0} else {1 << 7};
            line.push(bitchars[index]);
        }
        output.push(line);
    }
    output
 }
 fn read_jpeg_to_bitmap(file: &str) -> (usize, usize, Vec<u8>)
 {
    use zune_jpeg::zune_core::{colorspace::ColorSpace, options::DecoderOptions, bytestream::ZCursor};
    let data = std::fs::read(file).unwrap();
    let options = DecoderOptions::default().jpeg_set_out_colorspace(ColorSpace::Luma);
    let mut decoder = zune_jpeg::JpegDecoder::new_with_options(ZCursor::new(&data), options);
    let pixels = decoder.decode().unwrap();
    let (w, h) = decoder.dimensions().unwrap();
    (w, h, pixels)
 }
 fn main() 
 {
    match std::env::args().collect::<Vec<_>>().as_slice() {
        [_, bitmap_file, source_file, output_file] => 
        {
            let (w, h, pixels) = read_jpeg_to_bitmap(&bitmap_file);
            let char_bitmap = convert_bitmap_to_unicode(w, h, pixels);
            let source = std::fs::read_to_string(source_file).unwrap();
            let max_width = 120;
            let mut modified_lines = vec![];
            let mut buffer = Vec::with_capacity(1024);
            let mut row = 0;
            for line in source.lines() 
            {
                buffer.clear();
                buffer.extend(line.chars());
                if buffer.len() < max_width {
                    let needed = max_width - buffer.len();
                    buffer.extend(core::iter::repeat(' ').take(needed));
                }
                let mut i = 0;
                for j in 0..buffer.len() 
                {
                    if !buffer[j].is_whitespace() 
                    {
                        if j - i > 3 
                        {
                            let bmp_row = &char_bitmap[row];
                            for k in i..j {
                                buffer[k] = bmp_row[k % bmp_row.len()];
                            }
                            buffer[i  ] = '/';
                            buffer[i+1] = '*';
                            buffer[j-1] = '/';
                            buffer[j-2] = '*';
                        }
                        i = j + 1;
                    }
                }
                let j = buffer.len();
                if j - i > 3 
                {
                    let bmp_row = &char_bitmap[row];
                    for k in i..j {
                        buffer[k] = bmp_row[k % bmp_row.len()];
                    }
                    buffer[i  ] = '/';
                    buffer[i+1] = '*';
                    buffer[j-1] = '/';
                    buffer[j-2] = '*';
                }
                modified_lines.push(buffer.iter().collect::<String>());
                row += 1;
            }
            let new_source = modified_lines.join("\n");
            println!("{new_source}");
            std::fs::write(output_file, new_source).unwrap();
        }
        [path] =>
        {
            println!("usage: {path} art.jpg input_src output_src");
        }
        _ => 
        {
        }
    }
 }
--- a/test_image.jpg
+++ b/test_image.jpg
--- a/third_party/zune-core/CHANGELOG.md
+++ b/third_party/zune-core/CHANGELOG.md
@ -0,0 +1,22 @@
 ## 0.2.14
 - Fixed building with no-std
 - Add `peek_at` and `pos` for writer
 - Make serde non default
 - Add option to make PNG add an alpha channel
 ## 0.2.12
 - Add endianness conversion
 - Hide exposed values for EncoderOptions
 - Add Float32 bit depth
 - Remove support for BitDepth 10 and 12
 - Add bit_size method
 ## 0.2.1
 Improve documentation on various parts
 ## 0.2.0
 Initial version
--- a/third_party/zune-core/Cargo.toml
+++ b/third_party/zune-core/Cargo.toml
@ -0,0 +1,22 @@
 [package]
 name = "zune-core"
 version = "0.5.0-rc1"
 edition = "2021"
 description = "Core utilities for image processing in the zune family of crates"
 exclude = ["tests/"]
 repository = "https://github.com/etemesi254/zune-image"
 keywords = ["image"]
 categories = ["multimedia::images", "multimedia::encoding"]
 license = "MIT OR Apache-2.0 OR Zlib"
 [features]
 # When present, we can use std facilities to detect
 # if a specific feature exists
 # Not enabled by default. Other zune crates can enable dep:zune-core/std by default.
 # But if we enable it here, they can't disable it anymore.
 # See: https://github.com/rust-lang/cargo/issues/8366
 std = []
 [dependencies]
 log = { version = "0.4.17", optional = true }
 serde = { version = "1.0.52", optional = true }
--- a/third_party/zune-core/LICENSE-APACHE
+++ b/third_party/zune-core/LICENSE-APACHE
@ -0,0 +1 @@
 ../../LICENSE-APACHE
--- a/third_party/zune-core/LICENSE-MIT
+++ b/third_party/zune-core/LICENSE-MIT
@ -0,0 +1 @@
 ../../LICENSE-MIT
--- a/third_party/zune-core/LICENSE-ZLIB
+++ b/third_party/zune-core/LICENSE-ZLIB
@ -0,0 +1 @@
 ../../LICENSE-ZLIB
--- a/third_party/zune-core/README.md
+++ b/third_party/zune-core/README.md
@ -0,0 +1,15 @@
 ## Zune core
 Core primitives necessary for image manipulations
 This crate contains small set of primitives
 necessary for image manipulations which are shared among most   `zune-` family
 of decoders and encoders.
 ### Items present
 Currently,it contains.
 - Colorspace definitions
 - Bit depth definitions.
 - Decoder and encoder options
--- a/third_party/zune-core/src/bit_depth.rs
+++ b/third_party/zune-core/src/bit_depth.rs
@ -0,0 +1,170 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 //! Image bit depth, information and manipulations
 /// The image bit depth.
 ///
 /// The library successfully supports depths up to
 /// 16 bits, as the underlying storage is usually a `u16`.
 ///
 /// This allows us to comfortably support a wide variety of images
 /// e.g 10 bit av1, 16 bit png and ppm.
 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
 #[non_exhaustive]
 pub enum BitDepth {
    /// U8 bit depth.
    ///
    /// Images with such bit depth use [`u8`] to store
    /// pixels and use the whole range from 0-255.
    ///
    /// It is currently the smallest supported bit depth
    /// by the library.
    ///
    /// For images with bit depths lower than this, they will be scaled
    /// to this bit depth
    Eight,
    /// U16 bit depth
    ///
    /// Images with such bit depths use [`u16`] to store values and use the whole range
    /// i.e 0-65535
    ///
    /// Data is stored and processed in native endian.
    Sixteen,
    /// Floating point 32 bit data, range is 0.0 to 1.0
    ///
    /// Uses f32 to store data
    Float32,
    /// Bit depth information is unknown
    Unknown
 }
 /// The underlying bit representation of the image
 ///
 /// This represents the minimum rust type that
 /// can be used to represent image data, required
 /// by `Channel` struct in zune-image
 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
 #[non_exhaustive]
 pub enum BitType {
    /// Images represented using a [`u8`] as their
    /// underlying pixel storage
    U8,
    /// Images represented using a [`u16`] as their
    /// underlying pixel storage.
    U16,
    /// Images represented using a [`f32`] as their
    /// underlying pixel storage
    F32
 }
 impl BitType {
    /// Return the equivalent of the image bit type's depth
    pub fn to_depth(self) -> BitDepth {
        match self {
            BitType::U8 => BitDepth::Eight,
            BitType::U16 => BitDepth::Sixteen,
            BitType::F32 => BitDepth::Float32
        }
    }
 }
 impl Default for BitDepth {
    fn default() -> Self {
        Self::Unknown
    }
 }
 impl BitDepth {
    /// Get the max value supported by the bit depth
    ///
    /// During conversion from one bit depth to another
    ///
    /// larger values should be clamped to this bit depth
    #[rustfmt::skip]
    #[allow(clippy::zero_prefixed_literal)]
    pub const fn max_value(self) -> u16
    {
        match self
        {
            Self::Eight => (1 << 08) - 1,
            Self::Sixteen => u16::MAX,
            Self::Float32 => 1,
            Self::Unknown => 0,
        }
    }
    /// Return the minimum number of bits that can be used to represent
    /// each pixel in the image
    ///
    /// All bit depths below 8 return a bit type of `BitType::U8`.
    ///  and all those above 8 and below 16 return a bit type of `BitType::SixTeen`
    ///
    /// # Returns
    /// An enum whose variants represent the minimum size for an unsigned integer
    /// which can store the image pixels without overflow
    ///
    /// # Example
    ///
    /// ```
    /// use zune_core::bit_depth::{BitDepth, BitType};
    /// assert_eq!(BitDepth::Eight.bit_type(),BitType::U8);
    ///
    /// assert_eq!(BitDepth::Sixteen.bit_type(),BitType::U16);
    /// ```
    ///
    /// See also [size_of](BitDepth::size_of)
    pub const fn bit_type(self) -> BitType {
        match self {
            Self::Eight => BitType::U8,
            Self::Sixteen => BitType::U16,
            Self::Float32 => BitType::F32,
            Self::Unknown => panic!("Unknown bit type")
        }
    }
    /// Get the number of bytes needed to store a specific bit depth
    ///
    ///  
    /// # Example
    /// For images less than or equal to 8 bits(1 byte), we can use a [`u8`] to store
    /// the pixels, and a size_of [`u8`] is 1
    ///
    /// For images greater than 8  bits and less than 16 bits(2 bytes), we can use a [`u16`] to
    /// store the pixels, a size_of [`u16`] is 2.
    /// ```
    /// use zune_core::bit_depth::BitDepth;
    /// let depth = BitDepth::Sixteen;
    /// // greater 12 bits is greater than 8 and less than 16
    /// assert_eq!(depth.size_of(),2);
    /// ```
    pub const fn size_of(self) -> usize {
        match self {
            Self::Eight => core::mem::size_of::<u8>(),
            Self::Sixteen => core::mem::size_of::<u16>(),
            Self::Float32 => core::mem::size_of::<f32>(),
            Self::Unknown => panic!("Unknown bit type")
        }
    }
    pub const fn bit_size(&self) -> usize {
        self.size_of() * 8
    }
 }
 /// Byte endianness of returned samples
 /// this is useful when the decoder returns samples which span more
 /// than one byte yet the type returned is `&[u8]`
 ///
 /// This helps you interpret how those bytes should be reconstructed
 /// to a higher order type
 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
 pub enum ByteEndian {
    /// Little Endian byte-order
    LE,
    /// Big Endian byte-order
    BE
 }
--- a/third_party/zune-core/src/bytestream.rs
+++ b/third_party/zune-core/src/bytestream.rs
@ -0,0 +1,27 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 //! A simple implementation of a bytestream reader
 //! and writer.
 //!
 //! This module contains two main structs that help in
 //! byte reading and byte writing
 //!
 //! Useful for a lot of image readers and writers, it's put
 //! here to minimize code reuse
 pub use reader::ZReader;
 pub use traits::*;
 pub use writer::ZWriter;
 pub use crate::bytestream::reader::no_std_readers::*;
 //use crate::bytestream::reader::std_readers::*;
 pub use crate::bytestream::reader::ZByteIoError;
 mod reader;
 mod traits;
 mod writer;
--- a/third_party/zune-core/src/bytestream/reader.rs
+++ b/third_party/zune-core/src/bytestream/reader.rs
@ -0,0 +1,458 @@
 use alloc::string::String;
 use alloc::vec;
 use alloc::vec::Vec;
 use core::fmt::Formatter;
 pub(crate) mod no_std_readers;
 pub(crate) mod std_readers;
 use crate::bytestream::ZByteReaderTrait;
 /// Enumeration of possible methods to seek within an I/O object.
 ///
 /// It is analogous to the [SeekFrom](std::io::SeekFrom) in the std library but
 /// it's here to allow this to work in no-std crates
 #[derive(Copy, PartialEq, Eq, Clone, Debug)]
 pub enum ZSeekFrom {
    /// Sets the offset to the provided number of bytes.
    Start(u64),
    /// Sets the offset to the size of this object plus the specified number of
    /// bytes.
    ///
    /// It is possible to seek beyond the end of an object, but it's an error to
    /// seek before byte 0.
    End(i64),
    /// Sets the offset to the current position plus the specified number of
    /// bytes.
    ///
    /// It is possible to seek beyond the end of an object, but it's an error to
    /// seek before byte 0.
    Current(i64)
 }
 impl ZSeekFrom {
    /// Convert to [SeekFrom](std::io::SeekFrom) from the `std::io` library
    ///
    /// This is only present when std feature is present
    #[cfg(feature = "std")]
    pub(crate) fn to_std_seek(self) -> std::io::SeekFrom {
        match self {
            ZSeekFrom::Start(pos) => std::io::SeekFrom::Start(pos),
            ZSeekFrom::End(pos) => std::io::SeekFrom::End(pos),
            ZSeekFrom::Current(pos) => std::io::SeekFrom::Current(pos)
        }
    }
 }
 pub enum ZByteIoError {
    /// A standard library error
    /// Only available with the `std` feature
    #[cfg(feature = "std")]
    StdIoError(std::io::Error),
    /// An error converting from one type to another
    TryFromIntError(core::num::TryFromIntError),
    /// Not enough bytes to satisfy a read
    // requested, read
    NotEnoughBytes(usize, usize),
    /// The output buffer is too small to write the bytes
    NotEnoughBuffer(usize, usize),
    /// An error that may occur randomly
    Generic(&'static str),
    /// An error that occurred during a seek operation
    SeekError(&'static str),
    /// An error that occurred during a seek operation
    SeekErrorOwned(String)
 }
 impl core::fmt::Debug for ZByteIoError {
    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
        match self {
            #[cfg(feature = "std")]
            ZByteIoError::StdIoError(err) => {
                writeln!(f, "Underlying I/O error {}", err)
            }
            ZByteIoError::TryFromIntError(err) => {
                writeln!(f, "Cannot convert to int {}", err)
            }
            ZByteIoError::NotEnoughBytes(expected, found) => {
                writeln!(f, "Not enough bytes, expected {expected} but found {found}")
            }
            ZByteIoError::NotEnoughBuffer(expected, found) => {
                writeln!(
                    f,
                    "Not enough buffer to write {expected} bytes, buffer size is {found}"
                )
            }
            ZByteIoError::Generic(err) => {
                writeln!(f, "Generic I/O error: {err}")
            }
            ZByteIoError::SeekError(err) => {
                writeln!(f, "Seek error: {err}")
            }
            ZByteIoError::SeekErrorOwned(err) => {
                writeln!(f, "Seek error {err}")
            }
        }
    }
 }
 #[cfg(feature = "std")]
 impl From<std::io::Error> for ZByteIoError {
    fn from(value: std::io::Error) -> Self {
        ZByteIoError::StdIoError(value)
    }
 }
 impl From<core::num::TryFromIntError> for ZByteIoError {
    fn from(value: core::num::TryFromIntError) -> Self {
        ZByteIoError::TryFromIntError(value)
    }
 }
 impl From<&'static str> for ZByteIoError {
    fn from(value: &'static str) -> Self {
        ZByteIoError::Generic(value)
    }
 }
 /// The image reader wrapper
 ///
 /// This wraps anything that implements [ZByteReaderTrait] and
 /// extends the ability of the core trait methods by providing
 /// utilities like endian aware byte functions.
 ///
 /// This prevents each implementation from providing its own
 pub struct ZReader<T: ZByteReaderTrait> {
    inner:       T,
    temp_buffer: Vec<u8>
 }
 impl<T: ZByteReaderTrait> ZReader<T> {
    /// Create a new reader from a source
    /// that implements the [ZByteReaderTrait]
    pub fn new(source: T) -> ZReader<T> {
        ZReader {
            inner:       source,
            temp_buffer: vec![]
        }
    }
    /// Destroy this reader returning
    /// the underlying source of the bytes
    /// from which we were decoding
    #[inline(always)]
    pub fn consume(self) -> T {
        self.inner
    }
    /// Skip ahead ignoring `num` bytes
    ///
    /// For more advanced seek methods see [Self::seek] that allows
    /// moving around via more advanced ways
    ///
    /// # Arguments
    ///  - num: The number of bytes to skip.
    ///
    /// # Returns
    ///  - `Ok(u64)`: The new position from the start of the stream.
    ///  - `Error` If something went wrong
    #[inline(always)]
    pub fn skip(&mut self, num: usize) -> Result<u64, ZByteIoError> {
        self.inner.z_seek(ZSeekFrom::Current(num as i64))
    }
    /// Move back from current position to a previous
    /// position
    ///
    /// For more advanced seek methods see [Self::seek] that allows
    /// moving around via more advanced ways
    ///
    /// # Arguments
    /// - `num`: Positions to move before the current cursor
    ///
    /// # Returns
    ///  - `Ok(u64)`: The new position from the start of the stream.
    ///  - `Error` If something went wrong
    #[inline(always)]
    pub fn rewind(&mut self, num: usize) -> Result<u64, ZByteIoError> {
        self.inner.z_seek(ZSeekFrom::Current(-(num as i64)))
    }
    /// Move around a stream of bytes
    ///
    /// This is analogous to the [std::io::Seek] trait with the same ergonomics
    /// only implemented to allow use in a `no_std` environment
    ///
    /// # Arguments
    /// - `from`: The seek operation type.
    ///
    /// # Returns
    ///  - `Ok(u64)`: The new position from the start of the stream.
    ///  -  Error if something went wrong.
    #[inline(always)]
    pub fn seek(&mut self, from: ZSeekFrom) -> Result<u64, ZByteIoError> {
        self.inner.z_seek(from)
    }
    /// Read a single byte from the underlying stream
    ///
    /// If an error occurs, it will return `0` as default output
    /// hence it may be difficult to distinguish a `0` from the underlying source
    /// and a `0` from an error.
    /// For that there is [Self::read_u8_err]
    ///
    /// # Returns.
    /// - The next byte on the stream.
    ///  
    #[inline(always)]
    pub fn read_u8(&mut self) -> u8 {
        self.inner.read_byte_no_error()
    }
    /// Read a single byte returning an error if the read cannot be satisfied
    ///
    /// # Returns
    /// - `Ok(u8)`: The next byte
    /// - Error if the byte read could not be satisfied   
    #[inline(always)]
    pub fn read_u8_err(&mut self) -> Result<u8, ZByteIoError> {
        let mut buf = [0];
        self.inner.read_const_bytes(&mut buf)?;
        Ok(buf[0])
    }
    /// Look ahead position bytes and return a reference
    /// to num_bytes from that position, or an error if the
    /// peek would be out of bounds.
    ///
    /// This doesn't increment the position, bytes would have to be discarded
    /// at a later point.
    #[inline]
    pub fn peek_at(&mut self, position: usize, num_bytes: usize) -> Result<&[u8], ZByteIoError> {
        // short circuit for zero
        // important since implementations like File will
        // cause a syscall on skip
        if position != 0 {
            // skip position bytes from start
            self.skip(position)?;
        }
        // resize buffer
        self.temp_buffer.resize(num_bytes, 0);
        // read bytes
        match self.inner.peek_exact_bytes(&mut self.temp_buffer[..]) {
            Ok(_) => {
                // rewind back to where we were
                if position != 0 {
                    self.rewind(position)?;
                }
                Ok(&self.temp_buffer)
            }
            Err(e) => Err(e)
        }
    }
    /// Read a fixed number of known bytes to a buffer and return the bytes or an error
    /// if it occurred.
    ///
    /// The size of the `N` value must be small enough to fit the stack space otherwise
    /// this will cause a stack overflow :)
    ///
    /// If you can ignore errors, you can use [Self::read_fixed_bytes_or_zero]
    ///
    /// # Returns
    ///  - `Ok([u8;N])`: The bytes read from the source
    ///  - An error if it occurred.
    #[inline(always)]
    pub fn read_fixed_bytes_or_error<const N: usize>(&mut self) -> Result<[u8; N], ZByteIoError> {
        let mut byte_store: [u8; N] = [0; N];
        match self.inner.read_const_bytes(&mut byte_store) {
            Ok(_) => Ok(byte_store),
            Err(e) => Err(e)
        }
    }
    /// Read a fixed bytes to an array and if that is impossible, return an array containing
    /// zeros
    ///
    /// If you want to handle errors, use [Self::read_fixed_bytes_or_error]
    #[inline(always)]
    pub fn read_fixed_bytes_or_zero<const N: usize>(&mut self) -> [u8; N] {
        let mut byte_store: [u8; N] = [0; N];
        self.inner.read_const_bytes_no_error(&mut byte_store);
        byte_store
    }
    /// Move the cursor to a fixed position in the stream
    ///
    /// This will move the cursor to exacltly `position` bytes from the start of the buffer
    ///
    /// # Arguments
    /// - `position`: The current position to move the cursor.
    #[inline]
    pub fn set_position(&mut self, position: usize) -> Result<(), ZByteIoError> {
        self.seek(ZSeekFrom::Start(position as u64))?;
        Ok(())
    }
    /// Return true if the underlying buffer can no longer produce bytes
    ///
    /// This call may be expensive depending on the underlying buffer type, e.g if
    /// it's a file, we have to ask the os whether we have more contents, or in other words make a syscall.
    ///
    /// Use that wisely
    ///
    /// # Returns
    ///  - `Ok(bool)`: True if we are in `EOF`, false if we can produce more bytes
    ///  - Error if something went wrong
    #[inline(always)]
    pub fn eof(&mut self) -> Result<bool, ZByteIoError> {
        self.inner.is_eof()
    }
    /// Return the current position of the inner reader or an error
    /// if that occurred when reading.
    ///
    /// Like [eof](Self::eof), the perf characteristics may vary depending on underlying reader
    ///
    /// # Returns
    /// - `Ok(u64)`: The current position of the inner reader
    #[inline(always)]
    pub fn position(&mut self) -> Result<u64, ZByteIoError> {
        self.inner.z_position()
    }
    /// Read a fixed number of bytes from the underlying reader returning
    /// an error if that can't be satisfied
    ///
    /// Similar to [std::io::Read::read_exact]
    ///
    /// # Returns
    ///  - `Ok(())`: If the read was successful
    ///  - An error if the read was unsuccessful including failure to fill the whole bytes
    pub fn read_exact_bytes(&mut self, buf: &mut [u8]) -> Result<(), ZByteIoError> {
        self.inner.read_exact_bytes(buf)
    }
    /// Read some bytes from the inner reader, and return number of bytes read
    ///
    /// The implementation may not read bytes enough to fill the buffer
    ///
    /// Similar to [std::io::Read::read]
    ///
    /// # Returns
    /// - `Ok(usize)`: Number of bytes actually read to the buffer
    /// - An error if something went wrong
    pub fn read_bytes(&mut self, buf: &mut [u8]) -> Result<usize, ZByteIoError> {
        self.inner.read_bytes(buf)
    }
 }
 enum Mode {
    // Big endian
    BE,
    // Little Endian
    LE
 }
 macro_rules! get_single_type {
    ($name:tt,$name2:tt,$name3:tt,$name4:tt,$name5:tt,$name6:tt,$int_type:tt) => {
        impl<T:ZByteReaderTrait> ZReader<T>
        {
            #[inline(always)]
            fn $name(&mut self, mode: Mode) -> $int_type
            {
                const SIZE_OF_VAL: usize = core::mem::size_of::<$int_type>();
                let mut space = [0; SIZE_OF_VAL];
                self.inner.read_const_bytes_no_error(&mut space);
                match mode {
                    Mode::BE => $int_type::from_be_bytes(space),
                    Mode::LE => $int_type::from_le_bytes(space)
                }
            }
            #[inline(always)]
            fn $name2(&mut self, mode: Mode) -> Result<$int_type, ZByteIoError>
            {
                const SIZE_OF_VAL: usize = core::mem::size_of::<$int_type>();
                let mut space = [0; SIZE_OF_VAL];
                match self.inner.read_const_bytes(&mut space)
                {
                    Ok(_) => match mode {
                        Mode::BE => Ok($int_type::from_be_bytes(space)),
                        Mode::LE => Ok($int_type::from_le_bytes(space))
                    },
                     Err(e) =>  Err(e)
                }
            }
            #[doc=concat!("Read ",stringify!($int_type)," as a big endian integer")]
            #[doc=concat!("Returning an error if the underlying buffer cannot support a ",stringify!($int_type)," read.")]
            #[inline]
            pub fn $name3(&mut self) -> Result<$int_type, ZByteIoError>
            {
                self.$name2(Mode::BE)
            }
            #[doc=concat!("Read ",stringify!($int_type)," as a little endian integer")]
            #[doc=concat!("Returning an error if the underlying buffer cannot support a ",stringify!($int_type)," read.")]
            #[inline]
            pub fn $name4(&mut self) -> Result<$int_type, ZByteIoError>
            {
                self.$name2(Mode::LE)
            }
            #[doc=concat!("Read ",stringify!($int_type)," as a big endian integer")]
            #[doc=concat!("Returning 0 if the underlying  buffer does not have enough bytes for a ",stringify!($int_type)," read.")]
            #[inline(always)]
            pub fn $name5(&mut self) -> $int_type
            {
                self.$name(Mode::BE)
            }
            #[doc=concat!("Read ",stringify!($int_type)," as a little endian integer")]
            #[doc=concat!("Returning 0 if the underlying buffer does not have enough bytes for a ",stringify!($int_type)," read.")]
            #[inline(always)]
            pub fn $name6(&mut self) -> $int_type
            {
                self.$name(Mode::LE)
            }
        }
    };
 }
 get_single_type!(
    get_u16_inner_or_default,
    get_u16_inner_or_die,
    get_u16_be_err,
    get_u16_le_err,
    get_u16_be,
    get_u16_le,
    u16
 );
 get_single_type!(
    get_u32_inner_or_default,
    get_u32_inner_or_die,
    get_u32_be_err,
    get_u32_le_err,
    get_u32_be,
    get_u32_le,
    u32
 );
 get_single_type!(
    get_u64_inner_or_default,
    get_u64_inner_or_die,
    get_u64_be_err,
    get_u64_le_err,
    get_u64_be,
    get_u64_le,
    u64
 );
 #[cfg(feature = "std")]
 impl<T> std::io::Read for ZReader<T>
 where
    T: ZByteReaderTrait
 {
    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
        use std::io::ErrorKind;
        self.read_bytes(buf)
            .map_err(|e| std::io::Error::new(ErrorKind::Other, format!("{:?}", e)))
    }
 }
--- a/third_party/zune-core/src/bytestream/reader/no_std_readers.rs
+++ b/third_party/zune-core/src/bytestream/reader/no_std_readers.rs
@ -0,0 +1,198 @@
 use crate::bytestream::reader::{ZByteIoError, ZSeekFrom};
 use crate::bytestream::ZByteReaderTrait;
 /// Wraps an in memory buffer providing it with a `Seek` method
 /// but works in `no_std` environments
 ///
 /// `std::io::Cursor` is available in std environments, but we also need support
 /// for `no_std` environments so this serves as a drop in replacement
 pub struct ZCursor<T: AsRef<[u8]>> {
    stream:   T,
    position: usize
 }
 impl<T: AsRef<[u8]>> ZCursor<T> {
    pub fn new(buffer: T) -> ZCursor<T> {
        ZCursor {
            stream:   buffer,
            position: 0
        }
    }
 }
 impl<T: AsRef<[u8]>> ZCursor<T> {
    /// Move forward `num` bytes  from
    /// the current position.
    ///
    /// It doesn't check that position overflowed, new position
    /// may point past the internal buffer, all subsequent reads will
    /// either return an error or zero depending on the method called
    #[inline]
    pub fn skip(&mut self, num: usize) {
        // Can this overflow ??
        self.position = self.position.wrapping_add(num);
    }
    /// Move back `num` bytes from the current position
    ///
    ///
    /// This saturates at zero, it can never be negative or wraparound
    /// when the value becomes too small
    #[inline]
    pub fn rewind(&mut self, num: usize) {
        self.position = self.position.saturating_sub(num);
    }
 }
 impl<T: AsRef<[u8]>> ZByteReaderTrait for ZCursor<T> {
    #[inline(always)]
    fn read_byte_no_error(&mut self) -> u8 {
        let byte = self.stream.as_ref().get(self.position).unwrap_or(&0);
        self.position += 1;
        *byte
    }
    #[inline(always)]
    fn read_exact_bytes(&mut self, buf: &mut [u8]) -> Result<(), ZByteIoError> {
        let bytes_read = self.read_bytes(buf)?;
        if bytes_read != buf.len() {
            // restore read to initial position it was in.
            self.rewind(bytes_read);
            // not all bytes were read.
            return Err(ZByteIoError::NotEnoughBytes(bytes_read, buf.len()));
        }
        Ok(())
    }
    fn read_const_bytes<const N: usize>(&mut self, buf: &mut [u8; N]) -> Result<(), ZByteIoError> {
        if self.position + N <= self.stream.as_ref().len() {
            // we are in bounds
            let reference = self.stream.as_ref();
            let position = self.position;
            if let Some(buf_ref) = reference.get(position..position + N) {
                self.position += N;
                buf.copy_from_slice(buf_ref);
                return Ok(());
            }
        }
        Err(ZByteIoError::Generic("Cannot satisfy read"))
    }
    fn read_const_bytes_no_error<const N: usize>(&mut self, buf: &mut [u8; N]) {
        if self.position + N <= self.stream.as_ref().len() {
            // we are in bounds
            let reference = self.stream.as_ref();
            let position = self.position;
            if let Some(buf_ref) = reference.get(position..position + N) {
                self.position += N;
                buf.copy_from_slice(buf_ref);
            }
        }
    }
    #[inline(always)]
    fn read_bytes(&mut self, buf: &mut [u8]) -> Result<usize, ZByteIoError> {
        let len = self.peek_bytes(buf)?;
        self.skip(len);
        Ok(len)
    }
    #[inline(always)]
    fn peek_bytes(&mut self, buf: &mut [u8]) -> Result<usize, ZByteIoError> {
        let stream_end = self.stream.as_ref().len();
        let start = core::cmp::min(self.position, stream_end);
        let end = core::cmp::min(self.position + buf.len(), stream_end);
        let slice = self.stream.as_ref().get(start..end).unwrap();
        buf[..slice.len()].copy_from_slice(slice);
        let len = slice.len();
        Ok(len)
    }
    #[inline(always)]
    fn peek_exact_bytes(&mut self, buf: &mut [u8]) -> Result<(), ZByteIoError> {
        self.read_exact_bytes(buf)?;
        self.rewind(buf.len());
        Ok(())
    }
    #[inline(always)]
    fn z_seek(&mut self, from: ZSeekFrom) -> Result<u64, ZByteIoError> {
        let (base_pos, offset) = match from {
            ZSeekFrom::Start(n) => {
                self.position = n as usize;
                return Ok(n);
            }
            ZSeekFrom::End(n) => (self.stream.as_ref().len(), n as isize),
            ZSeekFrom::Current(n) => (self.position, n as isize)
        };
        match base_pos.checked_add_signed(offset) {
            Some(n) => {
                self.position = n;
                Ok(self.position as u64)
            }
            None => Err(ZByteIoError::SeekError("Negative seek"))
        }
    }
    #[inline(always)]
    fn is_eof(&mut self) -> Result<bool, ZByteIoError> {
        Ok(self.position >= self.stream.as_ref().len())
    }
    #[inline(always)]
    fn z_position(&mut self) -> Result<u64, ZByteIoError> {
        Ok(self.position as u64)
    }
    fn read_remaining(&mut self, sink: &mut alloc::vec::Vec<u8>) -> Result<usize, ZByteIoError> {
        let start = self.position;
        let end = self.stream.as_ref().len();
        match self.stream.as_ref().get(start..end) {
            None => {
                return Err(ZByteIoError::Generic(
                    "Somehow read remaining couldn't satisfy it's invariants"
                ))
            }
            Some(e) => {
                sink.extend_from_slice(e);
            }
        }
        self.skip(end - start);
        Ok(end - start)
    }
 }
 #[cfg(feature = "std")]
 impl<T: AsRef<[u8]>> std::io::Seek for ZCursor<T> {
    fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> {
        let (base_pos, offset) = match pos {
            std::io::SeekFrom::Start(n) => {
                self.position = n as usize;
                return Ok(n);
            }
            std::io::SeekFrom::End(n) => (self.stream.as_ref().len(), n as isize),
            std::io::SeekFrom::Current(n) => (self.position, n as isize)
        };
        match base_pos.checked_add_signed(offset) {
            Some(n) => {
                self.position = n;
                Ok(self.position as u64)
            }
            None => Err(std::io::Error::new(
                std::io::ErrorKind::Other,
                "Negative seek"
            ))
        }
    }
 }
 //
 // #[cfg(feature = "std")]
 // impl<T: AsRef<[u8]>> std::io::Read for ZCursor<T> {
 //     fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
 //         self.read_bytes(buf).map_err(|x|{ std::io::Error::new()})
 //     }
 // }
 impl<T: AsRef<[u8]>> From<T> for ZCursor<T> {
    fn from(value: T) -> Self {
        ZCursor::new(value)
    }
 }
--- a/third_party/zune-core/src/bytestream/reader/std_readers.rs
+++ b/third_party/zune-core/src/bytestream/reader/std_readers.rs
@ -0,0 +1,100 @@
 #![cfg(feature = "std")]
 use std::io;
 use std::io::SeekFrom;
 use crate::bytestream::reader::{ZByteIoError, ZSeekFrom};
 use crate::bytestream::ZByteReaderTrait;
 // note (cae): If Rust ever stabilizes trait specialization, specialize this for Cursor
 impl<T: io::BufRead + io::Seek> ZByteReaderTrait for T {
    #[inline(always)]
    fn read_byte_no_error(&mut self) -> u8 {
        let mut buf = [0];
        let _ = self.read(&mut buf);
        buf[0]
    }
    #[inline(always)]
    fn read_exact_bytes(&mut self, buf: &mut [u8]) -> Result<(), ZByteIoError> {
        let mut bytes_read = 0;
        while bytes_read < buf.len() {
            match self.read(&mut buf[bytes_read..]) {
                Ok(0) => {
                    // if a read returns zero bytes read, it means it encountered an EOF so we seek
                    // back to where we started because some paths may aggressively read forward and
                    // ZCursor maintains the position.
                    // NB: (cae) [tag=perf] This adds a branch on every read, and will slow down every function
                    // resting on it. Sorry
                    self.seek(SeekFrom::Current(-(bytes_read as i64)))
                        .map_err(ZByteIoError::from)?;
                    return Err(ZByteIoError::NotEnoughBytes(bytes_read, buf.len()));
                }
                Ok(bytes) => {
                    bytes_read += bytes;
                }
                Err(e) => return Err(ZByteIoError::from(e))
            }
        }
        Ok(())
    }
    #[inline]
    fn read_const_bytes<const N: usize>(&mut self, buf: &mut [u8; N]) -> Result<(), ZByteIoError> {
        self.read_exact_bytes(buf)
    }
    fn read_const_bytes_no_error<const N: usize>(&mut self, buf: &mut [u8; N]) {
        let _ = self.read_const_bytes(buf);
    }
    #[inline(always)]
    fn read_bytes(&mut self, buf: &mut [u8]) -> Result<usize, ZByteIoError> {
        self.read(buf).map_err(ZByteIoError::from)
    }
    #[inline(always)]
    fn peek_bytes(&mut self, buf: &mut [u8]) -> Result<usize, ZByteIoError> {
        // first read bytes to the buffer
        let bytes_read = self.read_bytes(buf)?;
        let converted = -i64::try_from(bytes_read).map_err(ZByteIoError::from)?;
        self.seek(std::io::SeekFrom::Current(converted))
            .map_err(ZByteIoError::from)?;
        Ok(bytes_read)
    }
    #[inline(always)]
    fn peek_exact_bytes(&mut self, buf: &mut [u8]) -> Result<(), ZByteIoError> {
        // first read bytes to the buffer
        self.read_exact_bytes(buf)?;
        let converted = -i64::try_from(buf.len()).map_err(ZByteIoError::from)?;
        self.seek(std::io::SeekFrom::Current(converted))
            .map_err(ZByteIoError::from)?;
        Ok(())
    }
    #[inline(always)]
    fn z_seek(&mut self, from: ZSeekFrom) -> Result<u64, ZByteIoError> {
        self.seek(from.to_std_seek()).map_err(ZByteIoError::from)
    }
    #[inline(always)]
    fn is_eof(&mut self) -> Result<bool, ZByteIoError> {
        self.fill_buf()
            .map(|b| b.is_empty())
            .map_err(ZByteIoError::from)
    }
    #[inline(always)]
    fn z_position(&mut self) -> Result<u64, ZByteIoError> {
        self.stream_position().map_err(ZByteIoError::from)
    }
    #[inline(always)]
    fn read_remaining(&mut self, sink: &mut Vec<u8>) -> Result<usize, ZByteIoError> {
        self.read_to_end(sink).map_err(ZByteIoError::from)
    }
 }
--- a/third_party/zune-core/src/bytestream/traits.rs
+++ b/third_party/zune-core/src/bytestream/traits.rs
@ -0,0 +1,146 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 //! Traits for reading and writing images in zune
 //!
 //!
 //! This exposes the traits and implementations for readers
 //! and writers in the zune family of decoders and encoders.
 use crate::bytestream::reader::{ZByteIoError, ZSeekFrom};
 /// The de-facto Input trait implemented for readers.
 ///
 /// This provides the basic functions needed to quick and sometimes
 /// heap free I/O for the zune image decoders with easy support for extending it
 /// to multiple implementations.
 ///
 /// # Considerations
 ///
 /// If you have an in memory buffer, prefer [`ZCursor`](crate::bytestream::ZCursor) over [`Cursor`](std::io::Cursor).
 /// We implement this trait for two types, `ZCursor`, and any thing that implements `BufRead`+`Seek`, `Cursor` falls in the latter
 /// and since Rust doesn't have specialization for traits, we can only implement it once. This means functions like
 /// [`read_byte_no_error`](crate::bytestream::ZByteReaderTrait::read_byte_no_error) are slower than they should be for `Cursor`.
 ///
 pub trait ZByteReaderTrait {
    /// Read a single byte from the decoder and return
    /// `0` if we can't read the byte, e.g because of EOF
    ///
    /// The implementation should try to be as fast as possible as this is called
    /// from some hot loops where it may become the bottleneck
    fn read_byte_no_error(&mut self) -> u8;
    /// Read exact bytes required to fill `buf` or return an error if that isn't possible
    ///
    /// ## Arguments
    ///  - `buf`: Buffer to fill with bytes from the underlying reader
    ///  ## Errors
    /// In case of an error, the implementation should not increment the internal position
    fn read_exact_bytes(&mut self, buf: &mut [u8]) -> Result<(), ZByteIoError>;
    /// Read exact bytes required to fill `buf` or return an error if that isn't possible
    ///
    /// This is the same as [`read_exact_bytes`](Self::read_exact_bytes) but implemented as a separate
    /// method to allow some implementations to optimize it to cost fewer instructions
    ///
    /// ## Arguments
    ///  - `buf`: Buffer to fill with bytes from the underlying reader
    ///  ## Errors
    /// In case of an error, the implementation should not increment the internal position
    fn read_const_bytes<const N: usize>(&mut self, buf: &mut [u8; N]) -> Result<(), ZByteIoError>;
    /// Read exact bytes required to fill `buf` or ignore buf entirely if you can't fill it
    /// due to an error like the inability to fill the buffer completely
    /// ## Arguments
    ///  - `buf`: Buffer to fill with bytes from the underlying reader
    /// ## Errors
    /// In case of an error, the implementation should not increment the internal position
    fn read_const_bytes_no_error<const N: usize>(&mut self, buf: &mut [u8; N]);
    /// Read bytes into `buf` returning how many bytes you have read or an error if one occurred
    ///
    /// This doesn't guarantee that buf will be filled with bytes for such a guarantee see
    /// [`read_exact_bytes`](Self::read_exact_bytes)
    ///
    /// ## Arguments
    /// - `buf`: The buffer to fill with bytes
    ///
    /// ## Returns
    ///  - `Ok(usize)` - Actual bytes read into the buffer
    ///  - `Err()` - The error encountered when reading bytes for which we couldn't recover
    fn read_bytes(&mut self, buf: &mut [u8]) -> Result<usize, ZByteIoError>;
    /// Reads data into provided buffer but does not advance read position.
    ///
    ///
    fn peek_bytes(&mut self, buf: &mut [u8]) -> Result<usize, ZByteIoError>;
    fn peek_exact_bytes(&mut self, buf: &mut [u8]) -> Result<(), ZByteIoError>;
    /// Seek into a new position from the buffer
    ///
    /// This is similar to the [seek](std::io::Seek::seek) function in the [Seek](std::io::Seek) trait
    /// but implemented to work for no-std environments
    fn z_seek(&mut self, from: ZSeekFrom) -> Result<u64, ZByteIoError>;
    /// Report whether we are at the end of a stream.
    ///
    /// ## Warning
    /// This may cause an additional syscall e.g when we are reading from a file, we must query the file
    /// multiple times to check if we really are at the end of the file and the user didn't sneakily
    /// add more contents to it hence use it with care
    ///
    /// ## Returns
    /// - `Ok(bool)` - The answer to whether or not we are at end of file
    /// - `Err()` - The error that occurred when we queried the underlying reader if we were at EOF
    fn is_eof(&mut self) -> Result<bool, ZByteIoError>;
    /// Return the current position of the inner cursor.
    ///
    /// This can be used to check the advancement of the cursor
    fn z_position(&mut self) -> Result<u64, ZByteIoError>;
    /// Read all bytes remaining in this input to `sink` until we hit eof
    ///
    /// # Returns
    /// - `Ok(usize)` The actual number of bytes added to the sink
    /// - `Err()` An error that occurred when reading bytes
    fn read_remaining(&mut self, sink: &mut alloc::vec::Vec<u8>) -> Result<usize, ZByteIoError>;
 }
 /// The writer trait implemented for zune-image library of encoders
 ///
 /// Anything that implements this trait can be used as a sink
 /// for writing encoded images
 pub trait ZByteWriterTrait {
    /// Write some bytes into the sink returning number of bytes written or
    /// an error if something bad happened
    ///
    /// An implementation is free to write less bytes that are in buf, so the bytes written
    /// cannot be guaranteed to be fully written
    fn write_bytes(&mut self, buf: &[u8]) -> Result<usize, ZByteIoError>;
    /// Write all bytes to the buffer or return an error if something occurred
    ///
    /// This will always write all bytes, if it can't fully write all bytes, it will
    /// error out
    fn write_all_bytes(&mut self, buf: &[u8]) -> Result<(), ZByteIoError>;
    /// Write a fixed number of bytes and error out if we can't write the bytes
    ///
    /// This is provided to allow for optimized writes where possible. (when the compiler can const fold them)
    fn write_const_bytes<const N: usize>(&mut self, buf: &[u8; N]) -> Result<(), ZByteIoError>;
    /// Ensure bytes are written to the sink.
    ///
    /// Implementations should treat this like linux `fsync`, and should implement
    /// whatever writer's implementation of fsync should look like
    ///
    /// After this, the encoder should be able to guarantee that all in-core data is synced with the
    /// storage decive
    fn flush_bytes(&mut self) -> Result<(), ZByteIoError>;
    /// A hint to tell the implementation how big of a size we expect the image to be
    /// An implementation like in memory `Vec` can use this to reserve additional memory to
    /// prevent reallocation when encoding
    ///
    /// This is just a hint, akin to calling `Vec::reserve` and should be treated as such.
    /// If your implementation doesn't support such, e.g file or mutable slices, it's okay to return
    /// `Ok(())`
    fn reserve_capacity(&mut self, size: usize) -> Result<(), ZByteIoError>;
 }
--- a/third_party/zune-core/src/bytestream/writer.rs
+++ b/third_party/zune-core/src/bytestream/writer.rs
@ -0,0 +1,262 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 use crate::bytestream::{ZByteIoError, ZByteWriterTrait};
 mod no_std_writer;
 mod std_writer;
 enum Mode {
    // Big endian
    BE,
    // Little Endian
    LE
 }
 /// Encapsulates a simple Byte writer with
 /// support for Endian aware writes
 pub struct ZWriter<T: ZByteWriterTrait> {
    buffer:        T,
    bytes_written: usize
 }
 impl<T: ZByteWriterTrait> ZWriter<T> {
    /// Write bytes from the buf into the bytestream
    /// and return how many bytes were written
    ///
    /// # Arguments
    /// - `buf`: The bytes to be written to the bytestream
    ///
    /// # Returns
    /// - `Ok(usize)` - Number of bytes written
    /// This number may be less than `buf.len()` if the length of the buffer is greater
    /// than the internal bytestream length
    ///  
    /// If you want to be sure that all bytes were written, see [`write_all`](Self::write_all)
    ///
    #[inline]
    pub fn write(&mut self, buf: &[u8]) -> Result<usize, ZByteIoError> {
        let bytes_written = self.buffer.write_bytes(buf)?;
        self.bytes_written += bytes_written;
        Ok(bytes_written)
    }
    /// Write all bytes from `buf` into the bytestream and return
    /// and panic if not all bytes were written to the bytestream
    ///
    /// # Arguments
    /// - `buf`: The bytes to be written into the bytestream
    ///
    ///# Returns
    /// - `Ok(())`: Indicates all bytes were written into the bytestream
    /// - `Err(&static str)`: In case all the bytes could not be written
    /// to the stream
    pub fn write_all(&mut self, buf: &[u8]) -> Result<(), ZByteIoError> {
        self.buffer.write_all_bytes(buf)?;
        self.bytes_written += buf.len();
        Ok(())
    }
    /// Create a new bytestream writer
    /// Bytes are written from the start to the end and not assumptions
    /// are made of the nature of the underlying stream
    ///
    /// # Arguments
    pub fn new(data: T) -> ZWriter<T> {
        ZWriter {
            buffer:        data,
            bytes_written: 0
        }
    }
    /// Write a single byte into the bytestream or error out
    /// if there is not enough space
    ///
    /// # Example
    /// ```
    /// use zune_core::bytestream::ZWriter;
    /// let mut buf = [0;10];
    /// let mut stream  =  ZWriter::new(&mut buf[..]);
    /// assert!(stream.write_u8_err(34).is_ok());
    /// ```
    /// No space
    /// ```
    /// use zune_core::bytestream::ZWriter;
    /// let mut no_space = [];
    /// let mut stream = ZWriter::new(&mut no_space[..]);
    /// assert!(stream.write_u8_err(32).is_err());
    /// ```
    ///
    #[inline]
    pub fn write_u8_err(&mut self, byte: u8) -> Result<(), ZByteIoError> {
        self.write_const_bytes(&[byte])
    }
    /// Write a fixed compile time known number of bytes to the sink
    ///
    /// This is provided since some implementations can optimize such writes by eliminating
    /// some redundant code.
    #[inline]
    pub fn write_const_bytes<const N: usize>(
        &mut self, byte: &[u8; N]
    ) -> Result<(), ZByteIoError> {
        self.buffer.write_const_bytes(byte)?;
        self.bytes_written += N;
        Ok(())
    }
    /// Write a single byte in the stream or don't write
    /// anything if the buffer is full and cannot support the byte read
    ///
    #[inline]
    pub fn write_u8(&mut self, byte: u8) {
        let _ = self.write_const_bytes(&[byte]);
    }
    /// Return the number of bytes written by this encoder
    ///
    /// The encoder keeps information of how many bytes were written and this method
    /// returns that value.
    ///
    /// # Returns
    ///  Number of bytes written
    pub fn bytes_written(&self) -> usize {
        self.bytes_written
    }
    /// Reserve some additional space to write.
    ///
    /// Some sinks like `Vec<u8>` allow reallocation and to prevent too much reallocation
    /// one can use this to reserve additional space to encode
    ///
    /// # Example
    ///  
    /// ```
    /// use zune_core::bytestream::ZWriter;
    /// let space_needed = 10; // Assume the image will fit into 10 bytes
    /// let mut output = Vec::new();
    /// let mut sink = ZWriter::new(&mut output);
    /// // now reserve some space
    ///sink.reserve(space_needed).unwrap();
    /// // at this point, we can assume that ZWriter allocated space for output
    /// ```
    pub fn reserve(&mut self, additional: usize) -> Result<(), ZByteIoError> {
        self.buffer.reserve_capacity(additional)
    }
    /// Consume the writer and return the inner sink
    /// we were writing to.
    ///
    /// After this, the writer can no longer be used
    pub fn inner(self) -> T {
        self.buffer
    }
    /// Return an immutable reference to the inner sink
    pub fn inner_ref(&self) -> &T {
        &self.buffer
    }
    /// Return a mutable reference to the inner sink
    pub fn inner_mut(&mut self) -> &mut T {
        &mut self.buffer
    }
 }
 macro_rules! write_single_type {
    ($name:tt,$name2:tt,$name3:tt,$name4:tt,$name5:tt,$name6:tt,$int_type:tt) => {
        impl<T:ZByteWriterTrait> ZWriter<T>
        {
            #[inline(always)]
            fn $name(&mut self, byte: $int_type, mode: Mode) -> Result<(), ZByteIoError>
            {
                 // get bits, depending on mode.
                 // This should be inlined and not visible in
                 // the generated binary since mode is a compile
                 // time constant.
                  let bytes = match mode
                   {
                         Mode::BE => byte.to_be_bytes(),
                         Mode::LE => byte.to_le_bytes()
                  };
                 self.write_const_bytes(&bytes)
            }
            #[inline(always)]
            fn $name2(&mut self, byte: $int_type, mode: Mode)
            {
                 // get bits, depending on mode.
                 // This should be inlined and not visible in
                 // the generated binary since mode is a compile
                 // time constant.
                  let bytes = match mode
                   {
                         Mode::BE => byte.to_be_bytes(),
                         Mode::LE => byte.to_le_bytes()
                  };
                 let _ = self.write_const_bytes(&bytes);
            }
            #[doc=concat!("Write ",stringify!($int_type)," as a big endian integer")]
            #[doc=concat!("Returning an error if the underlying buffer cannot support a ",stringify!($int_type)," write.")]
            #[inline]
            pub fn $name3(&mut self, byte: $int_type) -> Result<(), ZByteIoError>
            {
                self.$name(byte, Mode::BE)
            }
            #[doc=concat!("Write ",stringify!($int_type)," as a little endian integer")]
            #[doc=concat!("Returning an error if the underlying buffer cannot support a ",stringify!($int_type)," write.")]
            #[inline]
            pub fn $name4(&mut self, byte: $int_type) -> Result<(), ZByteIoError>
            {
                self.$name(byte, Mode::LE)
            }
            #[doc=concat!("Write ",stringify!($int_type)," as a big endian integer")]
            #[doc=concat!("Or don't write anything if the reader cannot support a ",stringify!($int_type)," write.")]
            #[inline]
            pub fn $name5(&mut self, byte: $int_type)
            {
                self.$name2(byte, Mode::BE)
            }
            #[doc=concat!("Write ",stringify!($int_type)," as a little endian integer")]
            #[doc=concat!("Or don't write anything if the reader cannot support a ",stringify!($int_type)," write.")]
            #[inline]
            pub fn $name6(&mut self, byte: $int_type)
            {
                self.$name2(byte, Mode::LE)
            }
        }
    };
 }
 write_single_type!(
    write_u64_inner_or_die,
    write_u64_inner_or_none,
    write_u64_be_err,
    write_u64_le_err,
    write_u64_be,
    write_u64_le,
    u64
 );
 write_single_type!(
    write_u32_inner_or_die,
    write_u32_inner_or_none,
    write_u32_be_err,
    write_u32_le_err,
    write_u32_be,
    write_u32_le,
    u32
 );
 write_single_type!(
    write_u16_inner_or_die,
    write_u16_inner_or_none,
    write_u16_be_err,
    write_u16_le_err,
    write_u16_be,
    write_u16_le,
    u16
 );
--- a/third_party/zune-core/src/bytestream/writer/no_std_writer.rs
+++ b/third_party/zune-core/src/bytestream/writer/no_std_writer.rs
@ -0,0 +1,70 @@
 // We cannot use the below impls and std ones because we'll re-implement the
 // same trait fot &[u8] which is blanketed by write. Ending up with two separate implementations
 #![cfg(not(feature = "std"))]
 use crate::bytestream::{ZByteIoError, ZByteWriterTrait};
 impl ZByteWriterTrait for &mut [u8] {
    fn write_bytes(&mut self, buf: &[u8]) -> Result<usize, ZByteIoError> {
        // got from the write of std
        let amt = core::cmp::min(buf.len(), self.len());
        let (a, b) = core::mem::take(self).split_at_mut(amt);
        a.copy_from_slice(&buf[..amt]);
        *self = b;
        Ok(amt)
    }
    fn write_all_bytes(&mut self, buf: &[u8]) -> Result<(), ZByteIoError> {
        if buf.len() > self.len() {
            return Err(ZByteIoError::NotEnoughBuffer(self.len(), buf.len()));
        }
        let amt = core::cmp::min(buf.len(), self.len());
        let (a, b) = core::mem::take(self).split_at_mut(amt);
        a.copy_from_slice(&buf[..amt]);
        *self = b;
        Ok(())
    }
    fn write_const_bytes<const N: usize>(&mut self, buf: &[u8; N]) -> Result<(), ZByteIoError> {
        if N > self.len() {
            return Err(ZByteIoError::NotEnoughBuffer(self.len(), N));
        }
        let amt = core::cmp::min(buf.len(), self.len());
        let (a, b) = core::mem::take(self).split_at_mut(amt);
        a.copy_from_slice(&buf[..amt]);
        *self = b;
        Ok(())
    }
    fn flush_bytes(&mut self) -> Result<(), ZByteIoError> {
        Ok(())
    }
    fn reserve_capacity(&mut self, _: usize) -> Result<(), ZByteIoError> {
        // can't really pre-allocate anything here
        Ok(())
    }
 }
 impl ZByteWriterTrait for &mut alloc::vec::Vec<u8> {
    fn write_bytes(&mut self, buf: &[u8]) -> Result<usize, ZByteIoError> {
        self.extend_from_slice(buf);
        Ok(buf.len())
    }
    fn write_all_bytes(&mut self, buf: &[u8]) -> Result<(), ZByteIoError> {
        self.extend_from_slice(buf);
        Ok(())
    }
    fn write_const_bytes<const N: usize>(&mut self, buf: &[u8; N]) -> Result<(), ZByteIoError> {
        self.extend_from_slice(buf);
        Ok(())
    }
    fn flush_bytes(&mut self) -> Result<(), ZByteIoError> {
        Ok(())
    }
    fn reserve_capacity(&mut self, size: usize) -> Result<(), ZByteIoError> {
        self.reserve(size);
        Ok(())
    }
 }
--- a/third_party/zune-core/src/bytestream/writer/std_writer.rs
+++ b/third_party/zune-core/src/bytestream/writer/std_writer.rs
@ -0,0 +1,27 @@
 #![cfg(feature = "std")]
 use std::io::Write;
 use crate::bytestream::ZByteIoError;
 impl<T: Write> crate::bytestream::ZByteWriterTrait for T {
    fn write_bytes(&mut self, buf: &[u8]) -> Result<usize, ZByteIoError> {
        self.write(buf).map_err(ZByteIoError::StdIoError)
    }
    fn write_all_bytes(&mut self, buf: &[u8]) -> Result<(), ZByteIoError> {
        self.write_all(buf).map_err(ZByteIoError::StdIoError)
    }
    fn write_const_bytes<const N: usize>(&mut self, buf: &[u8; N]) -> Result<(), ZByteIoError> {
        self.write_all_bytes(buf)
    }
    fn flush_bytes(&mut self) -> Result<(), ZByteIoError> {
        self.flush().map_err(ZByteIoError::StdIoError)
    }
    fn reserve_capacity(&mut self, _: usize) -> Result<(), ZByteIoError> {
        // we can't reserve capacity, sorry to implementations where this
        // matters
        Ok(())
    }
 }
--- a/third_party/zune-core/src/colorspace.rs
+++ b/third_party/zune-core/src/colorspace.rs
@ -0,0 +1,161 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 //! Image Colorspace information and manipulation utilities.
 /// All possible image colorspaces
 /// Some of them aren't yet supported exist here.
 #[allow(clippy::upper_case_acronyms)]
 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
 #[non_exhaustive]
 pub enum ColorSpace {
    /// Red, Green , Blue
    RGB,
    /// Red, Green, Blue, Alpha
    RGBA,
    /// YUV colorspace
    YCbCr,
    /// Grayscale colorspace
    Luma,
    /// Grayscale with alpha colorspace
    LumaA,
    YCCK,
    /// Cyan , Magenta, Yellow, Black
    CMYK,
    /// Blue, Green, Red
    BGR,
    /// Blue, Green, Red, Alpha
    BGRA,
    /// The colorspace is unknown
    Unknown,
    /// Alpha Red Green Blue
    ARGB,
    /// Hue,Saturation,Lightness
    /// Conversion from RGB to HSL and back matches that of Python [colorsys](https://docs.python.org/3/library/colorsys.html) module
    /// Color type is expected to be in floating point
    HSL,
    /// Hue, Saturation,Value
    ///
    /// Conversion from RGB to HSV and back matches that of Python [colorsys](https://docs.python.org/3/library/colorsys.html) module
    /// Color type is expected to be in floating point
    HSV
 }
 impl ColorSpace {
    /// Number of color channels present for a certain colorspace
    ///
    /// E.g. RGB returns 3 since it contains R,G and B colors to make up a pixel
    pub const fn num_components(&self) -> usize {
        match self {
            Self::RGB | Self::YCbCr | Self::BGR | Self::HSV | Self::HSL => 3,
            Self::RGBA | Self::YCCK | Self::CMYK | Self::BGRA | Self::ARGB => 4,
            Self::Luma => 1,
            Self::LumaA => 2,
            Self::Unknown => 0
        }
    }
    pub const fn has_alpha(&self) -> bool {
        matches!(self, Self::RGBA | Self::LumaA | Self::BGRA | Self::ARGB)
    }
    pub const fn is_grayscale(&self) -> bool {
        matches!(self, Self::LumaA | Self::Luma)
    }
    /// Returns the position of the alpha pixel in a pixel
    ///
    ///
    /// That is for an array of color components say `[0,1,2,3]` if the image has an alpha channel
    /// and is in RGBA format, this will return `Some(3)`, indicating alpha is found in the third index
    /// but if the image is in `ARGB` format, it will return `Some(0)` indicating alpha is found in  
    /// index 0
    ///
    /// If an image doesn't have an alpha channel returns `None`
    ///
    pub const fn alpha_position(&self) -> Option<usize> {
        match self {
            ColorSpace::RGBA => Some(3),
            ColorSpace::LumaA => Some(1),
            ColorSpace::BGRA => Some(3),
            ColorSpace::ARGB => Some(0),
            _ => None
        }
    }
 }
 /// Encapsulates all colorspaces supported by
 /// the library
 pub static ALL_COLORSPACES: [ColorSpace; 12] = [
    ColorSpace::RGB,
    ColorSpace::RGBA,
    ColorSpace::LumaA,
    ColorSpace::Luma,
    ColorSpace::CMYK,
    ColorSpace::BGRA,
    ColorSpace::BGR,
    ColorSpace::YCCK,
    ColorSpace::YCbCr,
    ColorSpace::ARGB,
    ColorSpace::HSL,
    ColorSpace::HSV
 ];
 /// Color characteristics
 ///
 /// Gives more information about values in a certain
 /// colorspace
 #[allow(non_camel_case_types)]
 #[derive(Copy, Clone, Debug, PartialEq)]
 pub enum ColorCharacteristics {
    /// Normal default gamma setting
    /// The float contains gamma present
    ///
    /// The default gamma value is 2.2 but for
    /// decoders that allow specifying gamma values,e.g PNG,
    /// the gamma value becomes the specified value by the decoder
    sRGB,
    /// Linear transfer characteristics
    /// The image is in linear colorspace
    Linear
 }
 /// Represents a single channel color primary.
 ///
 /// This can be viewed as a 3D coordinate of the color primary
 /// for a given colorspace
 #[derive(Default, Debug, Copy, Clone)]
 pub struct SingleColorPrimary {
    pub x: f64,
    pub y: f64,
    pub z: f64
 }
 /// A collection of red,green and blue color primaries placed
 /// in one struct for easy manipulation
 #[derive(Default, Debug, Copy, Clone)]
 pub struct ColorPrimaries {
    /// Red color primaries
    pub red:   SingleColorPrimary,
    /// Green color primaries
    pub green: SingleColorPrimary,
    /// Blue color primaries
    pub blue:  SingleColorPrimary
 }
 /// Rendering intents indicate what one may want to do with colors outside of it's gamut
 ///
 ///
 /// Further reading
 ///  - [IBM Rendering Intent](https://www.ibm.com/docs/en/i/7.5?topic=management-rendering-intents)
 ///  - [ColorGate Blog](https://blog.colorgate.com/en/rendering-intent-explained)   
 #[derive(Eq, PartialEq, Clone, Copy, Debug)]
 pub enum RenderingIntent {
    AbsoluteColorimetric,
    Saturation,
    RelativeColorimetric,
    Perceptual
 }
--- a/third_party/zune-core/src/lib.rs
+++ b/third_party/zune-core/src/lib.rs
@ -0,0 +1,62 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software; You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 //! Core routines shared by all libraries
 //!
 //! This crate provides a set of core routines shared
 //! by the decoders and encoders under `zune` umbrella
 //!
 //! It currently contains
 //!
 //! - A bytestream reader and writer with endian aware reads and writes
 //! - Colorspace and bit depth information shared by images
 //! - Image decoder and encoder options
 //! - A simple enum type to hold image decoding results.
 //!
 //! This library is `#[no_std]` with `alloc` feature needed for defining `Vec`
 //! which we need for storing decoded  bytes.
 //!
 //!
 //! # Features
 //!  - `no_std`: Enables `#[no_std]` compilation support.
 //!
 //!  - `serde`: Enables serializing of some of the data structures
 //!     present in the crate
 //!
 //!
 //! # Input/Output
 //!
 //! zune-image supports many different input and output devices. For input readers
 //! we can read anything that implements `BufRead` + `Seek` and provide an optimized routine for
 //! handling in memory buffers by using [`ZCursor`](crate::bytestream::ZCursor).
 //!
 //! For output, we support anything that implements `Write` trait, this includes files, standard io streams
 //! network sockets, etc
 //!
 //! In a `no_std` environment. We can write to in memory buffers `&mut [u8]` and `&mut Vec<u8>`
 //!
 //! If you have an in memory buffer, use [`ZCursor`](crate::bytestream::ZCursor),
 //! it's optimized for in memory buffers.
 //!
 //!  
 //!
 #![cfg_attr(not(feature = "std"), no_std)]
 #![macro_use]
 extern crate alloc;
 extern crate core;
 #[cfg(not(feature = "log"))]
 pub mod log;
 #[cfg(feature = "log")]
 pub use log;
 pub mod bit_depth;
 pub mod bytestream;
 pub mod colorspace;
 pub mod options;
 pub mod result;
 mod serde;
--- a/third_party/zune-core/src/log.rs
+++ b/third_party/zune-core/src/log.rs
@ -0,0 +1,74 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 // #[macro_export] is required to make macros works across crates
 // but it always put the macro in the crate root.
 // #[doc(hidden)] + "pub use" is a workaround to namespace a macro.
 pub use crate::{
    __debug as debug, __error as error, __info as info, __log_enabled as log_enabled,
    __trace as trace, __warn as warn
 };
 #[repr(usize)]
 #[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
 pub enum Level {
    Error = 1,
    Warn,
    Info,
    Debug,
    Trace
 }
 #[doc(hidden)]
 #[macro_export]
 macro_rules! __log_enabled {
    ($lvl:expr) => {{
        let _ = $lvl;
        false
    }};
 }
 #[doc(hidden)]
 #[macro_export]
 macro_rules! __error {
    ($($arg:tt)+) => {
        #[cfg(feature = "std")]
        {
            //eprintln!($($arg)+);
        }
    };
 }
 #[doc(hidden)]
 #[macro_export]
 macro_rules! __warn {
    ($($arg:tt)+) => {
        #[cfg(feature = "std")]
        {
            //eprintln!($($arg)+);
        }
    };
 }
 #[doc(hidden)]
 #[macro_export]
 macro_rules! __info {
    ($($arg:tt)+) => {};
 }
 #[doc(hidden)]
 #[macro_export]
 macro_rules! __debug {
    ($($arg:tt)+) => {};
 }
 #[doc(hidden)]
 #[macro_export]
 macro_rules! __trace {
    ($($arg:tt)+) => {};
 }
--- a/third_party/zune-core/src/options.rs
+++ b/third_party/zune-core/src/options.rs
@ -0,0 +1,13 @@
 //! Decoder and Encoder Options
 //!
 //! This module exposes a struct for which all implemented
 //! decoders get shared options for decoding
 //!
 //! All supported options are put into one _Options to allow for global configurations
 //! options e.g the same  `DecoderOption` can be reused for all other decoders
 //!
 pub use decoder::DecoderOptions;
 pub use encoder::EncoderOptions;
 mod decoder;
 mod encoder;
--- a/third_party/zune-core/src/options/decoder.rs
+++ b/third_party/zune-core/src/options/decoder.rs
@ -0,0 +1,666 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 //! Global Decoder options
 #![allow(clippy::zero_prefixed_literal)]
 use crate::bit_depth::ByteEndian;
 use crate::colorspace::ColorSpace;
 /// A decoder that can handle errors
 fn decoder_error_tolerance_mode() -> DecoderFlags {
    // similar to fast options currently, so no need to write a new one
    fast_options()
 }
 /// Fast decoder options
 ///
 /// Enables all intrinsics + unsafe routines
 ///
 /// Disables png adler and crc checking.
 fn fast_options() -> DecoderFlags {
    DecoderFlags {
        inflate_confirm_adler:        false,
        png_confirm_crc:              false,
        jpg_error_on_non_conformance: false,
        zune_use_unsafe: true,
        zune_use_neon:   true,
        zune_use_avx:    true,
        zune_use_avx2:   true,
        zune_use_sse2:   true,
        zune_use_sse3:   true,
        zune_use_sse41:  true,
        png_add_alpha_channel:     false,
        png_strip_16_bit_to_8_bit: false,
        png_decode_animated:       true,
        jxl_decode_animated:       true
    }
 }
 /// Command line options error resilient and fast
 ///
 /// Features
 /// - Ignore CRC and Adler in png
 /// - Do not error out on non-conformance in jpg
 /// - Use unsafe paths
 fn cmd_options() -> DecoderFlags {
    DecoderFlags {
        inflate_confirm_adler:        false,
        png_confirm_crc:              false,
        jpg_error_on_non_conformance: false,
        zune_use_unsafe: true,
        zune_use_neon:   true,
        zune_use_avx:    true,
        zune_use_avx2:   true,
        zune_use_sse2:   true,
        zune_use_sse3:   true,
        zune_use_sse41:  true,
        png_add_alpha_channel:     false,
        png_strip_16_bit_to_8_bit: false,
        png_decode_animated: true,
        jxl_decode_animated: true
    }
 }
 /// Decoder options that are flags
 ///
 /// NOTE: When you extend this, add true or false to
 /// all options above that return a `DecoderFlag`
 #[derive(Copy, Debug, Clone, Default)]
 pub struct DecoderFlags {
    /// Whether the decoder should confirm and report adler mismatch
    inflate_confirm_adler:        bool,
    /// Whether the PNG decoder should confirm crc
    png_confirm_crc:              bool,
    /// Whether the png decoder should error out on image non-conformance
    jpg_error_on_non_conformance: bool,
    /// Whether the decoder should use unsafe  platform specific intrinsics
    ///
    /// This will also shut down platform specific intrinsics `(ZUNE_USE_{EXT})` value
    zune_use_unsafe:              bool,
    /// Whether we should use SSE2.
    ///
    /// This should be enabled for all x64 platforms but can be turned off if
    /// `ZUNE_USE_UNSAFE` is false
    zune_use_sse2:                bool,
    /// Whether we should use SSE3 instructions where possible.
    zune_use_sse3:                bool,
    /// Whether we should use sse4.1 instructions where possible.
    zune_use_sse41:               bool,
    /// Whether we should use avx instructions where possible.
    zune_use_avx:                 bool,
    /// Whether we should use avx2 instructions where possible.
    zune_use_avx2:                bool,
    /// Whether the png decoder should add alpha channel where possible.
    png_add_alpha_channel:        bool,
    /// Whether we should use neon instructions where possible.
    zune_use_neon:                bool,
    /// Whether the png decoder should strip 16 bit to 8 bit
    png_strip_16_bit_to_8_bit:    bool,
    /// Decode all frames for an animated images
    png_decode_animated:          bool,
    jxl_decode_animated:          bool
 }
 /// Decoder options
 ///
 /// Not all options are respected by decoders all decoders
 #[derive(Debug, Copy, Clone)]
 pub struct DecoderOptions {
    /// Maximum width for which decoders will
    /// not try to decode images larger than
    /// the specified width.
    ///
    /// - Default value: 16384
    /// - Respected by: `all decoders`
    max_width:      usize,
    /// Maximum height for which decoders will not
    /// try to decode images larger than the
    /// specified height
    ///
    /// - Default value: 16384
    /// - Respected by: `all decoders`
    max_height:     usize,
    /// Output colorspace
    ///
    /// The jpeg decoder allows conversion to a separate colorspace
    /// than the input.
    ///
    /// I.e you can convert a RGB jpeg image to grayscale without
    /// first decoding it to RGB to get
    ///
    /// - Default value: `ColorSpace::RGB`
    /// - Respected by: `jpeg`
    out_colorspace: ColorSpace,
    /// Maximum number of scans allowed
    /// for progressive jpeg images
    ///
    /// Progressive jpegs have scans
    ///
    /// - Default value:100
    /// - Respected by: `jpeg`
    max_scans:     usize,
    /// Maximum size for deflate.
    /// Respected by all decoders that use inflate/deflate
    deflate_limit: usize,
    /// Boolean flags that influence decoding
    flags:         DecoderFlags,
    /// The byte endian of the returned bytes will be stored in
    /// in case a single pixel spans more than a byte
    endianness:    ByteEndian
 }
 /// Initializers
 impl DecoderOptions {
    /// Create the decoder with options  setting most configurable
    /// options to be their safe counterparts
    ///
    /// This is the same as `default` option as default initializes
    /// options to the  safe variant.
    ///
    /// Note, decoders running on this will be slower as it disables
    /// platform specific intrinsics
    pub fn new_safe() -> DecoderOptions {
        DecoderOptions::default()
    }
    /// Create the decoder with options setting the configurable options
    /// to the fast  counterparts
    ///
    /// This enables platform specific code paths and enable use of unsafe
    pub fn new_fast() -> DecoderOptions {
        let flag = fast_options();
        DecoderOptions::default().set_decoder_flags(flag)
    }
    /// Create the decoder options with the following characteristics
    ///
    /// - Use unsafe paths.
    /// - Ignore error checksuming, e.g in png we do not confirm adler and crc in this mode
    /// - Enable fast intrinsics paths
    pub fn new_cmd() -> DecoderOptions {
        let flag = cmd_options();
        DecoderOptions::default().set_decoder_flags(flag)
    }
 }
 /// Global options respected by all decoders
 impl DecoderOptions {
    /// Get maximum width configured for which the decoder
    /// should not try to decode images greater than this width
    pub const fn max_width(&self) -> usize {
        self.max_width
    }
    /// Get maximum height configured for which the decoder should
    /// not try to decode images greater than this height
    pub const fn max_height(&self) -> usize {
        self.max_height
    }
    /// Return true whether the decoder should be in strict mode
    /// And reject most errors
    pub fn strict_mode(&self) -> bool {
        self.flags.jpg_error_on_non_conformance
            | self.flags.png_confirm_crc
            | self.flags.inflate_confirm_adler
    }
    /// Return true if the decoder should use unsafe
    /// routines where possible
    pub const fn use_unsafe(&self) -> bool {
        self.flags.zune_use_unsafe
    }
    /// Set maximum width for which the decoder should not try
    /// decoding images greater than that width
    ///
    /// # Arguments
    ///
    /// * `width`:  The maximum width allowed
    ///
    /// returns: DecoderOptions
    pub fn set_max_width(mut self, width: usize) -> Self {
        self.max_width = width;
        self
    }
    /// Set maximum height for which the decoder should not try
    /// decoding images greater than that height
    /// # Arguments
    ///
    /// * `height`: The maximum height allowed
    ///
    /// returns: DecoderOptions
    ///
    pub fn set_max_height(mut self, height: usize) -> Self {
        self.max_height = height;
        self
    }
    /// Whether the routines can use unsafe platform specific
    /// intrinsics when necessary
    ///
    /// Platform intrinsics are implemented for operations which
    /// the compiler can't auto-vectorize, or we can do a marginably
    /// better job at it
    ///
    /// All decoders with unsafe routines respect it.
    ///
    /// Treat this with caution, disabling it will cause slowdowns but
    /// it's provided for mainly for debugging use.
    ///
    /// - Respected by: `png` and `jpeg`(decoders with unsafe routines)
    pub fn set_use_unsafe(mut self, yes: bool) -> Self {
        // first clear the flag
        self.flags.zune_use_unsafe = yes;
        self
    }
    fn set_decoder_flags(mut self, flags: DecoderFlags) -> Self {
        self.flags = flags;
        self
    }
    /// Set whether the decoder should be in standards conforming/
    /// strict mode
    ///
    /// This reduces the error tolerance level for the decoders and invalid
    /// samples will be rejected by the decoder
    ///
    /// # Arguments
    ///
    /// * `yes`:
    ///
    /// returns: DecoderOptions
    ///
    pub fn set_strict_mode(mut self, yes: bool) -> Self {
        self.flags.jpg_error_on_non_conformance = yes;
        self.flags.png_confirm_crc = yes;
        self.flags.inflate_confirm_adler = yes;
        self
    }
    /// Set the byte endian for which raw samples will be stored in
    /// in case a single pixel sample spans more than a byte.
    ///
    /// The default is usually native endian hence big endian values
    /// will be converted to little endian on little endian systems,
    ///
    /// and little endian values will be converted to big endian on big endian systems
    ///
    /// # Arguments
    ///
    /// * `endian`: The endianness to which to set the bytes to
    ///
    /// returns: DecoderOptions
    pub fn set_byte_endian(mut self, endian: ByteEndian) -> Self {
        self.endianness = endian;
        self
    }
    /// Get the byte endian for which samples that span more than one byte will
    /// be treated
    pub const fn byte_endian(&self) -> ByteEndian {
        self.endianness
    }
 }
 /// PNG specific options
 impl DecoderOptions {
    /// Whether the inflate decoder should confirm
    /// adler checksums
    pub const fn inflate_get_confirm_adler(&self) -> bool {
        self.flags.inflate_confirm_adler
    }
    /// Set whether the inflate decoder should confirm
    /// adler checksums
    pub fn inflate_set_confirm_adler(mut self, yes: bool) -> Self {
        self.flags.inflate_confirm_adler = yes;
        self
    }
    /// Get default inflate limit for which the decoder
    /// will not try to decompress further
    pub const fn inflate_get_limit(&self) -> usize {
        self.deflate_limit
    }
    /// Set the default inflate limit for which decompressors
    /// relying on inflate won't surpass this limit
    #[must_use]
    pub fn inflate_set_limit(mut self, limit: usize) -> Self {
        self.deflate_limit = limit;
        self
    }
    /// Whether the inflate decoder should confirm
    /// crc 32 checksums
    pub const fn png_get_confirm_crc(&self) -> bool {
        self.flags.png_confirm_crc
    }
    /// Set whether the png decoder should confirm
    /// CRC 32 checksums
    #[must_use]
    pub fn png_set_confirm_crc(mut self, yes: bool) -> Self {
        self.flags.png_confirm_crc = yes;
        self
    }
    /// Set whether the png decoder should add an alpha channel to
    /// images where possible.
    ///
    /// For Luma images, it converts it to Luma+Alpha
    ///
    /// For RGB images it converts it to RGB+Alpha
    pub fn png_set_add_alpha_channel(mut self, yes: bool) -> Self {
        self.flags.png_add_alpha_channel = yes;
        self
    }
    /// Return true whether the png decoder should add an alpha
    /// channel to images where possible
    pub const fn png_get_add_alpha_channel(&self) -> bool {
        self.flags.png_add_alpha_channel
    }
    /// Whether the png decoder should reduce 16 bit images to 8 bit
    /// images implicitly.
    ///
    /// Equivalent to [png::Transformations::STRIP_16](https://docs.rs/png/latest/png/struct.Transformations.html#associatedconstant.STRIP_16)
    pub fn png_set_strip_to_8bit(mut self, yes: bool) -> Self {
        self.flags.png_strip_16_bit_to_8_bit = yes;
        self
    }
    /// Return a boolean indicating whether the png decoder should reduce
    /// 16 bit images to 8 bit images implicitly
    pub const fn png_get_strip_to_8bit(&self) -> bool {
        self.flags.png_strip_16_bit_to_8_bit
    }
    /// Return whether `zune-image` should decode animated images or
    /// whether we should just decode the first frame only
    pub const fn png_decode_animated(&self) -> bool {
        self.flags.png_decode_animated
    }
    /// Set  whether `zune-image` should decode animated images or
    /// whether we should just decode the first frame only
    pub const fn png_set_decode_animated(mut self, yes: bool) -> Self {
        self.flags.png_decode_animated = yes;
        self
    }
 }
 /// JPEG specific options
 impl DecoderOptions {
    /// Get maximum scans for which the jpeg decoder
    /// should not go above for progressive images
    pub const fn jpeg_get_max_scans(&self) -> usize {
        self.max_scans
    }
    /// Set maximum scans for which the jpeg decoder should
    /// not exceed when reconstructing images.
    pub fn jpeg_set_max_scans(mut self, max_scans: usize) -> Self {
        self.max_scans = max_scans;
        self
    }
    /// Get expected output colorspace set by the user for which the image
    /// is expected to be reconstructed into.
    ///
    /// This may be different from the
    pub const fn jpeg_get_out_colorspace(&self) -> ColorSpace {
        self.out_colorspace
    }
    /// Set expected colorspace for which the jpeg output is expected to be in
    ///
    /// This is mainly provided as is, we do not guarantee the decoder can convert to all colorspaces
    /// and the decoder can change it internally when it sees fit.
    #[must_use]
    pub fn jpeg_set_out_colorspace(mut self, colorspace: ColorSpace) -> Self {
        self.out_colorspace = colorspace;
        self
    }
 }
 /// Intrinsics support
 ///
 /// These routines are compiled depending
 /// on the platform they are used, if compiled for a platform
 /// it doesn't support,(e.g avx2 on Arm), it will always return `false`
 impl DecoderOptions {
    /// Use SSE 2 code paths where possible
    ///
    /// This checks for existence of SSE2 first and returns
    /// false if it's not present
    #[allow(unreachable_code)]
    pub fn use_sse2(&self) -> bool {
        let opt = self.flags.zune_use_sse2 | self.flags.zune_use_unsafe;
        // options says no
        if !opt {
            return false;
        }
        #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
        {
            // where we can do runtime check if feature is present
            #[cfg(feature = "std")]
            {
                if is_x86_feature_detected!("sse2") {
                    return true;
                }
            }
            // where we can't do runtime check if feature is present
            // check if the compile feature had it enabled
            #[cfg(all(not(feature = "std"), target_feature = "sse2"))]
            {
                return true;
            }
        }
        // everything failed return false
        false
    }
    /// Use SSE 3 paths where possible
    ///
    ///
    /// This also checks for SSE3 support and returns false if
    /// it's not present
    #[allow(unreachable_code)]
    pub fn use_sse3(&self) -> bool {
        let opt = self.flags.zune_use_sse3 | self.flags.zune_use_unsafe;
        // options says no
        if !opt {
            return false;
        }
        #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
        {
            // where we can do runtime check if feature is present
            #[cfg(feature = "std")]
            {
                if is_x86_feature_detected!("sse3") {
                    return true;
                }
            }
            // where we can't do runtime check if feature is present
            // check if the compile feature had it enabled
            #[cfg(all(not(feature = "std"), target_feature = "sse3"))]
            {
                return true;
            }
        }
        // everything failed return false
        false
    }
    /// Use SSE4 paths where possible
    ///
    /// This also checks for sse 4.1 support and returns false if it
    /// is not present
    #[allow(unreachable_code)]
    pub fn use_sse41(&self) -> bool {
        let opt = self.flags.zune_use_sse41 | self.flags.zune_use_unsafe;
        // options says no
        if !opt {
            return false;
        }
        #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
        {
            // where we can do runtime check if feature is present
            #[cfg(feature = "std")]
            {
                if is_x86_feature_detected!("sse4.1") {
                    return true;
                }
            }
            // where we can't do runtime check if feature is present
            // check if the compile feature had it enabled
            #[cfg(all(not(feature = "std"), target_feature = "sse4.1"))]
            {
                return true;
            }
        }
        // everything failed return false
        false
    }
    /// Use AVX paths where possible
    ///
    /// This also checks for AVX support and returns false if it's
    /// not present
    #[allow(unreachable_code)]
    pub fn use_avx(&self) -> bool {
        let opt = self.flags.zune_use_avx | self.flags.zune_use_unsafe;
        // options says no
        if !opt {
            return false;
        }
        #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
        {
            // where we can do runtime check if feature is present
            #[cfg(feature = "std")]
            {
                if is_x86_feature_detected!("avx") {
                    return true;
                }
            }
            // where we can't do runitme check if feature is present
            // check if the compile feature had it enabled
            #[cfg(all(not(feature = "std"), target_feature = "avx"))]
            {
                return true;
            }
        }
        // everything failed return false
        false
    }
    /// Use avx2 paths where possible
    ///
    /// This also checks for AVX2 support and returns false if it's not
    /// present
    #[allow(unreachable_code)]
    pub fn use_avx2(&self) -> bool {
        let opt = self.flags.zune_use_avx2 | self.flags.zune_use_unsafe;
        // options says no
        if !opt {
            return false;
        }
        #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
        {
            // where we can do runtime check if feature is present
            #[cfg(feature = "std")]
            {
                if is_x86_feature_detected!("avx2") {
                    return true;
                }
            }
            // where we can't do runitme check if feature is present
            // check if the compile feature had it enabled
            #[cfg(all(not(feature = "std"), target_feature = "avx2"))]
            {
                return true;
            }
        }
        // everything failed return false
        false
    }
    #[allow(unreachable_code)]
    pub fn use_neon(&self) -> bool {
        let opt = self.flags.zune_use_neon | self.flags.zune_use_unsafe;
        // options says no
        if !opt {
            return false;
        }
        #[cfg(target_arch = "aarch64")]
        {
            // aarch64 implies neon on a compliant cpu
            // but for real prod should do something better here
            return true;
        }
        // everything failed return false
        false
    }
 }
 /// JPEG_XL specific options
 impl DecoderOptions {
    /// Return whether `zune-image` should decode animated images or
    /// whether we should just decode the first frame only
    pub const fn jxl_decode_animated(&self) -> bool {
        self.flags.jxl_decode_animated
    }
    /// Set  whether `zune-image` should decode animated images or
    /// whether we should just decode the first frame only
    pub const fn jxl_set_decode_animated(mut self, yes: bool) -> Self {
        self.flags.jxl_decode_animated = yes;
        self
    }
 }
 impl Default for DecoderOptions {
    /// Create a default and sane option for decoders
    ///
    /// The following are the defaults
    ///
    /// - All decoders
    ///     - max_width: 16536
    ///     - max_height: 16535
    ///     - use_unsafe: Use unsafe intrinsics where possible.
    ///
    /// - JPEG
    ///     - max_scans: 100 (progressive images only, artificial cap to prevent a specific DOS)
    ///     - error_on_non_conformance: False (slightly corrupt images will be allowed)
    /// - DEFLATE
    ///     - deflate_limit: 1GB (will not continue decoding deflate archives larger than this)
    /// - PNG
    ///   - endianness: Default endianess is Big Endian when decoding 16 bit images to be viewed as 8 byte images
    ///   - confirm_crc: False (CRC will not be confirmed to be safe)
    ///   - strip_16_bit_to_8: False, 16 bit images are handled as 16 bit images
    ///   - add alpha: False, alpha channel is not added where it isn't present
    ///   - decode_animated: True: All frames in an animated image are decoded
    ///
    ///  - JXL
    ///    - decode_animated: True: All frames in an animated image are decoded
    ///
    fn default() -> Self {
        Self {
            out_colorspace: ColorSpace::RGB,
            max_width:      1 << 14,
            max_height:     1 << 14,
            max_scans:      100,
            deflate_limit:  1 << 30,
            flags:          decoder_error_tolerance_mode(),
            endianness:     ByteEndian::BE
        }
    }
 }
--- a/third_party/zune-core/src/options/encoder.rs
+++ b/third_party/zune-core/src/options/encoder.rs
@ -0,0 +1,217 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 use crate::bit_depth::BitDepth;
 use crate::colorspace::ColorSpace;
 /// Encoder options that are flags
 #[derive(Copy, Debug, Clone, Default)]
 struct EncoderFlags {
    /// Whether JPEG images should be encoded as progressive images
    jpeg_encode_progressive: bool,
    /// Whether JPEG images should use optimized huffman tables
    jpeg_optimize_huffman:   bool,
    /// Whether to not preserve metadata across image transformations
    image_strip_metadata:    bool
 }
 /// Options shared by some of the encoders in
 /// the `zune-` family of image crates
 #[derive(Debug, Copy, Clone)]
 pub struct EncoderOptions {
    width:       usize,
    height:      usize,
    colorspace:  ColorSpace,
    quality:     u8,
    depth:       BitDepth,
    num_threads: u8,
    effort:      u8,
    flags:       EncoderFlags
 }
 impl Default for EncoderOptions {
    fn default() -> Self {
        Self {
            width:       0,
            height:      0,
            colorspace:  ColorSpace::RGB,
            quality:     80,
            depth:       BitDepth::Eight,
            num_threads: 4,
            effort:      4,
            flags:       EncoderFlags::default()
        }
    }
 }
 impl EncoderOptions {
    ///  Create  new encode options
    ///
    /// # Arguments
    ///  
    /// * `width`: Image width
    /// * `height`: Image height
    /// * `colorspace`:  Image colorspaces
    /// * `depth`: Image depth
    ///
    /// returns: EncoderOptions
    ///
    pub fn new(
        width: usize, height: usize, colorspace: ColorSpace, depth: BitDepth
    ) -> EncoderOptions {
        EncoderOptions {
            width,
            height,
            colorspace,
            depth,
            ..Default::default()
        }
    }
    /// Get the width for which the image will be encoded in
    pub const fn width(&self) -> usize {
        self.width
    }
    /// Get height for which the image will be encoded in
    ///
    /// returns: usize
    ///
    /// # Panics
    /// If height is zero
    pub fn height(&self) -> usize {
        assert_ne!(self.height, 0);
        self.height
    }
    /// Get the depth for which the image will be encoded in
    pub const fn depth(&self) -> BitDepth {
        self.depth
    }
    /// Get the quality for which the image will be encoded with
    ///
    ///  # Lossy
    /// - Higher quality means some images take longer to write and
    /// are big but they look good
    ///
    /// - Lower quality means small images and low quality.
    ///
    /// # Lossless
    /// - High quality indicates more time is spent in making the file
    /// smaller
    ///
    /// - Low quality indicates less time is spent in making the file bigger
    pub const fn quality(&self) -> u8 {
        self.quality
    }
    /// Get the colorspace for which the image will be encoded in
    pub const fn colorspace(&self) -> ColorSpace {
        self.colorspace
    }
    pub const fn effort(&self) -> u8 {
        self.effort
    }
    /// Set width for the image to be encoded
    pub fn set_width(mut self, width: usize) -> Self {
        self.width = width;
        self
    }
    /// Set height for the image to be encoded
    pub fn set_height(mut self, height: usize) -> Self {
        self.height = height;
        self
    }
    /// Set depth for the image to be encoded
    pub fn set_depth(mut self, depth: BitDepth) -> Self {
        self.depth = depth;
        self
    }
    /// Set quality of the image to be encoded
    ///
    /// Quality is clamped from 0..100
    ///
    /// Quality means different options depending on the encoder, see
    /// [get_quality](Self::quality)
    pub fn set_quality(mut self, quality: u8) -> Self {
        self.quality = quality.clamp(0, 100);
        self
    }
    /// Set colorspace for the image to be encoded
    pub fn set_colorspace(mut self, colorspace: ColorSpace) -> Self {
        self.colorspace = colorspace;
        self
    }
    /// Set the number of threads allowed for multithreaded encoding
    /// where supported
    ///
    /// Zero means use a single thread
    pub fn set_num_threads(mut self, threads: u8) -> Self {
        self.num_threads = threads;
        self
    }
    pub fn set_effort(mut self, effort: u8) -> Self {
        self.effort = effort;
        self
    }
    /// Return number of threads configured for multithreading
    /// where possible
    ///
    /// This is used for multi-threaded encoders,
    /// currently only jpeg-xl
    pub const fn num_threads(&self) -> u8 {
        self.num_threads
    }
    /// Set whether the encoder should remove metadata from the image
    ///
    /// When set to `true`, supported encoders will strip away metadata
    /// from the resulting image. If set to false, where supported, encoders
    /// will not remove metadata from images
    pub fn set_strip_metadata(mut self, yes: bool) -> Self {
        self.flags.image_strip_metadata = yes;
        self
    }
    /// Whether or not the encoder should remove metadata from the image
    ///
    /// The default value is false, and encoders that respect this try to preserve as much
    /// data as possible from one image to another
    pub const fn strip_metadata(&self) -> bool {
        !self.flags.image_strip_metadata
    }
 }
 /// JPEG options
 impl EncoderOptions {
    /// Whether the jpeg encoder should encode the image in progressive mode
    ///
    /// Default is `false`.
    ///
    /// This may be used to create slightly smaller images at the cost of more processing
    /// time
    pub const fn jpeg_encode_progressive(&self) -> bool {
        self.flags.jpeg_encode_progressive
    }
    /// Whether the jpeg encoder should optimize huffman tables to create smaller files
    /// at the cost of processing time
    ///
    /// Default is `false`.
    pub const fn jpeg_optimized_huffman_tables(&self) -> bool {
        self.flags.jpeg_optimize_huffman
    }
    /// Set whether the jpeg encoder should encode the imagei in progressive mode
    ///
    /// Default is `false`
    pub fn set_jpeg_encode_progressive(mut self, yes: bool) -> Self {
        self.flags.jpeg_optimize_huffman = yes;
        self
    }
 }
--- a/third_party/zune-core/src/result.rs
+++ b/third_party/zune-core/src/result.rs
@ -0,0 +1,72 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 //! Decoding results for images
 use alloc::vec::Vec;
 /// A simple enum that can hold decode
 /// results of most images
 #[non_exhaustive]
 pub enum DecodingResult {
    U8(Vec<u8>),
    U16(Vec<u16>),
    F32(Vec<f32>)
 }
 impl DecodingResult {
    /// Return the contents if the enum stores `Vec<u8>` or otherwise
    /// return `None`.
    ///
    /// Useful for de-sugaring the result of a decoding operation
    /// into raw bytes
    ///
    /// # Example
    /// ```
    /// use zune_core::result::DecodingResult;
    /// let data = DecodingResult::U8(vec![0;100]);
    /// // we know this won't fail because we created it with u8
    /// assert!(data.u8().is_some());
    ///
    /// let data = DecodingResult::U16(vec![0;100]);
    /// // it should now return nothing since the type is u18
    /// assert!(data.u8().is_none());
    ///
    /// ```
    pub fn u8(self) -> Option<Vec<u8>> {
        match self {
            DecodingResult::U8(data) => Some(data),
            _ => None
        }
    }
    /// Return the contents if the enum stores `Vec<u16>` or otherwise
    /// return `None`.
    ///
    /// Useful for de-sugaring the result of a decoding operation
    /// into raw bytes
    ///
    /// # Example
    /// ```
    /// use zune_core::result::DecodingResult;
    /// let data = DecodingResult::U8(vec![0;100]);
    /// // we know this will fail because we created it with u16
    /// assert!(data.u16().is_none());
    ///
    ///
    /// let data = DecodingResult::U16(vec![0;100]);
    /// // it should now return something since the type is u16
    /// assert!(data.u16().is_some());
    ///
    /// ```
    pub fn u16(self) -> Option<Vec<u16>> {
        match self {
            DecodingResult::U16(data) => Some(data),
            _ => None
        }
    }
 }
--- a/third_party/zune-core/src/serde.rs
+++ b/third_party/zune-core/src/serde.rs
@ -0,0 +1,63 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 #![cfg(feature = "serde")]
 //! Serde support for serializing
 //! crate datastructures
 //!
 //! Implements serialize for
 //!  - ColorSpace
 //!  - BitDepth
 //!  - ColorCharacteristics
 use alloc::format;
 use serde::ser::*;
 use crate::bit_depth::BitDepth;
 use crate::colorspace::{ColorCharacteristics, ColorSpace, RenderingIntent};
 impl Serialize for ColorSpace {
    #[allow(clippy::uninlined_format_args)]
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer
    {
        // colorspace serialization is simply it's debug value
        serializer.serialize_str(&format!("{:?}", self))
    }
 }
 impl Serialize for BitDepth {
    #[allow(clippy::uninlined_format_args)]
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer
    {
        serializer.serialize_str(&format!("{:?}", self))
    }
 }
 impl Serialize for ColorCharacteristics {
    #[allow(clippy::uninlined_format_args)]
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer
    {
        serializer.serialize_str(&format!("{:?}", self))
    }
 }
 impl Serialize for RenderingIntent {
    #[allow(clippy::uninlined_format_args)]
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer
    {
        serializer.serialize_str(&format!("{:?}", self))
    }
 }
--- a/third_party/zune-jpeg/.gitignore
+++ b/third_party/zune-jpeg/.gitignore
@ -0,0 +1 @@
 /target
--- a/third_party/zune-jpeg/Benches.md
+++ b/third_party/zune-jpeg/Benches.md
@ -0,0 +1,79 @@
 # Benchmarks of popular jpeg libraries
 Here I compare how long it takes popular JPEG decoders to decode the below 7680*4320 image
 of (now defunct ?) [Cutefish OS](https://en.cutefishos.com/) default wallpaper.
 ![img](benches/images/speed_bench.jpg)
 ## About benchmarks
 Benchmarks are weird, especially IO & multi-threaded programs. This library uses both of the above hence performance may
 vary.
 For best results shut down your machine, go take coffee, think about life and how it came to be and why people should
 save the environment.
 Then power up your machine, if it's a laptop connect it to a power supply and if there is a setting for performance
 mode, tweak it.
 Then run.
 ## Benchmarks vs real world usage
 Real world usage may vary.
 Notice that I'm using a large image but probably most decoding will be small to medium images.
 To make the library thread safe, we do about 1.5-1.7x more allocations than libjpeg-turbo. Although, do note that the
 allocations do not occur at ago, we allocate when needed and deallocate when not needed.
 Do note if memory bandwidth is a limitation. This is not for you.
 ## Reproducibility
 The benchmarks are carried out on my local machine with an AMD Ryzen 5 4500u
 The benchmarks are reproducible.
 To reproduce them
 1. Clone this repository
 2. Install rust(if you don't have it yet)
 3. `cd` into the directory.
 4. Run `cargo bench`
 ## Performance features of the three libraries
 | feature                      | image-rs/jpeg-decoder | libjpeg-turbo | zune-jpeg |
 |------------------------------|-----------------------|---------------|-----------|
 | multithreaded                | ✅                     | ❌             | ❌         |
 | platform specific intrinsics | ✅                     | ✅             | ✅         |
 - Image-rs/jpeg-decoder uses [rayon] under the hood but it's under a feature
  flag.
 - libjpeg-turbo uses hand-written asm for platform specific intrinsics, ported to
  the most common architectures out there but falls back to scalar
  code if it can't run in a platform.
 # Finally benchmarks
 [here]
 ## Notes
 Benchmarks are ran at least once a week to catch regressions early and
 are uploaded to Github pages.
 Machine specs can be found on the other [landing page]
 Benchmarks may not reflect real world usage(threads, other I/O machine bottlenecks)
 [landing page]:https://etemesi254.github.io/posts/Zune-Benchmarks/
 [here]:https://etemesi254.github.io/assets/criterion/report/index.html
 [libjpeg-turbo]:https://github.com/libjpeg-turbo/libjpeg-turbo
 [jpeg-decoder]:https://github.com/image-rs/jpeg-decoder
 [rayon]:https://github.com/rayon-rs/rayon
--- a/third_party/zune-jpeg/Cargo.toml
+++ b/third_party/zune-jpeg/Cargo.toml
@ -0,0 +1,26 @@
 [package]
 name = "zune-jpeg"
 version = "0.5.0-rc1"
 authors = ["caleb <etemesicaleb@gmail.com>"]
 edition = "2021"
 repository = "https://github.com/etemesi254/zune-image/tree/dev/crates/zune-jpeg"
 license = "MIT OR Apache-2.0 OR Zlib"
 keywords = ["jpeg", "jpeg-decoder", "decoder"]
 categories = ["multimedia::images"]
 exclude = ["/benches/images/*", "/tests/*", "/.idea/*", "/.gradle/*", "/test-images/*", "fuzz/*"]
 description = "A fast, correct and safe jpeg decoder"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 [features]
 x86 = []
 neon = []
 std = ["zune-core/std"]
 log = ["zune-core/log"]
 default = ["x86", "neon", "std"]
 [dependencies]
 zune-core = { path = "../zune-core", version = "^0.5.0-rc1" }
 [dev-dependencies]
 zune-ppm = { path = "../zune-ppm" }
--- a/third_party/zune-jpeg/Changelog.md
+++ b/third_party/zune-jpeg/Changelog.md
@ -0,0 +1,64 @@
 ## Version 0.3.17
 - Fix no-std compilation
 ## Version 0.3.16
 - Add support for decoding to BGR and BGRA
 ## Version 0.3.14
 - Add ability to parse exif and ICC chunk.
 - Fix images with one component that were down-sampled.
 ### Version 0.3.13
 - Allow decoding into pre-allocated buffer
 - Clarify documentation
 ### Version 0.3.11
 - Add guards for SSE and AVX code paths(allows compiling for platforms that do not support it)
 ### Version 0.3.0
 - Overhaul to the whole decoder.
 - Single threaded version
 - Lightweight.
 ### Version 0.2.0
 - New `ZuneJpegOptions` struct, this is the now recommended way to set up decoding options for
  decoding
 - Deprecated previous options setting functions.
 - More code cleanups
 - Fixed new bugs discovered by fuzzing
 - Removed dependency on `num_cpu`
 ### Version 0.1.5
 - Allow user to set memory limits in during decoding explicitly via `set_limits`
 - Fixed some bugs discovered by fuzzing
 - Correctly handle small images less than 16 pixels
 - Gracefully handle incorrectly sampled images.
 ### Version 0.1.4
 - Remove all `unsafe` instances except platform dependent intrinsics.
 - Numerous bug fixes identified by fuzzing.
 - Expose `ImageInfo` to the crate root.
 ### Version 0.1.3
 - Fix numerous panics found by fuzzing(thanks to @[Shnatsel] for the corpus)
 - Add new method `set_num_threads` that allows one to explicitly set the number of threads to use to decode the image.
 ### Version 0.1.2
 - Add more sub checks, contributed by @[5225225]
 - Privatize some modules.
 ### Version 0.1.1
 - Fix rgba/rgbx decoding when avx optimized functions were used
 - Initial support for fuzzing 
 - Remove `align_alloc` method which was unsound (Thanks to @[HeroicKatora] for pointing that out)
 [Shnatsel]:https://github.com/Shnatsel
 [HeroicKatora]:https://github.com/HeroicKatora
 [5225225]:https://github.com/5225225
--- a/third_party/zune-jpeg/LICENSE-APACHE
+++ b/third_party/zune-jpeg/LICENSE-APACHE
@ -0,0 +1 @@
 ../../LICENSE-APACHE
--- a/third_party/zune-jpeg/LICENSE-MIT
+++ b/third_party/zune-jpeg/LICENSE-MIT
@ -0,0 +1 @@
 ../../LICENSE-MIT
--- a/third_party/zune-jpeg/LICENSE-ZLIB
+++ b/third_party/zune-jpeg/LICENSE-ZLIB
@ -0,0 +1 @@
 ../../LICENSE-ZLIB
--- a/third_party/zune-jpeg/README.md
+++ b/third_party/zune-jpeg/README.md
@ -0,0 +1,104 @@
 # Zune-JPEG
 A fast, correct and safe jpeg decoder in pure Rust.
 ## Usage
 The library provides a simple-to-use API for jpeg decoding
 and an ability to add options to influence decoding.
 ### Example
 ```Rust
 // Import the library
 use zune_jpeg::JpegDecoder;
 use std::fs::read;
 fn main()->Result<(),DecoderErrors> {
    // load some jpeg data
    let data = read("cat.jpg").unwrap();
    // create a decoder
    let mut decoder = JpegDecoder::new(&data);
    // decode the file
    let pixels = decoder.decode()?;
 }
 ```
 The decoder supports more manipulations via `DecoderOptions`,
 see additional documentation in the library.
 ## Goals
 The implementation aims to have the following goals achieved,
 in order of importance
 1. Safety - Do not segfault on errors or invalid input. Panics are okay, but
   should be fixed when reported. `unsafe` is only used for SIMD intrinsics,
   and can be turned off entirely both at compile time and at runtime.
 2. Speed - Get the data as quickly as possible, which means
    1. Platform intrinsics code where justifiable
    2. Carefully written platform independent code that allows the
       compiler to vectorize it.
    3. Regression tests.
    4. Watch the memory usage of the program
 3. Usability - Provide utility functions like different color conversions functions.
 ## Non-Goals
 - Bit identical results with libjpeg/libjpeg-turbo will never be an aim of this library.
  Jpeg is a lossy format with very few parts specified by the standard
  (i.e it doesn't give a reference upsampling and color conversion algorithm)
 ## Features
 - [x] A Pretty fast 8*8 integer IDCT.
 - [x] Fast Huffman Decoding
 - [x] Fast color convert functions.
 - [x] Support for extended colorspaces like GrayScale and RGBA
 - [X] Single-threaded decoding.
 - [X] Support for four component JPEGs, and esoteric color schemes like CYMK
 - [X] Support for `no_std`
 - [X] BGR/BGRA decoding support.
 ## Crate Features
 | feature | on  | Capabilities                                                                                |
 |---------|-----|---------------------------------------------------------------------------------------------|
 | `x86`   | yes | Enables `x86` specific instructions, specifically `avx` and `sse` for accelerated decoding. |
 | `std`   | yes | Enable linking to the `std` crate                                                           |
 Note that the `x86` features are automatically disabled on platforms that aren't x86 during compile
 time hence there is no need to disable them explicitly if you are targeting such a platform.
 ## Using in a `no_std` environment
 The crate can be used in a `no_std` environment with the `alloc` feature.
 But one is required to link to a working allocator for whatever environment the decoder
 will be running on
 ## Debug vs release
 The decoder heavily relies on platform specific intrinsics, namely AVX2 and SSE to gain speed-ups in decoding,
 but they [perform poorly](https://godbolt.org/z/vPq57z13b) in debug builds. To get reasonable performance even
 when compiling your program in debug mode, add this to your `Cargo.toml`:
 ```toml
 # `zune-jpeg` package will be always built with optimizations
 [profile.dev.package.zune-jpeg]
 opt-level = 3
 ```
 ## Benchmarks
 The library tries to be at fast as [libjpeg-turbo] while being as safe as possible.
 Platform specific intrinsics help get speed up intensive operations ensuring we can almost
 match [libjpeg-turbo] speeds but speeds are always +- 10 ms of this library.
 For more up-to-date benchmarks, see the online repo with
 benchmarks [here](https://etemesi254.github.io/assets/criterion/report/index.html)
 [libjpeg-turbo]:https://github.com/libjpeg-turbo/libjpeg-turbo/
 [image-rs/jpeg-decoder]:https://github.com/image-rs/jpeg-decoder/tree/master/src
--- a/third_party/zune-jpeg/fuzz/.gitignore
+++ b/third_party/zune-jpeg/fuzz/.gitignore
@ -0,0 +1,3 @@
 target
 corpus
 artifacts
--- a/third_party/zune-jpeg/fuzz/Cargo.toml
+++ b/third_party/zune-jpeg/fuzz/Cargo.toml
@ -0,0 +1,32 @@
 [package]
 name = "zune-jpeg-fuzz"
 version = "0.0.0"
 authors = ["Automatically generated"]
 publish = false
 edition = "2018"
 [package.metadata]
 cargo-fuzz = true
 [dependencies]
 libfuzzer-sys = "0.4"
 [dependencies.zune-jpeg]
 path = ".."
 features =  ["neon", "x86"]
 # Prevent this from interfering with workspaces
 [workspace]
 members = ["."]
 [[bin]]
 name = "decode_buffer"
 path = "fuzz_targets/decode_buffer.rs"
 test = false
 doc = false
 [[bin]]
 name = "fuzz_idct"
 path = "fuzz_targets/fuzz_idct.rs"
 test = false
 doc = false
--- a/third_party/zune-jpeg/fuzz/fuzz_targets/decode_buffer.rs
+++ b/third_party/zune-jpeg/fuzz/fuzz_targets/decode_buffer.rs
@ -0,0 +1,10 @@
 #![no_main]
 use libfuzzer_sys::fuzz_target;
 fuzz_target!(|data: &[u8]| {
    use zune_jpeg::zune_core::bytestream::ZCursor;
    let data = ZCursor::new(data);
    let mut decoder = zune_jpeg::JpegDecoder::new(data);
    let _ = decoder.decode();
 });
--- a/third_party/zune-jpeg/fuzz/fuzz_targets/fuzz_idct.rs
+++ b/third_party/zune-jpeg/fuzz/fuzz_targets/fuzz_idct.rs
@ -0,0 +1,47 @@
 #![no_main]
 use libfuzzer_sys::fuzz_target;
 use zune_jpeg::idct::scalar::idct_int;
 fuzz_target!(|data: [i32; 64]| {
    let mut data = data;
    // keep in some relatively sane range
    // to prevent scalar overflows
    for d in &mut data
    {
        let bound = 255;
        *d = (*d).min(bound).max(-bound);
    }
    let mut data_vec = data;
    // this is way too big but it shouldn't matter
    // scalar and vector should mutate the minimum needed
    let mut output_scalar = [0i16; 64];
    let mut output_vector = [0i16; 64];
    let _must_use_supported_vector_arch;
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
    #[cfg(target_feature = "avx2")]
    {
        use zune_jpeg::idct::avx2::idct_avx2;
        idct_avx2(&mut data_vec, &mut output_vector, 8);
        _must_use_supported_vector_arch = true;
    }
    #[cfg(target_arch = "aarch64")]
    {
        use zune_jpeg::idct::neon::idct_neon;
        idct_neon(&mut data_vec, &mut output_vector, 8);
        _must_use_supported_vector_arch = true;
    }
    if _must_use_supported_vector_arch
    {
        idct_int(&mut data, &mut output_scalar, 8);
        assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match");
    }
    else
    {
        panic!("No vector IDCT ran!")
    }
 });
--- a/third_party/zune-jpeg/src/bitstream.rs
+++ b/third_party/zune-jpeg/src/bitstream.rs
@ -0,0 +1,671 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 #![allow(
    clippy::if_not_else,
    clippy::similar_names,
    clippy::inline_always,
    clippy::doc_markdown,
    clippy::cast_sign_loss,
    clippy::cast_possible_truncation
 )]
 //! This file exposes a single struct that can decode a huffman encoded
 //! Bitstream in a JPEG file
 //!
 //! This code is optimized for speed.
 //! It's meant to be super duper super fast, because everyone else depends on this being fast.
 //! It's (annoyingly) serial hence we cant use parallel bitstreams(it's variable length coding.)
 //!
 //! Furthermore, on the case of refills, we have to do bytewise processing because the standard decided
 //! that we want to support markers in the middle of streams(seriously few people use RST markers).
 //!
 //! So we pull in all optimization steps:
 //! - use `inline[always]`? ✅ ,
 //! - pre-execute most common cases ✅,
 //! - add random comments ✅
 //! -  fast paths ✅.
 //!
 //! Speed-wise: It is probably the fastest JPEG BitStream decoder to ever sail the seven seas because of
 //! a couple of optimization tricks.
 //! 1. Fast refills from libjpeg-turbo
 //! 2. As few as possible branches in decoder fast paths.
 //! 3. Accelerated AC table decoding borrowed from stb_image.h written by Fabian Gissen (@ rygorous),
 //! improved by me to handle more cases.
 //! 4. Safe and extensible routines(e.g. cool ways to eliminate bounds check)
 //! 5. No unsafe here
 //!
 //! Readability comes as a second priority(I tried with variable names this time, and we are wayy better than libjpeg).
 //!
 //! Anyway if you are reading this it means your cool and I hope you get whatever part of the code you are looking for
 //! (or learn something cool)
 //!
 //! Knock yourself out.
 use alloc::format;
 use alloc::string::ToString;
 use core::cmp::min;
 use zune_core::bytestream::{ZByteReaderTrait, ZReader};
 use crate::errors::DecodeErrors;
 use crate::huffman::{HuffmanTable, HUFF_LOOKAHEAD};
 use crate::marker::Marker;
 use crate::mcu::DCT_BLOCK;
 use crate::misc::UN_ZIGZAG;
 macro_rules! decode_huff {
    ($stream:tt,$symbol:tt,$table:tt) => {
        let mut code_length = $symbol >> HUFF_LOOKAHEAD;
        ($symbol) &= (1 << HUFF_LOOKAHEAD) - 1;
        if code_length > i32::from(HUFF_LOOKAHEAD)
        {
            // if the symbol cannot be resolved in the first HUFF_LOOKAHEAD bits,
            // we know it lies somewhere between HUFF_LOOKAHEAD and 16 bits since jpeg imposes 16 bit
            // limit, we can therefore look 16 bits ahead and try to resolve the symbol
            // starting from 1+HUFF_LOOKAHEAD bits.
            $symbol = ($stream).peek_bits::<16>() as i32;
            // (Credits to Sean T. Barrett stb library for this optimization)
            // maxcode is pre-shifted 16 bytes long so that it has (16-code_length)
            // zeroes at the end hence we do not need to shift in the inner loop.
            while code_length < 17{
                if $symbol < $table.maxcode[code_length as usize]  {
                    break;
                }
                code_length += 1;
            }
            if code_length == 17{
                // symbol could not be decoded.
                //
                // We may think, lets fake zeroes, noo
                // panic, because Huffman codes are sensitive, probably everything
                // after this will be corrupt, so no need to continue.
                return Err(DecodeErrors::Format(format!("Bad Huffman Code 0x{:X}, corrupt JPEG",$symbol)))
            }
            $symbol >>= (16-code_length);
            ($symbol) = i32::from(
                ($table).values
                    [(($symbol + ($table).offset[code_length as usize]) & 0xFF) as usize],
            );
        }
        // drop bits read
        ($stream).drop_bits(code_length as u8);
    };
 }
 /// A `BitStream` struct, a bit by bit reader with super powers
 ///
 pub(crate) struct BitStream {
    /// A MSB type buffer that is used for some certain operations
    pub buffer:           u64,
    /// A TOP  aligned MSB type buffer that is used to accelerate some operations like
    /// peek_bits and get_bits.
    ///
    /// By top aligned, I mean the top bit (63) represents the top bit in the buffer.
    aligned_buffer:       u64,
    /// Tell us the bits left the two buffer
    pub(crate) bits_left: u8,
    /// Did we find a marker(RST/EOF) during decoding?
    pub marker:           Option<Marker>,
    /// Progressive decoding
    pub successive_high: u8,
    pub successive_low:  u8,
    spec_start:          u8,
    spec_end:            u8,
    pub eob_run:         i32,
    pub overread_by:     usize,
    /// True if we have seen end of image marker.
    /// Don't read anything after that.
    pub seen_eoi:        bool
 }
 impl BitStream {
    /// Create a new BitStream
    pub(crate) const fn new() -> BitStream {
        BitStream {
            buffer:          0,
            aligned_buffer:  0,
            bits_left:       0,
            marker:          None,
            successive_high: 0,
            successive_low:  0,
            spec_start:      0,
            spec_end:        0,
            eob_run:         0,
            overread_by:     0,
            seen_eoi:        false
        }
    }
    /// Create a new Bitstream for progressive decoding
    #[allow(clippy::redundant_field_names)]
    pub(crate) fn new_progressive(ah: u8, al: u8, spec_start: u8, spec_end: u8) -> BitStream {
        BitStream {
            buffer:          0,
            aligned_buffer:  0,
            bits_left:       0,
            marker:          None,
            successive_high: ah,
            successive_low:  al,
            spec_start:      spec_start,
            spec_end:        spec_end,
            eob_run:         0,
            overread_by:     0,
            seen_eoi:        false
        }
    }
    /// Refill the bit buffer by (a maximum of) 32 bits
    ///
    /// # Arguments
    ///  - `reader`:`&mut BufReader<R>`: A mutable reference to an underlying
    ///    File/Memory buffer containing a valid JPEG stream
    ///
    /// This function will only refill if `self.count` is less than 32
    #[inline(always)] // to many call sites? ( perf improvement by 4%)
    fn refill<T>(&mut self, reader: &mut ZReader<T>) -> Result<bool, DecodeErrors>
    where
        T: ZByteReaderTrait
    {
        /// Macro version of a single byte refill.
        /// Arguments
        /// buffer-> our io buffer, because rust macros cannot get values from
        /// the surrounding environment bits_left-> number of bits left
        /// to full refill
        macro_rules! refill {
            ($buffer:expr,$byte:expr,$bits_left:expr) => {
                // read a byte from the stream
                $byte = u64::from(reader.read_u8());
                self.overread_by += usize::from(reader.eof()?);
                // append to the buffer
                // JPEG is a MSB type buffer so that means we append this
                // to the lower end (0..8) of the buffer and push the rest bits above..
                $buffer = ($buffer << 8) | $byte;
                // Increment bits left
                $bits_left += 8;
                // Check for special case  of OxFF, to see if it's a stream or a marker
                if $byte == 0xff {
                    // read next byte
                    let mut next_byte = u64::from(reader.read_u8());
                    // Byte snuffing, if we encounter byte snuff, we skip the byte
                    if next_byte != 0x00 {
                        // skip that byte we read
                        while next_byte == 0xFF {
                            next_byte = u64::from(reader.read_u8());
                        }
                        if next_byte != 0x00 {
                            // Undo the byte append and return
                            $buffer >>= 8;
                            $bits_left -= 8;
                            if $bits_left != 0 {
                                self.aligned_buffer = $buffer << (64 - $bits_left);
                            }
                            self.marker =
                                Some(Marker::from_u8(next_byte as u8).ok_or_else(|| {
                                    DecodeErrors::Format(format!(
                                        "Unknown marker 0xFF{:X}",
                                        next_byte
                                    ))
                                })?);
                            return Ok(false);
                        }
                    }
                }
            };
        }
        // 32 bits is enough for a decode(16 bits) and receive_extend(max 16 bits)
        // If we have less than 32 bits we refill
        if self.bits_left < 32 && self.marker.is_none() && !self.seen_eoi {
            // we optimize for the case where we don't have 255 in the stream and have 4 bytes left
            // as it is the common case
            //
            // so we always read 4 bytes, if read_fixed_bytes errors out, the cursor is
            // guaranteed not to advance in case of failure (is this true), so
            // we revert the read later on (if we have 255), if this fails, we use the normal
            // byte at a time read
            if let Ok(bytes) = reader.read_fixed_bytes_or_error::<4>() {
                // we have 4 bytes to spare, read the 4 bytes into a temporary buffer
                // create buffer
                let msb_buf = u32::from_be_bytes(bytes);
                // check if we have 0xff
                if !has_byte(msb_buf, 255) {
                    self.bits_left += 32;
                    self.buffer <<= 32;
                    self.buffer |= u64::from(msb_buf);
                    self.aligned_buffer = self.buffer << (64 - self.bits_left);
                    return Ok(true);
                }
                reader.rewind(4)?;
            }
            // This serves two reasons,
            // 1: Make clippy shut up
            // 2: Favour register reuse
            let mut byte;
            // 4 refills, if all succeed the stream should contain enough bits to decode a
            // value
            refill!(self.buffer, byte, self.bits_left);
            refill!(self.buffer, byte, self.bits_left);
            refill!(self.buffer, byte, self.bits_left);
            refill!(self.buffer, byte, self.bits_left);
            // Construct an MSB buffer whose top bits are the bitstream we are currently holding.
            self.aligned_buffer = self.buffer << (64 - self.bits_left);
        }
        return Ok(true);
    }
    /// Decode the DC coefficient in a MCU block.
    ///
    /// The decoded coefficient is written to `dc_prediction`
    ///
    #[allow(
        clippy::cast_possible_truncation,
        clippy::cast_sign_loss,
        clippy::unwrap_used
    )]
    #[inline(always)]
    fn decode_dc<T>(
        &mut self, reader: &mut ZReader<T>, dc_table: &HuffmanTable, dc_prediction: &mut i32
    ) -> Result<bool, DecodeErrors>
    where
        T: ZByteReaderTrait
    {
        let (mut symbol, r);
        if self.bits_left < 32 {
            self.refill(reader)?;
        };
        // look a head HUFF_LOOKAHEAD bits into the bitstream
        symbol = self.peek_bits::<HUFF_LOOKAHEAD>();
        symbol = dc_table.lookup[symbol as usize];
        decode_huff!(self, symbol, dc_table);
        if symbol != 0 {
            r = self.get_bits(symbol as u8);
            symbol = huff_extend(r, symbol);
        }
        // Update DC prediction
        *dc_prediction = dc_prediction.wrapping_add(symbol);
        return Ok(true);
    }
    /// Decode a Minimum Code Unit(MCU) as quickly as possible
    ///
    /// # Arguments
    /// - reader: The bitstream from where we read more bits.
    /// - dc_table: The Huffman table used to decode the DC coefficient
    /// - ac_table: The Huffman table used to decode AC values
    /// - block: A memory region where we will write out the decoded values
    /// - DC prediction: Last DC value for this component
    ///
    #[allow(
        clippy::many_single_char_names,
        clippy::cast_possible_truncation,
        clippy::cast_sign_loss
    )]
    #[inline(never)]
    pub fn decode_mcu_block<T>(
        &mut self, reader: &mut ZReader<T>, dc_table: &HuffmanTable, ac_table: &HuffmanTable,
        qt_table: &[i32; DCT_BLOCK], block: &mut [i32; 64], dc_prediction: &mut i32
    ) -> Result<(), DecodeErrors>
    where
        T: ZByteReaderTrait
    {
        // Get fast AC table as a reference before we enter the hot path
        let ac_lookup = ac_table.ac_lookup.as_ref().unwrap();
        let (mut symbol, mut r, mut fast_ac);
        // Decode AC coefficients
        let mut pos: usize = 1;
        // decode DC, dc prediction will contain the value
        self.decode_dc(reader, dc_table, dc_prediction)?;
        // set dc to be the dc prediction.
        block[0] = *dc_prediction * qt_table[0];
        while pos < 64 {
            self.refill(reader)?;
            symbol = self.peek_bits::<HUFF_LOOKAHEAD>();
            fast_ac = ac_lookup[symbol as usize];
            symbol = ac_table.lookup[symbol as usize];
            if fast_ac != 0 {
                //  FAST AC path
                pos += ((fast_ac >> 4) & 15) as usize; // run
                let t_pos = UN_ZIGZAG[min(pos, 63)] & 63;
                block[t_pos] = i32::from(fast_ac >> 8) * (qt_table[t_pos]); // Value
                self.drop_bits((fast_ac & 15) as u8);
                pos += 1;
            } else {
                decode_huff!(self, symbol, ac_table);
                r = symbol >> 4;
                symbol &= 15;
                if symbol != 0 {
                    pos += r as usize;
                    r = self.get_bits(symbol as u8);
                    symbol = huff_extend(r, symbol);
                    let t_pos = UN_ZIGZAG[pos & 63] & 63;
                    block[t_pos] = symbol * qt_table[t_pos];
                    pos += 1;
                } else if r != 15 {
                    return Ok(());
                } else {
                    pos += 16;
                }
            }
        }
        return Ok(());
    }
    /// Peek `look_ahead` bits ahead without discarding them from the buffer
    #[inline(always)]
    #[allow(clippy::cast_possible_truncation)]
    const fn peek_bits<const LOOKAHEAD: u8>(&self) -> i32 {
        (self.aligned_buffer >> (64 - LOOKAHEAD)) as i32
    }
    /// Discard the next `N` bits without checking
    #[inline]
    fn drop_bits(&mut self, n: u8) {
        self.bits_left = self.bits_left.saturating_sub(n);
        self.aligned_buffer <<= n;
    }
    /// Read `n_bits` from the buffer  and discard them
    #[inline(always)]
    #[allow(clippy::cast_possible_truncation)]
    fn get_bits(&mut self, n_bits: u8) -> i32 {
        let mask = (1_u64 << n_bits) - 1;
        self.aligned_buffer = self.aligned_buffer.rotate_left(u32::from(n_bits));
        let bits = (self.aligned_buffer & mask) as i32;
        self.bits_left = self.bits_left.wrapping_sub(n_bits);
        bits
    }
    /// Decode a DC block
    #[allow(clippy::cast_possible_truncation)]
    #[inline]
    pub(crate) fn decode_prog_dc_first<T>(
        &mut self, reader: &mut ZReader<T>, dc_table: &HuffmanTable, block: &mut i16,
        dc_prediction: &mut i32
    ) -> Result<(), DecodeErrors>
    where
        T: ZByteReaderTrait
    {
        self.decode_dc(reader, dc_table, dc_prediction)?;
        *block = (*dc_prediction as i16).wrapping_mul(1_i16 << self.successive_low);
        return Ok(());
    }
    #[inline]
    pub(crate) fn decode_prog_dc_refine<T>(
        &mut self, reader: &mut ZReader<T>, block: &mut i16
    ) -> Result<(), DecodeErrors>
    where
        T: ZByteReaderTrait
    {
        // refinement scan
        if self.bits_left < 1 {
            self.refill(reader)?;
        }
        if self.get_bit() == 1 {
            *block = block.wrapping_add(1 << self.successive_low);
        }
        Ok(())
    }
    /// Get a single bit from the bitstream
    fn get_bit(&mut self) -> u8 {
        let k = (self.aligned_buffer >> 63) as u8;
        // discard a bit
        self.drop_bits(1);
        return k;
    }
    pub(crate) fn decode_mcu_ac_first<T>(
        &mut self, reader: &mut ZReader<T>, ac_table: &HuffmanTable, block: &mut [i16; 64]
    ) -> Result<bool, DecodeErrors>
    where
        T: ZByteReaderTrait
    {
        let shift = self.successive_low;
        let fast_ac = ac_table.ac_lookup.as_ref().unwrap();
        let mut k = self.spec_start as usize;
        let (mut symbol, mut r, mut fac);
        // EOB runs are handled in mcu_prog.rs
        'block: loop {
            self.refill(reader)?;
            symbol = self.peek_bits::<HUFF_LOOKAHEAD>();
            fac = fast_ac[symbol as usize];
            symbol = ac_table.lookup[symbol as usize];
            if fac != 0 {
                // fast ac path
                k += ((fac >> 4) & 15) as usize; // run
                block[UN_ZIGZAG[min(k, 63)] & 63] = (fac >> 8).wrapping_mul(1 << shift); // value
                self.drop_bits((fac & 15) as u8);
                k += 1;
            } else {
                decode_huff!(self, symbol, ac_table);
                r = symbol >> 4;
                symbol &= 15;
                if symbol != 0 {
                    k += r as usize;
                    r = self.get_bits(symbol as u8);
                    symbol = huff_extend(r, symbol);
                    block[UN_ZIGZAG[k & 63] & 63] = (symbol as i16).wrapping_mul(1 << shift);
                    k += 1;
                } else {
                    if r != 15 {
                        self.eob_run = 1 << r;
                        self.eob_run += self.get_bits(r as u8);
                        self.eob_run -= 1;
                        break;
                    }
                    k += 16;
                }
            }
            if k > self.spec_end as usize {
                break 'block;
            }
        }
        return Ok(true);
    }
    #[allow(clippy::too_many_lines, clippy::op_ref)]
    pub(crate) fn decode_mcu_ac_refine<T>(
        &mut self, reader: &mut ZReader<T>, table: &HuffmanTable, block: &mut [i16; 64]
    ) -> Result<bool, DecodeErrors>
    where
        T: ZByteReaderTrait
    {
        let bit = (1 << self.successive_low) as i16;
        let mut k = self.spec_start;
        let (mut symbol, mut r);
        if self.eob_run == 0 {
            'no_eob: loop {
                // Decode a coefficient from the bit stream
                self.refill(reader)?;
                symbol = self.peek_bits::<HUFF_LOOKAHEAD>();
                symbol = table.lookup[symbol as usize];
                decode_huff!(self, symbol, table);
                r = symbol >> 4;
                symbol &= 15;
                if symbol == 0 {
                    if r != 15 {
                        // EOB run is 2^r + bits
                        self.eob_run = 1 << r;
                        self.eob_run += self.get_bits(r as u8);
                        // EOB runs are handled by the eob logic
                        break 'no_eob;
                    }
                } else {
                    if symbol != 1 {
                        return Err(DecodeErrors::HuffmanDecode(
                            "Bad Huffman code, corrupt JPEG?".to_string()
                        ));
                    }
                    // get sign bit
                    // We assume we have enough bits, which should be correct for sane images
                    // since we refill by 32 above
                    if self.get_bit() == 1 {
                        symbol = i32::from(bit);
                    } else {
                        symbol = i32::from(-bit);
                    }
                }
                // Advance over already nonzero coefficients  appending
                // correction bits to the non-zeroes.
                // A correction bit is 1 if the absolute value of the coefficient must be increased
                if k <= self.spec_end {
                    'advance_nonzero: loop {
                        let coefficient = &mut block[UN_ZIGZAG[k as usize & 63] & 63];
                        if *coefficient != 0 {
                            if self.get_bit() == 1 && (*coefficient & bit) == 0 {
                                if *coefficient >= 0 {
                                    *coefficient += bit;
                                } else {
                                    *coefficient -= bit;
                                }
                            }
                            if self.bits_left < 1 {
                                self.refill(reader)?;
                            }
                        } else {
                            r -= 1;
                            if r < 0 {
                                // reached target zero coefficient.
                                break 'advance_nonzero;
                            }
                        };
                        if k == self.spec_end {
                            break 'advance_nonzero;
                        }
                        k += 1;
                    }
                }
                if symbol != 0 {
                    let pos = UN_ZIGZAG[k as usize & 63];
                    // output new non-zero coefficient.
                    block[pos & 63] = symbol as i16;
                }
                k += 1;
                if k > self.spec_end {
                    break 'no_eob;
                }
            }
        }
        if self.eob_run > 0 {
            // only run if block does not consists of purely zeroes
            if &block[1..] != &[0; 63] {
                self.refill(reader)?;
                while k <= self.spec_end {
                    let coefficient = &mut block[UN_ZIGZAG[k as usize & 63] & 63];
                    if *coefficient != 0 && self.get_bit() == 1 {
                        // check if we already modified it, if so do nothing, otherwise
                        // append the correction bit.
                        if (*coefficient & bit) == 0 {
                            if *coefficient >= 0 {
                                *coefficient = coefficient.wrapping_add(bit);
                            } else {
                                *coefficient = coefficient.wrapping_sub(bit);
                            }
                        }
                    }
                    if self.bits_left < 1 {
                        // refill at the last possible moment
                        self.refill(reader)?;
                    }
                    k += 1;
                }
            }
            // count a block completed in EOB run
            self.eob_run -= 1;
        }
        return Ok(true);
    }
    pub fn update_progressive_params(&mut self, ah: u8, al: u8, spec_start: u8, spec_end: u8) {
        self.successive_high = ah;
        self.successive_low = al;
        self.spec_start = spec_start;
        self.spec_end = spec_end;
    }
    /// Reset the stream if we have a restart marker
    ///
    /// Restart markers indicate drop those bits in the stream and zero out
    /// everything
    #[cold]
    pub fn reset(&mut self) {
        self.bits_left = 0;
        self.marker = None;
        self.buffer = 0;
        self.aligned_buffer = 0;
        self.eob_run = 0;
    }
 }
 /// Do the equivalent of JPEG HUFF_EXTEND
 #[inline(always)]
 fn huff_extend(x: i32, s: i32) -> i32 {
    // if x<s return x else return x+offset[s] where offset[s] = ( (-1<<s)+1)
    (x) + ((((x) - (1 << ((s) - 1))) >> 31) & (((-1) << (s)) + 1))
 }
 const fn has_zero(v: u32) -> bool {
    // Retrieved from Stanford bithacks
    // @ https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
    return !((((v & 0x7F7F_7F7F) + 0x7F7F_7F7F) | v) | 0x7F7F_7F7F) != 0;
 }
 const fn has_byte(b: u32, val: u8) -> bool {
    // Retrieved from Stanford bithacks
    // @ https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
    has_zero(b ^ ((!0_u32 / 255) * (val as u32)))
 }
--- a/third_party/zune-jpeg/src/color_convert.rs
+++ b/third_party/zune-jpeg/src/color_convert.rs
@ -0,0 +1,89 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 #![allow(
    clippy::many_single_char_names,
    clippy::similar_names,
    clippy::cast_possible_truncation,
    clippy::cast_sign_loss,
    clippy::cast_possible_wrap,
    clippy::too_many_arguments,
    clippy::doc_markdown
 )]
 //! Color space conversion routines
 //!
 //! This files exposes functions to convert one colorspace to another in a jpeg
 //! image
 //!
 //! Currently supported conversions are
 //!
 //! - `YCbCr` to `RGB,RGBA,GRAYSCALE,RGBX`.
 //!
 //!
 //! Hey there, if your reading this it means you probably need something, so let me help you.
 //!
 //! There are 3 supported cpu extensions here.
 //! 1. Scalar
 //! 2. SSE
 //! 3. AVX
 //!
 //! There are two types of the color convert functions
 //!
 //! 1. Acts on 16 pixels.
 //! 2. Acts on 8 pixels.
 //!
 //! The reason for this is because when implementing the AVX part it occurred to me that we can actually
 //! do better and process 2 MCU's if we change IDCT return type to be `i16's`, since a lot of
 //! CPU's these days support AVX extensions, it becomes nice if we optimize for that path ,
 //! therefore AVX routines can process 16 pixels directly and SSE and Scalar just compensate.
 //!
 //! By compensating, I mean I wrote the 16 pixels version operating on the 8 pixel version twice.
 //!
 //! Therefore if your looking to optimize some routines, probably start there.
 pub use scalar::ycbcr_to_grayscale;
 use zune_core::colorspace::ColorSpace;
 use zune_core::options::DecoderOptions;
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 #[cfg(feature = "x86")]
 pub use crate::color_convert::avx::{ycbcr_to_rgb_avx2, ycbcr_to_rgba_avx2};
 use crate::decoder::ColorConvert16Ptr;
 mod avx;
 mod scalar;
 #[allow(unused_variables)]
 pub fn choose_ycbcr_to_rgb_convert_func(
    type_need: ColorSpace, options: &DecoderOptions
 ) -> Option<ColorConvert16Ptr> {
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
    #[cfg(feature = "x86")]
    {
        use zune_core::log::debug;
        if options.use_avx2() {
            debug!("Using AVX optimised color conversion functions");
            // I believe avx2 means sse4 is also available
            // match colorspace
            match type_need {
                ColorSpace::RGB => return Some(ycbcr_to_rgb_avx2),
                ColorSpace::RGBA => return Some(ycbcr_to_rgba_avx2),
                _ => () // fall through to scalar, which has more types
            };
        }
    }
    // when there is no x86 or we haven't returned by here, resort to scalar
    return match type_need {
        ColorSpace::RGB => Some(scalar::ycbcr_to_rgb_inner_16_scalar::<false>),
        ColorSpace::RGBA => Some(scalar::ycbcr_to_rgba_inner_16_scalar::<false>),
        ColorSpace::BGRA => Some(scalar::ycbcr_to_rgba_inner_16_scalar::<true>),
        ColorSpace::BGR => Some(scalar::ycbcr_to_rgb_inner_16_scalar::<true>),
        _ => None
    };
 }
--- a/third_party/zune-jpeg/src/color_convert/avx.rs
+++ b/third_party/zune-jpeg/src/color_convert/avx.rs
@ -0,0 +1,350 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 //! AVX color conversion routines
 //!
 //! Okay these codes are cool
 //!
 //! Herein lies super optimized codes to do color conversions.
 //!
 //!
 //! 1. The YCbCr to RGB use integer approximations and not the floating point equivalent.
 //! That means we may be +- 2 of pixels generated by libjpeg-turbo jpeg decoding
 //! (also libjpeg uses routines like `Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G`)
 //!
 //! Firstly, we use integers (fun fact:there is no part of this code base where were dealing with
 //! floating points.., fun fact: the first fun fact wasn't even fun.)
 //!
 //! Secondly ,we have cool clamping code, especially for rgba , where we don't need clamping and we
 //! spend our time cursing that Intel decided permute instructions to work like 2 128 bit vectors(the compiler opitmizes
 //! it out to something cool).
 //!
 //! There isn't a lot here (not as fun as bitstream ) but I hope you find what you're looking for.
 //!
 //! O and ~~subscribe to my youtube channel~~
 #![cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 #![cfg(feature = "x86")]
 #![allow(
    clippy::wildcard_imports,
    clippy::cast_possible_truncation,
    clippy::too_many_arguments,
    clippy::inline_always,
    clippy::doc_markdown,
    dead_code
 )]
 #[cfg(target_arch = "x86")]
 use core::arch::x86::*;
 #[cfg(target_arch = "x86_64")]
 use core::arch::x86_64::*;
 pub union YmmRegister {
    // both are 32 when using std::mem::size_of
    mm256: __m256i,
    // for avx color conversion
    array: [i16; 16]
 }
 //--------------------------------------------------------------------------------------------------
 // AVX conversion routines
 //--------------------------------------------------------------------------------------------------
 ///
 /// Convert YCBCR to RGB using AVX instructions
 ///
 ///  # Note
 ///**IT IS THE RESPONSIBILITY OF THE CALLER TO CALL THIS IN CPUS SUPPORTING
 /// AVX2 OTHERWISE THIS IS UB**
 ///
 /// *Peace*
 ///
 /// This library itself will ensure that it's never called in CPU's not
 /// supporting AVX2
 ///
 /// # Arguments
 /// - `y`,`cb`,`cr`: A reference of 8 i32's
 /// - `out`: The output  array where we store our converted items
 /// - `offset`: The position from 0 where we write these RGB values
 #[inline(always)]
 pub fn ycbcr_to_rgb_avx2(
    y: &[i16; 16], cb: &[i16; 16], cr: &[i16; 16], out: &mut [u8], offset: &mut usize
 ) {
    // call this in another function to tell RUST to vectorize this
    // storing
    unsafe {
        ycbcr_to_rgb_avx2_1(y, cb, cr, out, offset);
    }
 }
 #[inline]
 #[target_feature(enable = "avx2")]
 #[target_feature(enable = "avx")]
 unsafe fn ycbcr_to_rgb_avx2_1(
    y: &[i16; 16], cb: &[i16; 16], cr: &[i16; 16], out: &mut [u8], offset: &mut usize
 ) {
    // Load output buffer
    let tmp: &mut [u8; 48] = out
        .get_mut(*offset..*offset + 48)
        .expect("Slice to small cannot write")
        .try_into()
        .unwrap();
    let (r, g, b) = ycbcr_to_rgb_baseline(y, cb, cr);
    let mut j = 0;
    let mut i = 0;
    while i < 48 {
        tmp[i] = r.array[j] as u8;
        tmp[i + 1] = g.array[j] as u8;
        tmp[i + 2] = b.array[j] as u8;
        i += 3;
        j += 1;
    }
    *offset += 48;
 }
 /// Baseline implementation of YCBCR to RGB for avx,
 ///
 /// It uses integer operations as opposed to floats, the approximation is
 /// difficult for the  eye to see, but this means that it may produce different
 /// values with libjpeg_turbo.  if accuracy is of utmost importance, use that.
 ///
 /// this function should be called for most implementations, including
 /// - ycbcr->rgb
 /// - ycbcr->rgba
 /// - ycbcr->brga
 /// - ycbcr->rgbx
 #[inline]
 #[target_feature(enable = "avx2")]
 #[target_feature(enable = "avx")]
 unsafe fn ycbcr_to_rgb_baseline(
    y: &[i16; 16], cb: &[i16; 16], cr: &[i16; 16]
 ) -> (YmmRegister, YmmRegister, YmmRegister) {
    // Load values into a register
    //
    // dst[127:0] := MEM[loaddr+127:loaddr]
    // dst[255:128] := MEM[hiaddr+127:hiaddr]
    let y_c = _mm256_loadu_si256(y.as_ptr().cast());
    let cb_c = _mm256_loadu_si256(cb.as_ptr().cast());
    let cr_c = _mm256_loadu_si256(cr.as_ptr().cast());
    // AVX version of integer version in https://stackoverflow.com/questions/4041840/function-to-convert-ycbcr-to-rgb
    // Cb = Cb-128;
    let cb_r = _mm256_sub_epi16(cb_c, _mm256_set1_epi16(128));
    // cr = Cb -128;
    let cr_r = _mm256_sub_epi16(cr_c, _mm256_set1_epi16(128));
    // Calculate Y->R
    // r = Y + 45 * Cr / 32
    // 45*cr
    let r1 = _mm256_mullo_epi16(_mm256_set1_epi16(45), cr_r);
    // r1>>5
    let r2 = _mm256_srai_epi16::<5>(r1);
    //y+r2
    let r = YmmRegister {
        mm256: clamp_avx(_mm256_add_epi16(y_c, r2))
    };
    // g = Y - (11 * Cb + 23 * Cr) / 32 ;
    // 11*cb
    let g1 = _mm256_mullo_epi16(_mm256_set1_epi16(11), cb_r);
    // 23*cr
    let g2 = _mm256_mullo_epi16(_mm256_set1_epi16(23), cr_r);
    //(11
    //(11 * Cb + 23 * Cr)
    let g3 = _mm256_add_epi16(g1, g2);
    // (11 * Cb + 23 * Cr) / 32
    let g4 = _mm256_srai_epi16::<5>(g3);
    // Y - (11 * Cb + 23 * Cr) / 32 ;
    let g = YmmRegister {
        mm256: clamp_avx(_mm256_sub_epi16(y_c, g4))
    };
    // b = Y + 113 * Cb / 64
    // 113 * cb
    let b1 = _mm256_mullo_epi16(_mm256_set1_epi16(113), cb_r);
    //113 * Cb / 64
    let b2 = _mm256_srai_epi16::<6>(b1);
    // b = Y + 113 * Cb / 64 ;
    let b = YmmRegister {
        mm256: clamp_avx(_mm256_add_epi16(b2, y_c))
    };
    return (r, g, b);
 }
 #[inline]
 #[target_feature(enable = "avx2")]
 /// A baseline implementation of YCbCr to RGB conversion which does not carry
 /// out clamping
 ///
 /// This is used by the `ycbcr_to_rgba_avx` and `ycbcr_to_rgbx` conversion
 /// routines
 unsafe fn ycbcr_to_rgb_baseline_no_clamp(
    y: &[i16; 16], cb: &[i16; 16], cr: &[i16; 16]
 ) -> (__m256i, __m256i, __m256i) {
    // Load values into a register
    //
    let y_c = _mm256_loadu_si256(y.as_ptr().cast());
    let cb_c = _mm256_loadu_si256(cb.as_ptr().cast());
    let cr_c = _mm256_loadu_si256(cr.as_ptr().cast());
    // AVX version of integer version in https://stackoverflow.com/questions/4041840/function-to-convert-ycbcr-to-rgb
    // Cb = Cb-128;
    let cb_r = _mm256_sub_epi16(cb_c, _mm256_set1_epi16(128));
    // cr = Cb -128;
    let cr_r = _mm256_sub_epi16(cr_c, _mm256_set1_epi16(128));
    // Calculate Y->R
    // r = Y + 45 * Cr / 32
    // 45*cr
    let r1 = _mm256_mullo_epi16(_mm256_set1_epi16(45), cr_r);
    // r1>>5
    let r2 = _mm256_srai_epi16::<5>(r1);
    //y+r2
    let r = _mm256_add_epi16(y_c, r2);
    // g = Y - (11 * Cb + 23 * Cr) / 32 ;
    // 11*cb
    let g1 = _mm256_mullo_epi16(_mm256_set1_epi16(11), cb_r);
    // 23*cr
    let g2 = _mm256_mullo_epi16(_mm256_set1_epi16(23), cr_r);
    //(11
    //(11 * Cb + 23 * Cr)
    let g3 = _mm256_add_epi16(g1, g2);
    // (11 * Cb + 23 * Cr) / 32
    let g4 = _mm256_srai_epi16::<5>(g3);
    // Y - (11 * Cb + 23 * Cr) / 32 ;
    let g = _mm256_sub_epi16(y_c, g4);
    // b = Y + 113 * Cb / 64
    // 113 * cb
    let b1 = _mm256_mullo_epi16(_mm256_set1_epi16(113), cb_r);
    //113 * Cb / 64
    let b2 = _mm256_srai_epi16::<6>(b1);
    // b = Y + 113 * Cb / 64 ;
    let b = _mm256_add_epi16(b2, y_c);
    return (r, g, b);
 }
 #[inline(always)]
 pub fn ycbcr_to_rgba_avx2(
    y: &[i16; 16], cb: &[i16; 16], cr: &[i16; 16], out: &mut [u8], offset: &mut usize
 ) {
    unsafe {
        ycbcr_to_rgba_unsafe(y, cb, cr, out, offset);
    }
 }
 #[inline]
 #[target_feature(enable = "avx2")]
 #[rustfmt::skip]
 unsafe fn ycbcr_to_rgba_unsafe(
    y: &[i16; 16], cb: &[i16; 16], cr: &[i16; 16],
    out: &mut [u8],
    offset: &mut usize,
 )
 {
    // check if we have enough space to write.
    let tmp:& mut [u8; 64] = out.get_mut(*offset..*offset + 64).expect("Slice to small cannot write").try_into().unwrap();
    let (r, g, b) = ycbcr_to_rgb_baseline_no_clamp(y, cb, cr);
    // set alpha channel to 255 for opaque
    // And no these comments were not from me pressing the keyboard
    // Pack the integers into u8's using signed saturation.
    let c = _mm256_packus_epi16(r, g); //aaaaa_bbbbb_aaaaa_bbbbbb
    let d = _mm256_packus_epi16(b, _mm256_set1_epi16(255)); // cccccc_dddddd_ccccccc_ddddd
    // transpose_u16 and interleave channels
    let e = _mm256_unpacklo_epi8(c, d); //ab_ab_ab_ab_ab_ab_ab_ab
    let f = _mm256_unpackhi_epi8(c, d); //cd_cd_cd_cd_cd_cd_cd_cd
    // final transpose_u16
    let g = _mm256_unpacklo_epi8(e, f); //abcd_abcd_abcd_abcd_abcd
    let h = _mm256_unpackhi_epi8(e, f);
    // undo packus shuffling...
    let i = _mm256_permute2x128_si256::<{ shuffle(3, 2, 1, 0) }>(g, h);
    let j = _mm256_permute2x128_si256::<{ shuffle(1, 2, 3, 0) }>(g, h);
    let k = _mm256_permute2x128_si256::<{ shuffle(3, 2, 0, 1) }>(g, h);
    let l = _mm256_permute2x128_si256::<{ shuffle(0, 3, 2, 1) }>(g, h);
    let m = _mm256_blend_epi32::<0b1111_0000>(i, j);
    let n = _mm256_blend_epi32::<0b1111_0000>(k, l);
    // Store
    // Use streaming instructions to prevent polluting the cache?
    _mm256_storeu_si256(tmp.as_mut_ptr().cast(), m);
    _mm256_storeu_si256(tmp[32..].as_mut_ptr().cast(), n);
    *offset += 64;
 }
 /// Clamp values between 0 and 255
 ///
 /// This function clamps all values in `reg` to be between 0 and 255
 ///( the accepted values for RGB)
 #[inline]
 #[target_feature(enable = "avx2")]
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 unsafe fn clamp_avx(reg: __m256i) -> __m256i {
    // the lowest value
    let min_s = _mm256_set1_epi16(0);
    // Highest value
    let max_s = _mm256_set1_epi16(255);
    let max_v = _mm256_max_epi16(reg, min_s); //max(a,0)
    let min_v = _mm256_min_epi16(max_v, max_s); //min(max(a,0),255)
    return min_v;
 }
 #[inline]
 const fn shuffle(z: i32, y: i32, x: i32, w: i32) -> i32 {
    (z << 6) | (y << 4) | (x << 2) | w
 }
--- a/third_party/zune-jpeg/src/color_convert/scalar.rs
+++ b/third_party/zune-jpeg/src/color_convert/scalar.rs
@ -0,0 +1,116 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 use core::convert::TryInto;
 /// Limit values to 0 and 255
 #[inline]
 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss, dead_code)]
 fn clamp(a: i16) -> u8 {
    a.clamp(0, 255) as u8
 }
 /// YCbCr to RGBA color conversion
 /// Convert YCbCr to RGB/BGR
 ///
 /// Converts to RGB if const BGRA is false
 ///
 /// Converts to BGR if const BGRA is true
 pub fn ycbcr_to_rgba_inner_16_scalar<const BGRA: bool>(
    y: &[i16; 16], cb: &[i16; 16], cr: &[i16; 16], output: &mut [u8], pos: &mut usize
 ) {
    let (_, output_position) = output.split_at_mut(*pos);
    // Convert into a slice with 64 elements for Rust to see we won't go out of bounds.
    let opt: &mut [u8; 64] = output_position
        .get_mut(0..64)
        .expect("Slice to small cannot write")
        .try_into()
        .unwrap();
    for ((y, (cb, cr)), out) in y
        .iter()
        .zip(cb.iter().zip(cr.iter()))
        .zip(opt.chunks_exact_mut(4))
    {
        let cr = cr - 128;
        let cb = cb - 128;
        let r = y + ((45_i16.wrapping_mul(cr)) >> 5);
        let g = y - ((11_i16.wrapping_mul(cb) + 23_i16.wrapping_mul(cr)) >> 5);
        let b = y + ((113_i16.wrapping_mul(cb)) >> 6);
        if BGRA {
            out[0] = clamp(b);
            out[1] = clamp(g);
            out[2] = clamp(r);
            out[3] = 255;
        } else {
            out[0] = clamp(r);
            out[1] = clamp(g);
            out[2] = clamp(b);
            out[3] = 255;
        }
    }
    *pos += 64;
 }
 /// Convert YCbCr to RGB/BGR
 ///
 /// Converts to RGB if const BGRA is false
 ///
 /// Converts to BGR if const BGRA is true
 pub fn ycbcr_to_rgb_inner_16_scalar<const BGRA: bool>(
    y: &[i16; 16], cb: &[i16; 16], cr: &[i16; 16], output: &mut [u8], pos: &mut usize
 ) {
    let (_, output_position) = output.split_at_mut(*pos);
    // Convert into a slice with 48 elements
    let opt: &mut [u8; 48] = output_position
        .get_mut(0..48)
        .expect("Slice to small cannot write")
        .try_into()
        .unwrap();
    for ((y, (cb, cr)), out) in y
        .iter()
        .zip(cb.iter().zip(cr.iter()))
        .zip(opt.chunks_exact_mut(3))
    {
        let cr = cr - 128;
        let cb = cb - 128;
        let r = y + ((45_i16.wrapping_mul(cr)) >> 5);
        let g = y - ((11_i16.wrapping_mul(cb) + 23_i16.wrapping_mul(cr)) >> 5);
        let b = y + ((113_i16.wrapping_mul(cb)) >> 6);
        if BGRA {
            out[0] = clamp(b);
            out[1] = clamp(g);
            out[2] = clamp(r);
        } else {
            out[0] = clamp(r);
            out[1] = clamp(g);
            out[2] = clamp(b);
        }
    }
    // Increment pos
    *pos += 48;
 }
 pub fn ycbcr_to_grayscale(y: &[i16], width: usize, padded_width: usize, output: &mut [u8]) {
    for (y_in, out) in y
        .chunks_exact(padded_width)
        .zip(output.chunks_exact_mut(width))
    {
        for (y, out) in y_in.iter().zip(out.iter_mut()) {
            *out = *y as u8;
        }
    }
 }
--- a/third_party/zune-jpeg/src/components.rs
+++ b/third_party/zune-jpeg/src/components.rs
@ -0,0 +1,211 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 //! This module exports a single struct to store information about
 //! JPEG image components
 //!
 //! The data is extracted from a SOF header.
 use alloc::vec::Vec;
 use alloc::{format, vec};
 use zune_core::log::trace;
 use crate::decoder::MAX_COMPONENTS;
 use crate::errors::DecodeErrors;
 use crate::upsampler::upsample_no_op;
 /// Represents an up-sampler function, this function will be called to upsample
 /// a down-sampled image
 pub type UpSampler = fn(
    input: &[i16],
    in_near: &[i16],
    in_far: &[i16],
    scratch_space: &mut [i16],
    output: &mut [i16]
 );
 /// Component Data from start of frame
 #[derive(Clone)]
 pub(crate) struct Components {
    /// The type of component that has the metadata below, can be Y,Cb or Cr
    pub component_id: ComponentID,
    /// Sub-sampling ratio of this component in the x-plane
    pub vertical_sample: usize,
    /// Sub-sampling ratio of this component in the y-plane
    pub horizontal_sample: usize,
    /// DC huffman table position
    pub dc_huff_table: usize,
    /// AC huffman table position for this element.
    pub ac_huff_table: usize,
    /// Quantization table number
    pub quantization_table_number: u8,
    /// Specifies quantization table to use with this component
    pub quantization_table: [i32; 64],
    /// dc prediction for the component
    pub dc_pred: i32,
    /// An up-sampling function, can be basic or SSE, depending
    /// on the platform
    pub up_sampler: UpSampler,
    /// How pixels do we need to go to get to the next line?
    pub width_stride: usize,
    /// Component ID for progressive
    pub id: u8,
    /// Whether we need to decode this image component.
    pub needed: bool,
    /// Upsample scanline
    pub raw_coeff: Vec<i16>,
    /// Upsample destination, stores a scanline worth of sub sampled data
    pub upsample_dest: Vec<i16>,
    /// previous row, used to handle MCU boundaries
    pub row_up: Vec<i16>,
    /// current row, used to handle MCU boundaries again
    pub row: Vec<i16>,
    pub first_row_upsample_dest: Vec<i16>,
    pub idct_pos: usize,
    pub x: usize,
    pub w2: usize,
    pub y: usize,
    pub sample_ratio: SampleRatios,
    // a very annoying bug
    pub fix_an_annoying_bug: usize
 }
 impl Components {
    /// Create a new instance from three bytes from the start of frame
    #[inline]
    pub fn from(a: [u8; 3], pos: u8) -> Result<Components, DecodeErrors> {
        // it's a unique identifier.
        // doesn't have to be ascending
        // see tests/inputs/huge_sof_number
        //
        // For such cases, use the position of the component
        // to determine width
        let id = match pos {
            0 => ComponentID::Y,
            1 => ComponentID::Cb,
            2 => ComponentID::Cr,
            3 => ComponentID::Q,
            _ => {
                return Err(DecodeErrors::Format(format!(
                    "Unknown component id found,{pos}, expected value between 1 and 4"
                )))
            }
        };
        let horizontal_sample = (a[1] >> 4) as usize;
        let vertical_sample = (a[1] & 0x0f) as usize;
        let quantization_table_number = a[2];
        // confirm quantization number is between 0 and MAX_COMPONENTS
        if usize::from(quantization_table_number) >= MAX_COMPONENTS {
            return Err(DecodeErrors::Format(format!(
                "Too large quantization number :{quantization_table_number}, expected value between 0 and {MAX_COMPONENTS}"
            )));
        }
        // check that upsampling ratios are powers of two
        // if these fail, it's probably a corrupt image.
        if !horizontal_sample.is_power_of_two() {
            return Err(DecodeErrors::Format(format!(
                "Horizontal sample is not a power of two({horizontal_sample}) cannot decode"
            )));
        }
        if !vertical_sample.is_power_of_two() {
            return Err(DecodeErrors::Format(format!(
                "Vertical sub-sample is not power of two({vertical_sample}) cannot decode"
            )));
        }
        trace!(
            "Component ID:{:?} \tHS:{} VS:{} QT:{}",
            id,
            horizontal_sample,
            vertical_sample,
            quantization_table_number
        );
        Ok(Components {
            component_id: id,
            vertical_sample,
            horizontal_sample,
            quantization_table_number,
            first_row_upsample_dest: vec![],
            // These two will be set with sof marker
            dc_huff_table: 0,
            ac_huff_table: 0,
            quantization_table: [0; 64],
            dc_pred: 0,
            up_sampler: upsample_no_op,
            // set later
            width_stride: horizontal_sample,
            id: a[0],
            needed: true,
            raw_coeff: vec![],
            upsample_dest: vec![],
            row_up: vec![],
            row: vec![],
            idct_pos: 0,
            x: 0,
            y: 0,
            w2: 0,
            sample_ratio: SampleRatios::None,
            fix_an_annoying_bug: 1
        })
    }
    /// Setup space for upsampling
    ///
    /// During upsample, we need a reference of the last row so that upsampling can
    /// proceed correctly,
    /// so we store the last line of every scanline and use it for the next upsampling procedure
    /// to store this, but since we don't need it for 1v1 upsampling,
    /// we only call this for routines that need upsampling
    ///
    /// # Requirements
    ///  - width stride of this element is set for the component.
    pub fn setup_upsample_scanline(&mut self) {
        self.row = vec![0; self.width_stride * self.vertical_sample];
        self.row_up = vec![0; self.width_stride * self.vertical_sample];
        self.first_row_upsample_dest =
            vec![128; self.vertical_sample * self.width_stride * self.sample_ratio.sample()];
        self.upsample_dest =
            vec![0; self.width_stride * self.sample_ratio.sample() * self.fix_an_annoying_bug * 8];
    }
 }
 /// Component ID's
 #[derive(Copy, Debug, Clone, PartialEq, Eq)]
 pub enum ComponentID {
    /// Luminance channel
    Y,
    /// Blue chrominance
    Cb,
    /// Red chrominance
    Cr,
    /// Q or fourth component
    Q
 }
 #[derive(Copy, Debug, Clone, PartialEq, Eq)]
 pub enum SampleRatios {
    HV,
    V,
    H,
    None
 }
 impl SampleRatios {
    pub fn sample(self) -> usize {
        match self {
            SampleRatios::HV => 4,
            SampleRatios::V | SampleRatios::H => 2,
            SampleRatios::None => 1
        }
    }
 }
--- a/third_party/zune-jpeg/src/decoder.rs
+++ b/third_party/zune-jpeg/src/decoder.rs
@ -0,0 +1,910 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 //! Main image logic.
 #![allow(clippy::doc_markdown)]
 use alloc::string::ToString;
 use alloc::vec::Vec;
 use alloc::{format, vec};
 use zune_core::bytestream::{ZByteReaderTrait, ZReader};
 use zune_core::colorspace::ColorSpace;
 use zune_core::log::{error, trace, warn};
 use zune_core::options::DecoderOptions;
 use crate::color_convert::choose_ycbcr_to_rgb_convert_func;
 use crate::components::{Components, SampleRatios};
 use crate::errors::{DecodeErrors, UnsupportedSchemes};
 use crate::headers::{
    parse_app1, parse_app14, parse_app2, parse_dqt, parse_huffman, parse_sos, parse_start_of_frame
 };
 use crate::huffman::HuffmanTable;
 use crate::idct::choose_idct_func;
 use crate::marker::Marker;
 use crate::misc::SOFMarkers;
 use crate::upsampler::{
    choose_horizontal_samp_function, choose_hv_samp_function, choose_v_samp_function,
    upsample_no_op
 };
 /// Maximum components
 pub(crate) const MAX_COMPONENTS: usize = 4;
 /// Maximum image dimensions supported.
 pub(crate) const MAX_DIMENSIONS: usize = 1 << 27;
 /// Color conversion function that can convert YCbCr colorspace to RGB(A/X) for
 /// 16 values
 ///
 /// The following are guarantees to the following functions
 ///
 /// 1. The `&[i16]` slices passed contain 16 items
 ///
 /// 2. The slices passed are in the following order
 ///     `y,cb,cr`
 ///
 /// 3. `&mut [u8]` is zero initialized
 ///
 /// 4. `&mut usize` points to the position in the array where new values should
 /// be used
 ///
 /// The pointer should
 /// 1. Carry out color conversion
 /// 2. Update `&mut usize` with the new position
 pub type ColorConvert16Ptr = fn(&[i16; 16], &[i16; 16], &[i16; 16], &mut [u8], &mut usize);
 /// IDCT  function prototype
 ///
 /// This encapsulates a dequantize and IDCT function which will carry out the
 /// following functions
 ///
 /// Multiply each 64 element block of `&mut [i16]` with `&Aligned32<[i32;64]>`
 /// Carry out IDCT (type 3 dct) on ach block of 64 i16's
 pub type IDCTPtr = fn(&mut [i32; 64], &mut [i16], usize);
 /// An encapsulation of an ICC chunk
 pub(crate) struct ICCChunk {
    pub(crate) seq_no:      u8,
    pub(crate) num_markers: u8,
    pub(crate) data:        Vec<u8>
 }
 /// A JPEG Decoder Instance.
 #[allow(clippy::upper_case_acronyms, clippy::struct_excessive_bools)]
 pub struct JpegDecoder<T: ZByteReaderTrait> {
    /// Struct to hold image information from SOI
    pub(crate) info:              ImageInfo,
    ///  Quantization tables, will be set to none and the tables will
    /// be moved to `components` field
    pub(crate) qt_tables:         [Option<[i32; 64]>; MAX_COMPONENTS],
    /// DC Huffman Tables with a maximum of 4 tables for each  component
    pub(crate) dc_huffman_tables: [Option<HuffmanTable>; MAX_COMPONENTS],
    /// AC Huffman Tables with a maximum of 4 tables for each component
    pub(crate) ac_huffman_tables: [Option<HuffmanTable>; MAX_COMPONENTS],
    /// Image components, holds information like DC prediction and quantization
    /// tables of a component
    pub(crate) components:        Vec<Components>,
    /// maximum horizontal component of all channels in the image
    pub(crate) h_max:             usize,
    // maximum vertical component of all channels in the image
    pub(crate) v_max:             usize,
    /// mcu's  width (interleaved scans)
    pub(crate) mcu_width:         usize,
    /// MCU height(interleaved scans
    pub(crate) mcu_height:        usize,
    /// Number of MCU's in the x plane
    pub(crate) mcu_x:             usize,
    /// Number of MCU's in the y plane
    pub(crate) mcu_y:             usize,
    /// Is the image interleaved?
    pub(crate) is_interleaved:    bool,
    pub(crate) sub_sample_ratio:  SampleRatios,
    /// Image input colorspace, should be YCbCr for a sane image, might be
    /// grayscale too
    pub(crate) input_colorspace:  ColorSpace,
    // Progressive image details
    /// Is the image progressive?
    pub(crate) is_progressive:    bool,
    /// Start of spectral scan
    pub(crate) spec_start:       u8,
    /// End of spectral scan
    pub(crate) spec_end:         u8,
    /// Successive approximation bit position high
    pub(crate) succ_high:        u8,
    /// Successive approximation bit position low
    pub(crate) succ_low:         u8,
    /// Number of components.
    pub(crate) num_scans:        u8,
    // Function pointers, for pointy stuff.
    /// Dequantize and idct function
    // This is determined at runtime which function to run, statically it's
    // initialized to a platform independent one and during initialization
    // of this struct, we check if we can switch to a faster one which
    // depend on certain CPU extensions.
    pub(crate) idct_func: IDCTPtr,
    // Color convert function which acts on 16 YCbCr values
    pub(crate) color_convert_16: ColorConvert16Ptr,
    pub(crate) z_order:          [usize; MAX_COMPONENTS],
    /// restart markers
    pub(crate) restart_interval: usize,
    pub(crate) todo:             usize,
    // decoder options
    pub(crate) options:          DecoderOptions,
    // byte-stream
    pub(crate) stream:           ZReader<T>,
    // Indicate whether headers have been decoded
    pub(crate) headers_decoded:  bool,
    pub(crate) seen_sof:         bool,
    // exif data, lifted from app2
    pub(crate) exif_data:        Option<Vec<u8>>,
    pub(crate) icc_data: Vec<ICCChunk>,
    pub(crate) is_mjpeg: bool,
    pub(crate) coeff:    usize // Solves some weird bug :)
 }
 impl<T> JpegDecoder<T>
 where
    T: ZByteReaderTrait
 {
    #[allow(clippy::redundant_field_names)]
    fn default(options: DecoderOptions, buffer: T) -> Self {
        let color_convert = choose_ycbcr_to_rgb_convert_func(ColorSpace::RGB, &options).unwrap();
        JpegDecoder {
            info:              ImageInfo::default(),
            qt_tables:         [None, None, None, None],
            dc_huffman_tables: [None, None, None, None],
            ac_huffman_tables: [None, None, None, None],
            components:        vec![],
            // Interleaved information
            h_max:             1,
            v_max:             1,
            mcu_height:        0,
            mcu_width:         0,
            mcu_x:             0,
            mcu_y:             0,
            is_interleaved:    false,
            sub_sample_ratio:  SampleRatios::None,
            is_progressive:    false,
            spec_start:        0,
            spec_end:          0,
            succ_high:         0,
            succ_low:          0,
            num_scans:         0,
            idct_func:         choose_idct_func(&options),
            color_convert_16:  color_convert,
            input_colorspace:  ColorSpace::YCbCr,
            z_order:           [0; MAX_COMPONENTS],
            restart_interval:  0,
            todo:              0x7fff_ffff,
            options:           options,
            stream:            ZReader::new(buffer),
            headers_decoded:   false,
            seen_sof:          false,
            exif_data:         None,
            icc_data:          vec![],
            is_mjpeg:          false,
            coeff:             1
        }
    }
    /// Decode a buffer already in memory
    ///
    /// The buffer should be a valid jpeg file, perhaps created by the command
    /// `std:::fs::read()` or a JPEG file downloaded from the internet.
    ///
    /// # Errors
    /// See DecodeErrors for an explanation
    pub fn decode(&mut self) -> Result<Vec<u8>, DecodeErrors> {
        self.decode_headers()?;
        let size = self.output_buffer_size().unwrap();
        let mut out = vec![0; size];
        self.decode_into(&mut out)?;
        Ok(out)
    }
    /// Create a new Decoder instance
    ///
    /// # Arguments
    ///  - `stream`: The raw bytes of a jpeg file.
    #[must_use]
    #[allow(clippy::new_without_default)]
    pub fn new(stream: T) -> JpegDecoder<T> {
        JpegDecoder::default(DecoderOptions::default(), stream)
    }
    /// Returns the image information
    ///
    /// This **must** be called after a subsequent call to [`decode`] or [`decode_headers`]
    /// it will return `None`
    ///
    /// # Returns
    /// - `Some(info)`: Image information,width, height, number of components
    /// - None: Indicates image headers haven't been decoded
    ///
    /// [`decode`]: JpegDecoder::decode
    /// [`decode_headers`]: JpegDecoder::decode_headers
    #[must_use]
    pub fn info(&self) -> Option<ImageInfo> {
        // we check for fails to that call by comparing what we have to the default, if
        // it's default we assume that the caller failed to uphold the
        // guarantees. We can be sure that an image cannot be the default since
        // its a hard panic in-case width or height are set to zero.
        if !self.headers_decoded {
            return None;
        }
        return Some(self.info.clone());
    }
    /// Return the number of bytes required to hold a decoded image frame
    /// decoded using the given input transformations
    ///
    /// # Returns
    ///  - `Some(usize)`: Minimum size for a buffer needed to decode the image
    ///  - `None`: Indicates the image was not decoded, or image dimensions would overflow a usize
    ///
    #[must_use]
    pub fn output_buffer_size(&self) -> Option<usize> {
        return if self.headers_decoded {
            Some(
                usize::from(self.width())
                    .checked_mul(usize::from(self.height()))?
                    .checked_mul(self.options.jpeg_get_out_colorspace().num_components())?
            )
        } else {
            None
        };
    }
    /// Get an immutable reference to the decoder options
    /// for the decoder instance
    ///
    /// This can be used to modify options before actual decoding
    /// but after initial creation
    ///
    /// # Example
    /// ```no_run
    /// use zune_core::bytestream::ZCursor;
    /// use zune_jpeg::JpegDecoder;
    ///
    /// let mut decoder = JpegDecoder::new(ZCursor::new(&[]));
    /// // get current options
    /// let mut options = decoder.options();
    /// // modify it
    ///  let new_options = options.set_max_width(10);
    /// // set it back
    /// decoder.set_options(new_options);
    ///
    /// ```
    #[must_use]
    pub const fn options(&self) -> &DecoderOptions {
        &self.options
    }
    /// Return the input colorspace of the image
    ///
    /// This indicates the colorspace that is present in
    /// the image, but this may be different to the colorspace that
    /// the output will be transformed to
    ///
    /// # Returns
    /// -`Some(Colorspace)`: Input colorspace
    /// - None : Indicates the headers weren't decoded
    #[must_use]
    pub fn input_colorspace(&self) -> Option<ColorSpace> {
        return if self.headers_decoded { Some(self.input_colorspace) } else { None };
    }
    /// Set decoder options
    ///
    /// This can be used to set new options even after initialization
    /// but before decoding.
    ///
    /// This does not bear any significance after decoding an image
    ///
    /// # Arguments
    /// - `options`: New decoder options
    ///
    /// # Example
    /// Set maximum jpeg progressive passes to be 4
    ///
    /// ```no_run
    /// use zune_core::bytestream::ZCursor;
    /// use zune_jpeg::JpegDecoder;
    /// let mut decoder =JpegDecoder::new(ZCursor::new(&[]));
    /// // this works also because DecoderOptions implements `Copy`
    /// let options = decoder.options().jpeg_set_max_scans(4);
    /// // set the new options
    /// decoder.set_options(options);
    /// // now decode
    /// decoder.decode().unwrap();
    /// ```
    pub fn set_options(&mut self, options: DecoderOptions) {
        self.options = options;
    }
    /// Decode Decoder headers
    ///
    /// This routine takes care of parsing supported headers from a Decoder
    /// image
    ///
    /// # Supported Headers
    ///  - APP(0)
    ///  - SOF(O)
    ///  - DQT -> Quantization tables
    ///  - DHT -> Huffman tables
    ///  - SOS -> Start of Scan
    /// # Unsupported Headers
    ///  - SOF(n) -> Decoder images which are not baseline/progressive
    ///  - DAC -> Images using Arithmetic tables
    ///  - JPG(n)
    fn decode_headers_internal(&mut self) -> Result<(), DecodeErrors> {
        if self.headers_decoded {
            trace!("Headers decoded!");
            return Ok(());
        }
        // match output colorspace here
        // we know this will only be called once per image
        // so makes sense
        // We only care for ycbcr to rgb/rgba here
        // in case one is using another colorspace.
        // May god help you
        let out_colorspace = self.options.jpeg_get_out_colorspace();
        if matches!(
            out_colorspace,
            ColorSpace::BGR | ColorSpace::BGRA | ColorSpace::RGB | ColorSpace::RGBA
        ) {
            self.color_convert_16 = choose_ycbcr_to_rgb_convert_func(
                self.options.jpeg_get_out_colorspace(),
                &self.options
            )
            .unwrap();
        }
        // First two bytes should be jpeg soi marker
        let magic_bytes = self.stream.get_u16_be_err()?;
        let mut last_byte = 0;
        let mut bytes_before_marker = 0;
        if magic_bytes != 0xffd8 {
            return Err(DecodeErrors::IllegalMagicBytes(magic_bytes));
        }
        loop {
            // read a byte
            let mut m = self.stream.read_u8_err()?;
            // AND OF COURSE some images will have fill bytes in their marker
            // bitstreams because why not.
            //
            // I am disappointed as a man.
            if (m == 0xFF || m == 0) && last_byte == 0xFF {
                // This handles the edge case where
                // images have markers with fill bytes(0xFF)
                // or byte stuffing (0)
                // I.e 0xFF 0xFF 0xDA
                // and
                // 0xFF 0 0xDA
                // It should ignore those fill bytes and take 0xDA
                // I don't know why such images exist
                // but they do.
                // so this is for you (with love)
                while m == 0xFF || m == 0x0 {
                    last_byte = m;
                    m = self.stream.read_u8_err()?;
                }
            }
            // Last byte should be 0xFF to confirm existence of a marker since markers look
            // like OxFF(some marker data)
            if last_byte == 0xFF {
                let marker = Marker::from_u8(m);
                if let Some(n) = marker {
                    if bytes_before_marker > 3 {
                        if self.options.strict_mode()
                        /*No reason to use this*/
                        {
                            return Err(DecodeErrors::FormatStatic(
                                "[strict-mode]: Extra bytes between headers"
                            ));
                        }
                        error!(
                            "Extra bytes {} before marker 0xFF{:X}",
                            bytes_before_marker - 3,
                            m
                        );
                    }
                    bytes_before_marker = 0;
                    self.parse_marker_inner(n)?;
                    if n == Marker::SOS {
                        self.headers_decoded = true;
                        trace!("Input colorspace {:?}", self.input_colorspace);
                        return Ok(());
                    }
                } else {
                    bytes_before_marker = 0;
                    warn!("Marker 0xFF{:X} not known", m);
                    let length = self.stream.get_u16_be_err()?;
                    if length < 2 {
                        return Err(DecodeErrors::Format(format!(
                            "Found a marker with invalid length : {length}"
                        )));
                    }
                    warn!("Skipping {} bytes", length - 2);
                    self.stream.skip((length - 2) as usize)?;
                }
            }
            last_byte = m;
            bytes_before_marker += 1;
        }
    }
    #[allow(clippy::too_many_lines)]
    pub(crate) fn parse_marker_inner(&mut self, m: Marker) -> Result<(), DecodeErrors> {
        match m {
            Marker::SOF(0..=2) => {
                let marker = {
                    // choose marker
                    if m == Marker::SOF(0) || m == Marker::SOF(1) {
                        SOFMarkers::BaselineDct
                    } else {
                        self.is_progressive = true;
                        SOFMarkers::ProgressiveDctHuffman
                    }
                };
                trace!("Image encoding scheme =`{:?}`", marker);
                // get components
                parse_start_of_frame(marker, self)?;
            }
            // Start of Frame Segments not supported
            Marker::SOF(v) => {
                let feature = UnsupportedSchemes::from_int(v);
                if let Some(feature) = feature {
                    return Err(DecodeErrors::Unsupported(feature));
                }
                return Err(DecodeErrors::Format("Unsupported image format".to_string()));
            }
            //APP(0) segment
            Marker::APP(0) => {
                let mut length = self.stream.get_u16_be_err()?;
                if length < 2 {
                    return Err(DecodeErrors::Format(format!(
                        "Found a marker with invalid length:{length}\n"
                    )));
                }
                // skip for now
                if length > 5 {
                    let mut buffer = [0u8; 5];
                    self.stream.read_exact_bytes(&mut buffer)?;
                    if &buffer == b"AVI1\0" {
                        self.is_mjpeg = true;
                    }
                    length -= 5;
                }
                self.stream.skip(length.saturating_sub(2) as usize)?;
                //parse_app(buf, m, &mut self.info)?;
            }
            Marker::APP(1) => {
                parse_app1(self)?;
            }
            Marker::APP(2) => {
                parse_app2(self)?;
            }
            // Quantization tables
            Marker::DQT => {
                parse_dqt(self)?;
            }
            // Huffman tables
            Marker::DHT => {
                parse_huffman(self)?;
            }
            // Start of Scan Data
            Marker::SOS => {
                parse_sos(self)?;
                // break after reading the start of scan.
                // what follows is the image data
                return Ok(());
            }
            Marker::EOI => return Err(DecodeErrors::FormatStatic("Premature End of image")),
            Marker::DAC | Marker::DNL => {
                return Err(DecodeErrors::Format(format!(
                    "Parsing of the following header `{m:?}` is not supported,\
                                cannot continue"
                )));
            }
            Marker::DRI => {
                trace!("DRI marker present");
                if self.stream.get_u16_be_err()? != 4 {
                    return Err(DecodeErrors::Format(
                        "Bad DRI length, Corrupt JPEG".to_string()
                    ));
                }
                self.restart_interval = usize::from(self.stream.get_u16_be_err()?);
                self.todo = self.restart_interval;
            }
            Marker::APP(14) => {
                parse_app14(self)?;
            }
            _ => {
                warn!(
                    "Capabilities for processing marker \"{:?}\" not implemented",
                    m
                );
                let length = self.stream.get_u16_be_err()?;
                if length < 2 {
                    return Err(DecodeErrors::Format(format!(
                        "Found a marker with invalid length:{length}\n"
                    )));
                }
                warn!("Skipping {} bytes", length - 2);
                self.stream.skip((length - 2) as usize)?;
            }
        }
        Ok(())
    }
    /// Get the embedded ICC profile if it exists
    /// and is correct
    ///
    /// One needs not to decode the whole image to extract this,
    /// calling [`decode_headers`] for an image with an ICC profile
    /// allows you to decode this
    ///
    /// # Returns
    /// - `Some(Vec<u8>)`: The raw ICC profile of the image
    /// - `None`: May indicate an error  in the ICC profile , non-existence of
    /// an ICC profile, or that the headers weren't decoded.
    ///
    /// [`decode_headers`]:Self::decode_headers
    #[must_use]
    pub fn icc_profile(&self) -> Option<Vec<u8>> {
        let mut marker_present: [Option<&ICCChunk>; 256] = [None; 256];
        if !self.headers_decoded {
            return None;
        }
        let num_markers = self.icc_data.len();
        if num_markers == 0 || num_markers >= 255 {
            return None;
        }
        // check validity
        for chunk in &self.icc_data {
            if usize::from(chunk.num_markers) != num_markers {
                // all the lengths must match
                return None;
            }
            if chunk.seq_no == 0 {
                warn!("Zero sequence number in ICC, corrupt ICC chunk");
                return None;
            }
            if marker_present[usize::from(chunk.seq_no)].is_some() {
                // duplicate seq_no
                warn!("Duplicate sequence number in ICC, corrupt chunk");
                return None;
            }
            marker_present[usize::from(chunk.seq_no)] = Some(chunk);
        }
        let mut data = Vec::with_capacity(1000);
        // assemble the data now
        for chunk in marker_present.get(1..=num_markers).unwrap() {
            if let Some(ch) = chunk {
                data.extend_from_slice(&ch.data);
            } else {
                warn!("Missing icc sequence number, corrupt ICC chunk ");
                return None;
            }
        }
        Some(data)
    }
    /// Return the exif data for the file
    ///
    /// This returns the raw exif data starting at the
    /// TIFF header
    ///
    /// # Returns
    /// -`Some(data)`: The raw exif data, if present in the image
    /// - None: May indicate the following
    ///
    ///    1. The image doesn't have exif data
    ///    2. The image headers haven't been decoded
    #[must_use]
    pub fn exif(&self) -> Option<&Vec<u8>> {
        return self.exif_data.as_ref();
    }
    /// Get the output colorspace the image pixels will be decoded into
    ///
    ///
    /// # Note.
    /// This field can only be regarded after decoding headers,
    /// as markers such as Adobe APP14 may dictate different colorspaces
    /// than requested.
    ///
    /// Calling `decode_headers` is sufficient to know what colorspace the
    /// output is, if this is called after `decode` it indicates the colorspace
    /// the output is currently in
    ///
    /// Additionally not all input->output colorspace mappings are supported
    /// but all input colorspaces can map to RGB colorspace, so that's a safe bet
    /// if one is handling image formats
    ///
    ///# Returns
    /// - `Some(Colorspace)`: If headers have been decoded, the colorspace the
    ///output array will be in
    ///- `None
    #[must_use]
    pub fn output_colorspace(&self) -> Option<ColorSpace> {
        return if self.headers_decoded {
            Some(self.options.jpeg_get_out_colorspace())
        } else {
            None
        };
    }
    /// Decode into a pre-allocated buffer
    ///
    /// It is an error if the buffer size is smaller than
    /// [`output_buffer_size()`](Self::output_buffer_size)
    ///
    /// If the buffer is bigger than expected, we ignore the end padding bytes
    ///
    /// # Example
    ///
    /// - Read  headers and then alloc a buffer big enough to hold the image
    ///
    /// ```no_run
    /// use zune_core::bytestream::ZCursor;
    /// use zune_jpeg::JpegDecoder;
    /// let mut decoder = JpegDecoder::new(ZCursor::new(&[]));
    /// // before we get output, we must decode the headers to get width
    /// // height, and input colorspace
    /// decoder.decode_headers().unwrap();
    ///
    /// let mut out = vec![0;decoder.output_buffer_size().unwrap()];
    /// // write into out
    /// decoder.decode_into(&mut out).unwrap();
    /// ```
    ///
    ///
    pub fn decode_into(&mut self, out: &mut [u8]) -> Result<(), DecodeErrors> {
        self.decode_headers_internal()?;
        let expected_size = self.output_buffer_size().unwrap();
        if out.len() < expected_size {
            // too small of a size
            return Err(DecodeErrors::TooSmallOutput(expected_size, out.len()));
        }
        // ensure we don't touch anyone else's scratch space
        let out_len = core::cmp::min(out.len(), expected_size);
        let out = &mut out[0..out_len];
        if self.is_progressive {
            self.decode_mcu_ycbcr_progressive(out)
        } else {
            self.decode_mcu_ycbcr_baseline(out)
        }
    }
    /// Read only headers from a jpeg image buffer
    ///
    /// This allows you to extract important information like
    /// image width and height without decoding the full image
    ///
    /// # Examples
    /// ```no_run
    /// use zune_core::bytestream::ZCursor;
    /// use zune_jpeg::{JpegDecoder};
    ///
    /// let img_data = std::fs::read("a_valid.jpeg").unwrap();
    /// let mut decoder = JpegDecoder::new(ZCursor::new(&img_data));
    /// decoder.decode_headers().unwrap();
    ///
    /// println!("Total decoder dimensions are : {:?} pixels",decoder.dimensions());
    /// println!("Number of components in the image are {}", decoder.info().unwrap().components);
    /// ```
    /// # Errors
    /// See DecodeErrors enum for list of possible errors during decoding
    pub fn decode_headers(&mut self) -> Result<(), DecodeErrors> {
        self.decode_headers_internal()?;
        Ok(())
    }
    /// Create a new decoder with the specified options to be used for decoding
    /// an image
    ///
    /// # Arguments
    /// - `buf`: The input buffer from where we will pull in compressed jpeg bytes from
    /// - `options`: Options specific to this decoder instance
    #[must_use]
    pub fn new_with_options(buf: T, options: DecoderOptions) -> JpegDecoder<T> {
        JpegDecoder::default(options, buf)
    }
    /// Set up-sampling routines in case an image is down sampled
    pub(crate) fn set_upsampling(&mut self) -> Result<(), DecodeErrors> {
        // no sampling, return early
        // check if horizontal max ==1
        if self.h_max == self.v_max && self.h_max == 1 {
            return Ok(());
        }
        match (self.h_max, self.v_max) {
            (1, 1) => {
                self.sub_sample_ratio = SampleRatios::None;
            }
            (1, 2) => {
                self.sub_sample_ratio = SampleRatios::V;
            }
            (2, 1) => {
                self.sub_sample_ratio = SampleRatios::H;
            }
            (2, 2) => {
                self.sub_sample_ratio = SampleRatios::HV;
            }
            _ => {
                return Err(DecodeErrors::Format(
                    "Unknown down-sampling method, cannot continue".to_string()
                ))
            }
        }
        for comp in &mut self.components {
            let hs = self.h_max / comp.horizontal_sample;
            let vs = self.v_max / comp.vertical_sample;
            let samp_factor = match (hs, vs) {
                (1, 1) => {
                    comp.sample_ratio = SampleRatios::None;
                    upsample_no_op
                }
                (2, 1) => {
                    comp.sample_ratio = SampleRatios::H;
                    choose_horizontal_samp_function(self.options.use_unsafe())
                }
                (1, 2) => {
                    comp.sample_ratio = SampleRatios::V;
                    choose_v_samp_function(self.options.use_unsafe())
                }
                (2, 2) => {
                    comp.sample_ratio = SampleRatios::HV;
                    choose_hv_samp_function(self.options.use_unsafe())
                }
                _ => {
                    return Err(DecodeErrors::Format(
                        "Unknown down-sampling method, cannot continue".to_string()
                    ))
                }
            };
            comp.setup_upsample_scanline();
            comp.up_sampler = samp_factor;
        }
        return Ok(());
    }
    #[must_use]
    /// Get the width of the image as a u16
    ///
    /// The width lies between 1 and 65535
    pub(crate) fn width(&self) -> u16 {
        self.info.width
    }
    /// Get the height of the image as a u16
    ///
    /// The height lies between 1 and 65535
    #[must_use]
    pub(crate) fn height(&self) -> u16 {
        self.info.height
    }
    /// Get image dimensions as a tuple of width and height
    /// or `None` if the image hasn't been decoded.
    ///
    /// # Returns
    /// - `Some(width,height)`: Image dimensions
    /// -  None : The image headers haven't been decoded
    #[must_use]
    pub const fn dimensions(&self) -> Option<(usize, usize)> {
        return if self.headers_decoded {
            Some((self.info.width as usize, self.info.height as usize))
        } else {
            None
        };
    }
 }
 /// A struct representing Image Information
 #[derive(Default, Clone, Eq, PartialEq)]
 #[allow(clippy::module_name_repetitions)]
 pub struct ImageInfo {
    /// Width of the image
    pub width:         u16,
    /// Height of image
    pub height:        u16,
    /// PixelDensity
    pub pixel_density: u8,
    /// Start of frame markers
    pub sof:           SOFMarkers,
    /// Horizontal sample
    pub x_density:     u16,
    /// Vertical sample
    pub y_density:     u16,
    /// Number of components
    pub components:    u8
 }
 impl ImageInfo {
    /// Set width of the image
    ///
    /// Found in the start of frame
    pub(crate) fn set_width(&mut self, width: u16) {
        self.width = width;
    }
    /// Set height of the image
    ///
    /// Found in the start of frame
    pub(crate) fn set_height(&mut self, height: u16) {
        self.height = height;
    }
    /// Set the image density
    ///
    /// Found in the start of frame
    pub(crate) fn set_density(&mut self, density: u8) {
        self.pixel_density = density;
    }
    /// Set image Start of frame marker
    ///
    /// found in the Start of frame header
    pub(crate) fn set_sof_marker(&mut self, marker: SOFMarkers) {
        self.sof = marker;
    }
    /// Set image x-density(dots per pixel)
    ///
    /// Found in the APP(0) marker
    #[allow(dead_code)]
    pub(crate) fn set_x(&mut self, sample: u16) {
        self.x_density = sample;
    }
    /// Set image y-density
    ///
    /// Found in the APP(0) marker
    #[allow(dead_code)]
    pub(crate) fn set_y(&mut self, sample: u16) {
        self.y_density = sample;
    }
 }
--- a/third_party/zune-jpeg/src/errors.rs
+++ b/third_party/zune-jpeg/src/errors.rs
@ -0,0 +1,167 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 //! Contains most common errors that may be encountered in decoding a Decoder
 //! image
 use alloc::string::String;
 use core::fmt::{Debug, Display, Formatter};
 use zune_core::bytestream::ZByteIoError;
 use crate::misc::{
    START_OF_FRAME_EXT_AR, START_OF_FRAME_EXT_SEQ, START_OF_FRAME_LOS_SEQ,
    START_OF_FRAME_LOS_SEQ_AR, START_OF_FRAME_PROG_DCT_AR
 };
 /// Common Decode errors
 #[allow(clippy::module_name_repetitions)]
 pub enum DecodeErrors {
    /// Any other thing we do not know
    Format(String),
    /// Any other thing we do not know but we
    /// don't need to allocate space on the heap
    FormatStatic(&'static str),
    /// Illegal Magic Bytes
    IllegalMagicBytes(u16),
    /// problems with the Huffman Tables in a Decoder file
    HuffmanDecode(String),
    /// Image has zero width
    ZeroError,
    /// Discrete Quantization Tables error
    DqtError(String),
    /// Start of scan errors
    SosError(String),
    /// Start of frame errors
    SofError(String),
    /// UnsupportedImages
    Unsupported(UnsupportedSchemes),
    /// MCU errors
    MCUError(String),
    /// Exhausted data
    ExhaustedData,
    /// Large image dimensions(Corrupted data)?
    LargeDimensions(usize),
    /// Too small output for size
    TooSmallOutput(usize, usize),
    IoErrors(ZByteIoError)
 }
 #[cfg(feature = "std")]
 impl std::error::Error for DecodeErrors {}
 impl From<&'static str> for DecodeErrors {
    fn from(data: &'static str) -> Self {
        return Self::FormatStatic(data);
    }
 }
 impl From<ZByteIoError> for DecodeErrors {
    fn from(data: ZByteIoError) -> Self {
        return Self::IoErrors(data);
    }
 }
 impl Debug for DecodeErrors {
    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
        match &self
        {
            Self::Format(ref a) => write!(f, "{a:?}"),
            Self::FormatStatic(a) => write!(f, "{:?}", &a),
            Self::HuffmanDecode(ref reason) =>
            {
                write!(f, "Error decoding huffman values: {reason}")
            }
            Self::ZeroError => write!(f, "Image width or height is set to zero, cannot continue"),
            Self::DqtError(ref reason) => write!(f, "Error parsing DQT segment. Reason:{reason}"),
            Self::SosError(ref reason) => write!(f, "Error parsing SOS Segment. Reason:{reason}"),
            Self::SofError(ref reason) => write!(f, "Error parsing SOF segment. Reason:{reason}"),
            Self::IllegalMagicBytes(bytes) =>
            {
                write!(f, "Error parsing image. Illegal start bytes:{bytes:X}")
            }
            Self::MCUError(ref reason) => write!(f, "Error in decoding MCU. Reason {reason}"),
            Self::Unsupported(ref image_type) =>
                {
                    write!(f, "{image_type:?}")
                }
            Self::ExhaustedData => write!(f, "Exhausted data in the image"),
            Self::LargeDimensions(ref dimensions) => write!(
                f,
                "Too large dimensions {dimensions},library supports up to {}", crate::decoder::MAX_DIMENSIONS
            ),
            Self::TooSmallOutput(expected, found) => write!(f, "Too small output, expected buffer with at least {expected} bytes but got one with {found} bytes"),
            Self::IoErrors(error)=>write!(f,"I/O errors {error:?}"),
        }
    }
 }
 impl Display for DecodeErrors {
    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
        write!(f, "{self:?}")
    }
 }
 /// Contains Unsupported/Yet-to-be supported Decoder image encoding types.
 #[derive(Eq, PartialEq, Copy, Clone)]
 pub enum UnsupportedSchemes {
    /// SOF_1 Extended sequential DCT,Huffman coding
    ExtendedSequentialHuffman,
    /// Lossless (sequential), huffman coding,
    LosslessHuffman,
    /// Extended sequential DEC, arithmetic coding
    ExtendedSequentialDctArithmetic,
    /// Progressive DCT, arithmetic coding,
    ProgressiveDctArithmetic,
    /// Lossless ( sequential), arithmetic coding
    LosslessArithmetic
 }
 impl Debug for UnsupportedSchemes {
    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
        match &self {
            Self::ExtendedSequentialHuffman => {
                write!(f, "The library cannot yet decode images encoded using Extended Sequential Huffman  encoding scheme yet.")
            }
            Self::LosslessHuffman => {
                write!(f, "The library cannot yet decode images encoded with Lossless Huffman encoding scheme")
            }
            Self::ExtendedSequentialDctArithmetic => {
                write!(f,"The library cannot yet decode Images Encoded with Extended Sequential DCT Arithmetic scheme")
            }
            Self::ProgressiveDctArithmetic => {
                write!(f,"The library cannot yet decode images encoded with Progressive DCT Arithmetic scheme")
            }
            Self::LosslessArithmetic => {
                write!(f,"The library cannot yet decode images encoded with Lossless Arithmetic encoding scheme")
            }
        }
    }
 }
 impl UnsupportedSchemes {
    #[must_use]
    /// Create an unsupported scheme from an integer
    ///
    /// # Returns
    /// `Some(UnsupportedScheme)` if the int refers to a specific scheme,
    /// otherwise returns `None`
    pub fn from_int(int: u8) -> Option<UnsupportedSchemes> {
        let int = u16::from_be_bytes([0xff, int]);
        match int {
            START_OF_FRAME_PROG_DCT_AR => Some(Self::ProgressiveDctArithmetic),
            START_OF_FRAME_LOS_SEQ => Some(Self::LosslessHuffman),
            START_OF_FRAME_LOS_SEQ_AR => Some(Self::LosslessArithmetic),
            START_OF_FRAME_EXT_SEQ => Some(Self::ExtendedSequentialHuffman),
            START_OF_FRAME_EXT_AR => Some(Self::ExtendedSequentialDctArithmetic),
            _ => None
        }
    }
 }
--- a/third_party/zune-jpeg/src/headers.rs
+++ b/third_party/zune-jpeg/src/headers.rs
@ -0,0 +1,544 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 //! Decode Decoder markers/segments
 //!
 //! This file deals with decoding header information in a jpeg file
 //!
 use alloc::format;
 use alloc::string::ToString;
 use alloc::vec::Vec;
 use zune_core::bytestream::ZByteReaderTrait;
 use zune_core::colorspace::ColorSpace;
 use zune_core::log::{debug, error, trace, warn};
 use crate::components::Components;
 use crate::decoder::{ICCChunk, JpegDecoder, MAX_COMPONENTS};
 use crate::errors::DecodeErrors;
 use crate::huffman::HuffmanTable;
 use crate::misc::{SOFMarkers, UN_ZIGZAG};
 ///**B.2.4.2 Huffman table-specification syntax**
 #[allow(clippy::similar_names, clippy::cast_sign_loss)]
 pub(crate) fn parse_huffman<T: ZByteReaderTrait>(
    decoder: &mut JpegDecoder<T>
 ) -> Result<(), DecodeErrors>
 where
 {
    // Read the length of the Huffman table
    let mut dht_length = i32::from(decoder.stream.get_u16_be_err()?.checked_sub(2).ok_or(
        DecodeErrors::FormatStatic("Invalid Huffman length in image")
    )?);
    while dht_length > 16 {
        // HT information
        let ht_info = decoder.stream.read_u8_err()?;
        // third bit indicates whether the huffman encoding is DC or AC type
        let dc_or_ac = (ht_info >> 4) & 0xF;
        // Indicate the position of this table, should be less than 4;
        let index = (ht_info & 0xF) as usize;
        // read the number of symbols
        let mut num_symbols: [u8; 17] = [0; 17];
        if index >= MAX_COMPONENTS {
            return Err(DecodeErrors::HuffmanDecode(format!(
                "Invalid DHT index {index}, expected between 0 and 3"
            )));
        }
        if dc_or_ac > 1 {
            return Err(DecodeErrors::HuffmanDecode(format!(
                "Invalid DHT position {dc_or_ac}, should be 0 or 1"
            )));
        }
        decoder.stream.read_exact_bytes(&mut num_symbols[1..17])?;
        dht_length -= 1 + 16;
        let symbols_sum: i32 = num_symbols.iter().map(|f| i32::from(*f)).sum();
        // The sum of the number of symbols cannot be greater than 256;
        if symbols_sum > 256 {
            return Err(DecodeErrors::FormatStatic(
                "Encountered Huffman table with excessive length in DHT"
            ));
        }
        if symbols_sum > dht_length {
            return Err(DecodeErrors::HuffmanDecode(format!(
                "Excessive Huffman table of length {symbols_sum} found when header length is {dht_length}"
            )));
        }
        dht_length -= symbols_sum;
        // A table containing symbols in increasing code length
        let mut symbols = [0; 256];
        decoder
            .stream
            .read_exact_bytes(&mut symbols[0..(symbols_sum as usize)])?;
        // store
        match dc_or_ac {
            0 => {
                decoder.dc_huffman_tables[index] = Some(HuffmanTable::new(
                    &num_symbols,
                    symbols,
                    true,
                    decoder.is_progressive
                )?);
            }
            _ => {
                decoder.ac_huffman_tables[index] = Some(HuffmanTable::new(
                    &num_symbols,
                    symbols,
                    false,
                    decoder.is_progressive
                )?);
            }
        }
    }
    if dht_length > 0 {
        return Err(DecodeErrors::FormatStatic("Bogus Huffman table definition"));
    }
    Ok(())
 }
 ///**B.2.4.1 Quantization table-specification syntax**
 #[allow(clippy::cast_possible_truncation, clippy::needless_range_loop)]
 pub(crate) fn parse_dqt<T: ZByteReaderTrait>(img: &mut JpegDecoder<T>) -> Result<(), DecodeErrors> {
    // read length
    let mut qt_length =
        img.stream
            .get_u16_be_err()?
            .checked_sub(2)
            .ok_or(DecodeErrors::FormatStatic(
                "Invalid DQT length. Length should be greater than 2"
            ))?;
    // A single DQT header may have multiple QT's
    while qt_length > 0 {
        let qt_info = img.stream.read_u8_err()?;
        // 0 = 8 bit otherwise 16 bit dqt
        let precision = (qt_info >> 4) as usize;
        // last 4 bits give us position
        let table_position = (qt_info & 0x0f) as usize;
        let precision_value = 64 * (precision + 1);
        if (precision_value + 1) as u16 > qt_length {
            return Err(DecodeErrors::DqtError(format!("Invalid QT table bytes left :{}. Too small to construct a valid qt table which should be {} long", qt_length, precision_value + 1)));
        }
        let dct_table = match precision {
            0 => {
                let mut qt_values = [0; 64];
                img.stream.read_exact_bytes(&mut qt_values)?;
                qt_length -= (precision_value as u16) + 1 /*QT BIT*/;
                // carry out un zig-zag here
                un_zig_zag(&qt_values)
            }
            1 => {
                // 16 bit quantization tables
                let mut qt_values = [0_u16; 64];
                for i in 0..64 {
                    qt_values[i] = img.stream.get_u16_be_err()?;
                }
                qt_length -= (precision_value as u16) + 1;
                un_zig_zag(&qt_values)
            }
            _ => {
                return Err(DecodeErrors::DqtError(format!(
                    "Expected QT precision value of either 0 or 1, found {precision:?}"
                )));
            }
        };
        if table_position >= MAX_COMPONENTS {
            return Err(DecodeErrors::DqtError(format!(
                "Too large table position for QT :{table_position}, expected between 0 and 3"
            )));
        }
        img.qt_tables[table_position] = Some(dct_table);
    }
    return Ok(());
 }
 /// Section:`B.2.2 Frame header syntax`
 pub(crate) fn parse_start_of_frame<T: ZByteReaderTrait>(
    sof: SOFMarkers, img: &mut JpegDecoder<T>
 ) -> Result<(), DecodeErrors> {
    if img.seen_sof {
        return Err(DecodeErrors::SofError(
            "Two Start of Frame Markers".to_string()
        ));
    }
    // Get length of the frame header
    let length = img.stream.get_u16_be_err()?;
    // usually 8, but can be 12 and 16, we currently support only 8
    // so sorry about that 12 bit images
    let dt_precision = img.stream.read_u8_err()?;
    if dt_precision != 8 {
        return Err(DecodeErrors::SofError(format!(
            "The library can only parse 8-bit images, the image has {dt_precision} bits of precision"
        )));
    }
    img.info.set_density(dt_precision);
    // read  and set the image height.
    let img_height = img.stream.get_u16_be_err()?;
    img.info.set_height(img_height);
    // read and set the image width
    let img_width = img.stream.get_u16_be_err()?;
    img.info.set_width(img_width);
    trace!("Image width  :{}", img_width);
    trace!("Image height :{}", img_height);
    if usize::from(img_width) > img.options.max_width() {
        return Err(DecodeErrors::Format(format!("Image width {} greater than width limit {}. If use `set_limits` if you want to support huge images", img_width, img.options.max_width())));
    }
    if usize::from(img_height) > img.options.max_height() {
        return Err(DecodeErrors::Format(format!("Image height {} greater than height limit {}. If use `set_limits` if you want to support huge images", img_height, img.options.max_height())));
    }
    // Check image width or height is zero
    if img_width == 0 || img_height == 0 {
        return Err(DecodeErrors::ZeroError);
    }
    // Number of components for the image.
    let num_components = img.stream.read_u8_err()?;
    if num_components == 0 {
        return Err(DecodeErrors::SofError(
            "Number of components cannot be zero.".to_string()
        ));
    }
    let expected = 8 + 3 * u16::from(num_components);
    // length should be equal to num components
    if length != expected {
        return Err(DecodeErrors::SofError(format!(
            "Length of start of frame differs from expected {expected},value is {length}"
        )));
    }
    trace!("Image components : {}", num_components);
    if num_components == 1 {
        // SOF sets the number of image components
        // and that to us translates to setting input and output
        // colorspaces to zero
        img.input_colorspace = ColorSpace::Luma;
        img.options = img.options.jpeg_set_out_colorspace(ColorSpace::Luma);
        debug!("Overriding default colorspace set to Luma");
    }
    if num_components == 4 && img.input_colorspace == ColorSpace::YCbCr {
        trace!("Input image has 4 components, defaulting to CMYK colorspace");
        // https://entropymine.wordpress.com/2018/10/22/how-is-a-jpeg-images-color-type-determined/
        img.input_colorspace = ColorSpace::CMYK;
    }
    // set number of components
    img.info.components = num_components;
    let mut components = Vec::with_capacity(num_components as usize);
    let mut temp = [0; 3];
    for pos in 0..num_components {
        // read 3 bytes for each component
        img.stream.read_exact_bytes(&mut temp)?;
        // create a component.
        let component = Components::from(temp, pos)?;
        components.push(component);
    }
    img.seen_sof = true;
    img.info.set_sof_marker(sof);
    img.components = components;
    Ok(())
 }
 /// Parse a start of scan data
 pub(crate) fn parse_sos<T: ZByteReaderTrait>(
    image: &mut JpegDecoder<T>
 ) -> Result<(), DecodeErrors> {
    // Scan header length
    let ls = image.stream.get_u16_be_err()?;
    // Number of image components in scan
    let ns = image.stream.read_u8_err()?;
    let mut seen = [-1; { MAX_COMPONENTS + 1 }];
    image.num_scans = ns;
    if ls != 6 + 2 * u16::from(ns) {
        return Err(DecodeErrors::SosError(format!(
            "Bad SOS length {ls},corrupt jpeg"
        )));
    }
    // Check number of components.
    if !(1..5).contains(&ns) {
        return Err(DecodeErrors::SosError(format!(
            "Number of components in start of scan should be less than 3 but more than 0. Found {ns}"
        )));
    }
    if image.info.components == 0 {
        return Err(DecodeErrors::FormatStatic(
            "Error decoding SOF Marker, Number of components cannot be zero."
        ));
    }
    // consume spec parameters
    for i in 0..ns {
        // CS_i parameter, I don't need it so I might as well delete it
        let id = image.stream.read_u8_err()?;
        if seen.contains(&i32::from(id)) {
            return Err(DecodeErrors::SofError(format!(
                "Duplicate ID {id} seen twice in the same component"
            )));
        }
        seen[usize::from(i)] = i32::from(id);
        // DC and AC huffman table position
        // top 4 bits contain dc huffman destination table
        // lower four bits contain ac huffman destination table
        let y = image.stream.read_u8_err()?;
        let mut j = 0;
        while j < image.info.components {
            if image.components[j as usize].id == id {
                break;
            }
            j += 1;
        }
        if j == image.info.components {
            return Err(DecodeErrors::SofError(format!(
                "Invalid component id {}, expected a value between 0 and {}",
                id,
                image.components.len()
            )));
        }
        image.components[usize::from(j)].dc_huff_table = usize::from((y >> 4) & 0xF);
        image.components[usize::from(j)].ac_huff_table = usize::from(y & 0xF);
        image.z_order[i as usize] = j as usize;
    }
    // Collect the component spec parameters
    // This is only needed for progressive images but I'll read
    // them in order to ensure they are correct according to the spec
    // Extract progressive information
    // https://www.w3.org/Graphics/JPEG/itu-t81.pdf
    // Page 42
    // Start of spectral / predictor selection. (between 0 and 63)
    image.spec_start = image.stream.read_u8_err()?;
    // End of spectral selection
    image.spec_end = image.stream.read_u8_err()?;
    let bit_approx = image.stream.read_u8_err()?;
    // successive approximation bit position high
    image.succ_high = bit_approx >> 4;
    if image.spec_end > 63 {
        return Err(DecodeErrors::SosError(format!(
            "Invalid Se parameter {}, range should be 0-63",
            image.spec_end
        )));
    }
    if image.spec_start > 63 {
        return Err(DecodeErrors::SosError(format!(
            "Invalid Ss parameter {}, range should be 0-63",
            image.spec_start
        )));
    }
    if image.succ_high > 13 {
        return Err(DecodeErrors::SosError(format!(
            "Invalid Ah parameter {}, range should be 0-13",
            image.succ_low
        )));
    }
    // successive approximation bit position low
    image.succ_low = bit_approx & 0xF;
    if image.succ_low > 13 {
        return Err(DecodeErrors::SosError(format!(
            "Invalid Al parameter {}, range should be 0-13",
            image.succ_low
        )));
    }
    trace!(
        "Ss={}, Se={} Ah={} Al={}",
        image.spec_start,
        image.spec_end,
        image.succ_high,
        image.succ_low
    );
    Ok(())
 }
 /// Parse Adobe App14 segment
 pub(crate) fn parse_app14<T: ZByteReaderTrait>(
    decoder: &mut JpegDecoder<T>
 ) -> Result<(), DecodeErrors> {
    // skip length
    let mut length = usize::from(decoder.stream.get_u16_be());
    if length < 2 {
        return Err(DecodeErrors::FormatStatic("Too small APP14 length"));
    }
    if length < 14 {
        return Err(DecodeErrors::FormatStatic(
            "Too short of a length for App14 segment"
        ));
    }
    if decoder.stream.peek_at(0, 5)? == b"Adobe" {
        // move stream 6 bytes to remove adobe id
        decoder.stream.skip(6)?;
        // skip version, flags0 and flags1
        decoder.stream.skip(5)?;
        // get color transform
        let transform = decoder.stream.read_u8();
        // https://exiftool.org/TagNames/JPEG.html#Adobe
        match transform {
            0 => decoder.input_colorspace = ColorSpace::CMYK,
            1 => decoder.input_colorspace = ColorSpace::YCbCr,
            2 => decoder.input_colorspace = ColorSpace::YCCK,
            _ => {
                return Err(DecodeErrors::Format(format!(
                    "Unknown Adobe colorspace {transform}"
                )))
            }
        }
        // length   = 2
        // adobe id = 6
        // version =  5
        // transform = 1
        length = length.saturating_sub(14);
    } else if decoder.options.strict_mode() {
        return Err(DecodeErrors::FormatStatic("Corrupt Adobe App14 segment"));
    } else {
        length = length.saturating_sub(2);
        error!("Not a valid Adobe APP14 Segment");
    }
    // skip any proceeding lengths.
    // we do not need them
    decoder.stream.skip(length)?;
    Ok(())
 }
 /// Parse the APP1 segment
 ///
 /// This contains the exif tag
 pub(crate) fn parse_app1<T: ZByteReaderTrait>(
    decoder: &mut JpegDecoder<T>
 ) -> Result<(), DecodeErrors> {
    // contains exif data
    let mut length = usize::from(decoder.stream.get_u16_be());
    if length < 2 {
        return Err(DecodeErrors::FormatStatic("Too small app1 length"));
    }
    // length bytes
    length -= 2;
    if length > 6 && decoder.stream.peek_at(0, 6)? == b"Exif\x00\x00" {
        trace!("Exif segment present");
        // skip bytes we read above
        decoder.stream.skip(6)?;
        length -= 6;
        let exif_bytes = decoder.stream.peek_at(0, length)?.to_vec();
        decoder.exif_data = Some(exif_bytes);
    } else {
        warn!("Wrongly formatted exif tag");
    }
    decoder.stream.skip(length)?;
    Ok(())
 }
 pub(crate) fn parse_app2<T: ZByteReaderTrait>(
    decoder: &mut JpegDecoder<T>
 ) -> Result<(), DecodeErrors> {
    let mut length = usize::from(decoder.stream.get_u16_be());
    if length < 2 {
        return Err(DecodeErrors::FormatStatic("Too small app2 segment"));
    }
    // length bytes
    length -= 2;
    if length > 14 && decoder.stream.peek_at(0, 12)? == *b"ICC_PROFILE\0" {
        trace!("ICC Profile present");
        // skip 12 bytes which indicate ICC profile
        length -= 12;
        decoder.stream.skip(12)?;
        let seq_no = decoder.stream.read_u8();
        let num_markers = decoder.stream.read_u8();
        // deduct the two bytes we read above
        length -= 2;
        let data = decoder.stream.peek_at(0, length)?.to_vec();
        let icc_chunk = ICCChunk {
            seq_no,
            num_markers,
            data
        };
        decoder.icc_data.push(icc_chunk);
    }
    decoder.stream.skip(length)?;
    Ok(())
 }
 /// Small utility function to print Un-zig-zagged quantization tables
 fn un_zig_zag<T>(a: &[T]) -> [i32; 64]
 where
    T: Default + Copy,
    i32: core::convert::From<T>
 {
    let mut output = [i32::default(); 64];
    for i in 0..64 {
        output[UN_ZIGZAG[i]] = i32::from(a[i]);
    }
    output
 }
--- a/third_party/zune-jpeg/src/huffman.rs
+++ b/third_party/zune-jpeg/src/huffman.rs
@ -0,0 +1,254 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 //! This file contains a single struct `HuffmanTable` that
 //! stores Huffman tables needed during `BitStream` decoding.
 #![allow(clippy::similar_names, clippy::module_name_repetitions)]
 use alloc::string::ToString;
 use crate::errors::DecodeErrors;
 /// Determines how many bits of lookahead we have for our bitstream decoder.
 pub const HUFF_LOOKAHEAD: u8 = 9;
 /// A struct which contains necessary tables for decoding a JPEG
 /// huffman encoded bitstream
 pub struct HuffmanTable {
    // element `[0]` of each array is unused
    /// largest code of length k
    pub(crate) maxcode: [i32; 18],
    /// offset for codes of length k
    /// Answers the question, where do code-lengths of length k end
    /// Element 0 is unused
    pub(crate) offset:  [i32; 18],
    /// lookup table for fast decoding
    ///
    /// top  bits above HUFF_LOOKAHEAD contain the code length.
    ///
    /// Lower (8) bits contain the symbol in order of increasing code length.
    pub(crate) lookup:  [i32; 1 << HUFF_LOOKAHEAD],
    /// A table which can be used to decode small AC coefficients and
    /// do an equivalent of receive_extend
    pub(crate) ac_lookup: Option<[i16; 1 << HUFF_LOOKAHEAD]>,
    /// Directly represent contents of a JPEG DHT marker
    ///
    /// \# number of symbols with codes of length `k` bits
    // bits[0] is unused
    /// Symbols in order of increasing code length
    pub(crate) values: [u8; 256]
 }
 impl HuffmanTable {
    pub fn new(
        codes: &[u8; 17], values: [u8; 256], is_dc: bool, is_progressive: bool
    ) -> Result<HuffmanTable, DecodeErrors> {
        let too_long_code = (i32::from(HUFF_LOOKAHEAD) + 1) << HUFF_LOOKAHEAD;
        let mut p = HuffmanTable {
            maxcode: [0; 18],
            offset: [0; 18],
            lookup: [too_long_code; 1 << HUFF_LOOKAHEAD],
            values,
            ac_lookup: None
        };
        p.make_derived_table(is_dc, is_progressive, codes)?;
        Ok(p)
    }
    /// Create a new huffman tables with values that aren't fixed
    /// used by fill_mjpeg_tables
    pub fn new_unfilled(
        codes: &[u8; 17], values: &[u8], is_dc: bool, is_progressive: bool
    ) -> Result<HuffmanTable, DecodeErrors> {
        let mut buf = [0; 256];
        buf[..values.len()].copy_from_slice(values);
        HuffmanTable::new(codes, buf, is_dc, is_progressive)
    }
    /// Compute derived values for a Huffman table
    ///
    /// This routine performs some validation checks on the table
    #[allow(
        clippy::cast_possible_truncation,
        clippy::cast_possible_wrap,
        clippy::cast_sign_loss,
        clippy::too_many_lines,
        clippy::needless_range_loop
    )]
    fn make_derived_table(
        &mut self, is_dc: bool, _is_progressive: bool, bits: &[u8; 17]
    ) -> Result<(), DecodeErrors> {
        // build a list of code size
        let mut huff_size = [0; 257];
        // Huffman code lengths
        let mut huff_code: [u32; 257] = [0; 257];
        // figure C.1 make table of Huffman code length for each symbol
        let mut p = 0;
        for l in 1..=16 {
            let mut i = i32::from(bits[l]);
            // table overrun is checked before ,so we dont need to check
            while i != 0 {
                huff_size[p] = l as u8;
                p += 1;
                i -= 1;
            }
        }
        huff_size[p] = 0;
        let num_symbols = p;
        // Generate the codes themselves
        // We also validate that the counts represent a legal Huffman code tree
        let mut code = 0;
        let mut si = i32::from(huff_size[0]);
        p = 0;
        while huff_size[p] != 0 {
            while i32::from(huff_size[p]) == si {
                huff_code[p] = code;
                code += 1;
                p += 1;
            }
            // maximum code of length si, pre-shifted by 16-k bits
            self.maxcode[si as usize] = (code << (16 - si)) as i32;
            // code is now 1 more than the last code used for code-length si; but
            // it must still fit in si bits, since no code is allowed to be all ones.
            if (code as i32) >= (1 << si) {
                return Err(DecodeErrors::HuffmanDecode("Bad Huffman Table".to_string()));
            }
            code <<= 1;
            si += 1;
        }
        // Figure F.15 generate decoding tables for bit-sequential decoding
        p = 0;
        for l in 0..=16 {
            if bits[l] == 0 {
                // -1 if no codes of this length
                self.maxcode[l] = -1;
            } else {
                // offset[l]=codes[index of 1st symbol of code length l
                // minus minimum code of length l]
                self.offset[l] = (p as i32) - (huff_code[p]) as i32;
                p += usize::from(bits[l]);
            }
        }
        self.offset[17] = 0;
        // we ensure that decode terminates
        self.maxcode[17] = 0x000F_FFFF;
        /*
         * Compute lookahead tables to speed up decoding.
         * First we set all the table entries to 0(left justified), indicating "too long";
         * (Note too long was set during initialization)
         * then we iterate through the Huffman codes that are short enough and
         * fill in all the entries that correspond to bit sequences starting
         * with that code.
         */
        p = 0;
        for l in 1..=HUFF_LOOKAHEAD {
            for _ in 1..=i32::from(bits[usize::from(l)]) {
                // l -> Current code length,
                // p => Its index in self.code and self.values
                // Generate left justified code followed by all possible bit sequences
                let mut look_bits = (huff_code[p] as usize) << (HUFF_LOOKAHEAD - l);
                for _ in 0..1 << (HUFF_LOOKAHEAD - l) {
                    self.lookup[look_bits] =
                        (i32::from(l) << HUFF_LOOKAHEAD) | i32::from(self.values[p]);
                    look_bits += 1;
                }
                p += 1;
            }
        }
        // build an ac table that does an equivalent of decode and receive_extend
        if !is_dc {
            let mut fast = [255; 1 << HUFF_LOOKAHEAD];
            // Iterate over number of symbols
            for i in 0..num_symbols {
                // get code size for an item
                let s = huff_size[i];
                if s <= HUFF_LOOKAHEAD {
                    // if it's lower than what we need for our lookup table create the table
                    let c = (huff_code[i] << (HUFF_LOOKAHEAD - s)) as usize;
                    let m = (1 << (HUFF_LOOKAHEAD - s)) as usize;
                    for j in 0..m {
                        fast[c + j] = i as i16;
                    }
                }
            }
            // build a table that decodes both magnitude and value of small ACs in
            // one go.
            let mut fast_ac = [0; 1 << HUFF_LOOKAHEAD];
            for i in 0..(1 << HUFF_LOOKAHEAD) {
                let fast_v = fast[i];
                if fast_v < 255 {
                    // get symbol value from AC table
                    let rs = self.values[fast_v as usize];
                    // shift by 4 to get run length
                    let run = i16::from((rs >> 4) & 15);
                    // get magnitude bits stored at the lower 3 bits
                    let mag_bits = i16::from(rs & 15);
                    // length of the bit we've read
                    let len = i16::from(huff_size[fast_v as usize]);
                    if mag_bits != 0 && (len + mag_bits) <= i16::from(HUFF_LOOKAHEAD) {
                        // magnitude code followed by receive_extend code
                        let mut k = (((i as i16) << len) & ((1 << HUFF_LOOKAHEAD) - 1))
                            >> (i16::from(HUFF_LOOKAHEAD) - mag_bits);
                        let m = 1 << (mag_bits - 1);
                        if k < m {
                            k += (!0_i16 << mag_bits) + 1;
                        };
                        // if result is small enough fit into fast ac table
                        if (-128..=127).contains(&k) {
                            fast_ac[i] = (k << 8) + (run << 4) + (len + mag_bits);
                        }
                    }
                }
            }
            self.ac_lookup = Some(fast_ac);
        }
        // Validate symbols as being reasonable
        // For AC tables, we make no check, but accept all byte values 0..255
        // For DC tables, we require symbols to be in range 0..15
        if is_dc {
            for i in 0..num_symbols {
                let sym = self.values[i];
                if sym > 15 {
                    return Err(DecodeErrors::HuffmanDecode("Bad Huffman Table".to_string()));
                }
            }
        }
        Ok(())
    }
 }
--- a/third_party/zune-jpeg/src/idct.rs
+++ b/third_party/zune-jpeg/src/idct.rs
@ -0,0 +1,147 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 //! Routines for IDCT
 //!
 //! Essentially we provide 2 routines for IDCT, a scalar implementation and a not super optimized
 //! AVX2 one, i'll talk about them here.
 //!
 //! There are 2 reasons why we have the avx one
 //! 1. No one compiles with -C target-features=avx2 hence binaries won't probably take advantage(even
 //! if it exists).
 //! 2. AVX employs zero short circuit in a way the scalar code cannot employ it.
 //!     - AVX does this by checking for MCU's whose 63 AC coefficients are zero and if true, it writes
 //!        values directly, if false, it goes the long way of calculating.
 //!     -   Although this can be trivially implemented in the scalar version, it  generates code
 //!         I'm not happy width(scalar version that basically loops and that is too many branches for me)
 //!         The avx one does a better job of using bitwise or's with (`_mm256_or_si256`) which is magnitudes of faster
 //!         than anything I could come up with
 //!
 //! The AVX code also has some cool transpose_u16 instructions which look so complicated to be cool
 //! (spoiler alert, i barely understand how it works, that's why I credited the owner).
 //!
 #![allow(
    clippy::excessive_precision,
    clippy::unreadable_literal,
    clippy::module_name_repetitions,
    unused_parens,
    clippy::wildcard_imports
 )]
 use zune_core::log::debug;
 use zune_core::options::DecoderOptions;
 use crate::decoder::IDCTPtr;
 use crate::idct::scalar::idct_int;
 #[cfg(feature = "x86")]
 pub mod avx2;
 #[cfg(feature = "neon")]
 pub mod neon;
 pub mod scalar;
 /// Choose an appropriate IDCT function
 #[allow(unused_variables)]
 pub fn choose_idct_func(options: &DecoderOptions) -> IDCTPtr {
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
    #[cfg(feature = "x86")]
    {
        if options.use_avx2() {
            debug!("Using vector integer IDCT");
            // use avx one
            return crate::idct::avx2::idct_avx2;
        }
    }
    #[cfg(target_arch = "aarch64")]
    #[cfg(feature = "neon")]
    {
        if options.use_neon() {
            debug!("Using vector integer IDCT");
            return crate::idct::neon::idct_neon;
        }
    }
    debug!("Using scalar integer IDCT");
    // use generic one
    return idct_int;
 }
 #[cfg(test)]
 #[allow(unreachable_code)]
 #[allow(dead_code)]
 mod tests {
    use super::*;
    #[test]
    fn idct_test0() {
        let stride = 8;
        let mut coeff = [10; 64];
        let mut coeff2 = [10; 64];
        let mut output_scalar = [0; 64];
        let mut output_vector = [0; 64];
        idct_fnc()(&mut coeff, &mut output_vector, stride);
        idct_int(&mut coeff2, &mut output_scalar, stride);
        assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match");
    }
    #[test]
    fn do_idct_test1() {
        let stride = 8;
        let mut coeff = [14; 64];
        let mut coeff2 = [14; 64];
        let mut output_scalar = [0; 64];
        let mut output_vector = [0; 64];
        idct_fnc()(&mut coeff, &mut output_vector, stride);
        idct_int(&mut coeff2, &mut output_scalar, stride);
        assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match");
    }
    #[test]
    fn do_idct_test2() {
        let stride = 8;
        let mut coeff = [0; 64];
        coeff[0] = 255;
        coeff[63] = -256;
        let mut coeff2 = coeff;
        let mut output_scalar = [0; 64];
        let mut output_vector = [0; 64];
        idct_fnc()(&mut coeff, &mut output_vector, stride);
        idct_int(&mut coeff2, &mut output_scalar, stride);
        assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match");
    }
    #[test]
    fn do_idct_zeros() {
        let stride = 8;
        let mut coeff = [0; 64];
        let mut coeff2 = [0; 64];
        let mut output_scalar = [0; 64];
        let mut output_vector = [0; 64];
        idct_fnc()(&mut coeff, &mut output_vector, stride);
        idct_int(&mut coeff2, &mut output_scalar, stride);
        assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match");
    }
    fn idct_fnc() -> IDCTPtr {
        #[cfg(feature = "neon")]
        #[cfg(target_arch = "aarch64")]
        {
            use crate::idct::neon::idct_neon;
            return idct_neon;
        }
        #[cfg(feature = "x86")]
        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
        {
            use crate::idct::avx2::idct_avx2;
            return idct_avx2;
        }
        idct_int
    }
 }
--- a/third_party/zune-jpeg/src/idct/avx2.rs
+++ b/third_party/zune-jpeg/src/idct/avx2.rs
@ -0,0 +1,288 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 #![cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 //! AVX optimised IDCT.
 //!
 //! Okay not thaat optimised.
 //!
 //!
 //! # The implementation
 //! The implementation is neatly broken down into two operations.
 //!
 //! 1. Test for zeroes
 //! > There is a shortcut method for idct  where when all AC values are zero, we can get the answer really quickly.
 //!  by scaling the 1/8th of the DCT coefficient of the block to the whole block and level shifting.
 //!
 //! 2. If above fails, we proceed to carry out IDCT as a two pass one dimensional algorithm.
 //! IT does two whole scans where it carries out IDCT on all items
 //! After each successive scan, data is transposed in register(thank you x86 SIMD powers). and the second
 //! pass is carried out.
 //!
 //! The code is not super optimized, it produces bit identical results with scalar code hence it's
 //! `mm256_add_epi16`
 //! and it also has the advantage of making this implementation easy to maintain.
 #![cfg(feature = "x86")]
 #![allow(dead_code)]
 #[cfg(target_arch = "x86")]
 use core::arch::x86::*;
 #[cfg(target_arch = "x86_64")]
 use core::arch::x86_64::*;
 use crate::unsafe_utils::{transpose, YmmRegister};
 const SCALE_BITS: i32 = 512 + 65536 + (128 << 17);
 /// SAFETY
 /// ------
 ///
 /// It is the responsibility of the CALLER to ensure that  this function is
 /// called in contexts where the CPU supports it
 ///
 ///
 /// For documentation see module docs.
 pub fn idct_avx2(in_vector: &mut [i32; 64], out_vector: &mut [i16], stride: usize) {
    unsafe {
        // We don't call this method directly because we need to flag the code function
        // with #[target_feature] so that the compiler does do weird stuff with
        // it
        idct_int_avx2_inner(in_vector, out_vector, stride);
    }
 }
 #[target_feature(enable = "avx2")]
 #[allow(
    clippy::too_many_lines,
    clippy::cast_possible_truncation,
    clippy::similar_names,
    clippy::op_ref,
    unused_assignments,
    clippy::zero_prefixed_literal
 )]
 pub unsafe fn idct_int_avx2_inner(
    in_vector: &mut [i32; 64], out_vector: &mut [i16], stride: usize
 ) {
    let mut pos = 0;
    // load into registers
    //
    // We sign extend i16's to i32's and calculate them with extended precision and
    // later reduce them to i16's when we are done carrying out IDCT
    let rw0 = _mm256_loadu_si256(in_vector[00..].as_ptr().cast());
    let rw1 = _mm256_loadu_si256(in_vector[08..].as_ptr().cast());
    let rw2 = _mm256_loadu_si256(in_vector[16..].as_ptr().cast());
    let rw3 = _mm256_loadu_si256(in_vector[24..].as_ptr().cast());
    let rw4 = _mm256_loadu_si256(in_vector[32..].as_ptr().cast());
    let rw5 = _mm256_loadu_si256(in_vector[40..].as_ptr().cast());
    let rw6 = _mm256_loadu_si256(in_vector[48..].as_ptr().cast());
    let rw7 = _mm256_loadu_si256(in_vector[56..].as_ptr().cast());
    // Forward DCT and quantization may cause all the AC terms to be zero, for such
    // cases we can try to accelerate it
    // Basically the poop is that whenever the array has 63 zeroes, its idct is
    // (arr[0]>>3)or (arr[0]/8) propagated to all the elements.
    // We first test to see if the array contains zero elements and if it does, we go the
    // short way.
    //
    // This reduces IDCT overhead from about 39% to 18 %, almost half
    // Do another load for the first row, we don't want to check DC value, because
    // we only care about AC terms
    let rw8 = _mm256_loadu_si256(in_vector[1..].as_ptr().cast());
    let zero = _mm256_setzero_si256();
    let mut non_zero = 0;
    non_zero += _mm256_movemask_epi8(_mm256_cmpeq_epi32(rw8, zero));
    non_zero += _mm256_movemask_epi8(_mm256_cmpeq_epi32(rw1, zero));
    non_zero += _mm256_movemask_epi8(_mm256_cmpeq_epi32(rw2, zero));
    non_zero += _mm256_movemask_epi8(_mm256_cmpeq_epi64(rw3, zero));
    non_zero += _mm256_movemask_epi8(_mm256_cmpeq_epi64(rw4, zero));
    non_zero += _mm256_movemask_epi8(_mm256_cmpeq_epi64(rw5, zero));
    non_zero += _mm256_movemask_epi8(_mm256_cmpeq_epi64(rw6, zero));
    non_zero += _mm256_movemask_epi8(_mm256_cmpeq_epi64(rw7, zero));
    if non_zero == -8 {
        // AC terms all zero, idct of the block is  is ( coeff[0] * qt[0] )/8 + 128 (bias)
        // (and clamped to 255)
        let idct_value = _mm_set1_epi16(((in_vector[0] >> 3) + 128).clamp(0, 255) as i16);
        macro_rules! store {
            ($pos:tt,$value:tt) => {
                // store
                _mm_storeu_si128(
                    out_vector
                        .get_mut($pos..$pos + 8)
                        .unwrap()
                        .as_mut_ptr()
                        .cast(),
                    $value
                );
                $pos += stride;
            };
        }
        store!(pos, idct_value);
        store!(pos, idct_value);
        store!(pos, idct_value);
        store!(pos, idct_value);
        store!(pos, idct_value);
        store!(pos, idct_value);
        store!(pos, idct_value);
        store!(pos, idct_value);
        return;
    }
    let mut row0 = YmmRegister { mm256: rw0 };
    let mut row1 = YmmRegister { mm256: rw1 };
    let mut row2 = YmmRegister { mm256: rw2 };
    let mut row3 = YmmRegister { mm256: rw3 };
    let mut row4 = YmmRegister { mm256: rw4 };
    let mut row5 = YmmRegister { mm256: rw5 };
    let mut row6 = YmmRegister { mm256: rw6 };
    let mut row7 = YmmRegister { mm256: rw7 };
    macro_rules! dct_pass {
        ($SCALE_BITS:tt,$scale:tt) => {
            // There are a lot of ways to do this
            // but to keep it simple(and beautiful), ill make a direct translation of the
            // scalar code to also make this code fully transparent(this version and the non
            // avx one should produce identical code.)
            // even part
            let p1 = (row2 + row6) * 2217;
            let mut t2 = p1 + row6 * -7567;
            let mut t3 = p1 + row2 * 3135;
            let mut t0 = YmmRegister {
                mm256: _mm256_slli_epi32((row0 + row4).mm256, 12)
            };
            let mut t1 = YmmRegister {
                mm256: _mm256_slli_epi32((row0 - row4).mm256, 12)
            };
            let x0 = t0 + t3 + $SCALE_BITS;
            let x3 = t0 - t3 + $SCALE_BITS;
            let x1 = t1 + t2 + $SCALE_BITS;
            let x2 = t1 - t2 + $SCALE_BITS;
            let p3 = row7 + row3;
            let p4 = row5 + row1;
            let p1 = row7 + row1;
            let p2 = row5 + row3;
            let p5 = (p3 + p4) * 4816;
            t0 = row7 * 1223;
            t1 = row5 * 8410;
            t2 = row3 * 12586;
            t3 = row1 * 6149;
            let p1 = p5 + p1 * -3685;
            let p2 = p5 + (p2 * -10497);
            let p3 = p3 * -8034;
            let p4 = p4 * -1597;
            t3 += p1 + p4;
            t2 += p2 + p3;
            t1 += p2 + p4;
            t0 += p1 + p3;
            row0.mm256 = _mm256_srai_epi32((x0 + t3).mm256, $scale);
            row1.mm256 = _mm256_srai_epi32((x1 + t2).mm256, $scale);
            row2.mm256 = _mm256_srai_epi32((x2 + t1).mm256, $scale);
            row3.mm256 = _mm256_srai_epi32((x3 + t0).mm256, $scale);
            row4.mm256 = _mm256_srai_epi32((x3 - t0).mm256, $scale);
            row5.mm256 = _mm256_srai_epi32((x2 - t1).mm256, $scale);
            row6.mm256 = _mm256_srai_epi32((x1 - t2).mm256, $scale);
            row7.mm256 = _mm256_srai_epi32((x0 - t3).mm256, $scale);
        };
    }
    // Process rows
    dct_pass!(512, 10);
    transpose(
        &mut row0, &mut row1, &mut row2, &mut row3, &mut row4, &mut row5, &mut row6, &mut row7
    );
    // process columns
    dct_pass!(SCALE_BITS, 17);
    transpose(
        &mut row0, &mut row1, &mut row2, &mut row3, &mut row4, &mut row5, &mut row6, &mut row7
    );
    // Pack i32 to i16's,
    // clamp them to be between 0-255
    // Undo shuffling
    // Store back to array
    macro_rules! permute_store {
        ($x:tt,$y:tt,$index:tt,$out:tt) => {
            let a = _mm256_packs_epi32($x, $y);
            // Clamp the values after packing, we can clamp more values at once
            let b = clamp_avx(a);
            // /Undo shuffling
            let c = _mm256_permute4x64_epi64(b, shuffle(3, 1, 2, 0));
            // store first vector
            _mm_storeu_si128(
                ($out)
                    .get_mut($index..$index + 8)
                    .unwrap()
                    .as_mut_ptr()
                    .cast(),
                _mm256_extractf128_si256::<0>(c)
            );
            $index += stride;
            // second vector
            _mm_storeu_si128(
                ($out)
                    .get_mut($index..$index + 8)
                    .unwrap()
                    .as_mut_ptr()
                    .cast(),
                _mm256_extractf128_si256::<1>(c)
            );
            $index += stride;
        };
    }
    // Pack and write the values back to the array
    permute_store!((row0.mm256), (row1.mm256), pos, out_vector);
    permute_store!((row2.mm256), (row3.mm256), pos, out_vector);
    permute_store!((row4.mm256), (row5.mm256), pos, out_vector);
    permute_store!((row6.mm256), (row7.mm256), pos, out_vector);
 }
 #[inline]
 #[target_feature(enable = "avx2")]
 unsafe fn clamp_avx(reg: __m256i) -> __m256i {
    let min_s = _mm256_set1_epi16(0);
    let max_s = _mm256_set1_epi16(255);
    let max_v = _mm256_max_epi16(reg, min_s); //max(a,0)
    let min_v = _mm256_min_epi16(max_v, max_s); //min(max(a,0),255)
    return min_v;
 }
 /// A copy of `_MM_SHUFFLE()` that doesn't require
 /// a nightly compiler
 #[inline]
 const fn shuffle(z: i32, y: i32, x: i32, w: i32) -> i32 {
    ((z << 6) | (y << 4) | (x << 2) | w)
 }
--- a/third_party/zune-jpeg/src/idct/neon.rs
+++ b/third_party/zune-jpeg/src/idct/neon.rs
@ -0,0 +1,296 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 #![cfg(target_arch = "aarch64")]
 //! AVX optimised IDCT.
 //!
 //! Okay not thaat optimised.
 //!
 //!
 //! # The implementation
 //! The implementation is neatly broken down into two operations.
 //!
 //! 1. Test for zeroes
 //! > There is a shortcut method for idct  where when all AC values are zero, we can get the answer really quickly.
 //!  by scaling the 1/8th of the DCT coefficient of the block to the whole block and level shifting.
 //!
 //! 2. If above fails, we proceed to carry out IDCT as a two pass one dimensional algorithm.
 //! IT does two whole scans where it carries out IDCT on all items
 //! After each successive scan, data is transposed in register(thank you x86 SIMD powers). and the second
 //! pass is carried out.
 //!
 //! The code is not super optimized, it produces bit identical results with scalar code hence it's
 //! `mm256_add_epi16`
 //! and it also has the advantage of making this implementation easy to maintain.
 #![cfg(feature = "neon")]
 use core::arch::aarch64::*;
 use crate::unsafe_utils::{transpose, YmmRegister};
 const SCALE_BITS: i32 = 512 + 65536 + (128 << 17);
 /// SAFETY
 /// ------
 ///
 /// It is the responsibility of the CALLER to ensure that  this function is
 /// called in contexts where the CPU supports it
 ///
 ///
 /// For documentation see module docs.
 pub fn idct_neon(in_vector: &mut [i32; 64], out_vector: &mut [i16], stride: usize) {
    unsafe {
        // We don't call this method directly because we need to flag the code function
        // with #[target_feature] so that the compiler does do weird stuff with
        // it
        idct_int_neon_inner(in_vector, out_vector, stride);
    }
 }
 #[inline]
 #[target_feature(enable = "neon")]
 unsafe fn pack_16(a: int32x4x2_t) -> int16x8_t {
    vcombine_s16(vqmovn_s32(a.0), vqmovn_s32(a.1))
 }
 #[inline]
 #[target_feature(enable = "neon")]
 unsafe fn condense_bottom_16(a: int32x4x2_t, b: int32x4x2_t) -> int16x8x2_t {
    int16x8x2_t(pack_16(a), pack_16(b))
 }
 #[target_feature(enable = "neon")]
 #[allow(
    clippy::too_many_lines,
    clippy::cast_possible_truncation,
    clippy::similar_names,
    clippy::op_ref,
    unused_assignments,
    clippy::zero_prefixed_literal
 )]
 pub unsafe fn idct_int_neon_inner(
    in_vector: &mut [i32; 64], out_vector: &mut [i16], stride: usize
 ) {
    let mut pos = 0;
    // load into registers
    //
    // We sign extend i16's to i32's and calculate them with extended precision and
    // later reduce them to i16's when we are done carrying out IDCT
    let mut row0 = YmmRegister::load(in_vector[00..].as_ptr().cast());
    let mut row1 = YmmRegister::load(in_vector[08..].as_ptr().cast());
    let mut row2 = YmmRegister::load(in_vector[16..].as_ptr().cast());
    let mut row3 = YmmRegister::load(in_vector[24..].as_ptr().cast());
    let mut row4 = YmmRegister::load(in_vector[32..].as_ptr().cast());
    let mut row5 = YmmRegister::load(in_vector[40..].as_ptr().cast());
    let mut row6 = YmmRegister::load(in_vector[48..].as_ptr().cast());
    let mut row7 = YmmRegister::load(in_vector[56..].as_ptr().cast());
    // Forward DCT and quantization may cause all the AC terms to be zero, for such
    // cases we can try to accelerate it
    // Basically the poop is that whenever the array has 63 zeroes, its idct is
    // (arr[0]>>3)or (arr[0]/8) propagated to all the elements.
    // We first test to see if the array contains zero elements and if it does, we go the
    // short way.
    //
    // This reduces IDCT overhead from about 39% to 18 %, almost half
    // Do another load for the first row, we don't want to check DC value, because
    // we only care about AC terms
    // TODO this should be a shift/shuffle, not a likely unaligned load
    let row8 = YmmRegister::load(in_vector[1..].as_ptr().cast());
    let or_tree = (((row1 | row8) | (row2 | row3)) | ((row4 | row5) | (row6 | row7)));
    if or_tree.all_zero() {
        // AC terms all zero, idct of the block is  is ( coeff[0] * qt[0] )/8 + 128 (bias)
        // (and clamped to 255)
        let clamped_16 = ((in_vector[0] >> 3) + 128).clamp(0, 255) as i16;
        let idct_value = vdupq_n_s16(clamped_16);
        macro_rules! store {
            ($pos:tt,$value:tt) => {
                // store
                vst1q_s16(
                    out_vector
                        .get_mut($pos..$pos + 8)
                        .unwrap()
                        .as_mut_ptr()
                        .cast(),
                    $value
                );
                $pos += stride;
            };
        }
        store!(pos, idct_value);
        store!(pos, idct_value);
        store!(pos, idct_value);
        store!(pos, idct_value);
        store!(pos, idct_value);
        store!(pos, idct_value);
        store!(pos, idct_value);
        store!(pos, idct_value);
        return;
    }
    macro_rules! dct_pass {
        ($SCALE_BITS:tt,$scale:tt) => {
            // There are a lot of ways to do this
            // but to keep it simple(and beautiful), ill make a direct translation of the
            // scalar code to also make this code fully transparent(this version and the non
            // avx one should produce identical code.)
            // Compiler does a pretty good job of optimizing add + mul pairs
            // into multiply-acumulate pairs
            // even part
            let p1 = (row2 + row6) * 2217;
            let mut t2 = p1 + row6 * -7567;
            let mut t3 = p1 + row2 * 3135;
            let mut t0 = (row0 + row4).const_shl::<12>();
            let mut t1 = (row0 - row4).const_shl::<12>();
            let x0 = t0 + t3 + $SCALE_BITS;
            let x3 = t0 - t3 + $SCALE_BITS;
            let x1 = t1 + t2 + $SCALE_BITS;
            let x2 = t1 - t2 + $SCALE_BITS;
            let p3 = row7 + row3;
            let p4 = row5 + row1;
            let p1 = row7 + row1;
            let p2 = row5 + row3;
            let p5 = (p3 + p4) * 4816;
            t0 = row7 * 1223;
            t1 = row5 * 8410;
            t2 = row3 * 12586;
            t3 = row1 * 6149;
            let p1 = p5 + p1 * -3685;
            let p2 = p5 + (p2 * -10497);
            let p3 = p3 * -8034;
            let p4 = p4 * -1597;
            t3 += p1 + p4;
            t2 += p2 + p3;
            t1 += p2 + p4;
            t0 += p1 + p3;
            row0 = (x0 + t3).const_shra::<$scale>();
            row1 = (x1 + t2).const_shra::<$scale>();
            row2 = (x2 + t1).const_shra::<$scale>();
            row3 = (x3 + t0).const_shra::<$scale>();
            row4 = (x3 - t0).const_shra::<$scale>();
            row5 = (x2 - t1).const_shra::<$scale>();
            row6 = (x1 - t2).const_shra::<$scale>();
            row7 = (x0 - t3).const_shra::<$scale>();
        };
    }
    // Process rows
    dct_pass!(512, 10);
    transpose(
        &mut row0, &mut row1, &mut row2, &mut row3, &mut row4, &mut row5, &mut row6, &mut row7
    );
    // process columns
    dct_pass!(SCALE_BITS, 17);
    transpose(
        &mut row0, &mut row1, &mut row2, &mut row3, &mut row4, &mut row5, &mut row6, &mut row7
    );
    // Pack i32 to i16's,
    // clamp them to be between 0-255
    // Undo shuffling
    // Store back to array
    // This could potentially be reorganized to take advantage of the multi-register stores
    macro_rules! permute_store {
        ($x:tt,$y:tt,$index:tt,$out:tt) => {
            let a = condense_bottom_16($x, $y);
            // Clamp the values after packing, we can clamp more values at once
            let b = clamp256_neon(a);
            // store first vector
            vst1q_s16(
                ($out)
                    .get_mut($index..$index + 8)
                    .unwrap()
                    .as_mut_ptr()
                    .cast(),
                b.0
            );
            $index += stride;
            // second vector
            vst1q_s16(
                ($out)
                    .get_mut($index..$index + 8)
                    .unwrap()
                    .as_mut_ptr()
                    .cast(),
                b.1
            );
            $index += stride;
        };
    }
    // Pack and write the values back to the array
    permute_store!((row0.mm256), (row1.mm256), pos, out_vector);
    permute_store!((row2.mm256), (row3.mm256), pos, out_vector);
    permute_store!((row4.mm256), (row5.mm256), pos, out_vector);
    permute_store!((row6.mm256), (row7.mm256), pos, out_vector);
 }
 #[inline]
 #[target_feature(enable = "neon")]
 unsafe fn clamp_neon(reg: int16x8_t) -> int16x8_t {
    let min_s = vdupq_n_s16(0);
    let max_s = vdupq_n_s16(255);
    let max_v = vmaxq_s16(reg, min_s); //max(a,0)
    let min_v = vminq_s16(max_v, max_s); //min(max(a,0),255)
    min_v
 }
 #[inline]
 #[target_feature(enable = "neon")]
 unsafe fn clamp256_neon(reg: int16x8x2_t) -> int16x8x2_t {
    int16x8x2_t(clamp_neon(reg.0), clamp_neon(reg.1))
 }
 #[cfg(test)]
 mod test {
    use super::*;
    #[test]
    fn test_neon_clamp_256() {
        unsafe {
            let vals: [i16; 16] = [-1, -2, -3, 4, 256, 257, 258, 240, -1, 290, 2, 3, 4, 5, 6, 7];
            let loaded = vld1q_s16_x2(vals.as_ptr().cast());
            let shuffled = clamp256_neon(loaded);
            let mut result: [i16; 16] = [0; 16];
            vst1q_s16_x2(result.as_mut_ptr().cast(), shuffled);
            assert_eq!(
                result,
                [0, 0, 0, 4, 255, 255, 255, 240, 0, 255, 2, 3, 4, 5, 6, 7]
            )
        }
    }
 }
--- a/third_party/zune-jpeg/src/idct/scalar.rs
+++ b/third_party/zune-jpeg/src/idct/scalar.rs
@ -0,0 +1,212 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 //! Platform independent IDCT algorithm
 //!
 //! Not as fast as AVX one.
 const SCALE_BITS: i32 = 512 + 65536 + (128 << 17);
 #[allow(unused_assignments)]
 #[allow(
    clippy::too_many_lines,
    clippy::op_ref,
    clippy::cast_possible_truncation
 )]
 pub fn idct_int(in_vector: &mut [i32; 64], out_vector: &mut [i16], stride: usize) {
    // Temporary variables.
    let mut pos = 0;
    let mut i = 0;
    // Don't check for zeroes inside loop, lift it and check outside
    // we want to accelerate the case with 63 0 ac coeff
    if &in_vector[1..] == &[0_i32; 63] {
        // okay then if you work, yay, let's write you really quick
        let coeff = [(((in_vector[0] >> 3) + 128) as i16).clamp(0, 255); 8];
        macro_rules! store {
            ($index:tt) => {
                // position of the MCU
                let mcu_stride: &mut [i16; 8] = out_vector
                    .get_mut($index..$index + 8)
                    .unwrap()
                    .try_into()
                    .unwrap();
                // copy coefficients
                mcu_stride.copy_from_slice(&coeff);
                // increment index
                $index += stride;
            };
        }
        // write to four positions
        store!(pos);
        store!(pos);
        store!(pos);
        store!(pos);
        store!(pos);
        store!(pos);
        store!(pos);
        store!(pos);
    } else {
        // because the compiler fails to see that it can be auto_vectorised so i'll
        // leave it here check out [idct_int_slow, and idct_int_1D to get what i mean ] https://godbolt.org/z/8hqW9z9j9
        for ptr in 0..8 {
            let p2 = in_vector[ptr + 16];
            let p3 = in_vector[ptr + 48];
            let p1 = (p2 + p3).wrapping_mul(2217);
            let t2 = p1 + p3 * -7567;
            let t3 = p1 + p2 * 3135;
            let p2 = in_vector[ptr];
            let p3 = in_vector[32 + ptr];
            let t0 = fsh(p2 + p3);
            let t1 = fsh(p2 - p3);
            let x0 = t0 + t3 + 512;
            let x3 = t0 - t3 + 512;
            let x1 = t1 + t2 + 512;
            let x2 = t1 - t2 + 512;
            // odd part
            let mut t0 = in_vector[ptr + 56];
            let mut t1 = in_vector[ptr + 40];
            let mut t2 = in_vector[ptr + 24];
            let mut t3 = in_vector[ptr + 8];
            let p3 = t0 + t2;
            let p4 = t1 + t3;
            let p1 = t0 + t3;
            let p2 = t1 + t2;
            let p5 = (p3 + p4) * 4816;
            t0 *= 1223;
            t1 *= 8410;
            t2 *= 12586;
            t3 *= 6149;
            let p1 = p5 + p1 * -3685;
            let p2 = p5 + p2 * -10497;
            let p3 = p3 * -8034;
            let p4 = p4 * -1597;
            t3 += p1 + p4;
            t2 += p2 + p3;
            t1 += p2 + p4;
            t0 += p1 + p3;
            // constants scaled things up by 1<<12; let's bring them back
            // down, but keep 2 extra bits of precision
            in_vector[ptr] = (x0 + t3) >> 10;
            in_vector[ptr + 8] = (x1 + t2) >> 10;
            in_vector[ptr + 16] = (x2 + t1) >> 10;
            in_vector[ptr + 24] = (x3 + t0) >> 10;
            in_vector[ptr + 32] = (x3 - t0) >> 10;
            in_vector[ptr + 40] = (x2 - t1) >> 10;
            in_vector[ptr + 48] = (x1 - t2) >> 10;
            in_vector[ptr + 56] = (x0 - t3) >> 10;
        }
        // This is vectorised in architectures supporting SSE 4.1
        while i < 64 {
            // We won't try to short circuit here because it rarely works
            // Even part
            let p2 = in_vector[i + 2];
            let p3 = in_vector[i + 6];
            let p1 = (p2 + p3) * 2217;
            let t2 = p1 + p3 * -7567;
            let t3 = p1 + p2 * 3135;
            let p2 = in_vector[i];
            let p3 = in_vector[i + 4];
            let t0 = fsh(p2 + p3);
            let t1 = fsh(p2 - p3);
            // constants scaled things up by 1<<12, plus we had 1<<2 from first
            // loop, plus horizontal and vertical each scale by sqrt(8) so together
            // we've got an extra 1<<3, so 1<<17 total we need to remove.
            // so we want to round that, which means adding 0.5 * 1<<17,
            // aka 65536. Also, we'll end up with -128 to 127 that we want
            // to encode as 0..255 by adding 128, so we'll add that before the shift
            let x0 = t0 + t3 + SCALE_BITS;
            let x3 = t0 - t3 + SCALE_BITS;
            let x1 = t1 + t2 + SCALE_BITS;
            let x2 = t1 - t2 + SCALE_BITS;
            // odd part
            let mut t0 = in_vector[i + 7];
            let mut t1 = in_vector[i + 5];
            let mut t2 = in_vector[i + 3];
            let mut t3 = in_vector[i + 1];
            let p3 = t0 + t2;
            let p4 = t1 + t3;
            let p1 = t0 + t3;
            let p2 = t1 + t2;
            let p5 = (p3 + p4) * f2f(1.175875602);
            t0 = t0.wrapping_mul(1223);
            t1 = t1.wrapping_mul(8410);
            t2 = t2.wrapping_mul(12586);
            t3 = t3.wrapping_mul(6149);
            let p1 = p5 + p1 * -3685;
            let p2 = p5 + p2 * -10497;
            let p3 = p3 * -8034;
            let p4 = p4 * -1597;
            t3 += p1 + p4;
            t2 += p2 + p3;
            t1 += p2 + p4;
            t0 += p1 + p3;
            let out: &mut [i16; 8] = out_vector
                .get_mut(pos..pos + 8)
                .unwrap()
                .try_into()
                .unwrap();
            out[0] = clamp((x0 + t3) >> 17);
            out[1] = clamp((x1 + t2) >> 17);
            out[2] = clamp((x2 + t1) >> 17);
            out[3] = clamp((x3 + t0) >> 17);
            out[4] = clamp((x3 - t0) >> 17);
            out[5] = clamp((x2 - t1) >> 17);
            out[6] = clamp((x1 - t2) >> 17);
            out[7] = clamp((x0 - t3) >> 17);
            i += 8;
            pos += stride;
        }
    }
 }
 #[inline]
 #[allow(clippy::cast_possible_truncation)]
 /// Multiply a number by 4096
 fn f2f(x: f32) -> i32 {
    (x * 4096.0 + 0.5) as i32
 }
 #[inline]
 /// Multiply a number by 4096
 fn fsh(x: i32) -> i32 {
    x << 12
 }
 /// Clamp values between 0 and 255
 #[inline]
 #[allow(clippy::cast_possible_truncation)]
 fn clamp(a: i32) -> i16 {
    a.clamp(0, 255) as i16
 }
--- a/third_party/zune-jpeg/src/lib.rs
+++ b/third_party/zune-jpeg/src/lib.rs
@ -0,0 +1,133 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 //!This crate provides a library for decoding valid
 //! ITU-T Rec. T.851 (09/2005) ITU-T T.81 (JPEG-1) or JPEG images.
 //!
 //!
 //!
 //! # Features
 //!  - SSE and AVX accelerated functions to speed up certain decoding operations
 //!  - FAST and accurate 32 bit IDCT algorithm
 //!  - Fast color convert functions
 //!  - RGBA and RGBX (4-Channel) color conversion functions
 //!  - YCbCr to Luma(Grayscale) conversion.
 //!
 //! # Usage
 //! Add zune-jpeg to the dependencies in the project Cargo.toml
 //!
 //! ```toml
 //! [dependencies]
 //! zune_jpeg = "0.3"
 //! ```
 //! # Examples
 //!
 //! ## Decode a JPEG file with default arguments.
 //!```no_run
 //! use std::fs::read;
 //! use std::io::BufReader;
 //! use zune_jpeg::JpegDecoder;
 //! let file_contents = BufReader::new(std::fs::File::open("a_jpeg.file").unwrap());
 //! let mut decoder = JpegDecoder::new(file_contents);
 //! let mut pixels = decoder.decode().unwrap();
 //! ```
 //!
 //! ## Decode a JPEG file to RGBA format
 //!
 //! - Other (limited) supported formats are and  BGR, BGRA
 //!
 //!```no_run
 //! use zune_core::bytestream::ZCursor;
 //! use zune_core::colorspace::ColorSpace;
 //! use zune_core::options::DecoderOptions;
 //! use zune_jpeg::JpegDecoder;
 //!
 //! let mut options = DecoderOptions::default().jpeg_set_out_colorspace(ColorSpace::RGBA);
 //!
 //! let mut decoder = JpegDecoder::new_with_options(ZCursor::new(&[]),options);
 //! let pixels = decoder.decode().unwrap();
 //! ```
 //!
 //! ## Decode an image and get it's width and height.
 //!```no_run
 //! use zune_core::bytestream::ZCursor;
 //! use zune_jpeg::JpegDecoder;
 //!
 //! let mut decoder = JpegDecoder::new(ZCursor::new(&[]));
 //! decoder.decode_headers().unwrap();
 //! let image_info = decoder.info().unwrap();
 //! println!("{},{}",image_info.width,image_info.height)
 //! ```
 //! # Crate features.
 //! This crate tries to be as minimal as possible while being extensible
 //! enough to handle the complexities arising from parsing different types
 //! of jpeg images.
 //!
 //! Safety is a top concern that is why we provide both static ways to disable unsafe code,
 //! disabling x86 feature, and dynamic ,by using [`DecoderOptions::set_use_unsafe(false)`],
 //! both of these disable platform specific optimizations, which reduce the speed of decompression.
 //!
 //! Please do note that careful consideration has been taken to ensure that the unsafe paths
 //! are only unsafe because they depend on platform specific intrinsics, hence no need to disable them
 //!
 //! The crate tries to decode as many images as possible, as a best effort, even those violating the standard
 //! , this means a lot of images may  get silent warnings and wrong output, but if you are sure you will be handling
 //! images that follow the spec, set `ZuneJpegOptions::set_strict` to true.
 //!
 //![`DecoderOptions::set_use_unsafe(false)`]:  https://docs.rs/zune-core/0.2.1/zune_core/options/struct.DecoderOptions.html#method.set_use_unsafe
 #![warn(
    clippy::correctness,
    clippy::perf,
    clippy::pedantic,
    clippy::inline_always,
    clippy::missing_errors_doc,
    clippy::panic
 )]
 #![allow(
    clippy::needless_return,
    clippy::similar_names,
    clippy::inline_always,
    clippy::similar_names,
    clippy::doc_markdown,
    clippy::module_name_repetitions,
    clippy::missing_panics_doc,
    clippy::missing_errors_doc
 )]
 // no_std compatibility
 #![deny(clippy::std_instead_of_alloc, clippy::alloc_instead_of_core)]
 #![cfg_attr(not(feature = "x86"), forbid(unsafe_code))]
 #![cfg_attr(not(feature = "std"), no_std)]
 #![macro_use]
 extern crate alloc;
 extern crate core;
 pub use zune_core;
 pub use crate::decoder::{ImageInfo, JpegDecoder};
 mod bitstream;
 mod color_convert;
 mod components;
 mod decoder;
 pub mod errors;
 mod headers;
 mod huffman;
 #[cfg(not(fuzzing))]
 mod idct;
 #[cfg(fuzzing)]
 pub mod idct;
 mod marker;
 mod mcu;
 mod mcu_prog;
 mod misc;
 mod unsafe_utils;
 mod unsafe_utils_avx2;
 mod unsafe_utils_neon;
 mod upsampler;
 mod worker;
--- a/third_party/zune-jpeg/src/marker.rs
+++ b/third_party/zune-jpeg/src/marker.rs
@ -0,0 +1,85 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 #![allow(clippy::upper_case_acronyms)]
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum Marker {
    /// Start Of Frame markers
    ///
    /// - SOF(0):  Baseline DCT (Huffman coding)
    /// - SOF(1):  Extended sequential DCT (Huffman coding)
    /// - SOF(2):  Progressive DCT (Huffman coding)
    /// - SOF(3):  Lossless (sequential) (Huffman coding)
    /// - SOF(5):  Differential sequential DCT (Huffman coding)
    /// - SOF(6):  Differential progressive DCT (Huffman coding)
    /// - SOF(7):  Differential lossless (sequential) (Huffman coding)
    /// - SOF(9):  Extended sequential DCT (arithmetic coding)
    /// - SOF(10): Progressive DCT (arithmetic coding)
    /// - SOF(11): Lossless (sequential) (arithmetic coding)
    /// - SOF(13): Differential sequential DCT (arithmetic coding)
    /// - SOF(14): Differential progressive DCT (arithmetic coding)
    /// - SOF(15): Differential lossless (sequential) (arithmetic coding)
    SOF(u8),
    /// Define Huffman table(s)
    DHT,
    /// Define arithmetic coding conditioning(s)
    DAC,
    /// Restart with modulo 8 count `m`
    RST(u8),
    /// Start of image
    SOI,
    /// End of image
    EOI,
    /// Start of scan
    SOS,
    /// Define quantization table(s)
    DQT,
    /// Define number of lines
    DNL,
    /// Define restart interval
    DRI,
    /// Reserved for application segments
    APP(u8),
    /// Comment
    COM
 }
 impl Marker {
    pub fn from_u8(n: u8) -> Option<Marker> {
        use self::Marker::{APP, COM, DAC, DHT, DNL, DQT, DRI, EOI, RST, SOF, SOI, SOS};
        match n {
            0xFE => Some(COM),
            0xC0 => Some(SOF(0)),
            0xC1 => Some(SOF(1)),
            0xC2 => Some(SOF(2)),
            0xC4 => Some(DHT),
            0xCC => Some(DAC),
            0xD0 => Some(RST(0)),
            0xD1 => Some(RST(1)),
            0xD2 => Some(RST(2)),
            0xD3 => Some(RST(3)),
            0xD4 => Some(RST(4)),
            0xD5 => Some(RST(5)),
            0xD6 => Some(RST(6)),
            0xD7 => Some(RST(7)),
            0xD8 => Some(SOI),
            0xD9 => Some(EOI),
            0xDA => Some(SOS),
            0xDB => Some(DQT),
            0xDC => Some(DNL),
            0xDD => Some(DRI),
            0xE0 => Some(APP(0)),
            0xE1 => Some(APP(1)),
            0xE2 => Some(APP(2)),
            0xEE => Some(APP(14)),
            _ => None
        }
    }
 }
--- a/third_party/zune-jpeg/src/mcu.rs
+++ b/third_party/zune-jpeg/src/mcu.rs
@ -0,0 +1,504 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 use alloc::{format, vec};
 use core::cmp::min;
 use zune_core::bytestream::ZByteReaderTrait;
 use zune_core::colorspace::ColorSpace;
 use zune_core::colorspace::ColorSpace::Luma;
 use zune_core::log::{error, trace, warn};
 use crate::bitstream::BitStream;
 use crate::components::SampleRatios;
 use crate::decoder::MAX_COMPONENTS;
 use crate::errors::DecodeErrors;
 use crate::marker::Marker;
 use crate::misc::{calculate_padded_width, setup_component_params};
 use crate::worker::{color_convert, upsample};
 use crate::JpegDecoder;
 /// The size of a DC block for a MCU.
 pub const DCT_BLOCK: usize = 64;
 impl<T: ZByteReaderTrait> JpegDecoder<T> {
    /// Check for existence of DC and AC Huffman Tables
    pub(crate) fn check_tables(&self) -> Result<(), DecodeErrors> {
        // check that dc and AC tables exist outside the hot path
        for component in &self.components {
            let _ = &self
                .dc_huffman_tables
                .get(component.dc_huff_table)
                .as_ref()
                .ok_or_else(|| {
                    DecodeErrors::HuffmanDecode(format!(
                        "No Huffman DC table for component {:?} ",
                        component.component_id
                    ))
                })?
                .as_ref()
                .ok_or_else(|| {
                    DecodeErrors::HuffmanDecode(format!(
                        "No DC table for component {:?}",
                        component.component_id
                    ))
                })?;
            let _ = &self
                .ac_huffman_tables
                .get(component.ac_huff_table)
                .as_ref()
                .ok_or_else(|| {
                    DecodeErrors::HuffmanDecode(format!(
                        "No Huffman AC table for component {:?} ",
                        component.component_id
                    ))
                })?
                .as_ref()
                .ok_or_else(|| {
                    DecodeErrors::HuffmanDecode(format!(
                        "No AC table for component {:?}",
                        component.component_id
                    ))
                })?;
        }
        Ok(())
    }
    /// Decode MCUs and carry out post processing.
    ///
    /// This is the main decoder loop for the library, the hot path.
    ///
    /// Because of this, we pull in some very crazy optimization tricks hence readability is a pinch
    /// here.
    #[allow(
        clippy::similar_names,
        clippy::too_many_lines,
        clippy::cast_possible_truncation
    )]
    #[inline(never)]
    pub(crate) fn decode_mcu_ycbcr_baseline(
        &mut self, pixels: &mut [u8]
    ) -> Result<(), DecodeErrors> {
        setup_component_params(self)?;
        // check dc and AC tables
        self.check_tables()?;
        let (mut mcu_width, mut mcu_height);
        if self.is_interleaved {
            // set upsampling functions
            self.set_upsampling()?;
            mcu_width = self.mcu_x;
            mcu_height = self.mcu_y;
        } else {
            // For non-interleaved images( (1*1) subsampling)
            // number of MCU's are the widths (+7 to account for paddings) divided bu 8.
            mcu_width = ((self.info.width + 7) / 8) as usize;
            mcu_height = ((self.info.height + 7) / 8) as usize;
        }
        if self.is_interleaved
            && self.input_colorspace.num_components() > 1
            && self.options.jpeg_get_out_colorspace().num_components() == 1
            && (self.sub_sample_ratio == SampleRatios::V
                || self.sub_sample_ratio == SampleRatios::HV)
        {
            // For a specific set of images, e.g interleaved,
            // when converting from YcbCr to grayscale, we need to
            // take into account mcu height since the MCU decoding needs to take
            // it into account for padding purposes and the post processor
            // parses two rows per mcu width.
            //
            // set coeff to be 2 to ensure that we increment two rows
            // for every mcu processed also
            mcu_height *= self.v_max;
            mcu_height /= self.h_max;
            self.coeff = 2;
        }
        if self.input_colorspace.num_components() > self.components.len() {
            let msg = format!(
                " Expected {} number of components but found {}",
                self.input_colorspace.num_components(),
                self.components.len()
            );
            return Err(DecodeErrors::Format(msg));
        }
        if self.input_colorspace == ColorSpace::Luma && self.is_interleaved {
            warn!("Grayscale image with down-sampled component, resetting component details");
            self.reset_params();
            mcu_width = ((self.info.width + 7) / 8) as usize;
            mcu_height = ((self.info.height + 7) / 8) as usize;
        }
        let width = usize::from(self.info.width);
        let padded_width = calculate_padded_width(width, self.sub_sample_ratio);
        let mut stream = BitStream::new();
        let mut tmp = [0_i32; DCT_BLOCK];
        let comp_len = self.components.len();
        for (pos, comp) in self.components.iter_mut().enumerate() {
            // Allocate only needed components.
            //
            // For special colorspaces i.e YCCK and CMYK, just allocate all of the needed
            // components.
            if min(
                self.options.jpeg_get_out_colorspace().num_components() - 1,
                pos
            ) == pos
                || comp_len == 4
            // Special colorspace
            {
                // allocate enough space to hold a whole MCU width
                // this means we should take into account sampling ratios
                // `*8` is because each MCU spans 8 widths.
                let len = comp.width_stride * comp.vertical_sample * 8;
                comp.needed = true;
                comp.raw_coeff = vec![0; len];
            } else {
                comp.needed = false;
            }
        }
        let mut pixels_written = 0;
        let is_hv = usize::from(self.is_interleaved);
        let upsampler_scratch_size = is_hv * self.components[0].width_stride;
        let mut upsampler_scratch_space = vec![0; upsampler_scratch_size];
        for i in 0..mcu_height {
            // Report if we have no more bytes
            // This may generate false negatives since we over-read bytes
            // hence that why 37 is chosen(we assume if we over-read more than 37 bytes, we have a problem)
            if stream.overread_by > 37
            // favourite number :)
            {
                if self.options.strict_mode() {
                    return Err(DecodeErrors::FormatStatic("Premature end of buffer"));
                };
                error!("Premature end of buffer");
                break;
            }
            // decode a whole MCU width,
            // this takes into account interleaved components.
            self.decode_mcu_width(mcu_width, &mut tmp, &mut stream)?;
            // process that width up until it's impossible
            self.post_process(
                pixels,
                i,
                mcu_height,
                width,
                padded_width,
                &mut pixels_written,
                &mut upsampler_scratch_space
            )?;
        }
        // it may happen that some images don't have the whole buffer
        // so we can't panic in case of that
        // assert_eq!(pixels_written, pixels.len());
        trace!("Finished decoding image");
        Ok(())
    }
    fn decode_mcu_width(
        &mut self, mcu_width: usize, tmp: &mut [i32; 64], stream: &mut BitStream
    ) -> Result<(), DecodeErrors> {
        for j in 0..mcu_width {
            // iterate over components
            for component in &mut self.components {
                let dc_table = self.dc_huffman_tables[component.dc_huff_table % MAX_COMPONENTS]
                    .as_ref()
                    .unwrap();
                let ac_table = self.ac_huffman_tables[component.ac_huff_table % MAX_COMPONENTS]
                    .as_ref()
                    .unwrap();
                let qt_table = &component.quantization_table;
                let channel = &mut component.raw_coeff;
                // If image is interleaved iterate over scan components,
                // otherwise if it-s non-interleaved, these routines iterate in
                // trivial scanline order(Y,Cb,Cr)
                for v_samp in 0..component.vertical_sample {
                    for h_samp in 0..component.horizontal_sample {
                        // Fill the array with zeroes, decode_mcu_block expects
                        // a zero based array.
                        tmp.fill(0);
                        stream.decode_mcu_block(
                            &mut self.stream,
                            dc_table,
                            ac_table,
                            qt_table,
                            tmp,
                            &mut component.dc_pred
                        )?;
                        if component.needed {
                            let idct_position = {
                                // derived from stb and rewritten for my tastes
                                let c2 = v_samp * 8;
                                let c3 = ((j * component.horizontal_sample) + h_samp) * 8;
                                component.width_stride * c2 + c3
                            };
                            let idct_pos = channel.get_mut(idct_position..).unwrap();
                            //  call idct.
                            (self.idct_func)(tmp, idct_pos, component.width_stride);
                        }
                    }
                }
            }
            self.todo = self.todo.saturating_sub(1);
            // After all interleaved components, that's an MCU
            // handle stream markers
            //
            // In some corrupt images, it may occur that header markers occur in the stream.
            // The spec EXPLICITLY FORBIDS this, specifically, in
            // routine F.2.2.5  it says
            // `The only valid marker which may occur within the Huffman coded data is the RSTm marker.`
            //
            // But libjpeg-turbo allows it because of some weird reason. so I'll also
            // allow it because of some weird reason.
            if let Some(m) = stream.marker {
                if m == Marker::EOI {
                    // acknowledge and ignore EOI marker.
                    stream.marker.take();
                    trace!("Found EOI marker");
                    // Google Introduced the Ultra-HD image format which is basically
                    // stitching two images into one container.
                    // They basically separate two images via a EOI and SOI marker
                    // so let's just ensure if we ever see EOI, we never read past that
                    // ever.
                    // https://github.com/google/libultrahdr
                    stream.seen_eoi = true;
                } else if let Marker::RST(_) = m {
                    if self.todo == 0 {
                        self.handle_rst(stream)?;
                    }
                } else {
                    if self.options.strict_mode() {
                        return Err(DecodeErrors::Format(format!(
                            "Marker {m:?} found where not expected"
                        )));
                    }
                    error!(
                        "Marker `{:?}` Found within Huffman Stream, possibly corrupt jpeg",
                        m
                    );
                    self.parse_marker_inner(m)?;
                }
            }
        }
        Ok(())
    }
    // handle RST markers.
    // No-op if not using restarts
    // this routine is shared with mcu_prog
    #[cold]
    pub(crate) fn handle_rst(&mut self, stream: &mut BitStream) -> Result<(), DecodeErrors> {
        self.todo = self.restart_interval;
        if let Some(marker) = stream.marker {
            // Found a marker
            // Read stream and see what marker is stored there
            match marker {
                Marker::RST(_) => {
                    // reset stream
                    stream.reset();
                    // Initialize dc predictions to zero for all components
                    self.components.iter_mut().for_each(|x| x.dc_pred = 0);
                    // Start iterating again. from position.
                }
                Marker::EOI => {
                    // silent pass
                }
                _ => {
                    return Err(DecodeErrors::MCUError(format!(
                        "Marker {marker:?} found in bitstream, possibly corrupt jpeg"
                    )));
                }
            }
        }
        Ok(())
    }
    #[allow(clippy::too_many_lines, clippy::too_many_arguments)]
    pub(crate) fn post_process(
        &mut self, pixels: &mut [u8], i: usize, mcu_height: usize, width: usize,
        padded_width: usize, pixels_written: &mut usize, upsampler_scratch_space: &mut [i16]
    ) -> Result<(), DecodeErrors> {
        let out_colorspace_components = self.options.jpeg_get_out_colorspace().num_components();
        let mut px = *pixels_written;
        // indicates whether image is vertically up-sampled
        let is_vertically_sampled = self
            .components
            .iter()
            .any(|c| c.sample_ratio == SampleRatios::HV || c.sample_ratio == SampleRatios::V);
        let mut comp_len = self.components.len();
        // If we are moving from YCbCr-> Luma, we do not allocate storage for other components, so we
        // will panic when we are trying to read samples, so for that case,
        // hardcode it so that we  don't panic when doing
        //   *samp = &samples[j][pos * padded_width..(pos + 1) * padded_width]
        if out_colorspace_components < comp_len && self.options.jpeg_get_out_colorspace() == Luma {
            comp_len = out_colorspace_components;
        }
        let mut color_conv_function =
            |num_iters: usize, samples: [&[i16]; 4]| -> Result<(), DecodeErrors> {
                for (pos, output) in pixels[px..]
                    .chunks_exact_mut(width * out_colorspace_components)
                    .take(num_iters)
                    .enumerate()
                {
                    let mut raw_samples: [&[i16]; 4] = [&[], &[], &[], &[]];
                    // iterate over each line, since color-convert needs only
                    // one line
                    for (j, samp) in raw_samples.iter_mut().enumerate().take(comp_len) {
                        *samp = &samples[j][pos * padded_width..(pos + 1) * padded_width];
                    }
                    color_convert(
                        &raw_samples,
                        self.color_convert_16,
                        self.input_colorspace,
                        self.options.jpeg_get_out_colorspace(),
                        output,
                        width,
                        padded_width
                    )?;
                    px += width * out_colorspace_components;
                }
                Ok(())
            };
        let comps = &mut self.components[..];
        if self.is_interleaved && self.options.jpeg_get_out_colorspace() != ColorSpace::Luma {
            {
                // duplicated so that we can check that samples match
                // Fixes bug https://github.com/etemesi254/zune-image/issues/151
                let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]];
                for (samp, component) in samples.iter_mut().zip(comps.iter()) {
                    *samp = if component.sample_ratio == SampleRatios::None {
                        &component.raw_coeff
                    } else {
                        &component.upsample_dest
                    };
                }
            }
            for comp in comps.iter_mut() {
                upsample(comp, mcu_height, i, upsampler_scratch_space);
            }
            if is_vertically_sampled {
                if i > 0 {
                    // write the last line, it wasn't  up-sampled as we didn't have row_down
                    // yet
                    let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]];
                    for (samp, component) in samples.iter_mut().zip(comps.iter()) {
                        *samp = &component.first_row_upsample_dest;
                    }
                    // ensure length matches for all samples
                    let first_len = samples[0].len();
                    for samp in samples.iter().take(comp_len) {
                        assert_eq!(first_len, samp.len());
                    }
                    let num_iters = self.coeff * self.v_max;
                    color_conv_function(num_iters, samples)?;
                }
                // After upsampling the last row, save  any row that can be used for
                // a later upsampling,
                //
                // E.g the Y sample is not sampled but we haven't finished upsampling the last row of
                // the previous mcu, since we don't have the down row, so save it
                for component in comps.iter_mut() {
                    // copy last row to be used for the  next color conversion
                    let size = component.vertical_sample
                        * component.width_stride
                        * component.sample_ratio.sample();
                    let last_bytes = component.raw_coeff.rchunks_exact_mut(size).next().unwrap();
                    component
                        .first_row_upsample_dest
                        .copy_from_slice(last_bytes);
                }
            }
            let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]];
            for (samp, component) in samples.iter_mut().zip(comps.iter()) {
                *samp = if component.sample_ratio == SampleRatios::None {
                    &component.raw_coeff
                } else {
                    &component.upsample_dest
                };
            }
            // we either do 7 or 8 MCU's depending on the state, this only applies to
            // vertically sampled images
            //
            // for rows up until the last MCU, we do not upsample the last stride of the MCU
            // which means that the number of iterations should take that into account is one less the
            // up-sampled size
            //
            // For the last MCU, we upsample the last stride, meaning that if we hit the last MCU, we
            // should sample full raw coeffs
            let is_last_considered = is_vertically_sampled && (i != mcu_height.saturating_sub(1));
            let num_iters = (8 - usize::from(is_last_considered)) * self.coeff * self.v_max;
            color_conv_function(num_iters, samples)?;
        } else {
            let mut channels_ref: [&[i16]; MAX_COMPONENTS] = [&[]; MAX_COMPONENTS];
            self.components
                .iter()
                .enumerate()
                .for_each(|(pos, x)| channels_ref[pos] = &x.raw_coeff);
            color_conv_function(8 * self.coeff, channels_ref)?;
        }
        *pixels_written = px;
        Ok(())
    }
 }
 // #[cfg(test)]
 // mod tests {
 //     use zune_core::bytestream::ZCursor;
 //
 //     use crate::JpegDecoder;
 //
 //     #[test]
 //     fn im() {
 //         let image = std::fs::read("/home/caleb/Downloads/re.jpg").unwrap();
 //         JpegDecoder::new(ZCursor::new(&image)).decode().unwrap();
 //     }
 // }
--- a/third_party/zune-jpeg/src/mcu_prog.rs
+++ b/third_party/zune-jpeg/src/mcu_prog.rs
@ -0,0 +1,617 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 //!Routines for progressive decoding
 /*
 This file is needlessly complicated,
 It is that way to ensure we don't burn memory anyhow
 Memory is a scarce resource in some environments, I would like this to be viable
 in such environments
 Half of the complexity comes from the jpeg spec, because progressive decoding,
 is one hell of a ride.
 */
 use alloc::string::ToString;
 use alloc::vec::Vec;
 use alloc::{format, vec};
 use core::cmp::min;
 use zune_core::bytestream::{ZByteReaderTrait, ZReader};
 use zune_core::colorspace::ColorSpace;
 use zune_core::log::{debug, error, warn};
 use crate::bitstream::BitStream;
 use crate::components::{ComponentID, SampleRatios};
 use crate::decoder::{JpegDecoder, MAX_COMPONENTS};
 use crate::errors::DecodeErrors;
 use crate::errors::DecodeErrors::Format;
 use crate::headers::{parse_huffman, parse_sos};
 use crate::marker::Marker;
 use crate::mcu::DCT_BLOCK;
 use crate::misc::{calculate_padded_width, setup_component_params};
 impl<T: ZByteReaderTrait> JpegDecoder<T> {
    /// Decode a progressive image
    ///
    /// This routine decodes a progressive image, stopping if it finds any error.
    #[allow(
        clippy::needless_range_loop,
        clippy::cast_sign_loss,
        clippy::redundant_else,
        clippy::too_many_lines
    )]
    #[inline(never)]
    pub(crate) fn decode_mcu_ycbcr_progressive(
        &mut self, pixels: &mut [u8]
    ) -> Result<(), DecodeErrors> {
        setup_component_params(self)?;
        let mut mcu_height;
        // memory location for decoded pixels for components
        let mut block: [Vec<i16>; MAX_COMPONENTS] = [vec![], vec![], vec![], vec![]];
        let mut mcu_width;
        let mut seen_scans = 1;
        if self.input_colorspace == ColorSpace::Luma && self.is_interleaved {
            warn!("Grayscale image with down-sampled component, resetting component details");
            self.reset_params();
        }
        if self.is_interleaved {
            // this helps us catch component errors.
            self.set_upsampling()?;
        }
        if self.is_interleaved {
            mcu_width = self.mcu_x;
            mcu_height = self.mcu_y;
        } else {
            mcu_width = (self.info.width as usize + 7) / 8;
            mcu_height = (self.info.height as usize + 7) / 8;
        }
        if self.is_interleaved
            && self.input_colorspace.num_components() > 1
            && self.options.jpeg_get_out_colorspace().num_components() == 1
            && (self.sub_sample_ratio == SampleRatios::V
                || self.sub_sample_ratio == SampleRatios::HV)
        {
            // For a specific set of images, e.g interleaved,
            // when converting from YcbCr to grayscale, we need to
            // take into account mcu height since the MCU decoding needs to take
            // it into account for padding purposes and the post processor
            // parses two rows per mcu width.
            //
            // set coeff to be 2 to ensure that we increment two rows
            // for every mcu processed also
            mcu_height *= self.v_max;
            mcu_height /= self.h_max;
            self.coeff = 2;
        }
        mcu_width *= 64;
        if self.input_colorspace.num_components() > self.components.len() {
            let msg = format!(
                " Expected {} number of components but found {}",
                self.input_colorspace.num_components(),
                self.components.len()
            );
            return Err(DecodeErrors::Format(msg));
        }
        for i in 0..self.input_colorspace.num_components() {
            let comp = &self.components[i];
            let len = mcu_width * comp.vertical_sample * comp.horizontal_sample * mcu_height;
            block[i] = vec![0; len];
        }
        let mut stream = BitStream::new_progressive(
            self.succ_high,
            self.succ_low,
            self.spec_start,
            self.spec_end
        );
        // there are multiple scans in the stream, this should resolve the first scan
        self.parse_entropy_coded_data(&mut stream, &mut block)?;
        // extract marker
        let mut marker = stream
            .marker
            .take()
            .ok_or(DecodeErrors::FormatStatic("Marker missing where expected"))?;
        // if marker is EOI, we are done, otherwise continue scanning.
        //
        // In case we have a premature image, we print a warning or return
        // an error, depending on the strictness of the decoder, so there
        // is that logic to handle too
        'eoi: while marker != Marker::EOI {
            match marker {
                Marker::DHT => {
                    parse_huffman(self)?;
                }
                Marker::SOS => {
                    parse_sos(self)?;
                    stream.update_progressive_params(
                        self.succ_high,
                        self.succ_low,
                        self.spec_start,
                        self.spec_end
                    );
                    // after every SOS, marker, parse data for that scan.
                    self.parse_entropy_coded_data(&mut stream, &mut block)?;
                    // extract marker, might either indicate end of image or we continue
                    // scanning(hence the continue statement to determine).
                    match get_marker(&mut self.stream, &mut stream) {
                        Ok(marker_n) => {
                            marker = marker_n;
                            seen_scans += 1;
                            if seen_scans > self.options.jpeg_get_max_scans() {
                                return Err(DecodeErrors::Format(format!(
                                    "Too many scans, exceeded limit of {}",
                                    self.options.jpeg_get_max_scans()
                                )));
                            }
                            stream.reset();
                            continue 'eoi;
                        }
                        Err(msg) => {
                            if self.options.strict_mode() {
                                return Err(msg);
                            }
                            error!("{:?}", msg);
                            break 'eoi;
                        }
                    }
                }
                _ => {
                    break 'eoi;
                }
            }
            match get_marker(&mut self.stream, &mut stream) {
                Ok(marker_n) => {
                    marker = marker_n;
                }
                Err(e) => {
                    if self.options.strict_mode() {
                        return Err(e);
                    }
                    error!("{}", e);
                }
            }
        }
        self.finish_progressive_decoding(&block, mcu_width, pixels)
    }
    #[allow(clippy::too_many_lines, clippy::cast_sign_loss)]
    fn parse_entropy_coded_data(
        &mut self, stream: &mut BitStream, buffer: &mut [Vec<i16>; MAX_COMPONENTS]
    ) -> Result<(), DecodeErrors> {
        stream.reset();
        self.components.iter_mut().for_each(|x| x.dc_pred = 0);
        if usize::from(self.num_scans) > self.input_colorspace.num_components() {
            return Err(Format(format!(
                "Number of scans {} cannot be greater than number of components, {}",
                self.num_scans,
                self.input_colorspace.num_components()
            )));
        }
        if self.num_scans == 1 {
            // Safety checks
            if self.spec_end != 0 && self.spec_start == 0 {
                return Err(DecodeErrors::FormatStatic(
                    "Can't merge DC and AC corrupt jpeg"
                ));
            }
            // non interleaved data, process one block at a time in trivial scanline order
            let k = self.z_order[0];
            if k >= self.components.len() {
                return Err(DecodeErrors::Format(format!(
                    "Cannot find component {k}, corrupt image"
                )));
            }
            let (mcu_width, mcu_height);
            if self.components[k].component_id == ComponentID::Y
                && (self.components[k].vertical_sample != 1
                    || self.components[k].horizontal_sample != 1)
                || !self.is_interleaved
            {
                // For Y channel  or non interleaved scans ,
                // mcu's is the image dimensions divided by 8
                mcu_width = ((self.info.width + 7) / 8) as usize;
                mcu_height = ((self.info.height + 7) / 8) as usize;
            } else {
                // For other channels, in an interleaved mcu, number of MCU's
                // are determined by some weird maths done in headers.rs->parse_sos()
                mcu_width = self.mcu_x;
                mcu_height = self.mcu_y;
            }
            for i in 0..mcu_height {
                for j in 0..mcu_width {
                    if self.spec_start != 0 && self.succ_high == 0 && stream.eob_run > 0 {
                        // handle EOB runs here.
                        stream.eob_run -= 1;
                        continue;
                    }
                    let start = 64 * (j + i * (self.components[k].width_stride / 8));
                    let data: &mut [i16; 64] = buffer
                        .get_mut(k)
                        .unwrap()
                        .get_mut(start..start + 64)
                        .unwrap()
                        .try_into()
                        .unwrap();
                    if self.spec_start == 0 {
                        let pos = self.components[k].dc_huff_table & (MAX_COMPONENTS - 1);
                        let dc_table = self
                            .dc_huffman_tables
                            .get(pos)
                            .ok_or(DecodeErrors::FormatStatic(
                                "No huffman table for DC component"
                            ))?
                            .as_ref()
                            .ok_or(DecodeErrors::FormatStatic(
                                "Huffman table at index  {} not initialized"
                            ))?;
                        let dc_pred = &mut self.components[k].dc_pred;
                        if self.succ_high == 0 {
                            // first scan for this mcu
                            stream.decode_prog_dc_first(
                                &mut self.stream,
                                dc_table,
                                &mut data[0],
                                dc_pred
                            )?;
                        } else {
                            // refining scans for this MCU
                            stream.decode_prog_dc_refine(&mut self.stream, &mut data[0])?;
                        }
                    } else {
                        let pos = self.components[k].ac_huff_table;
                        let ac_table = self
                            .ac_huffman_tables
                            .get(pos)
                            .ok_or_else(|| {
                                DecodeErrors::Format(format!(
                                    "No huffman table for component:{pos}"
                                ))
                            })?
                            .as_ref()
                            .ok_or_else(|| {
                                DecodeErrors::Format(format!(
                                    "Huffman table at index  {pos} not initialized"
                                ))
                            })?;
                        if self.succ_high == 0 {
                            debug_assert!(stream.eob_run == 0, "EOB run is not zero");
                            stream.decode_mcu_ac_first(&mut self.stream, ac_table, data)?;
                        } else {
                            // refinement scan
                            stream.decode_mcu_ac_refine(&mut self.stream, ac_table, data)?;
                        }
                    }
                    // + EOB and investigate effect.
                    self.todo -= 1;
                    if self.todo == 0 {
                        self.handle_rst(stream)?;
                    }
                }
            }
        } else {
            if self.spec_end != 0 {
                return Err(DecodeErrors::HuffmanDecode(
                    "Can't merge dc and AC corrupt jpeg".to_string()
                ));
            }
            // process scan n elements in order
            // Do the error checking with allocs here.
            // Make the one in the inner loop free of allocations.
            for k in 0..self.num_scans {
                let n = self.z_order[k as usize];
                if n >= self.components.len() {
                    return Err(DecodeErrors::Format(format!(
                        "Cannot find component {n}, corrupt image"
                    )));
                }
                let component = &mut self.components[n];
                let _ = self
                    .dc_huffman_tables
                    .get(component.dc_huff_table)
                    .ok_or_else(|| {
                        DecodeErrors::Format(format!(
                            "No huffman table for component:{}",
                            component.dc_huff_table
                        ))
                    })?
                    .as_ref()
                    .ok_or_else(|| {
                        DecodeErrors::Format(format!(
                            "Huffman table at index  {} not initialized",
                            component.dc_huff_table
                        ))
                    })?;
            }
            // Interleaved scan
            // Components shall not be interleaved in progressive mode, except for
            // the DC coefficients in the first scan for each component of a progressive frame.
            for i in 0..self.mcu_y {
                for j in 0..self.mcu_x {
                    // process scan n elements in order
                    for k in 0..self.num_scans {
                        let n = self.z_order[k as usize];
                        let component = &mut self.components[n];
                        let huff_table = self
                            .dc_huffman_tables
                            .get(component.dc_huff_table)
                            .ok_or(DecodeErrors::FormatStatic("No huffman table for component"))?
                            .as_ref()
                            .ok_or(DecodeErrors::FormatStatic(
                                "Huffman table at index not initialized"
                            ))?;
                        for v_samp in 0..component.vertical_sample {
                            for h_samp in 0..component.horizontal_sample {
                                let x2 = j * component.horizontal_sample + h_samp;
                                let y2 = i * component.vertical_sample + v_samp;
                                let position = 64 * (x2 + y2 * component.width_stride / 8);
                                let data = &mut buffer[n][position];
                                if self.succ_high == 0 {
                                    stream.decode_prog_dc_first(
                                        &mut self.stream,
                                        huff_table,
                                        data,
                                        &mut component.dc_pred
                                    )?;
                                } else {
                                    stream.decode_prog_dc_refine(&mut self.stream, data)?;
                                }
                            }
                        }
                    }
                    // We want wrapping subtraction here because it means
                    // we get a higher number in the case this underflows
                    self.todo = self.todo.wrapping_sub(1);
                    // after every scan that's a mcu, count down restart markers.
                    if self.todo == 0 {
                        self.handle_rst(stream)?;
                    }
                }
            }
        }
        return Ok(());
    }
    #[allow(clippy::too_many_lines)]
    #[allow(clippy::needless_range_loop, clippy::cast_sign_loss)]
    fn finish_progressive_decoding(
        &mut self, block: &[Vec<i16>; MAX_COMPONENTS], _mcu_width: usize, pixels: &mut [u8]
    ) -> Result<(), DecodeErrors> {
        // This function is complicated because we need to replicate
        // the function in mcu.rs
        //
        // The advantage is that we do very little allocation and very lot
        // channel reusing.
        // The trick is to notice that we repeat the same procedure per MCU
        // width.
        //
        // So we can set it up that we only allocate temporary storage large enough
        // to store a single mcu width, then reuse it per invocation.
        //
        // This is advantageous to us.
        //
        // Remember we need to have the whole MCU buffer so we store 3 unprocessed
        // channels in memory, and then we allocate the whole output buffer in memory, both of
        // which are huge.
        //
        //
        let mcu_height = if self.is_interleaved {
            self.mcu_y
        } else {
            // For non-interleaved images( (1*1) subsampling)
            // number of MCU's are the widths (+7 to account for paddings) divided by 8.
            ((self.info.height + 7) / 8) as usize
        };
        // Size of our output image(width*height)
        let is_hv = usize::from(self.is_interleaved);
        let upsampler_scratch_size = is_hv * self.components[0].width_stride;
        let width = usize::from(self.info.width);
        let padded_width = calculate_padded_width(width, self.sub_sample_ratio);
        //let mut pixels = vec![0; capacity * out_colorspace_components];
        let mut upsampler_scratch_space = vec![0; upsampler_scratch_size];
        let mut tmp = [0_i32; DCT_BLOCK];
        for (pos, comp) in self.components.iter_mut().enumerate() {
            // Allocate only needed components.
            //
            // For special colorspaces i.e YCCK and CMYK, just allocate all of the needed
            // components.
            if min(
                self.options.jpeg_get_out_colorspace().num_components() - 1,
                pos
            ) == pos
                || self.input_colorspace == ColorSpace::YCCK
                || self.input_colorspace == ColorSpace::CMYK
            {
                // allocate enough space to hold a whole MCU width
                // this means we should take into account sampling ratios
                // `*8` is because each MCU spans 8 widths.
                let len = comp.width_stride * comp.vertical_sample * 8;
                comp.needed = true;
                comp.raw_coeff = vec![0; len];
            } else {
                comp.needed = false;
            }
        }
        let mut pixels_written = 0;
        // dequantize, idct and color convert.
        for i in 0..mcu_height {
            'component: for (position, component) in &mut self.components.iter_mut().enumerate() {
                if !component.needed {
                    continue 'component;
                }
                let qt_table = &component.quantization_table;
                // step is the number of pixels this iteration wil be handling
                // Given by the number of mcu's height and the length of the component block
                // Since the component block contains the whole channel as raw pixels
                // we this evenly divides the pixels into MCU blocks
                //
                // For interleaved images, this gives us the exact pixels comprising a whole MCU
                // block
                let step = block[position].len() / mcu_height;
                // where we will be reading our pixels from.
                let start = i * step;
                let slice = &block[position][start..start + step];
                let temp_channel = &mut component.raw_coeff;
                // The next logical step is to iterate width wise.
                // To figure out how many pixels we iterate by we use effective pixels
                // Given to us by component.x
                // iterate per effective pixels.
                let mcu_x = component.width_stride / 8;
                // iterate per every vertical sample.
                for k in 0..component.vertical_sample {
                    for j in 0..mcu_x {
                        // after writing a single stride, we need to skip 8 rows.
                        // This does the row calculation
                        let width_stride = k * 8 * component.width_stride;
                        let start = j * 64 + width_stride;
                        // dequantize
                        for ((x, out), qt_val) in slice[start..start + 64]
                            .iter()
                            .zip(tmp.iter_mut())
                            .zip(qt_table.iter())
                        {
                            *out = i32::from(*x) * qt_val;
                        }
                        // determine where to write.
                        let sl = &mut temp_channel[component.idct_pos..];
                        component.idct_pos += 8;
                        // tmp now contains a dequantized block so idct it
                        (self.idct_func)(&mut tmp, sl, component.width_stride);
                    }
                    // after every write of 8, skip 7 since idct write stride wise 8 times.
                    //
                    // Remember each MCU is 8x8 block, so each idct will write 8 strides into
                    // sl
                    //
                    // and component.idct_pos is one stride long
                    component.idct_pos += 7 * component.width_stride;
                }
                component.idct_pos = 0;
            }
            // process that width up until it's impossible
            self.post_process(
                pixels,
                i,
                mcu_height,
                width,
                padded_width,
                &mut pixels_written,
                &mut upsampler_scratch_space
            )?;
        }
        debug!("Finished decoding image");
        return Ok(());
    }
    pub(crate) fn reset_params(&mut self) {
        /*
        Apparently, grayscale images which can be down sampled exists, which is weird in the sense
        that it has one component Y, which is not usually down sampled.
        This means some calculations will be wrong, so for that we explicitly reset params
        for such occurrences, warn and reset the image info to appear as if it were
        a non-sampled image to ensure decoding works
        */
        self.h_max = 1;
        self.options = self.options.jpeg_set_out_colorspace(ColorSpace::Luma);
        self.v_max = 1;
        self.sub_sample_ratio = SampleRatios::None;
        self.is_interleaved = false;
        self.components[0].vertical_sample = 1;
        self.components[0].width_stride = (((self.info.width as usize) + 7) / 8) * 8;
        self.components[0].horizontal_sample = 1;
    }
 }
 ///Get a marker from the bit-stream.
 ///
 /// This reads until it gets a marker or end of file is encountered
 fn get_marker<T>(reader: &mut ZReader<T>, stream: &mut BitStream) -> Result<Marker, DecodeErrors>
 where
    T: ZByteReaderTrait
 {
    if let Some(marker) = stream.marker {
        stream.marker = None;
        return Ok(marker);
    }
    // read until we get a marker
    while !reader.eof()? {
        let marker = reader.read_u8_err()?;
        if marker == 255 {
            let mut r = reader.read_u8_err()?;
            // 0xFF 0XFF(some images may be like that)
            while r == 0xFF {
                r = reader.read_u8_err()?;
            }
            if r != 0 {
                return Marker::from_u8(r)
                    .ok_or_else(|| DecodeErrors::Format(format!("Unknown marker 0xFF{r:X}")));
            }
        }
    }
    return Err(DecodeErrors::ExhaustedData);
 }
--- a/third_party/zune-jpeg/src/misc.rs
+++ b/third_party/zune-jpeg/src/misc.rs
@ -0,0 +1,431 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 //!Miscellaneous stuff
 #![allow(dead_code)]
 use alloc::format;
 use core::cmp::max;
 use core::fmt;
 use zune_core::bytestream::ZByteReaderTrait;
 use zune_core::colorspace::ColorSpace;
 use zune_core::log::trace;
 use crate::components::{ComponentID, SampleRatios};
 use crate::errors::DecodeErrors;
 use crate::huffman::HuffmanTable;
 use crate::JpegDecoder;
 /// Start of baseline DCT Huffman coding
 pub const START_OF_FRAME_BASE: u16 = 0xffc0;
 /// Start of another frame
 pub const START_OF_FRAME_EXT_SEQ: u16 = 0xffc1;
 /// Start of progressive DCT encoding
 pub const START_OF_FRAME_PROG_DCT: u16 = 0xffc2;
 /// Start of Lossless sequential Huffman coding
 pub const START_OF_FRAME_LOS_SEQ: u16 = 0xffc3;
 /// Start of extended sequential DCT arithmetic coding
 pub const START_OF_FRAME_EXT_AR: u16 = 0xffc9;
 /// Start of Progressive DCT arithmetic coding
 pub const START_OF_FRAME_PROG_DCT_AR: u16 = 0xffca;
 /// Start of Lossless sequential Arithmetic coding
 pub const START_OF_FRAME_LOS_SEQ_AR: u16 = 0xffcb;
 /// Undo run length encoding of coefficients by placing them in natural order
 #[rustfmt::skip]
 pub const UN_ZIGZAG: [usize; 64 + 16] = [
     0,  1,  8, 16,  9,  2,  3, 10,
    17, 24, 32, 25, 18, 11,  4,  5,
    12, 19, 26, 33, 40, 48, 41, 34,
    27, 20, 13,  6,  7, 14, 21, 28,
    35, 42, 49, 56, 57, 50, 43, 36,
    29, 22, 15, 23, 30, 37, 44, 51,
    58, 59, 52, 45, 38, 31, 39, 46,
    53, 60, 61, 54, 47, 55, 62, 63,
    // Prevent overflowing
    63, 63, 63, 63, 63, 63, 63, 63,
    63, 63, 63, 63, 63, 63, 63, 63
 ];
 /// Align data to a 16 byte boundary
 #[repr(align(16))]
 #[derive(Clone)]
 pub struct Aligned16<T: ?Sized>(pub T);
 impl<T> Default for Aligned16<T>
 where
    T: Default
 {
    fn default() -> Self {
        Aligned16(T::default())
    }
 }
 /// Align data to a 32 byte boundary
 #[repr(align(32))]
 #[derive(Clone)]
 pub struct Aligned32<T: ?Sized>(pub T);
 impl<T> Default for Aligned32<T>
 where
    T: Default
 {
    fn default() -> Self {
        Aligned32(T::default())
    }
 }
 /// Markers that identify different Start of Image markers
 /// They identify the type of encoding and whether the file use lossy(DCT) or
 /// lossless compression and whether we use Huffman or arithmetic coding schemes
 #[derive(Eq, PartialEq, Copy, Clone)]
 #[allow(clippy::upper_case_acronyms)]
 pub enum SOFMarkers {
    /// Baseline DCT markers
    BaselineDct,
    /// SOF_1 Extended sequential DCT,Huffman coding
    ExtendedSequentialHuffman,
    /// Progressive DCT, Huffman coding
    ProgressiveDctHuffman,
    /// Lossless (sequential), huffman coding,
    LosslessHuffman,
    /// Extended sequential DEC, arithmetic coding
    ExtendedSequentialDctArithmetic,
    /// Progressive DCT, arithmetic coding,
    ProgressiveDctArithmetic,
    /// Lossless ( sequential), arithmetic coding
    LosslessArithmetic
 }
 impl Default for SOFMarkers {
    fn default() -> Self {
        Self::BaselineDct
    }
 }
 impl SOFMarkers {
    /// Check if a certain marker is sequential DCT or not
    pub fn is_sequential_dct(self) -> bool {
        matches!(
            self,
            Self::BaselineDct
                | Self::ExtendedSequentialHuffman
                | Self::ExtendedSequentialDctArithmetic
        )
    }
    /// Check if a marker is a Lossles type or not
    pub fn is_lossless(self) -> bool {
        matches!(self, Self::LosslessHuffman | Self::LosslessArithmetic)
    }
    /// Check whether a marker is a progressive marker or not
    pub fn is_progressive(self) -> bool {
        matches!(
            self,
            Self::ProgressiveDctHuffman | Self::ProgressiveDctArithmetic
        )
    }
    /// Create a marker from an integer
    pub fn from_int(int: u16) -> Option<SOFMarkers> {
        match int {
            START_OF_FRAME_BASE => Some(Self::BaselineDct),
            START_OF_FRAME_PROG_DCT => Some(Self::ProgressiveDctHuffman),
            START_OF_FRAME_PROG_DCT_AR => Some(Self::ProgressiveDctArithmetic),
            START_OF_FRAME_LOS_SEQ => Some(Self::LosslessHuffman),
            START_OF_FRAME_LOS_SEQ_AR => Some(Self::LosslessArithmetic),
            START_OF_FRAME_EXT_SEQ => Some(Self::ExtendedSequentialHuffman),
            START_OF_FRAME_EXT_AR => Some(Self::ExtendedSequentialDctArithmetic),
            _ => None
        }
    }
 }
 impl fmt::Debug for SOFMarkers {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match &self {
            Self::BaselineDct => write!(f, "Baseline DCT"),
            Self::ExtendedSequentialHuffman => {
                write!(f, "Extended sequential DCT, Huffman Coding")
            }
            Self::ProgressiveDctHuffman => write!(f, "Progressive DCT,Huffman Encoding"),
            Self::LosslessHuffman => write!(f, "Lossless (sequential) Huffman encoding"),
            Self::ExtendedSequentialDctArithmetic => {
                write!(f, "Extended sequential DCT, arithmetic coding")
            }
            Self::ProgressiveDctArithmetic => write!(f, "Progressive DCT, arithmetic coding"),
            Self::LosslessArithmetic => write!(f, "Lossless (sequential) arithmetic coding")
        }
    }
 }
 /// Set up component parameters.
 ///
 /// This modifies the components in place setting up details needed by other
 /// parts fo the decoder.
 pub(crate) fn setup_component_params<T: ZByteReaderTrait>(
    img: &mut JpegDecoder<T>
 ) -> Result<(), DecodeErrors> {
    let img_width = img.width();
    let img_height = img.height();
    // in case of adobe app14 being present, zero may indicate
    // either CMYK if components are 4 or RGB if components are 3,
    // see https://docs.oracle.com/javase/6/docs/api/javax/imageio/metadata/doc-files/jpeg_metadata.html
    // so since we may not know how many number of components
    // we have when decoding app14, we have to defer that check
    // until now.
    //
    // We know adobe app14 was present since it's the only one that can modify
    // input colorspace to be CMYK
    if img.components.len() == 3 && img.input_colorspace == ColorSpace::CMYK {
        img.input_colorspace = ColorSpace::RGB;
    }
    for component in &mut img.components {
        // compute interleaved image info
        // h_max contains the maximum horizontal component
        img.h_max = max(img.h_max, component.horizontal_sample);
        // v_max contains the maximum vertical component
        img.v_max = max(img.v_max, component.vertical_sample);
        img.mcu_width = img.h_max * 8;
        img.mcu_height = img.v_max * 8;
        // Number of MCU's per width
        img.mcu_x = (usize::from(img.info.width) + img.mcu_width - 1) / img.mcu_width;
        // Number of MCU's per height
        img.mcu_y = (usize::from(img.info.height) + img.mcu_height - 1) / img.mcu_height;
        if img.h_max != 1 || img.v_max != 1 {
            // interleaved images have horizontal and vertical sampling factors
            // not equal to 1.
            img.is_interleaved = true;
        }
        // Extract quantization tables from the arrays into components
        let qt_table = *img.qt_tables[component.quantization_table_number as usize]
            .as_ref()
            .ok_or_else(|| {
                DecodeErrors::DqtError(format!(
                    "No quantization table for component {:?}",
                    component.component_id
                ))
            })?;
        let x = (usize::from(img_width) * component.horizontal_sample + img.h_max - 1) / img.h_max;
        let y = (usize::from(img_height) * component.horizontal_sample + img.h_max - 1) / img.v_max;
        component.x = x;
        component.w2 = img.mcu_x * component.horizontal_sample * 8;
        // probably not needed. :)
        component.y = y;
        component.quantization_table = qt_table;
        // initially stride contains its horizontal sub-sampling
        component.width_stride *= img.mcu_x * 8;
    }
    {
        // Sampling factors are one thing that suck
        // this fixes a specific problem with images like
        //
        // (2 2) None
        // (2 1) H
        // (2 1) H
        //
        // The images exist in the wild, the images are not meant to exist
        // but they do, it's just an annoying horizontal sub-sampling that
        // I don't know why it exists.
        // But it does
        // So we try to cope with that.
        // I am not sure of how to explain how to fix it, but it involved a debugger
        // and to much coke(the legal one)
        //
        // If this wasn't present, self.upsample_dest would have the wrong length
        let mut handle_that_annoying_bug = false;
        if let Some(y_component) = img
            .components
            .iter()
            .find(|c| c.component_id == ComponentID::Y)
        {
            if y_component.horizontal_sample == 2 || y_component.vertical_sample == 2 {
                handle_that_annoying_bug = true;
            }
        }
        if handle_that_annoying_bug {
            for comp in &mut img.components {
                if (comp.component_id != ComponentID::Y)
                    && (comp.horizontal_sample != 1 || comp.vertical_sample != 1)
                {
                    comp.fix_an_annoying_bug = 2;
                }
            }
        }
    }
    if img.is_mjpeg {
        fill_default_mjpeg_tables(
            img.is_progressive,
            &mut img.dc_huffman_tables,
            &mut img.ac_huffman_tables
        );
    }
    Ok(())
 }
 ///Calculate number of fill bytes added to the end of a JPEG image
 /// to fill the image
 ///
 /// JPEG usually inserts padding bytes if the image width cannot be evenly divided into
 /// 8 , 16 or 32 chunks depending on the sub sampling ratio. So given a sub-sampling ratio,
 /// and the actual width, this calculates the padded bytes that were added to the image
 ///
 ///  # Params
 /// -actual_width: Actual width of the image
 /// -sub_sample: Sub sampling factor of the image
 ///
 /// # Returns
 /// The padded width, this is how long the width is for a particular image
 pub fn calculate_padded_width(actual_width: usize, sub_sample: SampleRatios) -> usize {
    match sub_sample {
        SampleRatios::None | SampleRatios::V => {
            // None+V sends one MCU row, so that's a simple calculation
            ((actual_width + 7) / 8) * 8
        }
        SampleRatios::H | SampleRatios::HV => {
            // sends two rows, width can be expanded by up to 15 more bytes
            ((actual_width + 15) / 16) * 16
        }
    }
 }
 // https://www.loc.gov/preservation/digital/formats/fdd/fdd000063.shtml
 // "Avery Lee, writing in the rec.video.desktop newsgroup in 2001, commented that "MJPEG, or at
 //  least the MJPEG in AVIs having the MJPG fourcc, is restricted JPEG with a fixed -- and
 //  *omitted* -- Huffman table. The JPEG must be YCbCr colorspace, it must be 4:2:2, and it must
 //  use basic Huffman encoding, not arithmetic or progressive.... You can indeed extract the
 //  MJPEG frames and decode them with a regular JPEG decoder, but you have to prepend the DHT
 //  segment to them, or else the decoder won't have any idea how to decompress the data.
 //  The exact table necessary is given in the OpenDML spec.""
 pub fn fill_default_mjpeg_tables(
    is_progressive: bool, dc_huffman_tables: &mut [Option<HuffmanTable>],
    ac_huffman_tables: &mut [Option<HuffmanTable>]
 ) {
    // Section K.3.3
    trace!("Filling with default mjpeg tables");
    if dc_huffman_tables[0].is_none() {
        // Table K.3
        dc_huffman_tables[0] = Some(
            HuffmanTable::new_unfilled(
                &[
                    0x00, 0x00, 0x01, 0x05, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00,
                    0x00, 0x00, 0x00, 0x00
                ],
                &[
                    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B
                ],
                true,
                is_progressive
            )
            .unwrap()
        );
    }
    if dc_huffman_tables[1].is_none() {
        // Table K.4
        dc_huffman_tables[1] = Some(
            HuffmanTable::new_unfilled(
                &[
                    0x00, 0x00, 0x03, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00,
                    0x00, 0x00, 0x00, 0x00
                ],
                &[
                    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B
                ],
                true,
                is_progressive
            )
            .unwrap()
        );
    }
    if ac_huffman_tables[0].is_none() {
        // Table K.5
        ac_huffman_tables[0] = Some(
            HuffmanTable::new_unfilled(
                &[
                    0x00, 0x00, 0x02, 0x01, 0x03, 0x03, 0x02, 0x04, 0x03, 0x05, 0x05, 0x04, 0x04,
                    0x00, 0x00, 0x01, 0x7D
                ],
                &[
                    0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06, 0x13,
                    0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xA1, 0x08, 0x23, 0x42,
                    0xB1, 0xC1, 0x15, 0x52, 0xD1, 0xF0, 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0A,
                    0x16, 0x17, 0x18, 0x19, 0x1A, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x34, 0x35,
                    0x36, 0x37, 0x38, 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A,
                    0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67,
                    0x68, 0x69, 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x83, 0x84,
                    0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
                    0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xB2, 0xB3,
                    0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
                    0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xE1,
                    0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xF1, 0xF2, 0xF3, 0xF4,
                    0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA
                ],
                false,
                is_progressive
            )
            .unwrap()
        );
    }
    if ac_huffman_tables[1].is_none() {
        // Table K.6
        ac_huffman_tables[1] = Some(
            HuffmanTable::new_unfilled(
                &[
                    0x00, 0x00, 0x02, 0x01, 0x02, 0x04, 0x04, 0x03, 0x04, 0x07, 0x05, 0x04, 0x04,
                    0x00, 0x01, 0x02, 0x77
                ],
                &[
                    0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41, 0x51,
                    0x07, 0x61, 0x71, 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, 0xA1, 0xB1,
                    0xC1, 0x09, 0x23, 0x33, 0x52, 0xF0, 0x15, 0x62, 0x72, 0xD1, 0x0A, 0x16, 0x24,
                    0x34, 0xE1, 0x25, 0xF1, 0x17, 0x18, 0x19, 0x1A, 0x26, 0x27, 0x28, 0x29, 0x2A,
                    0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
                    0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66,
                    0x67, 0x68, 0x69, 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x82,
                    0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96,
                    0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA,
                    0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3, 0xC4, 0xC5,
                    0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9,
                    0xDA, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xF2, 0xF3, 0xF4,
                    0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA
                ],
                false,
                is_progressive
            )
            .unwrap()
        );
    }
 }
--- a/third_party/zune-jpeg/src/unsafe_utils.rs
+++ b/third_party/zune-jpeg/src/unsafe_utils.rs
@ -0,0 +1,4 @@
 #[cfg(all(feature = "x86", any(target_arch = "x86", target_arch = "x86_64")))]
 pub use crate::unsafe_utils_avx2::*;
 #[cfg(all(feature = "neon", target_arch = "aarch64"))]
 pub use crate::unsafe_utils_neon::*;
--- a/third_party/zune-jpeg/src/unsafe_utils_avx2.rs
+++ b/third_party/zune-jpeg/src/unsafe_utils_avx2.rs
@ -0,0 +1,223 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 #![cfg(all(feature = "x86", any(target_arch = "x86", target_arch = "x86_64")))]
 //! This module provides unsafe ways to do some things
 #![allow(clippy::wildcard_imports)]
 #[cfg(target_arch = "x86")]
 use core::arch::x86::*;
 #[cfg(target_arch = "x86_64")]
 use core::arch::x86_64::*;
 use core::ops::{Add, AddAssign, Mul, MulAssign, Sub};
 /// A copy of `_MM_SHUFFLE()` that doesn't require
 /// a nightly compiler
 #[inline]
 const fn shuffle(z: i32, y: i32, x: i32, w: i32) -> i32 {
    (z << 6) | (y << 4) | (x << 2) | w
 }
 /// An abstraction of an AVX ymm register that
 ///allows some things to not look ugly
 #[derive(Clone, Copy)]
 pub struct YmmRegister {
    /// An AVX register
    pub(crate) mm256: __m256i
 }
 impl Add for YmmRegister {
    type Output = YmmRegister;
    #[inline]
    fn add(self, rhs: Self) -> Self::Output {
        unsafe {
            return YmmRegister {
                mm256: _mm256_add_epi32(self.mm256, rhs.mm256)
            };
        }
    }
 }
 impl Add<i32> for YmmRegister {
    type Output = YmmRegister;
    #[inline]
    fn add(self, rhs: i32) -> Self::Output {
        unsafe {
            let tmp = _mm256_set1_epi32(rhs);
            return YmmRegister {
                mm256: _mm256_add_epi32(self.mm256, tmp)
            };
        }
    }
 }
 impl Sub for YmmRegister {
    type Output = YmmRegister;
    #[inline]
    fn sub(self, rhs: Self) -> Self::Output {
        unsafe {
            return YmmRegister {
                mm256: _mm256_sub_epi32(self.mm256, rhs.mm256)
            };
        }
    }
 }
 impl AddAssign for YmmRegister {
    #[inline]
    fn add_assign(&mut self, rhs: Self) {
        unsafe {
            self.mm256 = _mm256_add_epi32(self.mm256, rhs.mm256);
        }
    }
 }
 impl AddAssign<i32> for YmmRegister {
    #[inline]
    fn add_assign(&mut self, rhs: i32) {
        unsafe {
            let tmp = _mm256_set1_epi32(rhs);
            self.mm256 = _mm256_add_epi32(self.mm256, tmp);
        }
    }
 }
 impl Mul for YmmRegister {
    type Output = YmmRegister;
    #[inline]
    fn mul(self, rhs: Self) -> Self::Output {
        unsafe {
            YmmRegister {
                mm256: _mm256_mullo_epi32(self.mm256, rhs.mm256)
            }
        }
    }
 }
 impl Mul<i32> for YmmRegister {
    type Output = YmmRegister;
    #[inline]
    fn mul(self, rhs: i32) -> Self::Output {
        unsafe {
            let tmp = _mm256_set1_epi32(rhs);
            YmmRegister {
                mm256: _mm256_mullo_epi32(self.mm256, tmp)
            }
        }
    }
 }
 impl MulAssign for YmmRegister {
    #[inline]
    fn mul_assign(&mut self, rhs: Self) {
        unsafe {
            self.mm256 = _mm256_mullo_epi32(self.mm256, rhs.mm256);
        }
    }
 }
 impl MulAssign<i32> for YmmRegister {
    #[inline]
    fn mul_assign(&mut self, rhs: i32) {
        unsafe {
            let tmp = _mm256_set1_epi32(rhs);
            self.mm256 = _mm256_mullo_epi32(self.mm256, tmp);
        }
    }
 }
 impl MulAssign<__m256i> for YmmRegister {
    #[inline]
    fn mul_assign(&mut self, rhs: __m256i) {
        unsafe {
            self.mm256 = _mm256_mullo_epi32(self.mm256, rhs);
        }
    }
 }
 type Reg = YmmRegister;
 /// Transpose an array of 8 by 8 i32's using avx intrinsics
 ///
 /// This was translated from [here](https://newbedev.com/transpose-an-8x8-float-using-avx-avx2)
 #[allow(unused_parens, clippy::too_many_arguments)]
 #[target_feature(enable = "avx2")]
 #[inline]
 pub unsafe fn transpose(
    v0: &mut Reg, v1: &mut Reg, v2: &mut Reg, v3: &mut Reg, v4: &mut Reg, v5: &mut Reg,
    v6: &mut Reg, v7: &mut Reg
 ) {
    macro_rules! merge_epi32 {
        ($v0:tt,$v1:tt,$v2:tt,$v3:tt) => {
            let va = _mm256_permute4x64_epi64($v0, shuffle(3, 1, 2, 0));
            let vb = _mm256_permute4x64_epi64($v1, shuffle(3, 1, 2, 0));
            $v2 = _mm256_unpacklo_epi32(va, vb);
            $v3 = _mm256_unpackhi_epi32(va, vb);
        };
    }
    macro_rules! merge_epi64 {
        ($v0:tt,$v1:tt,$v2:tt,$v3:tt) => {
            let va = _mm256_permute4x64_epi64($v0, shuffle(3, 1, 2, 0));
            let vb = _mm256_permute4x64_epi64($v1, shuffle(3, 1, 2, 0));
            $v2 = _mm256_unpacklo_epi64(va, vb);
            $v3 = _mm256_unpackhi_epi64(va, vb);
        };
    }
    macro_rules! merge_si128 {
        ($v0:tt,$v1:tt,$v2:tt,$v3:tt) => {
            $v2 = _mm256_permute2x128_si256($v0, $v1, shuffle(0, 2, 0, 0));
            $v3 = _mm256_permute2x128_si256($v0, $v1, shuffle(0, 3, 0, 1));
        };
    }
    let (w0, w1, w2, w3, w4, w5, w6, w7);
    merge_epi32!((v0.mm256), (v1.mm256), w0, w1);
    merge_epi32!((v2.mm256), (v3.mm256), w2, w3);
    merge_epi32!((v4.mm256), (v5.mm256), w4, w5);
    merge_epi32!((v6.mm256), (v7.mm256), w6, w7);
    let (x0, x1, x2, x3, x4, x5, x6, x7);
    merge_epi64!(w0, w2, x0, x1);
    merge_epi64!(w1, w3, x2, x3);
    merge_epi64!(w4, w6, x4, x5);
    merge_epi64!(w5, w7, x6, x7);
    merge_si128!(x0, x4, (v0.mm256), (v1.mm256));
    merge_si128!(x1, x5, (v2.mm256), (v3.mm256));
    merge_si128!(x2, x6, (v4.mm256), (v5.mm256));
    merge_si128!(x3, x7, (v6.mm256), (v7.mm256));
 }
--- a/third_party/zune-jpeg/src/unsafe_utils_neon.rs
+++ b/third_party/zune-jpeg/src/unsafe_utils_neon.rs
@ -0,0 +1,331 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 #![cfg(target_arch = "aarch64")]
 // TODO can this be extended to armv7
 //! This module provides unsafe ways to do some things
 #![allow(clippy::wildcard_imports)]
 use std::arch::aarch64::*;
 use std::ops::{Add, AddAssign, BitOr, BitOrAssign, Mul, MulAssign, Sub};
 pub type VecType = int32x4x2_t;
 pub unsafe fn loadu(src: *const i32) -> VecType {
    vld1q_s32_x2(src as *const _)
 }
 /// An abstraction of an AVX ymm register that
 ///allows some things to not look ugly
 #[derive(Clone, Copy)]
 pub struct YmmRegister {
    /// An AVX register
    pub(crate) mm256: VecType
 }
 impl YmmRegister {
    #[inline]
    pub unsafe fn load(src: *const i32) -> Self {
        loadu(src).into()
    }
    #[inline]
    pub fn map2(self, other: Self, f: impl Fn(int32x4_t, int32x4_t) -> int32x4_t) -> Self {
        let m0 = f(self.mm256.0, other.mm256.0);
        let m1 = f(self.mm256.1, other.mm256.1);
        YmmRegister {
            mm256: int32x4x2_t(m0, m1)
        }
    }
    #[inline]
    pub fn all_zero(self) -> bool {
        unsafe {
            let both = vorrq_s32(self.mm256.0, self.mm256.1);
            let both_unsigned = vreinterpretq_u32_s32(both);
            0 == vmaxvq_u32(both_unsigned)
        }
    }
    #[inline]
    pub fn const_shl<const N: i32>(self) -> Self {
        // Ensure that we logically shift left
        unsafe {
            let m0 = vreinterpretq_s32_u32(vshlq_n_u32::<N>(vreinterpretq_u32_s32(self.mm256.0)));
            let m1 = vreinterpretq_s32_u32(vshlq_n_u32::<N>(vreinterpretq_u32_s32(self.mm256.1)));
            YmmRegister {
                mm256: int32x4x2_t(m0, m1)
            }
        }
    }
    #[inline]
    pub fn const_shra<const N: i32>(self) -> Self {
        unsafe {
            let i0 = vshrq_n_s32::<N>(self.mm256.0);
            let i1 = vshrq_n_s32::<N>(self.mm256.1);
            YmmRegister {
                mm256: int32x4x2_t(i0, i1)
            }
        }
    }
 }
 impl<T> Add<T> for YmmRegister
 where
    T: Into<Self>
 {
    type Output = YmmRegister;
    #[inline]
    fn add(self, rhs: T) -> Self::Output {
        let rhs = rhs.into();
        unsafe { self.map2(rhs, |a, b| vaddq_s32(a, b)) }
    }
 }
 impl<T> Sub<T> for YmmRegister
 where
    T: Into<Self>
 {
    type Output = YmmRegister;
    #[inline]
    fn sub(self, rhs: T) -> Self::Output {
        let rhs = rhs.into();
        unsafe { self.map2(rhs, |a, b| vsubq_s32(a, b)) }
    }
 }
 impl<T> AddAssign<T> for YmmRegister
 where
    T: Into<Self>
 {
    #[inline]
    fn add_assign(&mut self, rhs: T) {
        let rhs: Self = rhs.into();
        *self = *self + rhs;
    }
 }
 impl<T> Mul<T> for YmmRegister
 where
    T: Into<Self>
 {
    type Output = YmmRegister;
    #[inline]
    fn mul(self, rhs: T) -> Self::Output {
        let rhs = rhs.into();
        unsafe { self.map2(rhs, |a, b| vmulq_s32(a, b)) }
    }
 }
 impl<T> MulAssign<T> for YmmRegister
 where
    T: Into<Self>
 {
    #[inline]
    fn mul_assign(&mut self, rhs: T) {
        let rhs: Self = rhs.into();
        *self = *self * rhs;
    }
 }
 impl<T> BitOr<T> for YmmRegister
 where
    T: Into<Self>
 {
    type Output = YmmRegister;
    #[inline]
    fn bitor(self, rhs: T) -> Self::Output {
        let rhs = rhs.into();
        unsafe { self.map2(rhs, |a, b| vorrq_s32(a, b)) }
    }
 }
 impl<T> BitOrAssign<T> for YmmRegister
 where
    T: Into<Self>
 {
    #[inline]
    fn bitor_assign(&mut self, rhs: T) {
        let rhs: Self = rhs.into();
        *self = *self | rhs;
    }
 }
 impl From<i32> for YmmRegister {
    #[inline]
    fn from(val: i32) -> Self {
        unsafe {
            let dup = vdupq_n_s32(val);
            YmmRegister {
                mm256: int32x4x2_t(dup, dup)
            }
        }
    }
 }
 impl From<VecType> for YmmRegister {
    #[inline]
    fn from(mm256: VecType) -> Self {
        YmmRegister { mm256 }
    }
 }
 #[allow(clippy::too_many_arguments)]
 #[inline]
 unsafe fn transpose4(
    v0: &mut int32x4_t, v1: &mut int32x4_t, v2: &mut int32x4_t, v3: &mut int32x4_t
 ) {
    let w0 = vtrnq_s32(
        vreinterpretq_s32_s64(vtrn1q_s64(
            vreinterpretq_s64_s32(*v0),
            vreinterpretq_s64_s32(*v2)
        )),
        vreinterpretq_s32_s64(vtrn1q_s64(
            vreinterpretq_s64_s32(*v1),
            vreinterpretq_s64_s32(*v3)
        ))
    );
    let w1 = vtrnq_s32(
        vreinterpretq_s32_s64(vtrn2q_s64(
            vreinterpretq_s64_s32(*v0),
            vreinterpretq_s64_s32(*v2)
        )),
        vreinterpretq_s32_s64(vtrn2q_s64(
            vreinterpretq_s64_s32(*v1),
            vreinterpretq_s64_s32(*v3)
        ))
    );
    *v0 = w0.0;
    *v1 = w0.1;
    *v2 = w1.0;
    *v3 = w1.1;
 }
 /// Transpose an array of 8 by 8 i32
 /// Arm has dedicated interleave/transpose instructions
 /// we:
 /// 1. Transpose the upper left and lower right quadrants
 /// 2. Swap and transpose the upper right and lower left quadrants
 #[allow(clippy::too_many_arguments)]
 #[inline]
 pub unsafe fn transpose(
    v0: &mut YmmRegister, v1: &mut YmmRegister, v2: &mut YmmRegister, v3: &mut YmmRegister,
    v4: &mut YmmRegister, v5: &mut YmmRegister, v6: &mut YmmRegister, v7: &mut YmmRegister
 ) {
    use std::mem::swap;
    let ul0 = &mut v0.mm256.0;
    let ul1 = &mut v1.mm256.0;
    let ul2 = &mut v2.mm256.0;
    let ul3 = &mut v3.mm256.0;
    let ur0 = &mut v0.mm256.1;
    let ur1 = &mut v1.mm256.1;
    let ur2 = &mut v2.mm256.1;
    let ur3 = &mut v3.mm256.1;
    let ll0 = &mut v4.mm256.0;
    let ll1 = &mut v5.mm256.0;
    let ll2 = &mut v6.mm256.0;
    let ll3 = &mut v7.mm256.0;
    let lr0 = &mut v4.mm256.1;
    let lr1 = &mut v5.mm256.1;
    let lr2 = &mut v6.mm256.1;
    let lr3 = &mut v7.mm256.1;
    swap(ur0, ll0);
    swap(ur1, ll1);
    swap(ur2, ll2);
    swap(ur3, ll3);
    transpose4(ul0, ul1, ul2, ul3);
    transpose4(ur0, ur1, ur2, ur3);
    transpose4(ll0, ll1, ll2, ll3);
    transpose4(lr0, lr1, lr2, lr3);
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_transpose() {
        fn get_val(i: usize, j: usize) -> i32 {
            ((i * 8) / (j + 1)) as i32
        }
        unsafe {
            let mut vals: [i32; 8 * 8] = [0; 8 * 8];
            for i in 0..8 {
                for j in 0..8 {
                    // some order-dependent value of i and j
                    let value = get_val(i, j);
                    vals[i * 8 + j] = value;
                }
            }
            let mut regs: [YmmRegister; 8] = std::mem::transmute(vals);
            let mut reg0 = regs[0];
            let mut reg1 = regs[1];
            let mut reg2 = regs[2];
            let mut reg3 = regs[3];
            let mut reg4 = regs[4];
            let mut reg5 = regs[5];
            let mut reg6 = regs[6];
            let mut reg7 = regs[7];
            transpose(
                &mut reg0, &mut reg1, &mut reg2, &mut reg3, &mut reg4, &mut reg5, &mut reg6,
                &mut reg7
            );
            regs[0] = reg0;
            regs[1] = reg1;
            regs[2] = reg2;
            regs[3] = reg3;
            regs[4] = reg4;
            regs[5] = reg5;
            regs[6] = reg6;
            regs[7] = reg7;
            let vals_from_reg: [i32; 8 * 8] = std::mem::transmute(regs);
            for i in 0..8 {
                for j in 0..i {
                    let orig = vals[i * 8 + j];
                    vals[i * 8 + j] = vals[j * 8 + i];
                    vals[j * 8 + i] = orig;
                }
            }
            for i in 0..8 {
                for j in 0..8 {
                    assert_eq!(vals[j * 8 + i], get_val(i, j));
                    assert_eq!(vals_from_reg[j * 8 + i], get_val(i, j));
                }
            }
            assert_eq!(vals, vals_from_reg);
        }
    }
 }
--- a/third_party/zune-jpeg/src/upsampler.rs
+++ b/third_party/zune-jpeg/src/upsampler.rs
@ -0,0 +1,101 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 //! Up-sampling routines
 //!
 //! The main upsampling method is a bi-linear interpolation or a "triangle
 //! filter " or libjpeg turbo `fancy_upsampling` which is a good compromise
 //! between speed and visual quality
 //!
 //! # The filter
 //! Each output pixel is made from `(3*A+B)/4` where A is the original
 //! pixel closer to the output and B is the one further.
 //!
 //! ```text
 //!+---+---+
 //! | A | B |
 //! +---+---+
 //! +-+-+-+-+
 //! | |P| | |
 //! +-+-+-+-+
 //! ```
 //!
 //! # Horizontal Bi-linear filter
 //! ```text
 //! |---+-----------+---+
 //! |   |           |   |
 //! | A | |p1 | p2| | B |
 //! |   |           |   |
 //! |---+-----------+---+
 //!
 //! ```
 //! For a horizontal bi-linear it's trivial to implement,
 //!
 //! `A` becomes the input closest to the output.
 //!
 //! `B` varies depending on output.
 //!  - For odd positions, input is the `next` pixel after A
 //!  - For even positions, input is the `previous` value before A.
 //!
 //! We iterate in a classic 1-D sliding window with a window of 3.
 //! For our sliding window approach, `A` is the 1st and `B` is either the 0th term or 2nd term
 //! depending on position we are writing.(see scalar code).
 //!
 //! For vector code see module sse for explanation.
 //!
 //! # Vertical bi-linear.
 //! Vertical up-sampling is a bit trickier.
 //!
 //! ```text
 //! +----+----+
 //! | A1 | A2 |
 //! +----+----+
 //! +----+----+
 //! | p1 | p2 |
 //! +----+-+--+
 //! +----+-+--+
 //! | p3 | p4 |
 //! +----+-+--+
 //! +----+----+
 //! | B1 | B2 |
 //! +----+----+
 //! ```
 //!
 //! For `p1`
 //! - `A1` is given a weight of `3` and `B1` is given a weight of 1.
 //!
 //! For `p3`
 //! - `B1` is given a weight of `3` and `A1` is given a weight of 1
 //!
 //! # Horizontal vertical downsampling/chroma quartering.
 //!
 //! Carry out a vertical filter in the first pass, then a horizontal filter in the second pass.
 use crate::components::UpSampler;
 mod scalar;
 // choose best possible implementation for this platform
 pub fn choose_horizontal_samp_function(_use_unsafe: bool) -> UpSampler {
    return scalar::upsample_horizontal;
 }
 pub fn choose_hv_samp_function(_use_unsafe: bool) -> UpSampler {
    return scalar::upsample_hv;
 }
 pub fn choose_v_samp_function(_use_unsafe: bool) -> UpSampler {
    return scalar::upsample_vertical;
 }
 /// Upsample nothing
 pub fn upsample_no_op(
    _input: &[i16], _in_ref: &[i16], _in_near: &[i16], _scratch_space: &mut [i16],
    _output: &mut [i16]
 ) {
 }
--- a/third_party/zune-jpeg/src/upsampler/scalar.rs
+++ b/third_party/zune-jpeg/src/upsampler/scalar.rs
@ -0,0 +1,110 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 pub fn upsample_horizontal(
    input: &[i16], _ref: &[i16], _in_near: &[i16], _scratch: &mut [i16], output: &mut [i16]
 ) {
    assert_eq!(
        input.len() * 2,
        output.len(),
        "Input length is not half the size of the output length"
    );
    assert!(
        output.len() > 4 && input.len() > 2,
        "Too Short of a vector, cannot upsample"
    );
    output[0] = input[0];
    output[1] = (input[0] * 3 + input[1] + 2) >> 2;
    // This code is written for speed and not readability
    //
    // The readable code is
    //
    //      for i in 1..input.len() - 1{
    //         let sample = 3 * input[i] + 2;
    //         out[i * 2] = (sample + input[i - 1]) >> 2;
    //         out[i * 2 + 1] = (sample + input[i + 1]) >> 2;
    //     }
    //
    // The output of a pixel is determined by it's surrounding neighbours but we attach more weight to it's nearest
    // neighbour (input[i]) than to the next nearest neighbour.
    for (output_window, input_window) in output[2..].chunks_exact_mut(2).zip(input.windows(3)) {
        let sample = 3 * input_window[1] + 2;
        output_window[0] = (sample + input_window[0]) >> 2;
        output_window[1] = (sample + input_window[2]) >> 2;
    }
    // Get lengths
    let out_len = output.len() - 2;
    let input_len = input.len() - 2;
    // slice the output vector
    let f_out = &mut output[out_len..];
    let i_last = &input[input_len..];
    // write out manually..
    f_out[0] = (3 * i_last[0] + i_last[1] + 2) >> 2;
    f_out[1] = i_last[1];
 }
 pub fn upsample_vertical(
    input: &[i16], in_near: &[i16], in_far: &[i16], _scratch_space: &mut [i16], output: &mut [i16]
 ) {
    assert_eq!(input.len() * 2, output.len());
    assert_eq!(in_near.len(), input.len());
    assert_eq!(in_far.len(), input.len());
    let middle = output.len() / 2;
    let (out_top, out_bottom) = output.split_at_mut(middle);
    // for the first row, closest row is in_near
    for ((near, far), x) in input.iter().zip(in_near.iter()).zip(out_top) {
        *x = (((3 * near) + 2) + far) >> 2;
    }
    // for the second row, the closest row to input is in_far
    for ((near, far), x) in input.iter().zip(in_far.iter()).zip(out_bottom) {
        *x = (((3 * near) + 2) + far) >> 2;
    }
 }
 pub fn upsample_hv(
    input: &[i16], in_near: &[i16], in_far: &[i16], scratch_space: &mut [i16], output: &mut [i16]
 ) {
    assert_eq!(input.len() * 4, output.len());
    let mut t = [0];
    upsample_vertical(input, in_near, in_far, &mut t, scratch_space);
    // horizontal upsampling must be done separate for every line
    // Otherwise it introduces artifacts that may cause the edge colors
    // to appear on the other line.
    // Since this is called for two scanlines/widths currently
    // splitting the inputs and outputs into half ensures we only handle
    // one scanline per iteration
    let scratch_half = scratch_space.len() / 2;
    let output_half = output.len() / 2;
    upsample_horizontal(
        &scratch_space[..scratch_half],
        &[],
        &[],
        &mut t,
        &mut output[..output_half]
    );
    upsample_horizontal(
        &scratch_space[scratch_half..],
        &[],
        &[],
        &mut t,
        &mut output[output_half..]
    );
 }
--- a/third_party/zune-jpeg/src/worker.rs
+++ b/third_party/zune-jpeg/src/worker.rs
@ -0,0 +1,429 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 use alloc::format;
 use core::convert::TryInto;
 use zune_core::colorspace::ColorSpace;
 use crate::color_convert::ycbcr_to_grayscale;
 use crate::components::{Components, SampleRatios};
 use crate::decoder::{ColorConvert16Ptr, MAX_COMPONENTS};
 use crate::errors::DecodeErrors;
 /// fast 0..255 * 0..255 => 0..255 rounded multiplication
 ///
 /// Borrowed from stb
 #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
 #[inline]
 fn blinn_8x8(in_val: u8, y: u8) -> u8 {
    let t = i32::from(in_val) * i32::from(y) + 128;
    return ((t + (t >> 8)) >> 8) as u8;
 }
 #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
 pub(crate) fn color_convert(
    unprocessed: &[&[i16]; MAX_COMPONENTS], color_convert_16: ColorConvert16Ptr,
    input_colorspace: ColorSpace, output_colorspace: ColorSpace, output: &mut [u8], width: usize,
    padded_width: usize
 ) -> Result<(), DecodeErrors> // so many parameters..
 {
    // maximum sampling factors are in Y-channel, no need to pass them.
    if input_colorspace.num_components() == 3 && input_colorspace == output_colorspace {
        // sort things like RGB to RGB conversion
        copy_removing_padding(unprocessed, width, padded_width, output);
        return Ok(());
    }
    if input_colorspace.num_components() == 4 && input_colorspace == output_colorspace {
        copy_removing_padding_4x(unprocessed, width, padded_width, output);
        return Ok(());
    }
    // color convert
    match (input_colorspace, output_colorspace) {
        (ColorSpace::YCbCr | ColorSpace::Luma, ColorSpace::Luma) => {
            ycbcr_to_grayscale(unprocessed[0], width, padded_width, output);
        }
        (
            ColorSpace::YCbCr,
            ColorSpace::RGB | ColorSpace::RGBA | ColorSpace::BGR | ColorSpace::BGRA
        ) => {
            color_convert_ycbcr(
                unprocessed,
                width,
                padded_width,
                output_colorspace,
                color_convert_16,
                output
            );
        }
        (ColorSpace::YCCK, ColorSpace::RGB) => {
            color_convert_ycck_to_rgb::<3>(
                unprocessed,
                width,
                padded_width,
                output_colorspace,
                color_convert_16,
                output
            );
        }
        (ColorSpace::YCCK, ColorSpace::RGBA) => {
            color_convert_ycck_to_rgb::<4>(
                unprocessed,
                width,
                padded_width,
                output_colorspace,
                color_convert_16,
                output
            );
        }
        (ColorSpace::CMYK, ColorSpace::RGB) => {
            color_convert_cymk_to_rgb::<3>(unprocessed, width, padded_width, output);
        }
        (ColorSpace::CMYK, ColorSpace::RGBA) => {
            color_convert_cymk_to_rgb::<4>(unprocessed, width, padded_width, output);
        }
        // For the other components we do nothing(currently)
        _ => {
            let msg = format!(
                    "Unimplemented colorspace mapping from {input_colorspace:?} to {output_colorspace:?}");
            return Err(DecodeErrors::Format(msg));
        }
    }
    Ok(())
 }
 /// Copy a block to output removing padding bytes from input
 /// if necessary
 #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
 fn copy_removing_padding(
    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
 ) {
    for (((pix_w, c_w), m_w), y_w) in output
        .chunks_exact_mut(width * 3)
        .zip(mcu_block[0].chunks_exact(padded_width))
        .zip(mcu_block[1].chunks_exact(padded_width))
        .zip(mcu_block[2].chunks_exact(padded_width))
    {
        for (((pix, c), y), m) in pix_w.chunks_exact_mut(3).zip(c_w).zip(m_w).zip(y_w) {
            pix[0] = *c as u8;
            pix[1] = *y as u8;
            pix[2] = *m as u8;
        }
    }
 }
 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
 fn copy_removing_padding_4x(
    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
 ) {
    for ((((pix_w, c_w), m_w), y_w), k_w) in output
        .chunks_exact_mut(width * 4)
        .zip(mcu_block[0].chunks_exact(padded_width))
        .zip(mcu_block[1].chunks_exact(padded_width))
        .zip(mcu_block[2].chunks_exact(padded_width))
        .zip(mcu_block[3].chunks_exact(padded_width))
    {
        for ((((pix, c), y), m), k) in pix_w
            .chunks_exact_mut(4)
            .zip(c_w)
            .zip(m_w)
            .zip(y_w)
            .zip(k_w)
        {
            pix[0] = *c as u8;
            pix[1] = *y as u8;
            pix[2] = *m as u8;
            pix[3] = *k as u8;
        }
    }
 }
 /// Convert YCCK image to rgb
 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
 fn color_convert_ycck_to_rgb<const NUM_COMPONENTS: usize>(
    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize,
    output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8]
 ) {
    color_convert_ycbcr(
        mcu_block,
        width,
        padded_width,
        output_colorspace,
        color_convert_16,
        output
    );
    for (pix_w, m_w) in output
        .chunks_exact_mut(width * 3)
        .zip(mcu_block[3].chunks_exact(padded_width))
    {
        for (pix, m) in pix_w.chunks_exact_mut(NUM_COMPONENTS).zip(m_w) {
            let m = (*m) as u8;
            pix[0] = blinn_8x8(255 - pix[0], m);
            pix[1] = blinn_8x8(255 - pix[1], m);
            pix[2] = blinn_8x8(255 - pix[2], m);
        }
    }
 }
 #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
 fn color_convert_cymk_to_rgb<const NUM_COMPONENTS: usize>(
    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
 ) {
    for ((((pix_w, c_w), m_w), y_w), k_w) in output
        .chunks_exact_mut(width * NUM_COMPONENTS)
        .zip(mcu_block[0].chunks_exact(padded_width))
        .zip(mcu_block[1].chunks_exact(padded_width))
        .zip(mcu_block[2].chunks_exact(padded_width))
        .zip(mcu_block[3].chunks_exact(padded_width))
    {
        for ((((pix, c), m), y), k) in pix_w
            .chunks_exact_mut(3)
            .zip(c_w)
            .zip(m_w)
            .zip(y_w)
            .zip(k_w)
        {
            let c = *c as u8;
            let m = *m as u8;
            let y = *y as u8;
            let k = *k as u8;
            pix[0] = blinn_8x8(c, k);
            pix[1] = blinn_8x8(m, k);
            pix[2] = blinn_8x8(y, k);
        }
    }
 }
 /// Do color-conversion for interleaved MCU
 #[allow(
    clippy::similar_names,
    clippy::too_many_arguments,
    clippy::needless_pass_by_value,
    clippy::unwrap_used
 )]
 fn color_convert_ycbcr(
    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize,
    output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8]
 ) {
    let num_components = output_colorspace.num_components();
    let stride = width * num_components;
    // Allocate temporary buffer for small widths less than  16.
    let mut temp = [0; 64];
    // We need to chunk per width to ensure we can discard extra values at the end of the width.
    // Since the encoder may pad bits to ensure the width is a multiple of 8.
    for (((y_width, cb_width), cr_width), out) in mcu_block[0]
        .chunks_exact(padded_width)
        .zip(mcu_block[1].chunks_exact(padded_width))
        .zip(mcu_block[2].chunks_exact(padded_width))
        .zip(output.chunks_exact_mut(stride))
    {
        if width < 16 {
            // allocate temporary buffers for the values received from idct
            let mut y_out = [0; 16];
            let mut cb_out = [0; 16];
            let mut cr_out = [0; 16];
            // copy those small widths to that buffer
            y_out[0..y_width.len()].copy_from_slice(y_width);
            cb_out[0..cb_width.len()].copy_from_slice(cb_width);
            cr_out[0..cr_width.len()].copy_from_slice(cr_width);
            // we handle widths less than 16 a bit differently, allocating a temporary
            // buffer and writing to that and then flushing to the out buffer
            // because of the optimizations applied below,
            (color_convert_16)(&y_out, &cb_out, &cr_out, &mut temp, &mut 0);
            // copy to stride
            out[0..width * num_components].copy_from_slice(&temp[0..width * num_components]);
            // next
            continue;
        }
        // Chunk in outputs of 16 to pass to color_convert as an array of 16 i16's.
        for (((y, cb), cr), out_c) in y_width
            .chunks_exact(16)
            .zip(cb_width.chunks_exact(16))
            .zip(cr_width.chunks_exact(16))
            .zip(out.chunks_exact_mut(16 * num_components))
        {
            (color_convert_16)(
                y.try_into().unwrap(),
                cb.try_into().unwrap(),
                cr.try_into().unwrap(),
                out_c,
                &mut 0
            );
        }
        //we have more pixels in the end that can't be handled by the main loop.
        //move pointer back a little bit to get last 16 bytes,
        //color convert, and overwrite
        //This means some values will be color converted twice.
        for ((y, cb), cr) in y_width[width - 16..]
            .chunks_exact(16)
            .zip(cb_width[width - 16..].chunks_exact(16))
            .zip(cr_width[width - 16..].chunks_exact(16))
            .take(1)
        {
            (color_convert_16)(
                y.try_into().unwrap(),
                cb.try_into().unwrap(),
                cr.try_into().unwrap(),
                &mut temp,
                &mut 0
            );
        }
        let rem = out[(width - 16) * num_components..]
            .chunks_exact_mut(16 * num_components)
            .next()
            .unwrap();
        rem.copy_from_slice(&temp[0..rem.len()]);
    }
 }
 pub(crate) fn upsample(
    component: &mut Components, mcu_height: usize, i: usize, upsampler_scratch_space: &mut [i16]
 ) {
    match component.sample_ratio {
        SampleRatios::V | SampleRatios::HV => {
            /*
            When upsampling vertically sampled images, we have a certain problem
            which is that we do not have all MCU's decoded, this usually sucks at boundaries
            e.g we can't upsample the last mcu row, since the row_down currently doesn't exist
            To solve this we need to do two things
            1. Carry over coefficients when we lack enough data to upsample
            2. Upsample when we have enough data
            To achieve (1), we store a previous row, and the current row in components themselves
            which will later be used to make (2)
            To achieve (2), we take the stored previous row(second last MCU row),
            current row(last mcu row) and row down(first row of newly decoded MCU)
            and upsample that and store it in first_row_upsample_dest, this contains
            up-sampled coefficients for the last for the previous decoded mcu row.
            The caller is then expected to process first_row_upsample_dest before processing data
            in component.upsample_dest which stores the up-sampled components excluding the last row
            */
            let mut dest_start = 0;
            let stride_bytes_written = component.width_stride * component.sample_ratio.sample();
            if i > 0 {
                // Handle the last MCU of the previous row
                // This wasn't up-sampled as we didn't have the row_down
                // so we do it now
                let stride = component.width_stride;
                let dest = &mut component.first_row_upsample_dest[0..stride_bytes_written];
                // get current row
                let row = &component.row[..];
                let row_up = &component.row_up[..];
                let row_down = &component.raw_coeff[0..stride];
                (component.up_sampler)(row, row_up, row_down, upsampler_scratch_space, dest);
            }
            // we have the Y component width stride.
            // this may be higher than the actual width,(2x because vertical sampling)
            //
            // This will not upsample the last row
            // if false, do not upsample.
            // set to false on the last row of an mcu
            let mut upsample = true;
            let stride = component.width_stride * component.vertical_sample;
            let stop_offset = component.raw_coeff.len() / component.width_stride;
            for (pos, curr_row) in component
                .raw_coeff
                .chunks_exact(component.width_stride)
                .enumerate()
            {
                let mut dest: &mut [i16] = &mut [];
                let mut row_up: &[i16] = &[];
                // row below current sample
                let mut row_down: &[i16] = &[];
                // Order of ifs matters
                if i == 0 && pos == 0 {
                    // first IMAGE row, row_up is the same as current row
                    // row_down is the row below.
                    row_up = &component.raw_coeff[pos * stride..(pos + 1) * stride];
                    row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride];
                } else if i > 0 && pos == 0 {
                    // first row of a new mcu, previous row was copied so use that
                    row_up = &component.row[..];
                    row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride];
                } else if i == mcu_height.saturating_sub(1) && pos == stop_offset - 1 {
                    // last IMAGE row, adjust pointer to use previous row and current row
                    row_up = &component.raw_coeff[(pos - 1) * stride..pos * stride];
                    row_down = &component.raw_coeff[pos * stride..(pos + 1) * stride];
                } else if pos > 0 && pos < stop_offset - 1 {
                    // other rows, get row up and row down relative to our current row
                    // ignore last row of each mcu
                    row_up = &component.raw_coeff[(pos - 1) * stride..pos * stride];
                    row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride];
                } else if pos == stop_offset - 1 {
                    // last MCU in a row
                    //
                    // we need a row at the next MCU but we haven't decoded that MCU yet
                    // so we should save this and when we have the next MCU,
                    // do the upsampling
                    // store the current row and previous row in a buffer
                    let prev_row = &component.raw_coeff[(pos - 1) * stride..pos * stride];
                    component.row_up.copy_from_slice(prev_row);
                    component.row.copy_from_slice(curr_row);
                    upsample = false;
                } else {
                    unreachable!("Uh oh!");
                }
                if upsample {
                    dest =
                        &mut component.upsample_dest[dest_start..dest_start + stride_bytes_written];
                    dest_start += stride_bytes_written;
                }
                if upsample {
                    // upsample
                    (component.up_sampler)(
                        curr_row,
                        row_up,
                        row_down,
                        upsampler_scratch_space,
                        dest
                    );
                }
            }
        }
        SampleRatios::H => {
            assert_eq!(component.raw_coeff.len() * 2, component.upsample_dest.len());
            let raw_coeff = &component.raw_coeff;
            let dest_coeff = &mut component.upsample_dest;
            // upsample each row
            for (single_row, output_stride) in raw_coeff
                .chunks_exact(component.width_stride)
                .zip(dest_coeff.chunks_exact_mut(component.width_stride * 2))
            {
                // upsample using the fn pointer, should only be H, so no need for
                // row up and row down
                (component.up_sampler)(single_row, &[], &[], &mut [], output_stride);
            }
        }
        SampleRatios::None => {}
    };
 }
--- a/third_party/zune-jpeg/tests/invalid_images.rs
+++ b/third_party/zune-jpeg/tests/invalid_images.rs
@ -0,0 +1,66 @@
 /*
 * Copyright (c) 2023.
 *
 * This software is free software;
 *
 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
 */
 use zune_core::bytestream::ZCursor;
 use zune_jpeg::JpegDecoder;
 #[test]
 fn eof() {
    let mut decoder = JpegDecoder::new(ZCursor::new([0xff, 0xd8, 0xa4]));
    decoder.decode().unwrap_err();
 }
 #[test]
 fn bad_ff_marker_size() {
    let mut decoder = JpegDecoder::new(ZCursor::new([0xff, 0xd8, 0xff, 0x00, 0x00, 0x00]));
    let _ = decoder.decode().unwrap_err();
 }
 #[test]
 fn bad_number_of_scans() {
    let mut decoder = JpegDecoder::new(ZCursor::new([255, 216, 255, 218, 232, 197, 255]));
    let err = decoder.decode().unwrap_err();
    assert!(
        matches!(err, zune_jpeg::errors::DecodeErrors::SosError(x) if x == "Bad SOS length 59589,corrupt jpeg")
    );
 }
 #[test]
 fn huffman_length_subtraction_overflow() {
    let mut decoder = JpegDecoder::new(ZCursor::new([255, 216, 255, 196, 0, 0]));
    let err = decoder.decode().unwrap_err();
    assert!(
        matches!(err, zune_jpeg::errors::DecodeErrors::FormatStatic(x) if x == "Invalid Huffman length in image")
    );
 }
 #[test]
 fn index_oob() {
    let mut decoder = JpegDecoder::new(ZCursor::new([255, 216, 255, 218, 0, 8, 1, 0, 8, 1]));
    let _ = decoder.decode().unwrap_err();
 }
 #[test]
 fn mul_with_overflow() {
    let mut decoder = JpegDecoder::new(ZCursor::new([
        255, 216, 255, 192, 255, 1, 8, 9, 119, 48, 255, 192
    ]));
    let err = decoder.decode().unwrap_err();
    assert!(
        matches!(err, zune_jpeg::errors::DecodeErrors::SofError(x) if x == "Length of start of frame differs from expected 584,value is 65281")
    );
 }