commit 6e82372485fc5185b6fcd24ad397c3add7ea1399 Author: Sam Blazes Date: Wed Jun 12 20:58:37 2024 -0400 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..db84dac --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,21 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "artspace" +version = "0.1.0" +dependencies = [ + "zune-jpeg", +] + +[[package]] +name = "zune-core" +version = "0.5.0-rc1" + +[[package]] +name = "zune-jpeg" +version = "0.5.0-rc1" +dependencies = [ + "zune-core", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..fb5e554 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "artspace" +version = "0.1.0" +edition = "2021" + +[dependencies] +zune-jpeg = {path = "third_party/zune-jpeg"} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..4c2b669 --- /dev/null +++ b/README.md @@ -0,0 +1,14 @@ +artspace +======== + +Tabs or spaces, the eternal question. Why limit yourself to whitespace that doesn't even make a visual difference? + +Instead, turn that wasted whitespace into artspace! + +## Usage + +``` +cargo run --release -- test_image.jpg src\main.rs artspace.rs +``` + +Note: Only compatible with languages with `/* this style of multi-line comment */`. \ No newline at end of file diff --git a/artspace.rs b/artspace.rs new file mode 100644 index 0000000..5582812 --- /dev/null +++ b/artspace.rs @@ -0,0 +1,132 @@ +/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠗ ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ ⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠇ ⢀⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠃ ⣀⣾⣿⣿⣿⣿⣿*/ +pub fn convert_bitmap_to_unicode(w: usize, h: usize, data: Vec) -> Vec>/*⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠓ ⣈⣬⣿⣿⣿⣿⣿⣿⣿*/ +{/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠓ ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠃ ⣸⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁ ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⡿⠳⠁ ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠃ ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠃ ⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷ ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⡷⠓ ⢀⣬⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿*/ +/**/const CHARS: [&str; 4] = [/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠃ ⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠓ ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁ ⣈⣮⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠓ */ +/*⣿⣿⣿⣿*/" ⠁⠂⠃⠄⠅⠆⠇⠈⠉⠊⠋⠌⠍⠎⠏⠐⠑⠒⠓⠔⠕⠖⠗⠘⠙⠚⠛⠜⠝⠞⠟⠠⠡⠢⠣⠤⠥⠦⠧⠨⠩⠪⠫⠬⠭⠮⠯⠰⠱⠲⠳⠴⠵⠶⠷⠸⠹⠺⠻⠼⠽⠾⠿",/* ⣈⣾⣿⣿⣿⣿⣿⣿⣿⡿⠓⠁ ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁ */ +/*⣿⣿⣿⣿*/"⡀⡁⡂⡃⡄⡅⡆⡇⡈⡉⡊⡋⡌⡍⡎⡏⡐⡑⡒⡓⡔⡕⡖⡗⡘⡙⡚⡛⡜⡝⡞⡟⡠⡡⡢⡣⡤⡥⡦⡧⡨⡩⡪⡫⡬⡭⡮⡯⡰⡱⡲⡳⡴⡵⡶⡷⡸⡹⡺⡻⡼⡽⡾⡿",/*⣾⣿⣿⣿⣿⣿⣿⣿⡿⠓ ⣈⣮⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁ */ +/*⣿⣿⣿⣿*/"⢀⢁⢂⢃⢄⢅⢆⢇⢈⢉⢊⢋⢌⢍⢎⢏⢐⢑⢒⢓⢔⢕⢖⢗⢘⢙⢚⢛⢜⢝⢞⢟⢠⢡⢢⢣⢤⢥⢦⢧⢨⢩⢪⢫⢬⢭⢮⢯⢰⢱⢲⢳⢴⢵⢶⢷⢸⢹⢺⢻⢼⢽⢾⢿",/*⣿⣿⣿⣿⣿⣿⣿⠗ ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁ */ +/*⣿⣿⣿⣿*/"⣀⣁⣂⣃⣄⣅⣆⣇⣈⣉⣊⣋⣌⣍⣎⣏⣐⣑⣒⣓⣔⣕⣖⣗⣘⣙⣚⣛⣜⣝⣞⣟⣠⣡⣢⣣⣤⣥⣦⣧⣨⣩⣪⣫⣬⣭⣮⣯⣰⣱⣲⣳⣴⣵⣶⣷⣸⣹⣺⣻⣼⣽⣾⣿",/*⣿⣿⣿⣿⣿⠷⠁ ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠳⠁ */ +/**/];/* ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏ ⣈⣾⣿⣿⣿⣿⣿⣿⣿⡿⠃ ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠷⠑ */ +/**/let bitchars = CHARS.iter().flat_map(|t| t.chars()).collect::>();/*⣿⠷ ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠷⠑ */ +/*⣿⣿⣿⠿ ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡷⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⢌ ⣼⣿⣿⣿⣿⣿⣿⣿⡿⠁ ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠓⠁ ⢀*/ +/**/let px = |i: usize, j: usize| if i < w && j < h {data[j * w + i]} else {0};/* ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⡿⠓ ⢀⣬⣿*/ +/*⣿⠿ ⣨⣿⣿⣿⣿⣿⣿⣿⡿⠓ ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯ ⣠⣿⣿⣿⣿⣿⣿⣿⠷ ⣀⣾⣿⣿⣿⣿⣿⣿⣿⣿⠗ ⣬⣿⣿⣿*/ +/**/let mut output = vec![];/* ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢏ ⣿⣿⣿⣿⣿⣿⣿⠿ ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌ ⣰⣿⣿⣿⣿*/ +/*⣿ ⣼⣿⣿⣿⣿⣿⣿⣿⠇ ⡰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ ⣠⣿⣿⣿⣿⣿⣿⣿⠏ ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⠈ ⣀⣿⣿⣿⣿⣿*/ +/**/for j in (0..h).step_by(4)/* ⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌ ⣸⣿⣿⣿⣿⣿⣿⣿⠏ ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈ ⢀⣿⣿⣿⣿⣿⣿*/ +/**/{/* ⣼⣿⣿⣿⣿⣿⣿⣿⠟ ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⣿⣿⣿⣿⣿⣿⣿⣿⠇ ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⢌ ⣰⣿⣿⣿⣿⣿⣿*/ +/* */let mut line = vec![];/* ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⣰⣿⣿⣿⣿⣿⣿⣿⣿ ⠰⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⣎⢌ ⣷⣿⣿⣿⣿⣿*/ +/* */for i in (0..w).step_by(2)/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⢀⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⠱⡳⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈ ⠱⣷⣿⣿⣿*/ +/* */{/*⣿⣿⣿⣿⣿⣿⣿⣿⠿ ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⠑⠱⠳⡷⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎ ⠐⡳⣿*/ +/* */let mut index = 0;/* ⣳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⠐⠱⣷⣿⣿⣿⣿⣿⣿⣿⣿⣯ ⠐*/ +/* ⣸*/index |= if px(i+0, j+0) < 128 {0} else {1 << 0};/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⠱⣷⣿⣿⣿⣿⣿⣿⠷ */ +/* ⢀⣿*/index |= if px(i+0, j+1) < 128 {0} else {1 << 1};/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠇ ⣳⣿⣿⣿⠷⠓ */ +/* ⣸⣿*/index |= if px(i+0, j+2) < 128 {0} else {1 << 2};/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿ ⢈⣌⣬⣮⣮⣮⣎⣌⣌⣌⣌⣾⣿⡿⠁ */ +/* ⣀⣿⣿*/index |= if px(i+0, j+3) < 128 {0} else {1 << 3};/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠃ ⢀⣬⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢌ */ +/* ⣈⣿⣿⣿*/index |= if px(i+1, j+0) < 128 {0} else {1 << 4};/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⠗ ⣈⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⣌⠈ */ +/* ⢀⣼⣿⣿⣿⣿*/index |= if px(i+1, j+1) < 128 {0} else {1 << 5};/*⣿⣿⣿⣿⣿⣿⣿⡿⠃ ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⢌ */ +/*⢌⢈⣿⣿⣿⣿⣿⣿*/index |= if px(i+1, j+2) < 128 {0} else {1 << 6};/*⣿⣿⣿⣿⣿⣿⠷ ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡷⡷⡷⡷⣿⣿⣿⣿⣿⣯⣎⢈ ⣴*/ +/*⣿⣿⣿⣿⣿⣿⣿⣿*/index |= if px(i+1, j+3) < 128 {0} else {1 << 7};/*⣿⣿⣿⣿⡷⠃ ⣨⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠓ ⠑⡳⣿⣿⣿⣿⣿⣿⣮⣮⣮⣿*/ +/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠿ ⠐⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠑ ⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠿⠁ ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿⣿⣿⣿⣿*/line.push(bitchars[index]);/* ⠱⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡷⠳⠑ ⣈⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷ ⣿⣿⣿⣿⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿*/}/*⠟ ⣾⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈ ⠐⠱⡷⣷⣿⡷⡷⠳⠑⠁ ⢀⣬⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠃ ⢀⣿⣿⣿⣿⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿*/output.push(line);/*⣿⣿⣿⣿⣿⣿⣿⣿⣎⢌ ⢀⣌⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠗ ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿*/ +/**/}/*⣿⣿⣿⣿ ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮⣌⢈⠈ ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠳⠁ ⢀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿⣿⣿⣿⣿⣿ ⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⣮⣌⢈ ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠷⠑ ⣀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/ +/**/output/* ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⢌ ⠈⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠑ ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⡿⠷⠓⣳*/ +}/*⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠓⠑⠑⠑⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⠈ ⢀⡈⣚⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡷⠓ ⢈⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⠓ */ +/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏ ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⠃ ⠐⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈ ⢈⣽⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠳⠁ ⢈⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁ */ +/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌ ⣰⣿⣿⣿⣿⣿⣿⣿⣿⠟ ⠐⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⣌⢈⢈⢈⣌⣬⣮⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠓ ⢀⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁ */ +fn read_jpeg_to_bitmap(file: &str) -> (usize, usize, Vec)/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁ ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠿ */ +{/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌ ⣷⣿⣿⣿⣿⣿⣿⠿ ⠱⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁ ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁ */ +/**/use zune_jpeg::zune_core::{colorspace::ColorSpace, options::DecoderOptions, bytestream::ZCursor};/*⣿⣿⣿⣿⣿⠁ */ +/**/let data = std::fs::read(file).unwrap();/* ⣳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠿⠁ ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ */ +/**/let options = DecoderOptions::default().jpeg_set_out_colorspace(ColorSpace::Luma);/*⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿ */ +/**/let mut decoder = zune_jpeg::JpegDecoder::new_with_options(ZCursor::new(&data), options);/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁ */ +/**/let pixels = decoder.decode().unwrap();/* ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠿⠁ ⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠁ */ +/**/let (w, h) = decoder.dimensions().unwrap();/* ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿ ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠗ */ +/**/(w, h, pixels)/*⣯⣌⣌⣬⣾⣿⣿⣿⠎ ⢬⣭⣿⣿⣿⣿⣿⣿⣿⣿⣿ ⣳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ */ +}/* ⠰⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁ ⣐⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ */ +/*⣯⠌ ⠐⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁ ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠿ ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠈ ⣳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ */ +/*⣿⣿⠌ ⠐⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⠿ ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏ ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯ */ +fn main()/* ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⠁ ⡰⣿⣿⣿⣿⣿⣿⣿⣿⣿ ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌ ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ */ +{/*⣿⣿⠏ ⣱⣿⣿⣿⣿⣿⣿⣿⠿ ⣳⣿⣿⣿⣿⣿⣿⣿⠿ ⢀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌ ⣹⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏ */ +/**/match std::env::args().collect::>().as_slice() {/*⣿⣿⣿⣿⣿⣿⣿⠌ ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢎ */ +/*⣿⣿⣿⣿*/[_, bitmap_file, source_file, output_file] =>/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢌ ⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢎ */ +/*⣿⣿⣿⣿*/{/* ⠰⣿⣿⣿⣿⣿⣿⣿⣏ ⣨⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈ ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⣎⠈ */ +/*⣿⣿⣿⣿⣿ */let (w, h, pixels) = read_jpeg_to_bitmap(&bitmap_file);/*⣿⣿⣯⢌ ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⣎⣌⢌⢈⢈⢈⢈*/ +/*⣿⣿⣿⣿⣿⠏ ⡰⣿⣿⣿⣿⣿⣿⣿⣿⣯⢌ ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠑⠑⠑⠱⣷⣿⣿⣿⣿⣿⣿⣿⣯⢌ ⢀⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿⣿⣿ */let char_bitmap = convert_bitmap_to_unicode(w, h, pixels);/*⣿⣿⣿⣯⣎⢈ ⢀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡷⠳⠳⠑⠑⠑⠑⠑⠑⠳⡳⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿⣿⣿⢏ ⠐⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎ ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ ⠱⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡷⠓ ⠑⠳⣷⣿⣿⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿⣿⣿⣿⠌*/let source = std::fs::read_to_string(source_file).unwrap();/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁ ⠐⠳⣷⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿⣿⣿⣿⣯ ⡱⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎ ⡰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢏ ⡱⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠓ ⠐⠱⠳⡷*/ +/*⣿⣿⣿⣿⣿⣿⣿⣿*/let max_width = 120;/*⣏ ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ ⠰⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠃ */ +/*⣿⣿⣿⣿⣿⣿⣿⣿⣿ ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⠈ ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⠐⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁ */ +/*⣿⣿⣿⣿⣿⣿⣿⣿*/let mut modified_lines = vec![];/* ⠐⡳⣿⣿⣿⣿⣿⣿⣿⣿⠁ ⠱⡳⣿⣿⣿⣿⣿⣿⣿⡿⠓ ⢈⢈⢈⢈⢈⢈⢈⠈ */ +/*⣿⣿⣿⣿⣿⣿⣿⣿*/let mut buffer = Vec::with_capacity(1024);/*⣿⣿⣿⠿ ⠑⠱⠳⠳⠓⠁ ⢈⣌⣬⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮⣌⠈ */ +/*⣿⣿⣿⣿⣿⣿⣿⣿*/let mut row = 0;/*⣿⣿⣿⣿⣿⣿⣿⣿⣎ ⠐⠑⠑⠁ ⢀⣈⣮⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮⢌⠈ */ +/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⢏ ⣸⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏⠈ ⢀⢈⣌⣮⣮⣎⣌⢈ ⣈⣮⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮*/ +/*⣿⣿⣿⣿⣿⣿⣿⣿*/for line in source.lines()/*⣿⢎ ⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈ ⢀⣬⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿⣿⣿⣿⣿*/{/* ⢀⣜⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⢌ ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮⢌ ⢀⣈⣮⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡷⠷⠳⠳⠳⠳⠳⠳⠳⡷⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿*/ +/*⣿⣿⡷⡷⡷⡷⣿⣿⣿⣿⣿⣿*/buffer.clear();/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⢌ ⣸⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮⣮⣌⣬⣮⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠷⠓⠁ ⠐⠱⠳⡷⣿⣿⣿⣿*/ +/*⠓ ⠐⣷⣿⣿⣿⣿*/buffer.extend(line.chars());/*⣯⢌⠈ ⣨⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠑ ⠑⠱⠳*/ +/* ⡳⣿⣿⣿*/if buffer.len() < max_width {/*⣿⣿⣮⢌⠈ ⣨⣿⣿⣿⣿⣿⣿⣿⣿⣿⡷⠳⠳⠑⠑⠳⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁ */ +/* ⠱⣷⣿⣿⣿⣿⣿*/let needed = max_width - buffer.len();/*⣿⣿⣿⣿⣿⣿⣿⠷⠁ ⠐⠳⣷⣿⣿⣿⣿⣿⣿⣿⡿⠳⠁ */ +/* ⠑⠑⠑ */buffer.extend(core::iter::repeat(' ').take(needed));/* ⠐⠱⠳⠳⠳⠳⠑ ⢈⣈⣌⣌⣬⣮⣎⣌⣌⢈ */ +/* */}/* ⠱⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠗ ⣈⣮⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⣎⢌ */ +/* ⢈⣌⣬⣎⢌⠈ ⢈⢈⢈⢈⢈⢈⢈⠈ ⠐⠱⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠃ ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮⢌⠈ */ +/*⣿⣿⣿⣿⣿⣿⣿⠌ */let mut i = 0;/*⣮⣎⣌⢈ ⠱⡳⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⠿⠁ ⢈⣌⣌⣌⠈ ⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮⣌⢈⢈⢈*/ +/*⣿⣿⣿⣿⣿⣿⣿⣿⢎ */for j in 0..buffer.len()/* ⠐⠱⡳⣷⣿⣿⣿⡿⠓ ⣈⣾⣿⣿⣿⣿⣿⣯⢌ ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⣌⢈*/{/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⣌⠈ ⠁ ⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⢈ ⢀⣬⣿⣿⣿⣿⣿⣿⣿⡿⠷⠳⠓⠑⠑⠑⠑⠑⠑⠱⡳⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/if !buffer[j].is_whitespace()/* ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⣎⣌⣌⣾⣿⣿⣿⣿⣿⣿⣿⠷⠁ ⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/ +/*⠳⠓⠑⠑⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/{/* ⠐⠱⠳⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⢌ ⣨⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁ ⠰⣷⣿⣿⣿⣿⣿⣿⣿⣿*/ +/* ⣳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠟ */if j - i > 3/*⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮⣌⢈ ⣈⣾⣿⣿⣿⣿⣿⡷⡷⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁ ⠐⡳⣿⣿⣿⣿⣿⣿⣿*/ +/* ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ */{/* ⠐⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⣎⢌⠈ ⢈⣬⣾⣿⣿⣿⣿⠷⠑ ⣱⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁ ⠐⣳⣿⣿⣿⣿⣿*/ +/* ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯ */let bmp_row = &char_bitmap[row];/*⣿⣿⣿⡿⠃ ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁ ⡱⣿⣿⣿⣿*/ +/* ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏ */for k in i..j {/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠿⠁ ⣿⣿⣿⣿⣿⣿⣿⣿⣿⠿ ⠰⣷⣿⣿*/ +/* ⣨⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏ */buffer[k] = bmp_row[k % bmp_row.len()];/*⣰⣿⣿⣿⣿⣿⣿⣿⣿⠗ ⠐⣷⣿*/ +/* ⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⠌ */}/* ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠇ ⣿⣿⣿⣿⣿⣿⣿⣿⠿ ⣈⣌⢈ ⠐⡳*/ +/*⢀⣌⣾⣿⣿⣿⣿⠷⠑ ⠐⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢎ */buffer[i ] = '/';/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿ ⣰⣿⣿⣿⣿⣿⣿⣿⡿ ⣀⣾⣿⣿⣿⣯⠈ */ +/*⣿⣿⣿⣿⣿⠿⠁ ⣱⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎*/buffer[i+1] = '*';/*⣳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⣿⣿⣿⣿⣿⣿⣿⣿⠃ ⣀⣿⣿⣿⣿⣿⣿⣿⠈ */ +/*⣿⣿⣿⣿⠗ ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/buffer[j-1] = '/';/*⡰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⣠⣿⣿⣿⣿⣿⣿⣿⠿ ⣀⣿⣿⣿⣿⣿⣿⣿⣿⣏ */ +/*⣿⣿⣿⠿ ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/buffer[j-2] = '*';/* ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⣰⣿⣿⣿⣿⣿⣿⣿⠇ ⣸⣿⣿⣿⣿⣿⣿⣿⣿⣿⠎ */ +/*⣿⣿⡿ ⣿⣿⣿⣿⣿⣿⣿*/}/* ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⣸⣿⣿⣿⣿⣿⣿⣿ ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ */ +/*⣿⣿⠃ ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ ⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⣿⣿⣿⣿⣿⣿⣿⠿ ⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ */ +/*⣿⠿ ⣿⣿⣿⣿⣿⣿⣿*/i = j + 1;/* ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⣰⣿⣿⣿⣿⣿⣿⠏ ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠁ */ +/*⣿⠃ ⣠⣿⣿⣿*/}/*⣿⣿⣿⣿ ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⣿⣿⣿⣿⣿⣿⣿⠏ ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ */ +/*⣿⠈ */}/*⣿⣿⣿⣿⣿⣿⣿⣿⣏⠈ ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⣳⣿⣿⣿⣿⣿⣿⠏ ⡰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣌⢈ */ +/*⣿⣯⠈ ⠐⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈ ⣳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⠰⣿⣿⣿⣿⣿⣿⣯⠈ ⠱⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈ */ +/*⣿⣿⣿⠌ */let j = buffer.len();/* ⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⣷⣿⣿⣿⣿⣿⣿⣏ ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢌ */ +/*⣿⣿⣿⣿⠈ */if j - i > 3/*⣿⣿⣿⣎⠈ ⣳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⣰⣿⣿⣿⣿⣿⣿⣿⢎ ⠰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌ */ +/*⣿⣿⣿⣿⣏ */{/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⢈ ⠱⣷⣿⣿⣿⣿⣿⣿⣿⣿⠃ ⣰⣿⣿⣿⣿⣿⣿⣿⣿⠌ ⠐⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠯ */ +/*⣿⣿⣿⣿⣿⢎ ⣰*/let bmp_row = &char_bitmap[row];/*⠱⣷⣿⣿⣿⣿⣿⠓ ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⠈ ⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠃ ⢈⣬⣮⣿⣿*/ +/*⣳⣿⣿⣿⣿⣿⣎⠈ ⢀⣀⣿*/for k in i..j {/*⣿⣿⣯⢌ ⠐⠳⠳⠳⠁ ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯ ⠐⠳⣷⣿⣿⣿⣿⣿⠷⠁ ⢀⣬⣾⣿⣿⣿⣿⣿*/ +/* ⣿⣿⣿⣿⣿⣿⣿⣯⣌⢌⢈⣌⣬⣿⣿⣿⣿⣿⣿*/buffer[k] = bmp_row[k % bmp_row.len()];/* ⣸⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏ ⠑⠑⠁ ⣈⣾⣿⣿⣿⣿⣿⣿⣿*/ +/*⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/}/*⠁ ⠑⠱⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈ ⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ ⣨⣿⣿⣿⣿⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/buffer[i ] = '/';/*⣿⣿⣿⣿⣿⣿⣎⢈ ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠈ ⢈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿*/buffer[i+1] = '*';/*⣿⣿⣿⣿⣿⣿⣿⣿⣯⢌ ⣀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⢀⢈⣬⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠓ */buffer[j-1] = '/';/*⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⢌⠈ ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏ ⣈⣮⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠟ */buffer[j-2] = '*';/* ⠐⠱⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮⣎⣌⢈⢈ ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌ ⣠⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠑⠑⠱⣷⣿⣿⣿⣿⣿*/ +/* ⠱⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿*/}/* ⠐⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣮⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⠌ ⣰⣿⣿⣿⣿⣿⣿⣿⡿⠁ ⣿⣿⣿⣿⣿*/ +/* ⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏ ⢀⣈⣌⣌⣌⢈ ⠐⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠓⠑⠑⠑⠑⠳⡷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈ ⡰⣿⣿⣿⣿⣿⣿⣿⠃ ⣰⣿⣿⣿⣿*/ +/* ⡱⣿⣿⣿⣿⣿⣿⣿*/modified_lines.push(buffer.iter().collect::());/* ⠐⠱⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣎⠈ ⣳⣿⣿⣿⣿⣿⣿ ⣰⣿⣿⣿⣿*/ +/* ⣱⣿⣿⣿⣿⣿⣿*/row += 1;/* ⣸⣿⣿⣿⣿⣿⣿⣿⣿⣯⠈ ⣳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠃ ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢎ ⠐⣿⣿⣿⣿⣿⣿ ⣰⣿⣿⣿⣿*/ +/* ⣿⣿⣿*/}/*⣿⣿ ⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏ ⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠓ ⢀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿ ⢀⣿⣿⣿⣿⣿⠏ ⣰⣿⣿⣿⣿*/ +/* ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⣀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠎ ⠰⣿⣿⣿⣿⣿⣿⣿⠷⠁ ⡈⣚⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁ ⣰⣿⣿⣿⣿⣿⠁ ⠐⣿⣿⣿⣿*/ +/* ⣀⣿⣿⣿⣿*/let new_source = modified_lines.join("\n");/*⠑ ⡀⣚⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠓ ⣾⣿⣿⣿⣿⣿ ⣰⣿⣿⣿⣿*/ +/* ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⠟ ⣸⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠎ ⢀⣌⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠓ ⣸⣿⣿⣿⣿⣿⣿ ⣰⣿⣿⣿⣿*/ +/* ⣼⣿⣿⣿⣿⣿*/println!("{new_source}");/*⣿⣿⣿⣯ ⢀⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠓ ⣼⣿⣿⣿⣿⣿⣿⠿ ⣰⣿⣿⣿⣿*/ +/* ⣸⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠟ ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌ ⢈⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁ ⢀⣾⣿⣿⣿⣿⣿⣿⣿⠇ ⣰⣿⣿⣿⣿*/ +/*⣀⣿⣿⣿⣿⣿⣿⣿*/std::fs::write(output_file, new_source).unwrap();/*⣌⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠳⠁ ⣈⣿⣿⣿⣿⣿⣿⣿⣿⡿ ⣰⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿*/}/*⣿⣿⠟ ⣠⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠎ ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠓ ⢀⣼⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁ ⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿*/[path] =>/* ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏ ⣨⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠑ ⣀⣾⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁ ⣠⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿*/{/*⣿⠟ ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁ ⣀⣿⣿⣿⣿⣿⣿⣿⡿⠷⠓ ⣼⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿⣿⣿⣿⣿*/println!("usage: {path} art.jpg input_src output_src");/*⣿⡿⠳⠁ ⡰⣷⡷⡷⠷⠳⠑⠁ ⣨⣿⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿*/}/*⠇ ⠑⠑ ⠐⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌ ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠓ ⣨⣿⣿⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿*/_ =>/* ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠷ ⢀⣌⣬⣮⣾⣿⣿⣿⣿⣿⠌ ⢀⣼⣿⣿⣿⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿*/{/* ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⠰⣿⣿⣿⣿⣿⣿⣿⣿⠷⠁ ⢀⣬⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ ⢀⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿*/}/* ⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⣷⣿⣿⣿⣿⣿⠷⠁ ⢀⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢎ ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿*/ +/**/}/*⣿⣿ ⣠⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠏ ⠱⠳⠳⠑ ⣬⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⢎ ⠱⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿*/ +/*⣿⣿⣿⣿⣿⣿⣿⠌ ⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌ ⡳⡷⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣏⠈ ⠑⡳⣷⣿⣿⣿⣿⣿⣿*/ +}/*⣿⣿⣿⣿⣿⣿⣯⠈ ⣈⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣯⠈ ⢀⢈⢈⢈ ⠑⠱⡳⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠌ ⠑⠳⡷⣿⣿⣿*/ \ No newline at end of file diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..429bbf9 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,132 @@ + +pub fn convert_bitmap_to_unicode(w: usize, h: usize, data: Vec) -> Vec> +{ + + const CHARS: [&str; 4] = [ + " ⠁⠂⠃⠄⠅⠆⠇⠈⠉⠊⠋⠌⠍⠎⠏⠐⠑⠒⠓⠔⠕⠖⠗⠘⠙⠚⠛⠜⠝⠞⠟⠠⠡⠢⠣⠤⠥⠦⠧⠨⠩⠪⠫⠬⠭⠮⠯⠰⠱⠲⠳⠴⠵⠶⠷⠸⠹⠺⠻⠼⠽⠾⠿", + "⡀⡁⡂⡃⡄⡅⡆⡇⡈⡉⡊⡋⡌⡍⡎⡏⡐⡑⡒⡓⡔⡕⡖⡗⡘⡙⡚⡛⡜⡝⡞⡟⡠⡡⡢⡣⡤⡥⡦⡧⡨⡩⡪⡫⡬⡭⡮⡯⡰⡱⡲⡳⡴⡵⡶⡷⡸⡹⡺⡻⡼⡽⡾⡿", + "⢀⢁⢂⢃⢄⢅⢆⢇⢈⢉⢊⢋⢌⢍⢎⢏⢐⢑⢒⢓⢔⢕⢖⢗⢘⢙⢚⢛⢜⢝⢞⢟⢠⢡⢢⢣⢤⢥⢦⢧⢨⢩⢪⢫⢬⢭⢮⢯⢰⢱⢲⢳⢴⢵⢶⢷⢸⢹⢺⢻⢼⢽⢾⢿", + "⣀⣁⣂⣃⣄⣅⣆⣇⣈⣉⣊⣋⣌⣍⣎⣏⣐⣑⣒⣓⣔⣕⣖⣗⣘⣙⣚⣛⣜⣝⣞⣟⣠⣡⣢⣣⣤⣥⣦⣧⣨⣩⣪⣫⣬⣭⣮⣯⣰⣱⣲⣳⣴⣵⣶⣷⣸⣹⣺⣻⣼⣽⣾⣿", + ]; + let bitchars = CHARS.iter().flat_map(|t| t.chars()).collect::>(); + + let px = |i: usize, j: usize| if i < w && j < h {data[j * w + i]} else {0}; + + let mut output = vec![]; + + for j in (0..h).step_by(4) + { + let mut line = vec![]; + for i in (0..w).step_by(2) + { + let mut index = 0; + index |= if px(i+0, j+0) < 128 {0} else {1 << 0}; + index |= if px(i+0, j+1) < 128 {0} else {1 << 1}; + index |= if px(i+0, j+2) < 128 {0} else {1 << 2}; + index |= if px(i+0, j+3) < 128 {0} else {1 << 3}; + index |= if px(i+1, j+0) < 128 {0} else {1 << 4}; + index |= if px(i+1, j+1) < 128 {0} else {1 << 5}; + index |= if px(i+1, j+2) < 128 {0} else {1 << 6}; + index |= if px(i+1, j+3) < 128 {0} else {1 << 7}; + + line.push(bitchars[index]); + } + output.push(line); + } + + output +} + + +fn read_jpeg_to_bitmap(file: &str) -> (usize, usize, Vec) +{ + use zune_jpeg::zune_core::{colorspace::ColorSpace, options::DecoderOptions, bytestream::ZCursor}; + let data = std::fs::read(file).unwrap(); + let options = DecoderOptions::default().jpeg_set_out_colorspace(ColorSpace::Luma); + let mut decoder = zune_jpeg::JpegDecoder::new_with_options(ZCursor::new(&data), options); + let pixels = decoder.decode().unwrap(); + let (w, h) = decoder.dimensions().unwrap(); + (w, h, pixels) +} + + +fn main() +{ + match std::env::args().collect::>().as_slice() { + [_, bitmap_file, source_file, output_file] => + { + let (w, h, pixels) = read_jpeg_to_bitmap(&bitmap_file); + + let char_bitmap = convert_bitmap_to_unicode(w, h, pixels); + + let source = std::fs::read_to_string(source_file).unwrap(); + + let max_width = 120; + + let mut modified_lines = vec![]; + let mut buffer = Vec::with_capacity(1024); + let mut row = 0; + + for line in source.lines() + { + buffer.clear(); + buffer.extend(line.chars()); + if buffer.len() < max_width { + let needed = max_width - buffer.len(); + buffer.extend(core::iter::repeat(' ').take(needed)); + } + + let mut i = 0; + for j in 0..buffer.len() + { + if !buffer[j].is_whitespace() + { + if j - i > 3 + { + let bmp_row = &char_bitmap[row]; + for k in i..j { + buffer[k] = bmp_row[k % bmp_row.len()]; + } + buffer[i ] = '/'; + buffer[i+1] = '*'; + buffer[j-1] = '/'; + buffer[j-2] = '*'; + } + + i = j + 1; + } + } + + let j = buffer.len(); + if j - i > 3 + { + let bmp_row = &char_bitmap[row]; + for k in i..j { + buffer[k] = bmp_row[k % bmp_row.len()]; + } + buffer[i ] = '/'; + buffer[i+1] = '*'; + buffer[j-1] = '/'; + buffer[j-2] = '*'; + } + + modified_lines.push(buffer.iter().collect::()); + row += 1; + } + + let new_source = modified_lines.join("\n"); + + println!("{new_source}"); + + std::fs::write(output_file, new_source).unwrap(); + } + [path] => + { + println!("usage: {path} art.jpg input_src output_src"); + } + _ => + { + } + } + +} diff --git a/test_image.jpg b/test_image.jpg new file mode 100644 index 0000000..51a47d0 Binary files /dev/null and b/test_image.jpg differ diff --git a/third_party/zune-core/CHANGELOG.md b/third_party/zune-core/CHANGELOG.md new file mode 100644 index 0000000..bd7626a --- /dev/null +++ b/third_party/zune-core/CHANGELOG.md @@ -0,0 +1,22 @@ +## 0.2.14 + +- Fixed building with no-std +- Add `peek_at` and `pos` for writer +- Make serde non default +- Add option to make PNG add an alpha channel + +## 0.2.12 + +- Add endianness conversion +- Hide exposed values for EncoderOptions +- Add Float32 bit depth +- Remove support for BitDepth 10 and 12 +- Add bit_size method + +## 0.2.1 + +Improve documentation on various parts + +## 0.2.0 + +Initial version \ No newline at end of file diff --git a/third_party/zune-core/Cargo.toml b/third_party/zune-core/Cargo.toml new file mode 100644 index 0000000..e67655a --- /dev/null +++ b/third_party/zune-core/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "zune-core" +version = "0.5.0-rc1" +edition = "2021" +description = "Core utilities for image processing in the zune family of crates" +exclude = ["tests/"] +repository = "https://github.com/etemesi254/zune-image" +keywords = ["image"] +categories = ["multimedia::images", "multimedia::encoding"] +license = "MIT OR Apache-2.0 OR Zlib" + +[features] +# When present, we can use std facilities to detect +# if a specific feature exists +# Not enabled by default. Other zune crates can enable dep:zune-core/std by default. +# But if we enable it here, they can't disable it anymore. +# See: https://github.com/rust-lang/cargo/issues/8366 +std = [] + +[dependencies] +log = { version = "0.4.17", optional = true } +serde = { version = "1.0.52", optional = true } diff --git a/third_party/zune-core/LICENSE-APACHE b/third_party/zune-core/LICENSE-APACHE new file mode 100644 index 0000000..1cd601d --- /dev/null +++ b/third_party/zune-core/LICENSE-APACHE @@ -0,0 +1 @@ +../../LICENSE-APACHE \ No newline at end of file diff --git a/third_party/zune-core/LICENSE-MIT b/third_party/zune-core/LICENSE-MIT new file mode 100644 index 0000000..b2cfbdc --- /dev/null +++ b/third_party/zune-core/LICENSE-MIT @@ -0,0 +1 @@ +../../LICENSE-MIT \ No newline at end of file diff --git a/third_party/zune-core/LICENSE-ZLIB b/third_party/zune-core/LICENSE-ZLIB new file mode 100644 index 0000000..f0648a7 --- /dev/null +++ b/third_party/zune-core/LICENSE-ZLIB @@ -0,0 +1 @@ +../../LICENSE-ZLIB \ No newline at end of file diff --git a/third_party/zune-core/README.md b/third_party/zune-core/README.md new file mode 100644 index 0000000..6627e4e --- /dev/null +++ b/third_party/zune-core/README.md @@ -0,0 +1,15 @@ +## Zune core + +Core primitives necessary for image manipulations + +This crate contains small set of primitives +necessary for image manipulations which are shared among most `zune-` family +of decoders and encoders. + +### Items present + +Currently,it contains. + +- Colorspace definitions +- Bit depth definitions. +- Decoder and encoder options \ No newline at end of file diff --git a/third_party/zune-core/src/bit_depth.rs b/third_party/zune-core/src/bit_depth.rs new file mode 100644 index 0000000..635079d --- /dev/null +++ b/third_party/zune-core/src/bit_depth.rs @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +//! Image bit depth, information and manipulations + +/// The image bit depth. +/// +/// The library successfully supports depths up to +/// 16 bits, as the underlying storage is usually a `u16`. +/// +/// This allows us to comfortably support a wide variety of images +/// e.g 10 bit av1, 16 bit png and ppm. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[non_exhaustive] +pub enum BitDepth { + /// U8 bit depth. + /// + /// Images with such bit depth use [`u8`] to store + /// pixels and use the whole range from 0-255. + /// + /// It is currently the smallest supported bit depth + /// by the library. + /// + /// For images with bit depths lower than this, they will be scaled + /// to this bit depth + Eight, + /// U16 bit depth + /// + /// Images with such bit depths use [`u16`] to store values and use the whole range + /// i.e 0-65535 + /// + /// Data is stored and processed in native endian. + Sixteen, + /// Floating point 32 bit data, range is 0.0 to 1.0 + /// + /// Uses f32 to store data + Float32, + /// Bit depth information is unknown + Unknown +} + +/// The underlying bit representation of the image +/// +/// This represents the minimum rust type that +/// can be used to represent image data, required +/// by `Channel` struct in zune-image +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[non_exhaustive] +pub enum BitType { + /// Images represented using a [`u8`] as their + /// underlying pixel storage + U8, + /// Images represented using a [`u16`] as their + /// underlying pixel storage. + U16, + /// Images represented using a [`f32`] as their + /// underlying pixel storage + F32 +} + +impl BitType { + /// Return the equivalent of the image bit type's depth + pub fn to_depth(self) -> BitDepth { + match self { + BitType::U8 => BitDepth::Eight, + BitType::U16 => BitDepth::Sixteen, + BitType::F32 => BitDepth::Float32 + } + } +} + +impl Default for BitDepth { + fn default() -> Self { + Self::Unknown + } +} + +impl BitDepth { + /// Get the max value supported by the bit depth + /// + /// During conversion from one bit depth to another + /// + /// larger values should be clamped to this bit depth + #[rustfmt::skip] + #[allow(clippy::zero_prefixed_literal)] + pub const fn max_value(self) -> u16 + { + match self + { + Self::Eight => (1 << 08) - 1, + Self::Sixteen => u16::MAX, + Self::Float32 => 1, + Self::Unknown => 0, + } + } + + /// Return the minimum number of bits that can be used to represent + /// each pixel in the image + /// + /// All bit depths below 8 return a bit type of `BitType::U8`. + /// and all those above 8 and below 16 return a bit type of `BitType::SixTeen` + /// + /// # Returns + /// An enum whose variants represent the minimum size for an unsigned integer + /// which can store the image pixels without overflow + /// + /// # Example + /// + /// ``` + /// use zune_core::bit_depth::{BitDepth, BitType}; + /// assert_eq!(BitDepth::Eight.bit_type(),BitType::U8); + /// + /// assert_eq!(BitDepth::Sixteen.bit_type(),BitType::U16); + /// ``` + /// + /// See also [size_of](BitDepth::size_of) + pub const fn bit_type(self) -> BitType { + match self { + Self::Eight => BitType::U8, + Self::Sixteen => BitType::U16, + Self::Float32 => BitType::F32, + Self::Unknown => panic!("Unknown bit type") + } + } + /// Get the number of bytes needed to store a specific bit depth + /// + /// + /// # Example + /// For images less than or equal to 8 bits(1 byte), we can use a [`u8`] to store + /// the pixels, and a size_of [`u8`] is 1 + /// + /// For images greater than 8 bits and less than 16 bits(2 bytes), we can use a [`u16`] to + /// store the pixels, a size_of [`u16`] is 2. + /// ``` + /// use zune_core::bit_depth::BitDepth; + /// let depth = BitDepth::Sixteen; + /// // greater 12 bits is greater than 8 and less than 16 + /// assert_eq!(depth.size_of(),2); + /// ``` + pub const fn size_of(self) -> usize { + match self { + Self::Eight => core::mem::size_of::(), + Self::Sixteen => core::mem::size_of::(), + Self::Float32 => core::mem::size_of::(), + Self::Unknown => panic!("Unknown bit type") + } + } + pub const fn bit_size(&self) -> usize { + self.size_of() * 8 + } +} + +/// Byte endianness of returned samples +/// this is useful when the decoder returns samples which span more +/// than one byte yet the type returned is `&[u8]` +/// +/// This helps you interpret how those bytes should be reconstructed +/// to a higher order type +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum ByteEndian { + /// Little Endian byte-order + LE, + /// Big Endian byte-order + BE +} diff --git a/third_party/zune-core/src/bytestream.rs b/third_party/zune-core/src/bytestream.rs new file mode 100644 index 0000000..e726314 --- /dev/null +++ b/third_party/zune-core/src/bytestream.rs @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +//! A simple implementation of a bytestream reader +//! and writer. +//! +//! This module contains two main structs that help in +//! byte reading and byte writing +//! +//! Useful for a lot of image readers and writers, it's put +//! here to minimize code reuse +pub use reader::ZReader; +pub use traits::*; +pub use writer::ZWriter; + +pub use crate::bytestream::reader::no_std_readers::*; +//use crate::bytestream::reader::std_readers::*; +pub use crate::bytestream::reader::ZByteIoError; + +mod reader; +mod traits; +mod writer; diff --git a/third_party/zune-core/src/bytestream/reader.rs b/third_party/zune-core/src/bytestream/reader.rs new file mode 100644 index 0000000..a91aaad --- /dev/null +++ b/third_party/zune-core/src/bytestream/reader.rs @@ -0,0 +1,458 @@ +use alloc::string::String; +use alloc::vec; +use alloc::vec::Vec; +use core::fmt::Formatter; + +pub(crate) mod no_std_readers; +pub(crate) mod std_readers; +use crate::bytestream::ZByteReaderTrait; + +/// Enumeration of possible methods to seek within an I/O object. +/// +/// It is analogous to the [SeekFrom](std::io::SeekFrom) in the std library but +/// it's here to allow this to work in no-std crates +#[derive(Copy, PartialEq, Eq, Clone, Debug)] +pub enum ZSeekFrom { + /// Sets the offset to the provided number of bytes. + Start(u64), + + /// Sets the offset to the size of this object plus the specified number of + /// bytes. + /// + /// It is possible to seek beyond the end of an object, but it's an error to + /// seek before byte 0. + End(i64), + + /// Sets the offset to the current position plus the specified number of + /// bytes. + /// + /// It is possible to seek beyond the end of an object, but it's an error to + /// seek before byte 0. + Current(i64) +} + +impl ZSeekFrom { + /// Convert to [SeekFrom](std::io::SeekFrom) from the `std::io` library + /// + /// This is only present when std feature is present + #[cfg(feature = "std")] + pub(crate) fn to_std_seek(self) -> std::io::SeekFrom { + match self { + ZSeekFrom::Start(pos) => std::io::SeekFrom::Start(pos), + ZSeekFrom::End(pos) => std::io::SeekFrom::End(pos), + ZSeekFrom::Current(pos) => std::io::SeekFrom::Current(pos) + } + } +} + +pub enum ZByteIoError { + /// A standard library error + /// Only available with the `std` feature + #[cfg(feature = "std")] + StdIoError(std::io::Error), + /// An error converting from one type to another + TryFromIntError(core::num::TryFromIntError), + /// Not enough bytes to satisfy a read + // requested, read + NotEnoughBytes(usize, usize), + /// The output buffer is too small to write the bytes + NotEnoughBuffer(usize, usize), + /// An error that may occur randomly + Generic(&'static str), + /// An error that occurred during a seek operation + SeekError(&'static str), + /// An error that occurred during a seek operation + SeekErrorOwned(String) +} + +impl core::fmt::Debug for ZByteIoError { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + match self { + #[cfg(feature = "std")] + ZByteIoError::StdIoError(err) => { + writeln!(f, "Underlying I/O error {}", err) + } + ZByteIoError::TryFromIntError(err) => { + writeln!(f, "Cannot convert to int {}", err) + } + ZByteIoError::NotEnoughBytes(expected, found) => { + writeln!(f, "Not enough bytes, expected {expected} but found {found}") + } + ZByteIoError::NotEnoughBuffer(expected, found) => { + writeln!( + f, + "Not enough buffer to write {expected} bytes, buffer size is {found}" + ) + } + ZByteIoError::Generic(err) => { + writeln!(f, "Generic I/O error: {err}") + } + ZByteIoError::SeekError(err) => { + writeln!(f, "Seek error: {err}") + } + ZByteIoError::SeekErrorOwned(err) => { + writeln!(f, "Seek error {err}") + } + } + } +} + +#[cfg(feature = "std")] +impl From for ZByteIoError { + fn from(value: std::io::Error) -> Self { + ZByteIoError::StdIoError(value) + } +} + +impl From for ZByteIoError { + fn from(value: core::num::TryFromIntError) -> Self { + ZByteIoError::TryFromIntError(value) + } +} + +impl From<&'static str> for ZByteIoError { + fn from(value: &'static str) -> Self { + ZByteIoError::Generic(value) + } +} + +/// The image reader wrapper +/// +/// This wraps anything that implements [ZByteReaderTrait] and +/// extends the ability of the core trait methods by providing +/// utilities like endian aware byte functions. +/// +/// This prevents each implementation from providing its own +pub struct ZReader { + inner: T, + temp_buffer: Vec +} + +impl ZReader { + /// Create a new reader from a source + /// that implements the [ZByteReaderTrait] + pub fn new(source: T) -> ZReader { + ZReader { + inner: source, + temp_buffer: vec![] + } + } + /// Destroy this reader returning + /// the underlying source of the bytes + /// from which we were decoding + #[inline(always)] + pub fn consume(self) -> T { + self.inner + } + /// Skip ahead ignoring `num` bytes + /// + /// For more advanced seek methods see [Self::seek] that allows + /// moving around via more advanced ways + /// + /// # Arguments + /// - num: The number of bytes to skip. + /// + /// # Returns + /// - `Ok(u64)`: The new position from the start of the stream. + /// - `Error` If something went wrong + #[inline(always)] + pub fn skip(&mut self, num: usize) -> Result { + self.inner.z_seek(ZSeekFrom::Current(num as i64)) + } + /// Move back from current position to a previous + /// position + /// + /// For more advanced seek methods see [Self::seek] that allows + /// moving around via more advanced ways + /// + /// # Arguments + /// - `num`: Positions to move before the current cursor + /// + /// # Returns + /// - `Ok(u64)`: The new position from the start of the stream. + /// - `Error` If something went wrong + #[inline(always)] + pub fn rewind(&mut self, num: usize) -> Result { + self.inner.z_seek(ZSeekFrom::Current(-(num as i64))) + } + /// Move around a stream of bytes + /// + /// This is analogous to the [std::io::Seek] trait with the same ergonomics + /// only implemented to allow use in a `no_std` environment + /// + /// # Arguments + /// - `from`: The seek operation type. + /// + /// # Returns + /// - `Ok(u64)`: The new position from the start of the stream. + /// - Error if something went wrong. + #[inline(always)] + pub fn seek(&mut self, from: ZSeekFrom) -> Result { + self.inner.z_seek(from) + } + + /// Read a single byte from the underlying stream + /// + /// If an error occurs, it will return `0` as default output + /// hence it may be difficult to distinguish a `0` from the underlying source + /// and a `0` from an error. + /// For that there is [Self::read_u8_err] + /// + /// # Returns. + /// - The next byte on the stream. + /// + #[inline(always)] + pub fn read_u8(&mut self) -> u8 { + self.inner.read_byte_no_error() + } + + /// Read a single byte returning an error if the read cannot be satisfied + /// + /// # Returns + /// - `Ok(u8)`: The next byte + /// - Error if the byte read could not be satisfied + #[inline(always)] + pub fn read_u8_err(&mut self) -> Result { + let mut buf = [0]; + self.inner.read_const_bytes(&mut buf)?; + Ok(buf[0]) + } + + /// Look ahead position bytes and return a reference + /// to num_bytes from that position, or an error if the + /// peek would be out of bounds. + /// + /// This doesn't increment the position, bytes would have to be discarded + /// at a later point. + #[inline] + pub fn peek_at(&mut self, position: usize, num_bytes: usize) -> Result<&[u8], ZByteIoError> { + // short circuit for zero + // important since implementations like File will + // cause a syscall on skip + if position != 0 { + // skip position bytes from start + self.skip(position)?; + } + // resize buffer + self.temp_buffer.resize(num_bytes, 0); + // read bytes + match self.inner.peek_exact_bytes(&mut self.temp_buffer[..]) { + Ok(_) => { + // rewind back to where we were + if position != 0 { + self.rewind(position)?; + } + Ok(&self.temp_buffer) + } + Err(e) => Err(e) + } + } + /// Read a fixed number of known bytes to a buffer and return the bytes or an error + /// if it occurred. + /// + /// The size of the `N` value must be small enough to fit the stack space otherwise + /// this will cause a stack overflow :) + /// + /// If you can ignore errors, you can use [Self::read_fixed_bytes_or_zero] + /// + /// # Returns + /// - `Ok([u8;N])`: The bytes read from the source + /// - An error if it occurred. + #[inline(always)] + pub fn read_fixed_bytes_or_error(&mut self) -> Result<[u8; N], ZByteIoError> { + let mut byte_store: [u8; N] = [0; N]; + match self.inner.read_const_bytes(&mut byte_store) { + Ok(_) => Ok(byte_store), + Err(e) => Err(e) + } + } + /// Read a fixed bytes to an array and if that is impossible, return an array containing + /// zeros + /// + /// If you want to handle errors, use [Self::read_fixed_bytes_or_error] + #[inline(always)] + pub fn read_fixed_bytes_or_zero(&mut self) -> [u8; N] { + let mut byte_store: [u8; N] = [0; N]; + self.inner.read_const_bytes_no_error(&mut byte_store); + byte_store + } + + /// Move the cursor to a fixed position in the stream + /// + /// This will move the cursor to exacltly `position` bytes from the start of the buffer + /// + /// # Arguments + /// - `position`: The current position to move the cursor. + #[inline] + pub fn set_position(&mut self, position: usize) -> Result<(), ZByteIoError> { + self.seek(ZSeekFrom::Start(position as u64))?; + + Ok(()) + } + + /// Return true if the underlying buffer can no longer produce bytes + /// + /// This call may be expensive depending on the underlying buffer type, e.g if + /// it's a file, we have to ask the os whether we have more contents, or in other words make a syscall. + /// + /// Use that wisely + /// + /// # Returns + /// - `Ok(bool)`: True if we are in `EOF`, false if we can produce more bytes + /// - Error if something went wrong + #[inline(always)] + pub fn eof(&mut self) -> Result { + self.inner.is_eof() + } + + /// Return the current position of the inner reader or an error + /// if that occurred when reading. + /// + /// Like [eof](Self::eof), the perf characteristics may vary depending on underlying reader + /// + /// # Returns + /// - `Ok(u64)`: The current position of the inner reader + #[inline(always)] + pub fn position(&mut self) -> Result { + self.inner.z_position() + } + + /// Read a fixed number of bytes from the underlying reader returning + /// an error if that can't be satisfied + /// + /// Similar to [std::io::Read::read_exact] + /// + /// # Returns + /// - `Ok(())`: If the read was successful + /// - An error if the read was unsuccessful including failure to fill the whole bytes + pub fn read_exact_bytes(&mut self, buf: &mut [u8]) -> Result<(), ZByteIoError> { + self.inner.read_exact_bytes(buf) + } + + /// Read some bytes from the inner reader, and return number of bytes read + /// + /// The implementation may not read bytes enough to fill the buffer + /// + /// Similar to [std::io::Read::read] + /// + /// # Returns + /// - `Ok(usize)`: Number of bytes actually read to the buffer + /// - An error if something went wrong + pub fn read_bytes(&mut self, buf: &mut [u8]) -> Result { + self.inner.read_bytes(buf) + } +} + +enum Mode { + // Big endian + BE, + // Little Endian + LE +} +macro_rules! get_single_type { + ($name:tt,$name2:tt,$name3:tt,$name4:tt,$name5:tt,$name6:tt,$int_type:tt) => { + impl ZReader + { + #[inline(always)] + fn $name(&mut self, mode: Mode) -> $int_type + { + const SIZE_OF_VAL: usize = core::mem::size_of::<$int_type>(); + + let mut space = [0; SIZE_OF_VAL]; + + self.inner.read_const_bytes_no_error(&mut space); + + match mode { + Mode::BE => $int_type::from_be_bytes(space), + Mode::LE => $int_type::from_le_bytes(space) + } + } + + #[inline(always)] + fn $name2(&mut self, mode: Mode) -> Result<$int_type, ZByteIoError> + { + const SIZE_OF_VAL: usize = core::mem::size_of::<$int_type>(); + + let mut space = [0; SIZE_OF_VAL]; + + match self.inner.read_const_bytes(&mut space) + { + Ok(_) => match mode { + Mode::BE => Ok($int_type::from_be_bytes(space)), + Mode::LE => Ok($int_type::from_le_bytes(space)) + }, + Err(e) => Err(e) + } + } + #[doc=concat!("Read ",stringify!($int_type)," as a big endian integer")] + #[doc=concat!("Returning an error if the underlying buffer cannot support a ",stringify!($int_type)," read.")] + #[inline] + pub fn $name3(&mut self) -> Result<$int_type, ZByteIoError> + { + self.$name2(Mode::BE) + } + + #[doc=concat!("Read ",stringify!($int_type)," as a little endian integer")] + #[doc=concat!("Returning an error if the underlying buffer cannot support a ",stringify!($int_type)," read.")] + #[inline] + pub fn $name4(&mut self) -> Result<$int_type, ZByteIoError> + { + self.$name2(Mode::LE) + } + #[doc=concat!("Read ",stringify!($int_type)," as a big endian integer")] + #[doc=concat!("Returning 0 if the underlying buffer does not have enough bytes for a ",stringify!($int_type)," read.")] + #[inline(always)] + pub fn $name5(&mut self) -> $int_type + { + self.$name(Mode::BE) + } + #[doc=concat!("Read ",stringify!($int_type)," as a little endian integer")] + #[doc=concat!("Returning 0 if the underlying buffer does not have enough bytes for a ",stringify!($int_type)," read.")] + #[inline(always)] + pub fn $name6(&mut self) -> $int_type + { + self.$name(Mode::LE) + } + } + }; +} + +get_single_type!( + get_u16_inner_or_default, + get_u16_inner_or_die, + get_u16_be_err, + get_u16_le_err, + get_u16_be, + get_u16_le, + u16 +); +get_single_type!( + get_u32_inner_or_default, + get_u32_inner_or_die, + get_u32_be_err, + get_u32_le_err, + get_u32_be, + get_u32_le, + u32 +); +get_single_type!( + get_u64_inner_or_default, + get_u64_inner_or_die, + get_u64_be_err, + get_u64_le_err, + get_u64_be, + get_u64_le, + u64 +); + +#[cfg(feature = "std")] +impl std::io::Read for ZReader +where + T: ZByteReaderTrait +{ + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + use std::io::ErrorKind; + self.read_bytes(buf) + .map_err(|e| std::io::Error::new(ErrorKind::Other, format!("{:?}", e))) + } +} diff --git a/third_party/zune-core/src/bytestream/reader/no_std_readers.rs b/third_party/zune-core/src/bytestream/reader/no_std_readers.rs new file mode 100644 index 0000000..0334121 --- /dev/null +++ b/third_party/zune-core/src/bytestream/reader/no_std_readers.rs @@ -0,0 +1,198 @@ +use crate::bytestream::reader::{ZByteIoError, ZSeekFrom}; +use crate::bytestream::ZByteReaderTrait; +/// Wraps an in memory buffer providing it with a `Seek` method +/// but works in `no_std` environments +/// +/// `std::io::Cursor` is available in std environments, but we also need support +/// for `no_std` environments so this serves as a drop in replacement +pub struct ZCursor> { + stream: T, + position: usize +} + +impl> ZCursor { + pub fn new(buffer: T) -> ZCursor { + ZCursor { + stream: buffer, + position: 0 + } + } +} + +impl> ZCursor { + /// Move forward `num` bytes from + /// the current position. + /// + /// It doesn't check that position overflowed, new position + /// may point past the internal buffer, all subsequent reads will + /// either return an error or zero depending on the method called + #[inline] + pub fn skip(&mut self, num: usize) { + // Can this overflow ?? + self.position = self.position.wrapping_add(num); + } + /// Move back `num` bytes from the current position + /// + /// + /// This saturates at zero, it can never be negative or wraparound + /// when the value becomes too small + #[inline] + pub fn rewind(&mut self, num: usize) { + self.position = self.position.saturating_sub(num); + } +} + +impl> ZByteReaderTrait for ZCursor { + #[inline(always)] + fn read_byte_no_error(&mut self) -> u8 { + let byte = self.stream.as_ref().get(self.position).unwrap_or(&0); + self.position += 1; + *byte + } + #[inline(always)] + fn read_exact_bytes(&mut self, buf: &mut [u8]) -> Result<(), ZByteIoError> { + let bytes_read = self.read_bytes(buf)?; + if bytes_read != buf.len() { + // restore read to initial position it was in. + self.rewind(bytes_read); + // not all bytes were read. + return Err(ZByteIoError::NotEnoughBytes(bytes_read, buf.len())); + } + Ok(()) + } + + fn read_const_bytes(&mut self, buf: &mut [u8; N]) -> Result<(), ZByteIoError> { + if self.position + N <= self.stream.as_ref().len() { + // we are in bounds + let reference = self.stream.as_ref(); + let position = self.position; + if let Some(buf_ref) = reference.get(position..position + N) { + self.position += N; + buf.copy_from_slice(buf_ref); + return Ok(()); + } + } + Err(ZByteIoError::Generic("Cannot satisfy read")) + } + + fn read_const_bytes_no_error(&mut self, buf: &mut [u8; N]) { + if self.position + N <= self.stream.as_ref().len() { + // we are in bounds + let reference = self.stream.as_ref(); + let position = self.position; + if let Some(buf_ref) = reference.get(position..position + N) { + self.position += N; + buf.copy_from_slice(buf_ref); + } + } + } + + #[inline(always)] + fn read_bytes(&mut self, buf: &mut [u8]) -> Result { + let len = self.peek_bytes(buf)?; + self.skip(len); + Ok(len) + } + + #[inline(always)] + fn peek_bytes(&mut self, buf: &mut [u8]) -> Result { + let stream_end = self.stream.as_ref().len(); + + let start = core::cmp::min(self.position, stream_end); + let end = core::cmp::min(self.position + buf.len(), stream_end); + + let slice = self.stream.as_ref().get(start..end).unwrap(); + buf[..slice.len()].copy_from_slice(slice); + let len = slice.len(); + + Ok(len) + } + + #[inline(always)] + fn peek_exact_bytes(&mut self, buf: &mut [u8]) -> Result<(), ZByteIoError> { + self.read_exact_bytes(buf)?; + self.rewind(buf.len()); + Ok(()) + } + + #[inline(always)] + fn z_seek(&mut self, from: ZSeekFrom) -> Result { + let (base_pos, offset) = match from { + ZSeekFrom::Start(n) => { + self.position = n as usize; + return Ok(n); + } + ZSeekFrom::End(n) => (self.stream.as_ref().len(), n as isize), + ZSeekFrom::Current(n) => (self.position, n as isize) + }; + match base_pos.checked_add_signed(offset) { + Some(n) => { + self.position = n; + Ok(self.position as u64) + } + None => Err(ZByteIoError::SeekError("Negative seek")) + } + } + + #[inline(always)] + fn is_eof(&mut self) -> Result { + Ok(self.position >= self.stream.as_ref().len()) + } + #[inline(always)] + fn z_position(&mut self) -> Result { + Ok(self.position as u64) + } + + fn read_remaining(&mut self, sink: &mut alloc::vec::Vec) -> Result { + let start = self.position; + let end = self.stream.as_ref().len(); + match self.stream.as_ref().get(start..end) { + None => { + return Err(ZByteIoError::Generic( + "Somehow read remaining couldn't satisfy it's invariants" + )) + } + Some(e) => { + sink.extend_from_slice(e); + } + } + self.skip(end - start); + Ok(end - start) + } +} + +#[cfg(feature = "std")] +impl> std::io::Seek for ZCursor { + fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result { + let (base_pos, offset) = match pos { + std::io::SeekFrom::Start(n) => { + self.position = n as usize; + return Ok(n); + } + std::io::SeekFrom::End(n) => (self.stream.as_ref().len(), n as isize), + std::io::SeekFrom::Current(n) => (self.position, n as isize) + }; + match base_pos.checked_add_signed(offset) { + Some(n) => { + self.position = n; + Ok(self.position as u64) + } + None => Err(std::io::Error::new( + std::io::ErrorKind::Other, + "Negative seek" + )) + } + } +} +// +// #[cfg(feature = "std")] +// impl> std::io::Read for ZCursor { +// fn read(&mut self, buf: &mut [u8]) -> std::io::Result { +// self.read_bytes(buf).map_err(|x|{ std::io::Error::new()}) +// } +// } +impl> From for ZCursor { + fn from(value: T) -> Self { + ZCursor::new(value) + } +} diff --git a/third_party/zune-core/src/bytestream/reader/std_readers.rs b/third_party/zune-core/src/bytestream/reader/std_readers.rs new file mode 100644 index 0000000..014d0bd --- /dev/null +++ b/third_party/zune-core/src/bytestream/reader/std_readers.rs @@ -0,0 +1,100 @@ +#![cfg(feature = "std")] + +use std::io; +use std::io::SeekFrom; + +use crate::bytestream::reader::{ZByteIoError, ZSeekFrom}; +use crate::bytestream::ZByteReaderTrait; +// note (cae): If Rust ever stabilizes trait specialization, specialize this for Cursor +impl ZByteReaderTrait for T { + #[inline(always)] + fn read_byte_no_error(&mut self) -> u8 { + let mut buf = [0]; + let _ = self.read(&mut buf); + buf[0] + } + #[inline(always)] + fn read_exact_bytes(&mut self, buf: &mut [u8]) -> Result<(), ZByteIoError> { + let mut bytes_read = 0; + + while bytes_read < buf.len() { + match self.read(&mut buf[bytes_read..]) { + Ok(0) => { + // if a read returns zero bytes read, it means it encountered an EOF so we seek + // back to where we started because some paths may aggressively read forward and + // ZCursor maintains the position. + + // NB: (cae) [tag=perf] This adds a branch on every read, and will slow down every function + // resting on it. Sorry + self.seek(SeekFrom::Current(-(bytes_read as i64))) + .map_err(ZByteIoError::from)?; + return Err(ZByteIoError::NotEnoughBytes(bytes_read, buf.len())); + } + Ok(bytes) => { + bytes_read += bytes; + } + Err(e) => return Err(ZByteIoError::from(e)) + } + } + + Ok(()) + } + + #[inline] + fn read_const_bytes(&mut self, buf: &mut [u8; N]) -> Result<(), ZByteIoError> { + self.read_exact_bytes(buf) + } + + fn read_const_bytes_no_error(&mut self, buf: &mut [u8; N]) { + let _ = self.read_const_bytes(buf); + } + + #[inline(always)] + fn read_bytes(&mut self, buf: &mut [u8]) -> Result { + self.read(buf).map_err(ZByteIoError::from) + } + + #[inline(always)] + fn peek_bytes(&mut self, buf: &mut [u8]) -> Result { + // first read bytes to the buffer + let bytes_read = self.read_bytes(buf)?; + let converted = -i64::try_from(bytes_read).map_err(ZByteIoError::from)?; + self.seek(std::io::SeekFrom::Current(converted)) + .map_err(ZByteIoError::from)?; + + Ok(bytes_read) + } + + #[inline(always)] + fn peek_exact_bytes(&mut self, buf: &mut [u8]) -> Result<(), ZByteIoError> { + // first read bytes to the buffer + self.read_exact_bytes(buf)?; + let converted = -i64::try_from(buf.len()).map_err(ZByteIoError::from)?; + self.seek(std::io::SeekFrom::Current(converted)) + .map_err(ZByteIoError::from)?; + + Ok(()) + } + + #[inline(always)] + fn z_seek(&mut self, from: ZSeekFrom) -> Result { + self.seek(from.to_std_seek()).map_err(ZByteIoError::from) + } + + #[inline(always)] + fn is_eof(&mut self) -> Result { + self.fill_buf() + .map(|b| b.is_empty()) + .map_err(ZByteIoError::from) + } + + #[inline(always)] + fn z_position(&mut self) -> Result { + self.stream_position().map_err(ZByteIoError::from) + } + + #[inline(always)] + fn read_remaining(&mut self, sink: &mut Vec) -> Result { + self.read_to_end(sink).map_err(ZByteIoError::from) + } +} diff --git a/third_party/zune-core/src/bytestream/traits.rs b/third_party/zune-core/src/bytestream/traits.rs new file mode 100644 index 0000000..e9f3af7 --- /dev/null +++ b/third_party/zune-core/src/bytestream/traits.rs @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ +//! Traits for reading and writing images in zune +//! +//! +//! This exposes the traits and implementations for readers +//! and writers in the zune family of decoders and encoders. + +use crate::bytestream::reader::{ZByteIoError, ZSeekFrom}; + +/// The de-facto Input trait implemented for readers. +/// +/// This provides the basic functions needed to quick and sometimes +/// heap free I/O for the zune image decoders with easy support for extending it +/// to multiple implementations. +/// +/// # Considerations +/// +/// If you have an in memory buffer, prefer [`ZCursor`](crate::bytestream::ZCursor) over [`Cursor`](std::io::Cursor). +/// We implement this trait for two types, `ZCursor`, and any thing that implements `BufRead`+`Seek`, `Cursor` falls in the latter +/// and since Rust doesn't have specialization for traits, we can only implement it once. This means functions like +/// [`read_byte_no_error`](crate::bytestream::ZByteReaderTrait::read_byte_no_error) are slower than they should be for `Cursor`. +/// +pub trait ZByteReaderTrait { + /// Read a single byte from the decoder and return + /// `0` if we can't read the byte, e.g because of EOF + /// + /// The implementation should try to be as fast as possible as this is called + /// from some hot loops where it may become the bottleneck + fn read_byte_no_error(&mut self) -> u8; + /// Read exact bytes required to fill `buf` or return an error if that isn't possible + /// + /// ## Arguments + /// - `buf`: Buffer to fill with bytes from the underlying reader + /// ## Errors + /// In case of an error, the implementation should not increment the internal position + fn read_exact_bytes(&mut self, buf: &mut [u8]) -> Result<(), ZByteIoError>; + + /// Read exact bytes required to fill `buf` or return an error if that isn't possible + /// + /// This is the same as [`read_exact_bytes`](Self::read_exact_bytes) but implemented as a separate + /// method to allow some implementations to optimize it to cost fewer instructions + /// + /// ## Arguments + /// - `buf`: Buffer to fill with bytes from the underlying reader + /// ## Errors + /// In case of an error, the implementation should not increment the internal position + fn read_const_bytes(&mut self, buf: &mut [u8; N]) -> Result<(), ZByteIoError>; + + /// Read exact bytes required to fill `buf` or ignore buf entirely if you can't fill it + /// due to an error like the inability to fill the buffer completely + /// ## Arguments + /// - `buf`: Buffer to fill with bytes from the underlying reader + /// ## Errors + /// In case of an error, the implementation should not increment the internal position + fn read_const_bytes_no_error(&mut self, buf: &mut [u8; N]); + + /// Read bytes into `buf` returning how many bytes you have read or an error if one occurred + /// + /// This doesn't guarantee that buf will be filled with bytes for such a guarantee see + /// [`read_exact_bytes`](Self::read_exact_bytes) + /// + /// ## Arguments + /// - `buf`: The buffer to fill with bytes + /// + /// ## Returns + /// - `Ok(usize)` - Actual bytes read into the buffer + /// - `Err()` - The error encountered when reading bytes for which we couldn't recover + fn read_bytes(&mut self, buf: &mut [u8]) -> Result; + /// Reads data into provided buffer but does not advance read position. + /// + /// + fn peek_bytes(&mut self, buf: &mut [u8]) -> Result; + fn peek_exact_bytes(&mut self, buf: &mut [u8]) -> Result<(), ZByteIoError>; + /// Seek into a new position from the buffer + /// + /// This is similar to the [seek](std::io::Seek::seek) function in the [Seek](std::io::Seek) trait + /// but implemented to work for no-std environments + fn z_seek(&mut self, from: ZSeekFrom) -> Result; + /// Report whether we are at the end of a stream. + /// + /// ## Warning + /// This may cause an additional syscall e.g when we are reading from a file, we must query the file + /// multiple times to check if we really are at the end of the file and the user didn't sneakily + /// add more contents to it hence use it with care + /// + /// ## Returns + /// - `Ok(bool)` - The answer to whether or not we are at end of file + /// - `Err()` - The error that occurred when we queried the underlying reader if we were at EOF + fn is_eof(&mut self) -> Result; + + /// Return the current position of the inner cursor. + /// + /// This can be used to check the advancement of the cursor + fn z_position(&mut self) -> Result; + /// Read all bytes remaining in this input to `sink` until we hit eof + /// + /// # Returns + /// - `Ok(usize)` The actual number of bytes added to the sink + /// - `Err()` An error that occurred when reading bytes + fn read_remaining(&mut self, sink: &mut alloc::vec::Vec) -> Result; +} + +/// The writer trait implemented for zune-image library of encoders +/// +/// Anything that implements this trait can be used as a sink +/// for writing encoded images +pub trait ZByteWriterTrait { + /// Write some bytes into the sink returning number of bytes written or + /// an error if something bad happened + /// + /// An implementation is free to write less bytes that are in buf, so the bytes written + /// cannot be guaranteed to be fully written + fn write_bytes(&mut self, buf: &[u8]) -> Result; + /// Write all bytes to the buffer or return an error if something occurred + /// + /// This will always write all bytes, if it can't fully write all bytes, it will + /// error out + fn write_all_bytes(&mut self, buf: &[u8]) -> Result<(), ZByteIoError>; + /// Write a fixed number of bytes and error out if we can't write the bytes + /// + /// This is provided to allow for optimized writes where possible. (when the compiler can const fold them) + fn write_const_bytes(&mut self, buf: &[u8; N]) -> Result<(), ZByteIoError>; + /// Ensure bytes are written to the sink. + /// + /// Implementations should treat this like linux `fsync`, and should implement + /// whatever writer's implementation of fsync should look like + /// + /// After this, the encoder should be able to guarantee that all in-core data is synced with the + /// storage decive + fn flush_bytes(&mut self) -> Result<(), ZByteIoError>; + + /// A hint to tell the implementation how big of a size we expect the image to be + /// An implementation like in memory `Vec` can use this to reserve additional memory to + /// prevent reallocation when encoding + /// + /// This is just a hint, akin to calling `Vec::reserve` and should be treated as such. + /// If your implementation doesn't support such, e.g file or mutable slices, it's okay to return + /// `Ok(())` + fn reserve_capacity(&mut self, size: usize) -> Result<(), ZByteIoError>; +} diff --git a/third_party/zune-core/src/bytestream/writer.rs b/third_party/zune-core/src/bytestream/writer.rs new file mode 100644 index 0000000..8b1fc63 --- /dev/null +++ b/third_party/zune-core/src/bytestream/writer.rs @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ +use crate::bytestream::{ZByteIoError, ZByteWriterTrait}; + +mod no_std_writer; +mod std_writer; + +enum Mode { + // Big endian + BE, + // Little Endian + LE +} + +/// Encapsulates a simple Byte writer with +/// support for Endian aware writes +pub struct ZWriter { + buffer: T, + bytes_written: usize +} + +impl ZWriter { + /// Write bytes from the buf into the bytestream + /// and return how many bytes were written + /// + /// # Arguments + /// - `buf`: The bytes to be written to the bytestream + /// + /// # Returns + /// - `Ok(usize)` - Number of bytes written + /// This number may be less than `buf.len()` if the length of the buffer is greater + /// than the internal bytestream length + /// + /// If you want to be sure that all bytes were written, see [`write_all`](Self::write_all) + /// + #[inline] + pub fn write(&mut self, buf: &[u8]) -> Result { + let bytes_written = self.buffer.write_bytes(buf)?; + self.bytes_written += bytes_written; + Ok(bytes_written) + } + /// Write all bytes from `buf` into the bytestream and return + /// and panic if not all bytes were written to the bytestream + /// + /// # Arguments + /// - `buf`: The bytes to be written into the bytestream + /// + ///# Returns + /// - `Ok(())`: Indicates all bytes were written into the bytestream + /// - `Err(&static str)`: In case all the bytes could not be written + /// to the stream + pub fn write_all(&mut self, buf: &[u8]) -> Result<(), ZByteIoError> { + self.buffer.write_all_bytes(buf)?; + self.bytes_written += buf.len(); + Ok(()) + } + /// Create a new bytestream writer + /// Bytes are written from the start to the end and not assumptions + /// are made of the nature of the underlying stream + /// + /// # Arguments + pub fn new(data: T) -> ZWriter { + ZWriter { + buffer: data, + bytes_written: 0 + } + } + + /// Write a single byte into the bytestream or error out + /// if there is not enough space + /// + /// # Example + /// ``` + /// use zune_core::bytestream::ZWriter; + /// let mut buf = [0;10]; + /// let mut stream = ZWriter::new(&mut buf[..]); + /// assert!(stream.write_u8_err(34).is_ok()); + /// ``` + /// No space + /// ``` + /// use zune_core::bytestream::ZWriter; + /// let mut no_space = []; + /// let mut stream = ZWriter::new(&mut no_space[..]); + /// assert!(stream.write_u8_err(32).is_err()); + /// ``` + /// + #[inline] + pub fn write_u8_err(&mut self, byte: u8) -> Result<(), ZByteIoError> { + self.write_const_bytes(&[byte]) + } + /// Write a fixed compile time known number of bytes to the sink + /// + /// This is provided since some implementations can optimize such writes by eliminating + /// some redundant code. + #[inline] + pub fn write_const_bytes( + &mut self, byte: &[u8; N] + ) -> Result<(), ZByteIoError> { + self.buffer.write_const_bytes(byte)?; + self.bytes_written += N; + Ok(()) + } + + /// Write a single byte in the stream or don't write + /// anything if the buffer is full and cannot support the byte read + /// + #[inline] + pub fn write_u8(&mut self, byte: u8) { + let _ = self.write_const_bytes(&[byte]); + } + /// Return the number of bytes written by this encoder + /// + /// The encoder keeps information of how many bytes were written and this method + /// returns that value. + /// + /// # Returns + /// Number of bytes written + pub fn bytes_written(&self) -> usize { + self.bytes_written + } + + /// Reserve some additional space to write. + /// + /// Some sinks like `Vec` allow reallocation and to prevent too much reallocation + /// one can use this to reserve additional space to encode + /// + /// # Example + /// + /// ``` + /// use zune_core::bytestream::ZWriter; + /// let space_needed = 10; // Assume the image will fit into 10 bytes + /// let mut output = Vec::new(); + /// let mut sink = ZWriter::new(&mut output); + /// // now reserve some space + ///sink.reserve(space_needed).unwrap(); + /// // at this point, we can assume that ZWriter allocated space for output + /// ``` + pub fn reserve(&mut self, additional: usize) -> Result<(), ZByteIoError> { + self.buffer.reserve_capacity(additional) + } + /// Consume the writer and return the inner sink + /// we were writing to. + /// + /// After this, the writer can no longer be used + pub fn inner(self) -> T { + self.buffer + } + /// Return an immutable reference to the inner sink + pub fn inner_ref(&self) -> &T { + &self.buffer + } + /// Return a mutable reference to the inner sink + pub fn inner_mut(&mut self) -> &mut T { + &mut self.buffer + } +} + +macro_rules! write_single_type { + ($name:tt,$name2:tt,$name3:tt,$name4:tt,$name5:tt,$name6:tt,$int_type:tt) => { + impl ZWriter + { + #[inline(always)] + fn $name(&mut self, byte: $int_type, mode: Mode) -> Result<(), ZByteIoError> + { + + // get bits, depending on mode. + // This should be inlined and not visible in + // the generated binary since mode is a compile + // time constant. + let bytes = match mode + { + Mode::BE => byte.to_be_bytes(), + Mode::LE => byte.to_le_bytes() + }; + self.write_const_bytes(&bytes) + } + #[inline(always)] + fn $name2(&mut self, byte: $int_type, mode: Mode) + { + + // get bits, depending on mode. + // This should be inlined and not visible in + // the generated binary since mode is a compile + // time constant. + let bytes = match mode + { + Mode::BE => byte.to_be_bytes(), + Mode::LE => byte.to_le_bytes() + }; + let _ = self.write_const_bytes(&bytes); + + + } + + #[doc=concat!("Write ",stringify!($int_type)," as a big endian integer")] + #[doc=concat!("Returning an error if the underlying buffer cannot support a ",stringify!($int_type)," write.")] + #[inline] + pub fn $name3(&mut self, byte: $int_type) -> Result<(), ZByteIoError> + { + self.$name(byte, Mode::BE) + } + + #[doc=concat!("Write ",stringify!($int_type)," as a little endian integer")] + #[doc=concat!("Returning an error if the underlying buffer cannot support a ",stringify!($int_type)," write.")] + #[inline] + pub fn $name4(&mut self, byte: $int_type) -> Result<(), ZByteIoError> + { + self.$name(byte, Mode::LE) + } + + #[doc=concat!("Write ",stringify!($int_type)," as a big endian integer")] + #[doc=concat!("Or don't write anything if the reader cannot support a ",stringify!($int_type)," write.")] + #[inline] + pub fn $name5(&mut self, byte: $int_type) + { + self.$name2(byte, Mode::BE) + } + #[doc=concat!("Write ",stringify!($int_type)," as a little endian integer")] + #[doc=concat!("Or don't write anything if the reader cannot support a ",stringify!($int_type)," write.")] + #[inline] + pub fn $name6(&mut self, byte: $int_type) + { + self.$name2(byte, Mode::LE) + } + } + }; +} + +write_single_type!( + write_u64_inner_or_die, + write_u64_inner_or_none, + write_u64_be_err, + write_u64_le_err, + write_u64_be, + write_u64_le, + u64 +); + +write_single_type!( + write_u32_inner_or_die, + write_u32_inner_or_none, + write_u32_be_err, + write_u32_le_err, + write_u32_be, + write_u32_le, + u32 +); + +write_single_type!( + write_u16_inner_or_die, + write_u16_inner_or_none, + write_u16_be_err, + write_u16_le_err, + write_u16_be, + write_u16_le, + u16 +); diff --git a/third_party/zune-core/src/bytestream/writer/no_std_writer.rs b/third_party/zune-core/src/bytestream/writer/no_std_writer.rs new file mode 100644 index 0000000..fd5fca8 --- /dev/null +++ b/third_party/zune-core/src/bytestream/writer/no_std_writer.rs @@ -0,0 +1,70 @@ +// We cannot use the below impls and std ones because we'll re-implement the +// same trait fot &[u8] which is blanketed by write. Ending up with two separate implementations +#![cfg(not(feature = "std"))] +use crate::bytestream::{ZByteIoError, ZByteWriterTrait}; + +impl ZByteWriterTrait for &mut [u8] { + fn write_bytes(&mut self, buf: &[u8]) -> Result { + // got from the write of std + let amt = core::cmp::min(buf.len(), self.len()); + let (a, b) = core::mem::take(self).split_at_mut(amt); + a.copy_from_slice(&buf[..amt]); + *self = b; + Ok(amt) + } + + fn write_all_bytes(&mut self, buf: &[u8]) -> Result<(), ZByteIoError> { + if buf.len() > self.len() { + return Err(ZByteIoError::NotEnoughBuffer(self.len(), buf.len())); + } + let amt = core::cmp::min(buf.len(), self.len()); + let (a, b) = core::mem::take(self).split_at_mut(amt); + a.copy_from_slice(&buf[..amt]); + *self = b; + + Ok(()) + } + + fn write_const_bytes(&mut self, buf: &[u8; N]) -> Result<(), ZByteIoError> { + if N > self.len() { + return Err(ZByteIoError::NotEnoughBuffer(self.len(), N)); + } + let amt = core::cmp::min(buf.len(), self.len()); + let (a, b) = core::mem::take(self).split_at_mut(amt); + a.copy_from_slice(&buf[..amt]); + *self = b; + Ok(()) + } + + fn flush_bytes(&mut self) -> Result<(), ZByteIoError> { + Ok(()) + } + fn reserve_capacity(&mut self, _: usize) -> Result<(), ZByteIoError> { + // can't really pre-allocate anything here + Ok(()) + } +} + +impl ZByteWriterTrait for &mut alloc::vec::Vec { + fn write_bytes(&mut self, buf: &[u8]) -> Result { + self.extend_from_slice(buf); + Ok(buf.len()) + } + + fn write_all_bytes(&mut self, buf: &[u8]) -> Result<(), ZByteIoError> { + self.extend_from_slice(buf); + Ok(()) + } + + fn write_const_bytes(&mut self, buf: &[u8; N]) -> Result<(), ZByteIoError> { + self.extend_from_slice(buf); + Ok(()) + } + fn flush_bytes(&mut self) -> Result<(), ZByteIoError> { + Ok(()) + } + fn reserve_capacity(&mut self, size: usize) -> Result<(), ZByteIoError> { + self.reserve(size); + Ok(()) + } +} diff --git a/third_party/zune-core/src/bytestream/writer/std_writer.rs b/third_party/zune-core/src/bytestream/writer/std_writer.rs new file mode 100644 index 0000000..d82aba5 --- /dev/null +++ b/third_party/zune-core/src/bytestream/writer/std_writer.rs @@ -0,0 +1,27 @@ +#![cfg(feature = "std")] + +use std::io::Write; + +use crate::bytestream::ZByteIoError; + +impl crate::bytestream::ZByteWriterTrait for T { + fn write_bytes(&mut self, buf: &[u8]) -> Result { + self.write(buf).map_err(ZByteIoError::StdIoError) + } + + fn write_all_bytes(&mut self, buf: &[u8]) -> Result<(), ZByteIoError> { + self.write_all(buf).map_err(ZByteIoError::StdIoError) + } + + fn write_const_bytes(&mut self, buf: &[u8; N]) -> Result<(), ZByteIoError> { + self.write_all_bytes(buf) + } + fn flush_bytes(&mut self) -> Result<(), ZByteIoError> { + self.flush().map_err(ZByteIoError::StdIoError) + } + fn reserve_capacity(&mut self, _: usize) -> Result<(), ZByteIoError> { + // we can't reserve capacity, sorry to implementations where this + // matters + Ok(()) + } +} diff --git a/third_party/zune-core/src/colorspace.rs b/third_party/zune-core/src/colorspace.rs new file mode 100644 index 0000000..8bdc916 --- /dev/null +++ b/third_party/zune-core/src/colorspace.rs @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +//! Image Colorspace information and manipulation utilities. + +/// All possible image colorspaces +/// Some of them aren't yet supported exist here. +#[allow(clippy::upper_case_acronyms)] +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[non_exhaustive] +pub enum ColorSpace { + /// Red, Green , Blue + RGB, + /// Red, Green, Blue, Alpha + RGBA, + /// YUV colorspace + YCbCr, + /// Grayscale colorspace + Luma, + /// Grayscale with alpha colorspace + LumaA, + YCCK, + /// Cyan , Magenta, Yellow, Black + CMYK, + /// Blue, Green, Red + BGR, + /// Blue, Green, Red, Alpha + BGRA, + /// The colorspace is unknown + Unknown, + /// Alpha Red Green Blue + ARGB, + /// Hue,Saturation,Lightness + /// Conversion from RGB to HSL and back matches that of Python [colorsys](https://docs.python.org/3/library/colorsys.html) module + /// Color type is expected to be in floating point + HSL, + /// Hue, Saturation,Value + /// + /// Conversion from RGB to HSV and back matches that of Python [colorsys](https://docs.python.org/3/library/colorsys.html) module + /// Color type is expected to be in floating point + HSV +} + +impl ColorSpace { + /// Number of color channels present for a certain colorspace + /// + /// E.g. RGB returns 3 since it contains R,G and B colors to make up a pixel + pub const fn num_components(&self) -> usize { + match self { + Self::RGB | Self::YCbCr | Self::BGR | Self::HSV | Self::HSL => 3, + Self::RGBA | Self::YCCK | Self::CMYK | Self::BGRA | Self::ARGB => 4, + Self::Luma => 1, + Self::LumaA => 2, + Self::Unknown => 0 + } + } + + pub const fn has_alpha(&self) -> bool { + matches!(self, Self::RGBA | Self::LumaA | Self::BGRA | Self::ARGB) + } + + pub const fn is_grayscale(&self) -> bool { + matches!(self, Self::LumaA | Self::Luma) + } + + /// Returns the position of the alpha pixel in a pixel + /// + /// + /// That is for an array of color components say `[0,1,2,3]` if the image has an alpha channel + /// and is in RGBA format, this will return `Some(3)`, indicating alpha is found in the third index + /// but if the image is in `ARGB` format, it will return `Some(0)` indicating alpha is found in + /// index 0 + /// + /// If an image doesn't have an alpha channel returns `None` + /// + pub const fn alpha_position(&self) -> Option { + match self { + ColorSpace::RGBA => Some(3), + ColorSpace::LumaA => Some(1), + ColorSpace::BGRA => Some(3), + ColorSpace::ARGB => Some(0), + _ => None + } + } +} + +/// Encapsulates all colorspaces supported by +/// the library +pub static ALL_COLORSPACES: [ColorSpace; 12] = [ + ColorSpace::RGB, + ColorSpace::RGBA, + ColorSpace::LumaA, + ColorSpace::Luma, + ColorSpace::CMYK, + ColorSpace::BGRA, + ColorSpace::BGR, + ColorSpace::YCCK, + ColorSpace::YCbCr, + ColorSpace::ARGB, + ColorSpace::HSL, + ColorSpace::HSV +]; + +/// Color characteristics +/// +/// Gives more information about values in a certain +/// colorspace +#[allow(non_camel_case_types)] +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum ColorCharacteristics { + /// Normal default gamma setting + /// The float contains gamma present + /// + /// The default gamma value is 2.2 but for + /// decoders that allow specifying gamma values,e.g PNG, + /// the gamma value becomes the specified value by the decoder + sRGB, + /// Linear transfer characteristics + /// The image is in linear colorspace + Linear +} +/// Represents a single channel color primary. +/// +/// This can be viewed as a 3D coordinate of the color primary +/// for a given colorspace +#[derive(Default, Debug, Copy, Clone)] +pub struct SingleColorPrimary { + pub x: f64, + pub y: f64, + pub z: f64 +} +/// A collection of red,green and blue color primaries placed +/// in one struct for easy manipulation +#[derive(Default, Debug, Copy, Clone)] +pub struct ColorPrimaries { + /// Red color primaries + pub red: SingleColorPrimary, + /// Green color primaries + pub green: SingleColorPrimary, + /// Blue color primaries + pub blue: SingleColorPrimary +} + +/// Rendering intents indicate what one may want to do with colors outside of it's gamut +/// +/// +/// Further reading +/// - [IBM Rendering Intent](https://www.ibm.com/docs/en/i/7.5?topic=management-rendering-intents) +/// - [ColorGate Blog](https://blog.colorgate.com/en/rendering-intent-explained) +#[derive(Eq, PartialEq, Clone, Copy, Debug)] +pub enum RenderingIntent { + AbsoluteColorimetric, + Saturation, + RelativeColorimetric, + Perceptual +} diff --git a/third_party/zune-core/src/lib.rs b/third_party/zune-core/src/lib.rs new file mode 100644 index 0000000..77355f9 --- /dev/null +++ b/third_party/zune-core/src/lib.rs @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +//! Core routines shared by all libraries +//! +//! This crate provides a set of core routines shared +//! by the decoders and encoders under `zune` umbrella +//! +//! It currently contains +//! +//! - A bytestream reader and writer with endian aware reads and writes +//! - Colorspace and bit depth information shared by images +//! - Image decoder and encoder options +//! - A simple enum type to hold image decoding results. +//! +//! This library is `#[no_std]` with `alloc` feature needed for defining `Vec` +//! which we need for storing decoded bytes. +//! +//! +//! # Features +//! - `no_std`: Enables `#[no_std]` compilation support. +//! +//! - `serde`: Enables serializing of some of the data structures +//! present in the crate +//! +//! +//! # Input/Output +//! +//! zune-image supports many different input and output devices. For input readers +//! we can read anything that implements `BufRead` + `Seek` and provide an optimized routine for +//! handling in memory buffers by using [`ZCursor`](crate::bytestream::ZCursor). +//! +//! For output, we support anything that implements `Write` trait, this includes files, standard io streams +//! network sockets, etc +//! +//! In a `no_std` environment. We can write to in memory buffers `&mut [u8]` and `&mut Vec` +//! +//! If you have an in memory buffer, use [`ZCursor`](crate::bytestream::ZCursor), +//! it's optimized for in memory buffers. +//! +//! +//! +#![cfg_attr(not(feature = "std"), no_std)] +#![macro_use] +extern crate alloc; +extern crate core; + +#[cfg(not(feature = "log"))] +pub mod log; + +#[cfg(feature = "log")] +pub use log; + +pub mod bit_depth; +pub mod bytestream; +pub mod colorspace; +pub mod options; +pub mod result; +mod serde; diff --git a/third_party/zune-core/src/log.rs b/third_party/zune-core/src/log.rs new file mode 100644 index 0000000..19c867c --- /dev/null +++ b/third_party/zune-core/src/log.rs @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +// #[macro_export] is required to make macros works across crates +// but it always put the macro in the crate root. +// #[doc(hidden)] + "pub use" is a workaround to namespace a macro. +pub use crate::{ + __debug as debug, __error as error, __info as info, __log_enabled as log_enabled, + __trace as trace, __warn as warn +}; + +#[repr(usize)] +#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)] +pub enum Level { + Error = 1, + Warn, + Info, + Debug, + Trace +} + +#[doc(hidden)] +#[macro_export] +macro_rules! __log_enabled { + ($lvl:expr) => {{ + let _ = $lvl; + false + }}; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! __error { + ($($arg:tt)+) => { + #[cfg(feature = "std")] + { + //eprintln!($($arg)+); + } + }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! __warn { + ($($arg:tt)+) => { + #[cfg(feature = "std")] + { + //eprintln!($($arg)+); + } + }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! __info { + ($($arg:tt)+) => {}; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! __debug { + ($($arg:tt)+) => {}; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! __trace { + ($($arg:tt)+) => {}; +} diff --git a/third_party/zune-core/src/options.rs b/third_party/zune-core/src/options.rs new file mode 100644 index 0000000..59566af --- /dev/null +++ b/third_party/zune-core/src/options.rs @@ -0,0 +1,13 @@ +//! Decoder and Encoder Options +//! +//! This module exposes a struct for which all implemented +//! decoders get shared options for decoding +//! +//! All supported options are put into one _Options to allow for global configurations +//! options e.g the same `DecoderOption` can be reused for all other decoders +//! +pub use decoder::DecoderOptions; +pub use encoder::EncoderOptions; + +mod decoder; +mod encoder; diff --git a/third_party/zune-core/src/options/decoder.rs b/third_party/zune-core/src/options/decoder.rs new file mode 100644 index 0000000..6093c70 --- /dev/null +++ b/third_party/zune-core/src/options/decoder.rs @@ -0,0 +1,666 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +//! Global Decoder options +#![allow(clippy::zero_prefixed_literal)] + +use crate::bit_depth::ByteEndian; +use crate::colorspace::ColorSpace; + +/// A decoder that can handle errors +fn decoder_error_tolerance_mode() -> DecoderFlags { + // similar to fast options currently, so no need to write a new one + fast_options() +} +/// Fast decoder options +/// +/// Enables all intrinsics + unsafe routines +/// +/// Disables png adler and crc checking. +fn fast_options() -> DecoderFlags { + DecoderFlags { + inflate_confirm_adler: false, + png_confirm_crc: false, + jpg_error_on_non_conformance: false, + + zune_use_unsafe: true, + zune_use_neon: true, + zune_use_avx: true, + zune_use_avx2: true, + zune_use_sse2: true, + zune_use_sse3: true, + zune_use_sse41: true, + + png_add_alpha_channel: false, + png_strip_16_bit_to_8_bit: false, + png_decode_animated: true, + jxl_decode_animated: true + } +} + +/// Command line options error resilient and fast +/// +/// Features +/// - Ignore CRC and Adler in png +/// - Do not error out on non-conformance in jpg +/// - Use unsafe paths +fn cmd_options() -> DecoderFlags { + DecoderFlags { + inflate_confirm_adler: false, + png_confirm_crc: false, + jpg_error_on_non_conformance: false, + + zune_use_unsafe: true, + zune_use_neon: true, + zune_use_avx: true, + zune_use_avx2: true, + zune_use_sse2: true, + zune_use_sse3: true, + zune_use_sse41: true, + + png_add_alpha_channel: false, + png_strip_16_bit_to_8_bit: false, + + png_decode_animated: true, + jxl_decode_animated: true + } +} + +/// Decoder options that are flags +/// +/// NOTE: When you extend this, add true or false to +/// all options above that return a `DecoderFlag` +#[derive(Copy, Debug, Clone, Default)] +pub struct DecoderFlags { + /// Whether the decoder should confirm and report adler mismatch + inflate_confirm_adler: bool, + /// Whether the PNG decoder should confirm crc + png_confirm_crc: bool, + /// Whether the png decoder should error out on image non-conformance + jpg_error_on_non_conformance: bool, + /// Whether the decoder should use unsafe platform specific intrinsics + /// + /// This will also shut down platform specific intrinsics `(ZUNE_USE_{EXT})` value + zune_use_unsafe: bool, + /// Whether we should use SSE2. + /// + /// This should be enabled for all x64 platforms but can be turned off if + /// `ZUNE_USE_UNSAFE` is false + zune_use_sse2: bool, + /// Whether we should use SSE3 instructions where possible. + zune_use_sse3: bool, + /// Whether we should use sse4.1 instructions where possible. + zune_use_sse41: bool, + /// Whether we should use avx instructions where possible. + zune_use_avx: bool, + /// Whether we should use avx2 instructions where possible. + zune_use_avx2: bool, + /// Whether the png decoder should add alpha channel where possible. + png_add_alpha_channel: bool, + /// Whether we should use neon instructions where possible. + zune_use_neon: bool, + /// Whether the png decoder should strip 16 bit to 8 bit + png_strip_16_bit_to_8_bit: bool, + /// Decode all frames for an animated images + png_decode_animated: bool, + jxl_decode_animated: bool +} + +/// Decoder options +/// +/// Not all options are respected by decoders all decoders +#[derive(Debug, Copy, Clone)] +pub struct DecoderOptions { + /// Maximum width for which decoders will + /// not try to decode images larger than + /// the specified width. + /// + /// - Default value: 16384 + /// - Respected by: `all decoders` + max_width: usize, + /// Maximum height for which decoders will not + /// try to decode images larger than the + /// specified height + /// + /// - Default value: 16384 + /// - Respected by: `all decoders` + max_height: usize, + /// Output colorspace + /// + /// The jpeg decoder allows conversion to a separate colorspace + /// than the input. + /// + /// I.e you can convert a RGB jpeg image to grayscale without + /// first decoding it to RGB to get + /// + /// - Default value: `ColorSpace::RGB` + /// - Respected by: `jpeg` + out_colorspace: ColorSpace, + + /// Maximum number of scans allowed + /// for progressive jpeg images + /// + /// Progressive jpegs have scans + /// + /// - Default value:100 + /// - Respected by: `jpeg` + max_scans: usize, + /// Maximum size for deflate. + /// Respected by all decoders that use inflate/deflate + deflate_limit: usize, + /// Boolean flags that influence decoding + flags: DecoderFlags, + /// The byte endian of the returned bytes will be stored in + /// in case a single pixel spans more than a byte + endianness: ByteEndian +} + +/// Initializers +impl DecoderOptions { + /// Create the decoder with options setting most configurable + /// options to be their safe counterparts + /// + /// This is the same as `default` option as default initializes + /// options to the safe variant. + /// + /// Note, decoders running on this will be slower as it disables + /// platform specific intrinsics + pub fn new_safe() -> DecoderOptions { + DecoderOptions::default() + } + + /// Create the decoder with options setting the configurable options + /// to the fast counterparts + /// + /// This enables platform specific code paths and enable use of unsafe + pub fn new_fast() -> DecoderOptions { + let flag = fast_options(); + DecoderOptions::default().set_decoder_flags(flag) + } + + /// Create the decoder options with the following characteristics + /// + /// - Use unsafe paths. + /// - Ignore error checksuming, e.g in png we do not confirm adler and crc in this mode + /// - Enable fast intrinsics paths + pub fn new_cmd() -> DecoderOptions { + let flag = cmd_options(); + DecoderOptions::default().set_decoder_flags(flag) + } +} + +/// Global options respected by all decoders +impl DecoderOptions { + /// Get maximum width configured for which the decoder + /// should not try to decode images greater than this width + pub const fn max_width(&self) -> usize { + self.max_width + } + + /// Get maximum height configured for which the decoder should + /// not try to decode images greater than this height + pub const fn max_height(&self) -> usize { + self.max_height + } + + /// Return true whether the decoder should be in strict mode + /// And reject most errors + pub fn strict_mode(&self) -> bool { + self.flags.jpg_error_on_non_conformance + | self.flags.png_confirm_crc + | self.flags.inflate_confirm_adler + } + /// Return true if the decoder should use unsafe + /// routines where possible + pub const fn use_unsafe(&self) -> bool { + self.flags.zune_use_unsafe + } + + /// Set maximum width for which the decoder should not try + /// decoding images greater than that width + /// + /// # Arguments + /// + /// * `width`: The maximum width allowed + /// + /// returns: DecoderOptions + pub fn set_max_width(mut self, width: usize) -> Self { + self.max_width = width; + self + } + + /// Set maximum height for which the decoder should not try + /// decoding images greater than that height + /// # Arguments + /// + /// * `height`: The maximum height allowed + /// + /// returns: DecoderOptions + /// + pub fn set_max_height(mut self, height: usize) -> Self { + self.max_height = height; + self + } + + /// Whether the routines can use unsafe platform specific + /// intrinsics when necessary + /// + /// Platform intrinsics are implemented for operations which + /// the compiler can't auto-vectorize, or we can do a marginably + /// better job at it + /// + /// All decoders with unsafe routines respect it. + /// + /// Treat this with caution, disabling it will cause slowdowns but + /// it's provided for mainly for debugging use. + /// + /// - Respected by: `png` and `jpeg`(decoders with unsafe routines) + pub fn set_use_unsafe(mut self, yes: bool) -> Self { + // first clear the flag + self.flags.zune_use_unsafe = yes; + self + } + + fn set_decoder_flags(mut self, flags: DecoderFlags) -> Self { + self.flags = flags; + self + } + /// Set whether the decoder should be in standards conforming/ + /// strict mode + /// + /// This reduces the error tolerance level for the decoders and invalid + /// samples will be rejected by the decoder + /// + /// # Arguments + /// + /// * `yes`: + /// + /// returns: DecoderOptions + /// + pub fn set_strict_mode(mut self, yes: bool) -> Self { + self.flags.jpg_error_on_non_conformance = yes; + self.flags.png_confirm_crc = yes; + self.flags.inflate_confirm_adler = yes; + self + } + + /// Set the byte endian for which raw samples will be stored in + /// in case a single pixel sample spans more than a byte. + /// + /// The default is usually native endian hence big endian values + /// will be converted to little endian on little endian systems, + /// + /// and little endian values will be converted to big endian on big endian systems + /// + /// # Arguments + /// + /// * `endian`: The endianness to which to set the bytes to + /// + /// returns: DecoderOptions + pub fn set_byte_endian(mut self, endian: ByteEndian) -> Self { + self.endianness = endian; + self + } + + /// Get the byte endian for which samples that span more than one byte will + /// be treated + pub const fn byte_endian(&self) -> ByteEndian { + self.endianness + } +} + +/// PNG specific options +impl DecoderOptions { + /// Whether the inflate decoder should confirm + /// adler checksums + pub const fn inflate_get_confirm_adler(&self) -> bool { + self.flags.inflate_confirm_adler + } + /// Set whether the inflate decoder should confirm + /// adler checksums + pub fn inflate_set_confirm_adler(mut self, yes: bool) -> Self { + self.flags.inflate_confirm_adler = yes; + self + } + /// Get default inflate limit for which the decoder + /// will not try to decompress further + pub const fn inflate_get_limit(&self) -> usize { + self.deflate_limit + } + /// Set the default inflate limit for which decompressors + /// relying on inflate won't surpass this limit + #[must_use] + pub fn inflate_set_limit(mut self, limit: usize) -> Self { + self.deflate_limit = limit; + self + } + /// Whether the inflate decoder should confirm + /// crc 32 checksums + pub const fn png_get_confirm_crc(&self) -> bool { + self.flags.png_confirm_crc + } + /// Set whether the png decoder should confirm + /// CRC 32 checksums + #[must_use] + pub fn png_set_confirm_crc(mut self, yes: bool) -> Self { + self.flags.png_confirm_crc = yes; + self + } + /// Set whether the png decoder should add an alpha channel to + /// images where possible. + /// + /// For Luma images, it converts it to Luma+Alpha + /// + /// For RGB images it converts it to RGB+Alpha + pub fn png_set_add_alpha_channel(mut self, yes: bool) -> Self { + self.flags.png_add_alpha_channel = yes; + self + } + /// Return true whether the png decoder should add an alpha + /// channel to images where possible + pub const fn png_get_add_alpha_channel(&self) -> bool { + self.flags.png_add_alpha_channel + } + + /// Whether the png decoder should reduce 16 bit images to 8 bit + /// images implicitly. + /// + /// Equivalent to [png::Transformations::STRIP_16](https://docs.rs/png/latest/png/struct.Transformations.html#associatedconstant.STRIP_16) + pub fn png_set_strip_to_8bit(mut self, yes: bool) -> Self { + self.flags.png_strip_16_bit_to_8_bit = yes; + self + } + + /// Return a boolean indicating whether the png decoder should reduce + /// 16 bit images to 8 bit images implicitly + pub const fn png_get_strip_to_8bit(&self) -> bool { + self.flags.png_strip_16_bit_to_8_bit + } + + /// Return whether `zune-image` should decode animated images or + /// whether we should just decode the first frame only + pub const fn png_decode_animated(&self) -> bool { + self.flags.png_decode_animated + } + /// Set whether `zune-image` should decode animated images or + /// whether we should just decode the first frame only + pub const fn png_set_decode_animated(mut self, yes: bool) -> Self { + self.flags.png_decode_animated = yes; + self + } +} + +/// JPEG specific options +impl DecoderOptions { + /// Get maximum scans for which the jpeg decoder + /// should not go above for progressive images + pub const fn jpeg_get_max_scans(&self) -> usize { + self.max_scans + } + + /// Set maximum scans for which the jpeg decoder should + /// not exceed when reconstructing images. + pub fn jpeg_set_max_scans(mut self, max_scans: usize) -> Self { + self.max_scans = max_scans; + self + } + /// Get expected output colorspace set by the user for which the image + /// is expected to be reconstructed into. + /// + /// This may be different from the + pub const fn jpeg_get_out_colorspace(&self) -> ColorSpace { + self.out_colorspace + } + /// Set expected colorspace for which the jpeg output is expected to be in + /// + /// This is mainly provided as is, we do not guarantee the decoder can convert to all colorspaces + /// and the decoder can change it internally when it sees fit. + #[must_use] + pub fn jpeg_set_out_colorspace(mut self, colorspace: ColorSpace) -> Self { + self.out_colorspace = colorspace; + self + } +} + +/// Intrinsics support +/// +/// These routines are compiled depending +/// on the platform they are used, if compiled for a platform +/// it doesn't support,(e.g avx2 on Arm), it will always return `false` +impl DecoderOptions { + /// Use SSE 2 code paths where possible + /// + /// This checks for existence of SSE2 first and returns + /// false if it's not present + #[allow(unreachable_code)] + pub fn use_sse2(&self) -> bool { + let opt = self.flags.zune_use_sse2 | self.flags.zune_use_unsafe; + // options says no + if !opt { + return false; + } + + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + { + // where we can do runtime check if feature is present + #[cfg(feature = "std")] + { + if is_x86_feature_detected!("sse2") { + return true; + } + } + // where we can't do runtime check if feature is present + // check if the compile feature had it enabled + #[cfg(all(not(feature = "std"), target_feature = "sse2"))] + { + return true; + } + } + // everything failed return false + false + } + + /// Use SSE 3 paths where possible + /// + /// + /// This also checks for SSE3 support and returns false if + /// it's not present + #[allow(unreachable_code)] + pub fn use_sse3(&self) -> bool { + let opt = self.flags.zune_use_sse3 | self.flags.zune_use_unsafe; + // options says no + if !opt { + return false; + } + + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + { + // where we can do runtime check if feature is present + #[cfg(feature = "std")] + { + if is_x86_feature_detected!("sse3") { + return true; + } + } + // where we can't do runtime check if feature is present + // check if the compile feature had it enabled + #[cfg(all(not(feature = "std"), target_feature = "sse3"))] + { + return true; + } + } + // everything failed return false + false + } + + /// Use SSE4 paths where possible + /// + /// This also checks for sse 4.1 support and returns false if it + /// is not present + #[allow(unreachable_code)] + pub fn use_sse41(&self) -> bool { + let opt = self.flags.zune_use_sse41 | self.flags.zune_use_unsafe; + // options says no + if !opt { + return false; + } + + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + { + // where we can do runtime check if feature is present + #[cfg(feature = "std")] + { + if is_x86_feature_detected!("sse4.1") { + return true; + } + } + // where we can't do runtime check if feature is present + // check if the compile feature had it enabled + #[cfg(all(not(feature = "std"), target_feature = "sse4.1"))] + { + return true; + } + } + // everything failed return false + false + } + + /// Use AVX paths where possible + /// + /// This also checks for AVX support and returns false if it's + /// not present + #[allow(unreachable_code)] + pub fn use_avx(&self) -> bool { + let opt = self.flags.zune_use_avx | self.flags.zune_use_unsafe; + // options says no + if !opt { + return false; + } + + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + { + // where we can do runtime check if feature is present + #[cfg(feature = "std")] + { + if is_x86_feature_detected!("avx") { + return true; + } + } + // where we can't do runitme check if feature is present + // check if the compile feature had it enabled + #[cfg(all(not(feature = "std"), target_feature = "avx"))] + { + return true; + } + } + // everything failed return false + false + } + + /// Use avx2 paths where possible + /// + /// This also checks for AVX2 support and returns false if it's not + /// present + #[allow(unreachable_code)] + pub fn use_avx2(&self) -> bool { + let opt = self.flags.zune_use_avx2 | self.flags.zune_use_unsafe; + // options says no + if !opt { + return false; + } + + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + { + // where we can do runtime check if feature is present + #[cfg(feature = "std")] + { + if is_x86_feature_detected!("avx2") { + return true; + } + } + // where we can't do runitme check if feature is present + // check if the compile feature had it enabled + #[cfg(all(not(feature = "std"), target_feature = "avx2"))] + { + return true; + } + } + // everything failed return false + false + } + + #[allow(unreachable_code)] + pub fn use_neon(&self) -> bool { + let opt = self.flags.zune_use_neon | self.flags.zune_use_unsafe; + // options says no + if !opt { + return false; + } + + #[cfg(target_arch = "aarch64")] + { + // aarch64 implies neon on a compliant cpu + // but for real prod should do something better here + return true; + } + // everything failed return false + false + } +} + +/// JPEG_XL specific options +impl DecoderOptions { + /// Return whether `zune-image` should decode animated images or + /// whether we should just decode the first frame only + pub const fn jxl_decode_animated(&self) -> bool { + self.flags.jxl_decode_animated + } + /// Set whether `zune-image` should decode animated images or + /// whether we should just decode the first frame only + pub const fn jxl_set_decode_animated(mut self, yes: bool) -> Self { + self.flags.jxl_decode_animated = yes; + self + } +} +impl Default for DecoderOptions { + /// Create a default and sane option for decoders + /// + /// The following are the defaults + /// + /// - All decoders + /// - max_width: 16536 + /// - max_height: 16535 + /// - use_unsafe: Use unsafe intrinsics where possible. + /// + /// - JPEG + /// - max_scans: 100 (progressive images only, artificial cap to prevent a specific DOS) + /// - error_on_non_conformance: False (slightly corrupt images will be allowed) + /// - DEFLATE + /// - deflate_limit: 1GB (will not continue decoding deflate archives larger than this) + /// - PNG + /// - endianness: Default endianess is Big Endian when decoding 16 bit images to be viewed as 8 byte images + /// - confirm_crc: False (CRC will not be confirmed to be safe) + /// - strip_16_bit_to_8: False, 16 bit images are handled as 16 bit images + /// - add alpha: False, alpha channel is not added where it isn't present + /// - decode_animated: True: All frames in an animated image are decoded + /// + /// - JXL + /// - decode_animated: True: All frames in an animated image are decoded + /// + fn default() -> Self { + Self { + out_colorspace: ColorSpace::RGB, + max_width: 1 << 14, + max_height: 1 << 14, + max_scans: 100, + deflate_limit: 1 << 30, + flags: decoder_error_tolerance_mode(), + endianness: ByteEndian::BE + } + } +} diff --git a/third_party/zune-core/src/options/encoder.rs b/third_party/zune-core/src/options/encoder.rs new file mode 100644 index 0000000..fb59f6c --- /dev/null +++ b/third_party/zune-core/src/options/encoder.rs @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +use crate::bit_depth::BitDepth; +use crate::colorspace::ColorSpace; + +/// Encoder options that are flags +#[derive(Copy, Debug, Clone, Default)] +struct EncoderFlags { + /// Whether JPEG images should be encoded as progressive images + jpeg_encode_progressive: bool, + /// Whether JPEG images should use optimized huffman tables + jpeg_optimize_huffman: bool, + /// Whether to not preserve metadata across image transformations + image_strip_metadata: bool +} + +/// Options shared by some of the encoders in +/// the `zune-` family of image crates +#[derive(Debug, Copy, Clone)] +pub struct EncoderOptions { + width: usize, + height: usize, + colorspace: ColorSpace, + quality: u8, + depth: BitDepth, + num_threads: u8, + effort: u8, + flags: EncoderFlags +} + +impl Default for EncoderOptions { + fn default() -> Self { + Self { + width: 0, + height: 0, + colorspace: ColorSpace::RGB, + quality: 80, + depth: BitDepth::Eight, + num_threads: 4, + effort: 4, + flags: EncoderFlags::default() + } + } +} + +impl EncoderOptions { + /// Create new encode options + /// + /// # Arguments + /// + /// * `width`: Image width + /// * `height`: Image height + /// * `colorspace`: Image colorspaces + /// * `depth`: Image depth + /// + /// returns: EncoderOptions + /// + pub fn new( + width: usize, height: usize, colorspace: ColorSpace, depth: BitDepth + ) -> EncoderOptions { + EncoderOptions { + width, + height, + colorspace, + depth, + ..Default::default() + } + } + /// Get the width for which the image will be encoded in + pub const fn width(&self) -> usize { + self.width + } + + /// Get height for which the image will be encoded in + /// + /// returns: usize + /// + /// # Panics + /// If height is zero + pub fn height(&self) -> usize { + assert_ne!(self.height, 0); + self.height + } + /// Get the depth for which the image will be encoded in + pub const fn depth(&self) -> BitDepth { + self.depth + } + /// Get the quality for which the image will be encoded with + /// + /// # Lossy + /// - Higher quality means some images take longer to write and + /// are big but they look good + /// + /// - Lower quality means small images and low quality. + /// + /// # Lossless + /// - High quality indicates more time is spent in making the file + /// smaller + /// + /// - Low quality indicates less time is spent in making the file bigger + pub const fn quality(&self) -> u8 { + self.quality + } + /// Get the colorspace for which the image will be encoded in + pub const fn colorspace(&self) -> ColorSpace { + self.colorspace + } + pub const fn effort(&self) -> u8 { + self.effort + } + + /// Set width for the image to be encoded + pub fn set_width(mut self, width: usize) -> Self { + self.width = width; + self + } + + /// Set height for the image to be encoded + pub fn set_height(mut self, height: usize) -> Self { + self.height = height; + self + } + /// Set depth for the image to be encoded + pub fn set_depth(mut self, depth: BitDepth) -> Self { + self.depth = depth; + self + } + /// Set quality of the image to be encoded + /// + /// Quality is clamped from 0..100 + /// + /// Quality means different options depending on the encoder, see + /// [get_quality](Self::quality) + pub fn set_quality(mut self, quality: u8) -> Self { + self.quality = quality.clamp(0, 100); + self + } + /// Set colorspace for the image to be encoded + pub fn set_colorspace(mut self, colorspace: ColorSpace) -> Self { + self.colorspace = colorspace; + self + } + /// Set the number of threads allowed for multithreaded encoding + /// where supported + /// + /// Zero means use a single thread + pub fn set_num_threads(mut self, threads: u8) -> Self { + self.num_threads = threads; + + self + } + pub fn set_effort(mut self, effort: u8) -> Self { + self.effort = effort; + self + } + + /// Return number of threads configured for multithreading + /// where possible + /// + /// This is used for multi-threaded encoders, + /// currently only jpeg-xl + pub const fn num_threads(&self) -> u8 { + self.num_threads + } + + /// Set whether the encoder should remove metadata from the image + /// + /// When set to `true`, supported encoders will strip away metadata + /// from the resulting image. If set to false, where supported, encoders + /// will not remove metadata from images + pub fn set_strip_metadata(mut self, yes: bool) -> Self { + self.flags.image_strip_metadata = yes; + self + } + /// Whether or not the encoder should remove metadata from the image + /// + /// The default value is false, and encoders that respect this try to preserve as much + /// data as possible from one image to another + pub const fn strip_metadata(&self) -> bool { + !self.flags.image_strip_metadata + } +} + +/// JPEG options +impl EncoderOptions { + /// Whether the jpeg encoder should encode the image in progressive mode + /// + /// Default is `false`. + /// + /// This may be used to create slightly smaller images at the cost of more processing + /// time + pub const fn jpeg_encode_progressive(&self) -> bool { + self.flags.jpeg_encode_progressive + } + + /// Whether the jpeg encoder should optimize huffman tables to create smaller files + /// at the cost of processing time + /// + /// Default is `false`. + pub const fn jpeg_optimized_huffman_tables(&self) -> bool { + self.flags.jpeg_optimize_huffman + } + + /// Set whether the jpeg encoder should encode the imagei in progressive mode + /// + /// Default is `false` + pub fn set_jpeg_encode_progressive(mut self, yes: bool) -> Self { + self.flags.jpeg_optimize_huffman = yes; + self + } +} diff --git a/third_party/zune-core/src/result.rs b/third_party/zune-core/src/result.rs new file mode 100644 index 0000000..135bfb9 --- /dev/null +++ b/third_party/zune-core/src/result.rs @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +//! Decoding results for images +use alloc::vec::Vec; + +/// A simple enum that can hold decode +/// results of most images +#[non_exhaustive] +pub enum DecodingResult { + U8(Vec), + U16(Vec), + F32(Vec) +} + +impl DecodingResult { + /// Return the contents if the enum stores `Vec` or otherwise + /// return `None`. + /// + /// Useful for de-sugaring the result of a decoding operation + /// into raw bytes + /// + /// # Example + /// ``` + /// use zune_core::result::DecodingResult; + /// let data = DecodingResult::U8(vec![0;100]); + /// // we know this won't fail because we created it with u8 + /// assert!(data.u8().is_some()); + /// + /// let data = DecodingResult::U16(vec![0;100]); + /// // it should now return nothing since the type is u18 + /// assert!(data.u8().is_none()); + /// + /// ``` + pub fn u8(self) -> Option> { + match self { + DecodingResult::U8(data) => Some(data), + _ => None + } + } + + /// Return the contents if the enum stores `Vec` or otherwise + /// return `None`. + /// + /// Useful for de-sugaring the result of a decoding operation + /// into raw bytes + /// + /// # Example + /// ``` + /// use zune_core::result::DecodingResult; + /// let data = DecodingResult::U8(vec![0;100]); + /// // we know this will fail because we created it with u16 + /// assert!(data.u16().is_none()); + /// + /// + /// let data = DecodingResult::U16(vec![0;100]); + /// // it should now return something since the type is u16 + /// assert!(data.u16().is_some()); + /// + /// ``` + pub fn u16(self) -> Option> { + match self { + DecodingResult::U16(data) => Some(data), + _ => None + } + } +} diff --git a/third_party/zune-core/src/serde.rs b/third_party/zune-core/src/serde.rs new file mode 100644 index 0000000..0e3b1f3 --- /dev/null +++ b/third_party/zune-core/src/serde.rs @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +#![cfg(feature = "serde")] +//! Serde support for serializing +//! crate datastructures +//! +//! Implements serialize for +//! - ColorSpace +//! - BitDepth +//! - ColorCharacteristics +use alloc::format; + +use serde::ser::*; + +use crate::bit_depth::BitDepth; +use crate::colorspace::{ColorCharacteristics, ColorSpace, RenderingIntent}; + +impl Serialize for ColorSpace { + #[allow(clippy::uninlined_format_args)] + fn serialize(&self, serializer: S) -> Result + where + S: Serializer + { + // colorspace serialization is simply it's debug value + serializer.serialize_str(&format!("{:?}", self)) + } +} + +impl Serialize for BitDepth { + #[allow(clippy::uninlined_format_args)] + fn serialize(&self, serializer: S) -> Result + where + S: Serializer + { + serializer.serialize_str(&format!("{:?}", self)) + } +} + +impl Serialize for ColorCharacteristics { + #[allow(clippy::uninlined_format_args)] + fn serialize(&self, serializer: S) -> Result + where + S: Serializer + { + serializer.serialize_str(&format!("{:?}", self)) + } +} + +impl Serialize for RenderingIntent { + #[allow(clippy::uninlined_format_args)] + fn serialize(&self, serializer: S) -> Result + where + S: Serializer + { + serializer.serialize_str(&format!("{:?}", self)) + } +} diff --git a/third_party/zune-jpeg/.gitignore b/third_party/zune-jpeg/.gitignore new file mode 100644 index 0000000..c41cc9e --- /dev/null +++ b/third_party/zune-jpeg/.gitignore @@ -0,0 +1 @@ +/target \ No newline at end of file diff --git a/third_party/zune-jpeg/Benches.md b/third_party/zune-jpeg/Benches.md new file mode 100644 index 0000000..296e96a --- /dev/null +++ b/third_party/zune-jpeg/Benches.md @@ -0,0 +1,79 @@ +# Benchmarks of popular jpeg libraries + +Here I compare how long it takes popular JPEG decoders to decode the below 7680*4320 image +of (now defunct ?) [Cutefish OS](https://en.cutefishos.com/) default wallpaper. +![img](benches/images/speed_bench.jpg) + +## About benchmarks + +Benchmarks are weird, especially IO & multi-threaded programs. This library uses both of the above hence performance may +vary. + +For best results shut down your machine, go take coffee, think about life and how it came to be and why people should +save the environment. + +Then power up your machine, if it's a laptop connect it to a power supply and if there is a setting for performance +mode, tweak it. + +Then run. + +## Benchmarks vs real world usage + +Real world usage may vary. + +Notice that I'm using a large image but probably most decoding will be small to medium images. + +To make the library thread safe, we do about 1.5-1.7x more allocations than libjpeg-turbo. Although, do note that the +allocations do not occur at ago, we allocate when needed and deallocate when not needed. + +Do note if memory bandwidth is a limitation. This is not for you. + +## Reproducibility + +The benchmarks are carried out on my local machine with an AMD Ryzen 5 4500u + +The benchmarks are reproducible. + +To reproduce them + +1. Clone this repository +2. Install rust(if you don't have it yet) +3. `cd` into the directory. +4. Run `cargo bench` + +## Performance features of the three libraries + +| feature | image-rs/jpeg-decoder | libjpeg-turbo | zune-jpeg | +|------------------------------|-----------------------|---------------|-----------| +| multithreaded | ✅ | ❌ | ❌ | +| platform specific intrinsics | ✅ | ✅ | ✅ | + +- Image-rs/jpeg-decoder uses [rayon] under the hood but it's under a feature + flag. + +- libjpeg-turbo uses hand-written asm for platform specific intrinsics, ported to + the most common architectures out there but falls back to scalar + code if it can't run in a platform. + +# Finally benchmarks + +[here] + +## Notes + +Benchmarks are ran at least once a week to catch regressions early and +are uploaded to Github pages. + +Machine specs can be found on the other [landing page] + +Benchmarks may not reflect real world usage(threads, other I/O machine bottlenecks) + +[landing page]:https://etemesi254.github.io/posts/Zune-Benchmarks/ + +[here]:https://etemesi254.github.io/assets/criterion/report/index.html + +[libjpeg-turbo]:https://github.com/libjpeg-turbo/libjpeg-turbo + +[jpeg-decoder]:https://github.com/image-rs/jpeg-decoder + +[rayon]:https://github.com/rayon-rs/rayon \ No newline at end of file diff --git a/third_party/zune-jpeg/Cargo.toml b/third_party/zune-jpeg/Cargo.toml new file mode 100644 index 0000000..1f6a884 --- /dev/null +++ b/third_party/zune-jpeg/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "zune-jpeg" +version = "0.5.0-rc1" +authors = ["caleb "] +edition = "2021" +repository = "https://github.com/etemesi254/zune-image/tree/dev/crates/zune-jpeg" +license = "MIT OR Apache-2.0 OR Zlib" +keywords = ["jpeg", "jpeg-decoder", "decoder"] +categories = ["multimedia::images"] +exclude = ["/benches/images/*", "/tests/*", "/.idea/*", "/.gradle/*", "/test-images/*", "fuzz/*"] +description = "A fast, correct and safe jpeg decoder" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[features] +x86 = [] +neon = [] +std = ["zune-core/std"] +log = ["zune-core/log"] +default = ["x86", "neon", "std"] + +[dependencies] +zune-core = { path = "../zune-core", version = "^0.5.0-rc1" } + + +[dev-dependencies] +zune-ppm = { path = "../zune-ppm" } diff --git a/third_party/zune-jpeg/Changelog.md b/third_party/zune-jpeg/Changelog.md new file mode 100644 index 0000000..1a9ec0d --- /dev/null +++ b/third_party/zune-jpeg/Changelog.md @@ -0,0 +1,64 @@ +## Version 0.3.17 + +- Fix no-std compilation + +## Version 0.3.16 + +- Add support for decoding to BGR and BGRA + +## Version 0.3.14 + +- Add ability to parse exif and ICC chunk. +- Fix images with one component that were down-sampled. + +### Version 0.3.13 + +- Allow decoding into pre-allocated buffer +- Clarify documentation + +### Version 0.3.11 + +- Add guards for SSE and AVX code paths(allows compiling for platforms that do not support it) + +### Version 0.3.0 + +- Overhaul to the whole decoder. +- Single threaded version +- Lightweight. + +### Version 0.2.0 + +- New `ZuneJpegOptions` struct, this is the now recommended way to set up decoding options for + decoding +- Deprecated previous options setting functions. +- More code cleanups +- Fixed new bugs discovered by fuzzing +- Removed dependency on `num_cpu` + +### Version 0.1.5 +- Allow user to set memory limits in during decoding explicitly via `set_limits` +- Fixed some bugs discovered by fuzzing +- Correctly handle small images less than 16 pixels +- Gracefully handle incorrectly sampled images. + +### Version 0.1.4 +- Remove all `unsafe` instances except platform dependent intrinsics. +- Numerous bug fixes identified by fuzzing. +- Expose `ImageInfo` to the crate root. + +### Version 0.1.3 +- Fix numerous panics found by fuzzing(thanks to @[Shnatsel] for the corpus) +- Add new method `set_num_threads` that allows one to explicitly set the number of threads to use to decode the image. + +### Version 0.1.2 +- Add more sub checks, contributed by @[5225225] +- Privatize some modules. + +### Version 0.1.1 +- Fix rgba/rgbx decoding when avx optimized functions were used +- Initial support for fuzzing +- Remove `align_alloc` method which was unsound (Thanks to @[HeroicKatora] for pointing that out) + +[Shnatsel]:https://github.com/Shnatsel +[HeroicKatora]:https://github.com/HeroicKatora +[5225225]:https://github.com/5225225 \ No newline at end of file diff --git a/third_party/zune-jpeg/LICENSE-APACHE b/third_party/zune-jpeg/LICENSE-APACHE new file mode 100644 index 0000000..1cd601d --- /dev/null +++ b/third_party/zune-jpeg/LICENSE-APACHE @@ -0,0 +1 @@ +../../LICENSE-APACHE \ No newline at end of file diff --git a/third_party/zune-jpeg/LICENSE-MIT b/third_party/zune-jpeg/LICENSE-MIT new file mode 100644 index 0000000..b2cfbdc --- /dev/null +++ b/third_party/zune-jpeg/LICENSE-MIT @@ -0,0 +1 @@ +../../LICENSE-MIT \ No newline at end of file diff --git a/third_party/zune-jpeg/LICENSE-ZLIB b/third_party/zune-jpeg/LICENSE-ZLIB new file mode 100644 index 0000000..f0648a7 --- /dev/null +++ b/third_party/zune-jpeg/LICENSE-ZLIB @@ -0,0 +1 @@ +../../LICENSE-ZLIB \ No newline at end of file diff --git a/third_party/zune-jpeg/README.md b/third_party/zune-jpeg/README.md new file mode 100644 index 0000000..606c987 --- /dev/null +++ b/third_party/zune-jpeg/README.md @@ -0,0 +1,104 @@ +# Zune-JPEG + +A fast, correct and safe jpeg decoder in pure Rust. + +## Usage + +The library provides a simple-to-use API for jpeg decoding +and an ability to add options to influence decoding. + +### Example + +```Rust +// Import the library +use zune_jpeg::JpegDecoder; +use std::fs::read; + +fn main()->Result<(),DecoderErrors> { + // load some jpeg data + let data = read("cat.jpg").unwrap(); + // create a decoder + let mut decoder = JpegDecoder::new(&data); + // decode the file + let pixels = decoder.decode()?; +} +``` + +The decoder supports more manipulations via `DecoderOptions`, +see additional documentation in the library. + +## Goals + +The implementation aims to have the following goals achieved, +in order of importance + +1. Safety - Do not segfault on errors or invalid input. Panics are okay, but + should be fixed when reported. `unsafe` is only used for SIMD intrinsics, + and can be turned off entirely both at compile time and at runtime. +2. Speed - Get the data as quickly as possible, which means + 1. Platform intrinsics code where justifiable + 2. Carefully written platform independent code that allows the + compiler to vectorize it. + 3. Regression tests. + 4. Watch the memory usage of the program +3. Usability - Provide utility functions like different color conversions functions. + +## Non-Goals + +- Bit identical results with libjpeg/libjpeg-turbo will never be an aim of this library. + Jpeg is a lossy format with very few parts specified by the standard + (i.e it doesn't give a reference upsampling and color conversion algorithm) + +## Features + +- [x] A Pretty fast 8*8 integer IDCT. +- [x] Fast Huffman Decoding +- [x] Fast color convert functions. +- [x] Support for extended colorspaces like GrayScale and RGBA +- [X] Single-threaded decoding. +- [X] Support for four component JPEGs, and esoteric color schemes like CYMK +- [X] Support for `no_std` +- [X] BGR/BGRA decoding support. + +## Crate Features + +| feature | on | Capabilities | +|---------|-----|---------------------------------------------------------------------------------------------| +| `x86` | yes | Enables `x86` specific instructions, specifically `avx` and `sse` for accelerated decoding. | +| `std` | yes | Enable linking to the `std` crate | + +Note that the `x86` features are automatically disabled on platforms that aren't x86 during compile +time hence there is no need to disable them explicitly if you are targeting such a platform. + +## Using in a `no_std` environment + +The crate can be used in a `no_std` environment with the `alloc` feature. + +But one is required to link to a working allocator for whatever environment the decoder +will be running on + +## Debug vs release + +The decoder heavily relies on platform specific intrinsics, namely AVX2 and SSE to gain speed-ups in decoding, +but they [perform poorly](https://godbolt.org/z/vPq57z13b) in debug builds. To get reasonable performance even +when compiling your program in debug mode, add this to your `Cargo.toml`: + +```toml +# `zune-jpeg` package will be always built with optimizations +[profile.dev.package.zune-jpeg] +opt-level = 3 +``` + +## Benchmarks + +The library tries to be at fast as [libjpeg-turbo] while being as safe as possible. +Platform specific intrinsics help get speed up intensive operations ensuring we can almost +match [libjpeg-turbo] speeds but speeds are always +- 10 ms of this library. + +For more up-to-date benchmarks, see the online repo with +benchmarks [here](https://etemesi254.github.io/assets/criterion/report/index.html) + + +[libjpeg-turbo]:https://github.com/libjpeg-turbo/libjpeg-turbo/ + +[image-rs/jpeg-decoder]:https://github.com/image-rs/jpeg-decoder/tree/master/src diff --git a/third_party/zune-jpeg/fuzz/.gitignore b/third_party/zune-jpeg/fuzz/.gitignore new file mode 100644 index 0000000..a092511 --- /dev/null +++ b/third_party/zune-jpeg/fuzz/.gitignore @@ -0,0 +1,3 @@ +target +corpus +artifacts diff --git a/third_party/zune-jpeg/fuzz/Cargo.toml b/third_party/zune-jpeg/fuzz/Cargo.toml new file mode 100644 index 0000000..2574dcd --- /dev/null +++ b/third_party/zune-jpeg/fuzz/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "zune-jpeg-fuzz" +version = "0.0.0" +authors = ["Automatically generated"] +publish = false +edition = "2018" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" + +[dependencies.zune-jpeg] +path = ".." +features = ["neon", "x86"] + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "decode_buffer" +path = "fuzz_targets/decode_buffer.rs" +test = false +doc = false + +[[bin]] +name = "fuzz_idct" +path = "fuzz_targets/fuzz_idct.rs" +test = false +doc = false diff --git a/third_party/zune-jpeg/fuzz/fuzz_targets/decode_buffer.rs b/third_party/zune-jpeg/fuzz/fuzz_targets/decode_buffer.rs new file mode 100644 index 0000000..6f2e206 --- /dev/null +++ b/third_party/zune-jpeg/fuzz/fuzz_targets/decode_buffer.rs @@ -0,0 +1,10 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + use zune_jpeg::zune_core::bytestream::ZCursor; + let data = ZCursor::new(data); + let mut decoder = zune_jpeg::JpegDecoder::new(data); + let _ = decoder.decode(); +}); diff --git a/third_party/zune-jpeg/fuzz/fuzz_targets/fuzz_idct.rs b/third_party/zune-jpeg/fuzz/fuzz_targets/fuzz_idct.rs new file mode 100644 index 0000000..8fa03ad --- /dev/null +++ b/third_party/zune-jpeg/fuzz/fuzz_targets/fuzz_idct.rs @@ -0,0 +1,47 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; +use zune_jpeg::idct::scalar::idct_int; + +fuzz_target!(|data: [i32; 64]| { + let mut data = data; + + // keep in some relatively sane range + // to prevent scalar overflows + for d in &mut data + { + let bound = 255; + *d = (*d).min(bound).max(-bound); + } + let mut data_vec = data; + // this is way too big but it shouldn't matter + // scalar and vector should mutate the minimum needed + + let mut output_scalar = [0i16; 64]; + let mut output_vector = [0i16; 64]; + + let _must_use_supported_vector_arch; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #[cfg(target_feature = "avx2")] + { + use zune_jpeg::idct::avx2::idct_avx2; + idct_avx2(&mut data_vec, &mut output_vector, 8); + _must_use_supported_vector_arch = true; + } + + #[cfg(target_arch = "aarch64")] + { + use zune_jpeg::idct::neon::idct_neon; + idct_neon(&mut data_vec, &mut output_vector, 8); + _must_use_supported_vector_arch = true; + } + + if _must_use_supported_vector_arch + { + idct_int(&mut data, &mut output_scalar, 8); + assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match"); + } + else + { + panic!("No vector IDCT ran!") + } +}); diff --git a/third_party/zune-jpeg/src/bitstream.rs b/third_party/zune-jpeg/src/bitstream.rs new file mode 100644 index 0000000..9f3032a --- /dev/null +++ b/third_party/zune-jpeg/src/bitstream.rs @@ -0,0 +1,671 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +#![allow( + clippy::if_not_else, + clippy::similar_names, + clippy::inline_always, + clippy::doc_markdown, + clippy::cast_sign_loss, + clippy::cast_possible_truncation +)] + +//! This file exposes a single struct that can decode a huffman encoded +//! Bitstream in a JPEG file +//! +//! This code is optimized for speed. +//! It's meant to be super duper super fast, because everyone else depends on this being fast. +//! It's (annoyingly) serial hence we cant use parallel bitstreams(it's variable length coding.) +//! +//! Furthermore, on the case of refills, we have to do bytewise processing because the standard decided +//! that we want to support markers in the middle of streams(seriously few people use RST markers). +//! +//! So we pull in all optimization steps: +//! - use `inline[always]`? ✅ , +//! - pre-execute most common cases ✅, +//! - add random comments ✅ +//! - fast paths ✅. +//! +//! Speed-wise: It is probably the fastest JPEG BitStream decoder to ever sail the seven seas because of +//! a couple of optimization tricks. +//! 1. Fast refills from libjpeg-turbo +//! 2. As few as possible branches in decoder fast paths. +//! 3. Accelerated AC table decoding borrowed from stb_image.h written by Fabian Gissen (@ rygorous), +//! improved by me to handle more cases. +//! 4. Safe and extensible routines(e.g. cool ways to eliminate bounds check) +//! 5. No unsafe here +//! +//! Readability comes as a second priority(I tried with variable names this time, and we are wayy better than libjpeg). +//! +//! Anyway if you are reading this it means your cool and I hope you get whatever part of the code you are looking for +//! (or learn something cool) +//! +//! Knock yourself out. +use alloc::format; +use alloc::string::ToString; +use core::cmp::min; + +use zune_core::bytestream::{ZByteReaderTrait, ZReader}; + +use crate::errors::DecodeErrors; +use crate::huffman::{HuffmanTable, HUFF_LOOKAHEAD}; +use crate::marker::Marker; +use crate::mcu::DCT_BLOCK; +use crate::misc::UN_ZIGZAG; + +macro_rules! decode_huff { + ($stream:tt,$symbol:tt,$table:tt) => { + let mut code_length = $symbol >> HUFF_LOOKAHEAD; + + ($symbol) &= (1 << HUFF_LOOKAHEAD) - 1; + + if code_length > i32::from(HUFF_LOOKAHEAD) + { + // if the symbol cannot be resolved in the first HUFF_LOOKAHEAD bits, + // we know it lies somewhere between HUFF_LOOKAHEAD and 16 bits since jpeg imposes 16 bit + // limit, we can therefore look 16 bits ahead and try to resolve the symbol + // starting from 1+HUFF_LOOKAHEAD bits. + $symbol = ($stream).peek_bits::<16>() as i32; + // (Credits to Sean T. Barrett stb library for this optimization) + // maxcode is pre-shifted 16 bytes long so that it has (16-code_length) + // zeroes at the end hence we do not need to shift in the inner loop. + while code_length < 17{ + if $symbol < $table.maxcode[code_length as usize] { + break; + } + code_length += 1; + } + + if code_length == 17{ + // symbol could not be decoded. + // + // We may think, lets fake zeroes, noo + // panic, because Huffman codes are sensitive, probably everything + // after this will be corrupt, so no need to continue. + return Err(DecodeErrors::Format(format!("Bad Huffman Code 0x{:X}, corrupt JPEG",$symbol))) + } + + $symbol >>= (16-code_length); + ($symbol) = i32::from( + ($table).values + [(($symbol + ($table).offset[code_length as usize]) & 0xFF) as usize], + ); + } + // drop bits read + ($stream).drop_bits(code_length as u8); + }; +} + +/// A `BitStream` struct, a bit by bit reader with super powers +/// +pub(crate) struct BitStream { + /// A MSB type buffer that is used for some certain operations + pub buffer: u64, + /// A TOP aligned MSB type buffer that is used to accelerate some operations like + /// peek_bits and get_bits. + /// + /// By top aligned, I mean the top bit (63) represents the top bit in the buffer. + aligned_buffer: u64, + /// Tell us the bits left the two buffer + pub(crate) bits_left: u8, + /// Did we find a marker(RST/EOF) during decoding? + pub marker: Option, + + /// Progressive decoding + pub successive_high: u8, + pub successive_low: u8, + spec_start: u8, + spec_end: u8, + pub eob_run: i32, + pub overread_by: usize, + /// True if we have seen end of image marker. + /// Don't read anything after that. + pub seen_eoi: bool +} + +impl BitStream { + /// Create a new BitStream + pub(crate) const fn new() -> BitStream { + BitStream { + buffer: 0, + aligned_buffer: 0, + bits_left: 0, + marker: None, + successive_high: 0, + successive_low: 0, + spec_start: 0, + spec_end: 0, + eob_run: 0, + overread_by: 0, + seen_eoi: false + } + } + + /// Create a new Bitstream for progressive decoding + #[allow(clippy::redundant_field_names)] + pub(crate) fn new_progressive(ah: u8, al: u8, spec_start: u8, spec_end: u8) -> BitStream { + BitStream { + buffer: 0, + aligned_buffer: 0, + bits_left: 0, + marker: None, + successive_high: ah, + successive_low: al, + spec_start: spec_start, + spec_end: spec_end, + eob_run: 0, + overread_by: 0, + seen_eoi: false + } + } + + /// Refill the bit buffer by (a maximum of) 32 bits + /// + /// # Arguments + /// - `reader`:`&mut BufReader`: A mutable reference to an underlying + /// File/Memory buffer containing a valid JPEG stream + /// + /// This function will only refill if `self.count` is less than 32 + #[inline(always)] // to many call sites? ( perf improvement by 4%) + fn refill(&mut self, reader: &mut ZReader) -> Result + where + T: ZByteReaderTrait + { + /// Macro version of a single byte refill. + /// Arguments + /// buffer-> our io buffer, because rust macros cannot get values from + /// the surrounding environment bits_left-> number of bits left + /// to full refill + macro_rules! refill { + ($buffer:expr,$byte:expr,$bits_left:expr) => { + // read a byte from the stream + $byte = u64::from(reader.read_u8()); + self.overread_by += usize::from(reader.eof()?); + // append to the buffer + // JPEG is a MSB type buffer so that means we append this + // to the lower end (0..8) of the buffer and push the rest bits above.. + $buffer = ($buffer << 8) | $byte; + // Increment bits left + $bits_left += 8; + // Check for special case of OxFF, to see if it's a stream or a marker + if $byte == 0xff { + // read next byte + let mut next_byte = u64::from(reader.read_u8()); + // Byte snuffing, if we encounter byte snuff, we skip the byte + if next_byte != 0x00 { + // skip that byte we read + while next_byte == 0xFF { + next_byte = u64::from(reader.read_u8()); + } + + if next_byte != 0x00 { + // Undo the byte append and return + $buffer >>= 8; + $bits_left -= 8; + + if $bits_left != 0 { + self.aligned_buffer = $buffer << (64 - $bits_left); + } + + self.marker = + Some(Marker::from_u8(next_byte as u8).ok_or_else(|| { + DecodeErrors::Format(format!( + "Unknown marker 0xFF{:X}", + next_byte + )) + })?); + return Ok(false); + } + } + } + }; + } + + // 32 bits is enough for a decode(16 bits) and receive_extend(max 16 bits) + // If we have less than 32 bits we refill + if self.bits_left < 32 && self.marker.is_none() && !self.seen_eoi { + // we optimize for the case where we don't have 255 in the stream and have 4 bytes left + // as it is the common case + // + // so we always read 4 bytes, if read_fixed_bytes errors out, the cursor is + // guaranteed not to advance in case of failure (is this true), so + // we revert the read later on (if we have 255), if this fails, we use the normal + // byte at a time read + if let Ok(bytes) = reader.read_fixed_bytes_or_error::<4>() { + // we have 4 bytes to spare, read the 4 bytes into a temporary buffer + // create buffer + let msb_buf = u32::from_be_bytes(bytes); + // check if we have 0xff + if !has_byte(msb_buf, 255) { + self.bits_left += 32; + self.buffer <<= 32; + self.buffer |= u64::from(msb_buf); + self.aligned_buffer = self.buffer << (64 - self.bits_left); + return Ok(true); + } + + reader.rewind(4)?; + } + // This serves two reasons, + // 1: Make clippy shut up + // 2: Favour register reuse + let mut byte; + // 4 refills, if all succeed the stream should contain enough bits to decode a + // value + refill!(self.buffer, byte, self.bits_left); + refill!(self.buffer, byte, self.bits_left); + refill!(self.buffer, byte, self.bits_left); + refill!(self.buffer, byte, self.bits_left); + // Construct an MSB buffer whose top bits are the bitstream we are currently holding. + self.aligned_buffer = self.buffer << (64 - self.bits_left); + } + return Ok(true); + } + /// Decode the DC coefficient in a MCU block. + /// + /// The decoded coefficient is written to `dc_prediction` + /// + #[allow( + clippy::cast_possible_truncation, + clippy::cast_sign_loss, + clippy::unwrap_used + )] + #[inline(always)] + fn decode_dc( + &mut self, reader: &mut ZReader, dc_table: &HuffmanTable, dc_prediction: &mut i32 + ) -> Result + where + T: ZByteReaderTrait + { + let (mut symbol, r); + + if self.bits_left < 32 { + self.refill(reader)?; + }; + // look a head HUFF_LOOKAHEAD bits into the bitstream + symbol = self.peek_bits::(); + symbol = dc_table.lookup[symbol as usize]; + + decode_huff!(self, symbol, dc_table); + + if symbol != 0 { + r = self.get_bits(symbol as u8); + symbol = huff_extend(r, symbol); + } + // Update DC prediction + *dc_prediction = dc_prediction.wrapping_add(symbol); + + return Ok(true); + } + + /// Decode a Minimum Code Unit(MCU) as quickly as possible + /// + /// # Arguments + /// - reader: The bitstream from where we read more bits. + /// - dc_table: The Huffman table used to decode the DC coefficient + /// - ac_table: The Huffman table used to decode AC values + /// - block: A memory region where we will write out the decoded values + /// - DC prediction: Last DC value for this component + /// + #[allow( + clippy::many_single_char_names, + clippy::cast_possible_truncation, + clippy::cast_sign_loss + )] + #[inline(never)] + pub fn decode_mcu_block( + &mut self, reader: &mut ZReader, dc_table: &HuffmanTable, ac_table: &HuffmanTable, + qt_table: &[i32; DCT_BLOCK], block: &mut [i32; 64], dc_prediction: &mut i32 + ) -> Result<(), DecodeErrors> + where + T: ZByteReaderTrait + { + // Get fast AC table as a reference before we enter the hot path + let ac_lookup = ac_table.ac_lookup.as_ref().unwrap(); + + let (mut symbol, mut r, mut fast_ac); + // Decode AC coefficients + let mut pos: usize = 1; + + // decode DC, dc prediction will contain the value + self.decode_dc(reader, dc_table, dc_prediction)?; + + // set dc to be the dc prediction. + block[0] = *dc_prediction * qt_table[0]; + + while pos < 64 { + self.refill(reader)?; + symbol = self.peek_bits::(); + fast_ac = ac_lookup[symbol as usize]; + symbol = ac_table.lookup[symbol as usize]; + + if fast_ac != 0 { + // FAST AC path + pos += ((fast_ac >> 4) & 15) as usize; // run + let t_pos = UN_ZIGZAG[min(pos, 63)] & 63; + + block[t_pos] = i32::from(fast_ac >> 8) * (qt_table[t_pos]); // Value + self.drop_bits((fast_ac & 15) as u8); + pos += 1; + } else { + decode_huff!(self, symbol, ac_table); + + r = symbol >> 4; + symbol &= 15; + + if symbol != 0 { + pos += r as usize; + r = self.get_bits(symbol as u8); + symbol = huff_extend(r, symbol); + let t_pos = UN_ZIGZAG[pos & 63] & 63; + + block[t_pos] = symbol * qt_table[t_pos]; + + pos += 1; + } else if r != 15 { + return Ok(()); + } else { + pos += 16; + } + } + } + return Ok(()); + } + + /// Peek `look_ahead` bits ahead without discarding them from the buffer + #[inline(always)] + #[allow(clippy::cast_possible_truncation)] + const fn peek_bits(&self) -> i32 { + (self.aligned_buffer >> (64 - LOOKAHEAD)) as i32 + } + + /// Discard the next `N` bits without checking + #[inline] + fn drop_bits(&mut self, n: u8) { + self.bits_left = self.bits_left.saturating_sub(n); + self.aligned_buffer <<= n; + } + + /// Read `n_bits` from the buffer and discard them + #[inline(always)] + #[allow(clippy::cast_possible_truncation)] + fn get_bits(&mut self, n_bits: u8) -> i32 { + let mask = (1_u64 << n_bits) - 1; + + self.aligned_buffer = self.aligned_buffer.rotate_left(u32::from(n_bits)); + let bits = (self.aligned_buffer & mask) as i32; + self.bits_left = self.bits_left.wrapping_sub(n_bits); + bits + } + + /// Decode a DC block + #[allow(clippy::cast_possible_truncation)] + #[inline] + pub(crate) fn decode_prog_dc_first( + &mut self, reader: &mut ZReader, dc_table: &HuffmanTable, block: &mut i16, + dc_prediction: &mut i32 + ) -> Result<(), DecodeErrors> + where + T: ZByteReaderTrait + { + self.decode_dc(reader, dc_table, dc_prediction)?; + *block = (*dc_prediction as i16).wrapping_mul(1_i16 << self.successive_low); + return Ok(()); + } + #[inline] + pub(crate) fn decode_prog_dc_refine( + &mut self, reader: &mut ZReader, block: &mut i16 + ) -> Result<(), DecodeErrors> + where + T: ZByteReaderTrait + { + // refinement scan + if self.bits_left < 1 { + self.refill(reader)?; + } + + if self.get_bit() == 1 { + *block = block.wrapping_add(1 << self.successive_low); + } + + Ok(()) + } + + /// Get a single bit from the bitstream + fn get_bit(&mut self) -> u8 { + let k = (self.aligned_buffer >> 63) as u8; + // discard a bit + self.drop_bits(1); + return k; + } + pub(crate) fn decode_mcu_ac_first( + &mut self, reader: &mut ZReader, ac_table: &HuffmanTable, block: &mut [i16; 64] + ) -> Result + where + T: ZByteReaderTrait + { + let shift = self.successive_low; + let fast_ac = ac_table.ac_lookup.as_ref().unwrap(); + + let mut k = self.spec_start as usize; + let (mut symbol, mut r, mut fac); + + // EOB runs are handled in mcu_prog.rs + 'block: loop { + self.refill(reader)?; + + symbol = self.peek_bits::(); + fac = fast_ac[symbol as usize]; + symbol = ac_table.lookup[symbol as usize]; + + if fac != 0 { + // fast ac path + k += ((fac >> 4) & 15) as usize; // run + block[UN_ZIGZAG[min(k, 63)] & 63] = (fac >> 8).wrapping_mul(1 << shift); // value + self.drop_bits((fac & 15) as u8); + k += 1; + } else { + decode_huff!(self, symbol, ac_table); + + r = symbol >> 4; + symbol &= 15; + + if symbol != 0 { + k += r as usize; + r = self.get_bits(symbol as u8); + symbol = huff_extend(r, symbol); + block[UN_ZIGZAG[k & 63] & 63] = (symbol as i16).wrapping_mul(1 << shift); + k += 1; + } else { + if r != 15 { + self.eob_run = 1 << r; + self.eob_run += self.get_bits(r as u8); + self.eob_run -= 1; + break; + } + + k += 16; + } + } + + if k > self.spec_end as usize { + break 'block; + } + } + return Ok(true); + } + #[allow(clippy::too_many_lines, clippy::op_ref)] + pub(crate) fn decode_mcu_ac_refine( + &mut self, reader: &mut ZReader, table: &HuffmanTable, block: &mut [i16; 64] + ) -> Result + where + T: ZByteReaderTrait + { + let bit = (1 << self.successive_low) as i16; + + let mut k = self.spec_start; + let (mut symbol, mut r); + + if self.eob_run == 0 { + 'no_eob: loop { + // Decode a coefficient from the bit stream + self.refill(reader)?; + + symbol = self.peek_bits::(); + symbol = table.lookup[symbol as usize]; + + decode_huff!(self, symbol, table); + + r = symbol >> 4; + symbol &= 15; + + if symbol == 0 { + if r != 15 { + // EOB run is 2^r + bits + self.eob_run = 1 << r; + self.eob_run += self.get_bits(r as u8); + // EOB runs are handled by the eob logic + break 'no_eob; + } + } else { + if symbol != 1 { + return Err(DecodeErrors::HuffmanDecode( + "Bad Huffman code, corrupt JPEG?".to_string() + )); + } + // get sign bit + // We assume we have enough bits, which should be correct for sane images + // since we refill by 32 above + if self.get_bit() == 1 { + symbol = i32::from(bit); + } else { + symbol = i32::from(-bit); + } + } + + // Advance over already nonzero coefficients appending + // correction bits to the non-zeroes. + // A correction bit is 1 if the absolute value of the coefficient must be increased + + if k <= self.spec_end { + 'advance_nonzero: loop { + let coefficient = &mut block[UN_ZIGZAG[k as usize & 63] & 63]; + + if *coefficient != 0 { + if self.get_bit() == 1 && (*coefficient & bit) == 0 { + if *coefficient >= 0 { + *coefficient += bit; + } else { + *coefficient -= bit; + } + } + + if self.bits_left < 1 { + self.refill(reader)?; + } + } else { + r -= 1; + + if r < 0 { + // reached target zero coefficient. + break 'advance_nonzero; + } + }; + + if k == self.spec_end { + break 'advance_nonzero; + } + + k += 1; + } + } + + if symbol != 0 { + let pos = UN_ZIGZAG[k as usize & 63]; + // output new non-zero coefficient. + block[pos & 63] = symbol as i16; + } + + k += 1; + + if k > self.spec_end { + break 'no_eob; + } + } + } + if self.eob_run > 0 { + // only run if block does not consists of purely zeroes + if &block[1..] != &[0; 63] { + self.refill(reader)?; + + while k <= self.spec_end { + let coefficient = &mut block[UN_ZIGZAG[k as usize & 63] & 63]; + + if *coefficient != 0 && self.get_bit() == 1 { + // check if we already modified it, if so do nothing, otherwise + // append the correction bit. + if (*coefficient & bit) == 0 { + if *coefficient >= 0 { + *coefficient = coefficient.wrapping_add(bit); + } else { + *coefficient = coefficient.wrapping_sub(bit); + } + } + } + if self.bits_left < 1 { + // refill at the last possible moment + self.refill(reader)?; + } + k += 1; + } + } + // count a block completed in EOB run + self.eob_run -= 1; + } + return Ok(true); + } + + pub fn update_progressive_params(&mut self, ah: u8, al: u8, spec_start: u8, spec_end: u8) { + self.successive_high = ah; + self.successive_low = al; + self.spec_start = spec_start; + self.spec_end = spec_end; + } + + /// Reset the stream if we have a restart marker + /// + /// Restart markers indicate drop those bits in the stream and zero out + /// everything + #[cold] + pub fn reset(&mut self) { + self.bits_left = 0; + self.marker = None; + self.buffer = 0; + self.aligned_buffer = 0; + self.eob_run = 0; + } +} + +/// Do the equivalent of JPEG HUFF_EXTEND +#[inline(always)] +fn huff_extend(x: i32, s: i32) -> i32 { + // if x> 31) & (((-1) << (s)) + 1)) +} + +const fn has_zero(v: u32) -> bool { + // Retrieved from Stanford bithacks + // @ https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord + return !((((v & 0x7F7F_7F7F) + 0x7F7F_7F7F) | v) | 0x7F7F_7F7F) != 0; +} + +const fn has_byte(b: u32, val: u8) -> bool { + // Retrieved from Stanford bithacks + // @ https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord + has_zero(b ^ ((!0_u32 / 255) * (val as u32))) +} diff --git a/third_party/zune-jpeg/src/color_convert.rs b/third_party/zune-jpeg/src/color_convert.rs new file mode 100644 index 0000000..1fca3b2 --- /dev/null +++ b/third_party/zune-jpeg/src/color_convert.rs @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +#![allow( + clippy::many_single_char_names, + clippy::similar_names, + clippy::cast_possible_truncation, + clippy::cast_sign_loss, + clippy::cast_possible_wrap, + clippy::too_many_arguments, + clippy::doc_markdown +)] + +//! Color space conversion routines +//! +//! This files exposes functions to convert one colorspace to another in a jpeg +//! image +//! +//! Currently supported conversions are +//! +//! - `YCbCr` to `RGB,RGBA,GRAYSCALE,RGBX`. +//! +//! +//! Hey there, if your reading this it means you probably need something, so let me help you. +//! +//! There are 3 supported cpu extensions here. +//! 1. Scalar +//! 2. SSE +//! 3. AVX +//! +//! There are two types of the color convert functions +//! +//! 1. Acts on 16 pixels. +//! 2. Acts on 8 pixels. +//! +//! The reason for this is because when implementing the AVX part it occurred to me that we can actually +//! do better and process 2 MCU's if we change IDCT return type to be `i16's`, since a lot of +//! CPU's these days support AVX extensions, it becomes nice if we optimize for that path , +//! therefore AVX routines can process 16 pixels directly and SSE and Scalar just compensate. +//! +//! By compensating, I mean I wrote the 16 pixels version operating on the 8 pixel version twice. +//! +//! Therefore if your looking to optimize some routines, probably start there. + +pub use scalar::ycbcr_to_grayscale; +use zune_core::colorspace::ColorSpace; +use zune_core::options::DecoderOptions; + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[cfg(feature = "x86")] +pub use crate::color_convert::avx::{ycbcr_to_rgb_avx2, ycbcr_to_rgba_avx2}; +use crate::decoder::ColorConvert16Ptr; + +mod avx; +mod scalar; +#[allow(unused_variables)] +pub fn choose_ycbcr_to_rgb_convert_func( + type_need: ColorSpace, options: &DecoderOptions +) -> Option { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #[cfg(feature = "x86")] + { + use zune_core::log::debug; + if options.use_avx2() { + debug!("Using AVX optimised color conversion functions"); + + // I believe avx2 means sse4 is also available + // match colorspace + match type_need { + ColorSpace::RGB => return Some(ycbcr_to_rgb_avx2), + ColorSpace::RGBA => return Some(ycbcr_to_rgba_avx2), + _ => () // fall through to scalar, which has more types + }; + } + } + // when there is no x86 or we haven't returned by here, resort to scalar + return match type_need { + ColorSpace::RGB => Some(scalar::ycbcr_to_rgb_inner_16_scalar::), + ColorSpace::RGBA => Some(scalar::ycbcr_to_rgba_inner_16_scalar::), + ColorSpace::BGRA => Some(scalar::ycbcr_to_rgba_inner_16_scalar::), + ColorSpace::BGR => Some(scalar::ycbcr_to_rgb_inner_16_scalar::), + _ => None + }; +} diff --git a/third_party/zune-jpeg/src/color_convert/avx.rs b/third_party/zune-jpeg/src/color_convert/avx.rs new file mode 100644 index 0000000..5f828b5 --- /dev/null +++ b/third_party/zune-jpeg/src/color_convert/avx.rs @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +//! AVX color conversion routines +//! +//! Okay these codes are cool +//! +//! Herein lies super optimized codes to do color conversions. +//! +//! +//! 1. The YCbCr to RGB use integer approximations and not the floating point equivalent. +//! That means we may be +- 2 of pixels generated by libjpeg-turbo jpeg decoding +//! (also libjpeg uses routines like `Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G`) +//! +//! Firstly, we use integers (fun fact:there is no part of this code base where were dealing with +//! floating points.., fun fact: the first fun fact wasn't even fun.) +//! +//! Secondly ,we have cool clamping code, especially for rgba , where we don't need clamping and we +//! spend our time cursing that Intel decided permute instructions to work like 2 128 bit vectors(the compiler opitmizes +//! it out to something cool). +//! +//! There isn't a lot here (not as fun as bitstream ) but I hope you find what you're looking for. +//! +//! O and ~~subscribe to my youtube channel~~ + +#![cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#![cfg(feature = "x86")] +#![allow( + clippy::wildcard_imports, + clippy::cast_possible_truncation, + clippy::too_many_arguments, + clippy::inline_always, + clippy::doc_markdown, + dead_code +)] + +#[cfg(target_arch = "x86")] +use core::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::*; + +pub union YmmRegister { + // both are 32 when using std::mem::size_of + mm256: __m256i, + // for avx color conversion + array: [i16; 16] +} + +//-------------------------------------------------------------------------------------------------- +// AVX conversion routines +//-------------------------------------------------------------------------------------------------- + +/// +/// Convert YCBCR to RGB using AVX instructions +/// +/// # Note +///**IT IS THE RESPONSIBILITY OF THE CALLER TO CALL THIS IN CPUS SUPPORTING +/// AVX2 OTHERWISE THIS IS UB** +/// +/// *Peace* +/// +/// This library itself will ensure that it's never called in CPU's not +/// supporting AVX2 +/// +/// # Arguments +/// - `y`,`cb`,`cr`: A reference of 8 i32's +/// - `out`: The output array where we store our converted items +/// - `offset`: The position from 0 where we write these RGB values +#[inline(always)] +pub fn ycbcr_to_rgb_avx2( + y: &[i16; 16], cb: &[i16; 16], cr: &[i16; 16], out: &mut [u8], offset: &mut usize +) { + // call this in another function to tell RUST to vectorize this + // storing + unsafe { + ycbcr_to_rgb_avx2_1(y, cb, cr, out, offset); + } +} + +#[inline] +#[target_feature(enable = "avx2")] +#[target_feature(enable = "avx")] +unsafe fn ycbcr_to_rgb_avx2_1( + y: &[i16; 16], cb: &[i16; 16], cr: &[i16; 16], out: &mut [u8], offset: &mut usize +) { + // Load output buffer + let tmp: &mut [u8; 48] = out + .get_mut(*offset..*offset + 48) + .expect("Slice to small cannot write") + .try_into() + .unwrap(); + + let (r, g, b) = ycbcr_to_rgb_baseline(y, cb, cr); + + let mut j = 0; + let mut i = 0; + while i < 48 { + tmp[i] = r.array[j] as u8; + + tmp[i + 1] = g.array[j] as u8; + tmp[i + 2] = b.array[j] as u8; + i += 3; + j += 1; + } + + *offset += 48; +} + +/// Baseline implementation of YCBCR to RGB for avx, +/// +/// It uses integer operations as opposed to floats, the approximation is +/// difficult for the eye to see, but this means that it may produce different +/// values with libjpeg_turbo. if accuracy is of utmost importance, use that. +/// +/// this function should be called for most implementations, including +/// - ycbcr->rgb +/// - ycbcr->rgba +/// - ycbcr->brga +/// - ycbcr->rgbx +#[inline] +#[target_feature(enable = "avx2")] +#[target_feature(enable = "avx")] +unsafe fn ycbcr_to_rgb_baseline( + y: &[i16; 16], cb: &[i16; 16], cr: &[i16; 16] +) -> (YmmRegister, YmmRegister, YmmRegister) { + // Load values into a register + // + // dst[127:0] := MEM[loaddr+127:loaddr] + // dst[255:128] := MEM[hiaddr+127:hiaddr] + let y_c = _mm256_loadu_si256(y.as_ptr().cast()); + + let cb_c = _mm256_loadu_si256(cb.as_ptr().cast()); + + let cr_c = _mm256_loadu_si256(cr.as_ptr().cast()); + + // AVX version of integer version in https://stackoverflow.com/questions/4041840/function-to-convert-ycbcr-to-rgb + + // Cb = Cb-128; + let cb_r = _mm256_sub_epi16(cb_c, _mm256_set1_epi16(128)); + + // cr = Cb -128; + let cr_r = _mm256_sub_epi16(cr_c, _mm256_set1_epi16(128)); + + // Calculate Y->R + // r = Y + 45 * Cr / 32 + // 45*cr + let r1 = _mm256_mullo_epi16(_mm256_set1_epi16(45), cr_r); + + // r1>>5 + let r2 = _mm256_srai_epi16::<5>(r1); + + //y+r2 + + let r = YmmRegister { + mm256: clamp_avx(_mm256_add_epi16(y_c, r2)) + }; + + // g = Y - (11 * Cb + 23 * Cr) / 32 ; + + // 11*cb + let g1 = _mm256_mullo_epi16(_mm256_set1_epi16(11), cb_r); + + // 23*cr + let g2 = _mm256_mullo_epi16(_mm256_set1_epi16(23), cr_r); + + //(11 + //(11 * Cb + 23 * Cr) + let g3 = _mm256_add_epi16(g1, g2); + + // (11 * Cb + 23 * Cr) / 32 + let g4 = _mm256_srai_epi16::<5>(g3); + + // Y - (11 * Cb + 23 * Cr) / 32 ; + let g = YmmRegister { + mm256: clamp_avx(_mm256_sub_epi16(y_c, g4)) + }; + + // b = Y + 113 * Cb / 64 + // 113 * cb + let b1 = _mm256_mullo_epi16(_mm256_set1_epi16(113), cb_r); + + //113 * Cb / 64 + let b2 = _mm256_srai_epi16::<6>(b1); + + // b = Y + 113 * Cb / 64 ; + let b = YmmRegister { + mm256: clamp_avx(_mm256_add_epi16(b2, y_c)) + }; + + return (r, g, b); +} + +#[inline] +#[target_feature(enable = "avx2")] +/// A baseline implementation of YCbCr to RGB conversion which does not carry +/// out clamping +/// +/// This is used by the `ycbcr_to_rgba_avx` and `ycbcr_to_rgbx` conversion +/// routines +unsafe fn ycbcr_to_rgb_baseline_no_clamp( + y: &[i16; 16], cb: &[i16; 16], cr: &[i16; 16] +) -> (__m256i, __m256i, __m256i) { + // Load values into a register + // + let y_c = _mm256_loadu_si256(y.as_ptr().cast()); + + let cb_c = _mm256_loadu_si256(cb.as_ptr().cast()); + + let cr_c = _mm256_loadu_si256(cr.as_ptr().cast()); + + // AVX version of integer version in https://stackoverflow.com/questions/4041840/function-to-convert-ycbcr-to-rgb + + // Cb = Cb-128; + let cb_r = _mm256_sub_epi16(cb_c, _mm256_set1_epi16(128)); + + // cr = Cb -128; + let cr_r = _mm256_sub_epi16(cr_c, _mm256_set1_epi16(128)); + + // Calculate Y->R + // r = Y + 45 * Cr / 32 + // 45*cr + let r1 = _mm256_mullo_epi16(_mm256_set1_epi16(45), cr_r); + + // r1>>5 + let r2 = _mm256_srai_epi16::<5>(r1); + + //y+r2 + + let r = _mm256_add_epi16(y_c, r2); + + // g = Y - (11 * Cb + 23 * Cr) / 32 ; + + // 11*cb + let g1 = _mm256_mullo_epi16(_mm256_set1_epi16(11), cb_r); + + // 23*cr + let g2 = _mm256_mullo_epi16(_mm256_set1_epi16(23), cr_r); + + //(11 + //(11 * Cb + 23 * Cr) + let g3 = _mm256_add_epi16(g1, g2); + + // (11 * Cb + 23 * Cr) / 32 + let g4 = _mm256_srai_epi16::<5>(g3); + + // Y - (11 * Cb + 23 * Cr) / 32 ; + let g = _mm256_sub_epi16(y_c, g4); + + // b = Y + 113 * Cb / 64 + // 113 * cb + let b1 = _mm256_mullo_epi16(_mm256_set1_epi16(113), cb_r); + + //113 * Cb / 64 + let b2 = _mm256_srai_epi16::<6>(b1); + + // b = Y + 113 * Cb / 64 ; + let b = _mm256_add_epi16(b2, y_c); + + return (r, g, b); +} + +#[inline(always)] +pub fn ycbcr_to_rgba_avx2( + y: &[i16; 16], cb: &[i16; 16], cr: &[i16; 16], out: &mut [u8], offset: &mut usize +) { + unsafe { + ycbcr_to_rgba_unsafe(y, cb, cr, out, offset); + } +} + +#[inline] +#[target_feature(enable = "avx2")] +#[rustfmt::skip] +unsafe fn ycbcr_to_rgba_unsafe( + y: &[i16; 16], cb: &[i16; 16], cr: &[i16; 16], + out: &mut [u8], + offset: &mut usize, +) +{ + // check if we have enough space to write. + let tmp:& mut [u8; 64] = out.get_mut(*offset..*offset + 64).expect("Slice to small cannot write").try_into().unwrap(); + + let (r, g, b) = ycbcr_to_rgb_baseline_no_clamp(y, cb, cr); + + // set alpha channel to 255 for opaque + + // And no these comments were not from me pressing the keyboard + + // Pack the integers into u8's using signed saturation. + let c = _mm256_packus_epi16(r, g); //aaaaa_bbbbb_aaaaa_bbbbbb + let d = _mm256_packus_epi16(b, _mm256_set1_epi16(255)); // cccccc_dddddd_ccccccc_ddddd + // transpose_u16 and interleave channels + let e = _mm256_unpacklo_epi8(c, d); //ab_ab_ab_ab_ab_ab_ab_ab + let f = _mm256_unpackhi_epi8(c, d); //cd_cd_cd_cd_cd_cd_cd_cd + // final transpose_u16 + let g = _mm256_unpacklo_epi8(e, f); //abcd_abcd_abcd_abcd_abcd + let h = _mm256_unpackhi_epi8(e, f); + + + // undo packus shuffling... + let i = _mm256_permute2x128_si256::<{ shuffle(3, 2, 1, 0) }>(g, h); + + let j = _mm256_permute2x128_si256::<{ shuffle(1, 2, 3, 0) }>(g, h); + + let k = _mm256_permute2x128_si256::<{ shuffle(3, 2, 0, 1) }>(g, h); + + let l = _mm256_permute2x128_si256::<{ shuffle(0, 3, 2, 1) }>(g, h); + + let m = _mm256_blend_epi32::<0b1111_0000>(i, j); + + let n = _mm256_blend_epi32::<0b1111_0000>(k, l); + + + // Store + // Use streaming instructions to prevent polluting the cache? + _mm256_storeu_si256(tmp.as_mut_ptr().cast(), m); + + _mm256_storeu_si256(tmp[32..].as_mut_ptr().cast(), n); + + *offset += 64; +} + +/// Clamp values between 0 and 255 +/// +/// This function clamps all values in `reg` to be between 0 and 255 +///( the accepted values for RGB) +#[inline] +#[target_feature(enable = "avx2")] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +unsafe fn clamp_avx(reg: __m256i) -> __m256i { + // the lowest value + let min_s = _mm256_set1_epi16(0); + + // Highest value + let max_s = _mm256_set1_epi16(255); + + let max_v = _mm256_max_epi16(reg, min_s); //max(a,0) + let min_v = _mm256_min_epi16(max_v, max_s); //min(max(a,0),255) + return min_v; +} + +#[inline] +const fn shuffle(z: i32, y: i32, x: i32, w: i32) -> i32 { + (z << 6) | (y << 4) | (x << 2) | w +} diff --git a/third_party/zune-jpeg/src/color_convert/scalar.rs b/third_party/zune-jpeg/src/color_convert/scalar.rs new file mode 100644 index 0000000..f217f30 --- /dev/null +++ b/third_party/zune-jpeg/src/color_convert/scalar.rs @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +use core::convert::TryInto; + +/// Limit values to 0 and 255 +#[inline] +#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss, dead_code)] +fn clamp(a: i16) -> u8 { + a.clamp(0, 255) as u8 +} + +/// YCbCr to RGBA color conversion + +/// Convert YCbCr to RGB/BGR +/// +/// Converts to RGB if const BGRA is false +/// +/// Converts to BGR if const BGRA is true +pub fn ycbcr_to_rgba_inner_16_scalar( + y: &[i16; 16], cb: &[i16; 16], cr: &[i16; 16], output: &mut [u8], pos: &mut usize +) { + let (_, output_position) = output.split_at_mut(*pos); + + // Convert into a slice with 64 elements for Rust to see we won't go out of bounds. + let opt: &mut [u8; 64] = output_position + .get_mut(0..64) + .expect("Slice to small cannot write") + .try_into() + .unwrap(); + for ((y, (cb, cr)), out) in y + .iter() + .zip(cb.iter().zip(cr.iter())) + .zip(opt.chunks_exact_mut(4)) + { + let cr = cr - 128; + let cb = cb - 128; + + let r = y + ((45_i16.wrapping_mul(cr)) >> 5); + let g = y - ((11_i16.wrapping_mul(cb) + 23_i16.wrapping_mul(cr)) >> 5); + let b = y + ((113_i16.wrapping_mul(cb)) >> 6); + + if BGRA { + out[0] = clamp(b); + out[1] = clamp(g); + out[2] = clamp(r); + out[3] = 255; + } else { + out[0] = clamp(r); + out[1] = clamp(g); + out[2] = clamp(b); + out[3] = 255; + } + } + *pos += 64; +} + +/// Convert YCbCr to RGB/BGR +/// +/// Converts to RGB if const BGRA is false +/// +/// Converts to BGR if const BGRA is true +pub fn ycbcr_to_rgb_inner_16_scalar( + y: &[i16; 16], cb: &[i16; 16], cr: &[i16; 16], output: &mut [u8], pos: &mut usize +) { + let (_, output_position) = output.split_at_mut(*pos); + + // Convert into a slice with 48 elements + let opt: &mut [u8; 48] = output_position + .get_mut(0..48) + .expect("Slice to small cannot write") + .try_into() + .unwrap(); + + for ((y, (cb, cr)), out) in y + .iter() + .zip(cb.iter().zip(cr.iter())) + .zip(opt.chunks_exact_mut(3)) + { + let cr = cr - 128; + let cb = cb - 128; + + let r = y + ((45_i16.wrapping_mul(cr)) >> 5); + let g = y - ((11_i16.wrapping_mul(cb) + 23_i16.wrapping_mul(cr)) >> 5); + let b = y + ((113_i16.wrapping_mul(cb)) >> 6); + + if BGRA { + out[0] = clamp(b); + out[1] = clamp(g); + out[2] = clamp(r); + } else { + out[0] = clamp(r); + out[1] = clamp(g); + out[2] = clamp(b); + } + } + + // Increment pos + *pos += 48; +} + +pub fn ycbcr_to_grayscale(y: &[i16], width: usize, padded_width: usize, output: &mut [u8]) { + for (y_in, out) in y + .chunks_exact(padded_width) + .zip(output.chunks_exact_mut(width)) + { + for (y, out) in y_in.iter().zip(out.iter_mut()) { + *out = *y as u8; + } + } +} diff --git a/third_party/zune-jpeg/src/components.rs b/third_party/zune-jpeg/src/components.rs new file mode 100644 index 0000000..467c33a --- /dev/null +++ b/third_party/zune-jpeg/src/components.rs @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +//! This module exports a single struct to store information about +//! JPEG image components +//! +//! The data is extracted from a SOF header. + +use alloc::vec::Vec; +use alloc::{format, vec}; + +use zune_core::log::trace; + +use crate::decoder::MAX_COMPONENTS; +use crate::errors::DecodeErrors; +use crate::upsampler::upsample_no_op; + +/// Represents an up-sampler function, this function will be called to upsample +/// a down-sampled image + +pub type UpSampler = fn( + input: &[i16], + in_near: &[i16], + in_far: &[i16], + scratch_space: &mut [i16], + output: &mut [i16] +); + +/// Component Data from start of frame +#[derive(Clone)] +pub(crate) struct Components { + /// The type of component that has the metadata below, can be Y,Cb or Cr + pub component_id: ComponentID, + /// Sub-sampling ratio of this component in the x-plane + pub vertical_sample: usize, + /// Sub-sampling ratio of this component in the y-plane + pub horizontal_sample: usize, + /// DC huffman table position + pub dc_huff_table: usize, + /// AC huffman table position for this element. + pub ac_huff_table: usize, + /// Quantization table number + pub quantization_table_number: u8, + /// Specifies quantization table to use with this component + pub quantization_table: [i32; 64], + /// dc prediction for the component + pub dc_pred: i32, + /// An up-sampling function, can be basic or SSE, depending + /// on the platform + pub up_sampler: UpSampler, + /// How pixels do we need to go to get to the next line? + pub width_stride: usize, + /// Component ID for progressive + pub id: u8, + /// Whether we need to decode this image component. + pub needed: bool, + /// Upsample scanline + pub raw_coeff: Vec, + /// Upsample destination, stores a scanline worth of sub sampled data + pub upsample_dest: Vec, + /// previous row, used to handle MCU boundaries + pub row_up: Vec, + /// current row, used to handle MCU boundaries again + pub row: Vec, + pub first_row_upsample_dest: Vec, + pub idct_pos: usize, + pub x: usize, + pub w2: usize, + pub y: usize, + pub sample_ratio: SampleRatios, + // a very annoying bug + pub fix_an_annoying_bug: usize +} + +impl Components { + /// Create a new instance from three bytes from the start of frame + #[inline] + pub fn from(a: [u8; 3], pos: u8) -> Result { + // it's a unique identifier. + // doesn't have to be ascending + // see tests/inputs/huge_sof_number + // + // For such cases, use the position of the component + // to determine width + + let id = match pos { + 0 => ComponentID::Y, + 1 => ComponentID::Cb, + 2 => ComponentID::Cr, + 3 => ComponentID::Q, + _ => { + return Err(DecodeErrors::Format(format!( + "Unknown component id found,{pos}, expected value between 1 and 4" + ))) + } + }; + + let horizontal_sample = (a[1] >> 4) as usize; + let vertical_sample = (a[1] & 0x0f) as usize; + let quantization_table_number = a[2]; + // confirm quantization number is between 0 and MAX_COMPONENTS + if usize::from(quantization_table_number) >= MAX_COMPONENTS { + return Err(DecodeErrors::Format(format!( + "Too large quantization number :{quantization_table_number}, expected value between 0 and {MAX_COMPONENTS}" + ))); + } + // check that upsampling ratios are powers of two + // if these fail, it's probably a corrupt image. + if !horizontal_sample.is_power_of_two() { + return Err(DecodeErrors::Format(format!( + "Horizontal sample is not a power of two({horizontal_sample}) cannot decode" + ))); + } + + if !vertical_sample.is_power_of_two() { + return Err(DecodeErrors::Format(format!( + "Vertical sub-sample is not power of two({vertical_sample}) cannot decode" + ))); + } + + trace!( + "Component ID:{:?} \tHS:{} VS:{} QT:{}", + id, + horizontal_sample, + vertical_sample, + quantization_table_number + ); + + Ok(Components { + component_id: id, + vertical_sample, + horizontal_sample, + quantization_table_number, + first_row_upsample_dest: vec![], + // These two will be set with sof marker + dc_huff_table: 0, + ac_huff_table: 0, + quantization_table: [0; 64], + dc_pred: 0, + up_sampler: upsample_no_op, + // set later + width_stride: horizontal_sample, + id: a[0], + needed: true, + raw_coeff: vec![], + upsample_dest: vec![], + row_up: vec![], + row: vec![], + idct_pos: 0, + x: 0, + y: 0, + w2: 0, + sample_ratio: SampleRatios::None, + fix_an_annoying_bug: 1 + }) + } + /// Setup space for upsampling + /// + /// During upsample, we need a reference of the last row so that upsampling can + /// proceed correctly, + /// so we store the last line of every scanline and use it for the next upsampling procedure + /// to store this, but since we don't need it for 1v1 upsampling, + /// we only call this for routines that need upsampling + /// + /// # Requirements + /// - width stride of this element is set for the component. + pub fn setup_upsample_scanline(&mut self) { + self.row = vec![0; self.width_stride * self.vertical_sample]; + self.row_up = vec![0; self.width_stride * self.vertical_sample]; + self.first_row_upsample_dest = + vec![128; self.vertical_sample * self.width_stride * self.sample_ratio.sample()]; + self.upsample_dest = + vec![0; self.width_stride * self.sample_ratio.sample() * self.fix_an_annoying_bug * 8]; + } +} + +/// Component ID's +#[derive(Copy, Debug, Clone, PartialEq, Eq)] +pub enum ComponentID { + /// Luminance channel + Y, + /// Blue chrominance + Cb, + /// Red chrominance + Cr, + /// Q or fourth component + Q +} + +#[derive(Copy, Debug, Clone, PartialEq, Eq)] +pub enum SampleRatios { + HV, + V, + H, + None +} + +impl SampleRatios { + pub fn sample(self) -> usize { + match self { + SampleRatios::HV => 4, + SampleRatios::V | SampleRatios::H => 2, + SampleRatios::None => 1 + } + } +} diff --git a/third_party/zune-jpeg/src/decoder.rs b/third_party/zune-jpeg/src/decoder.rs new file mode 100644 index 0000000..dab33b6 --- /dev/null +++ b/third_party/zune-jpeg/src/decoder.rs @@ -0,0 +1,910 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +//! Main image logic. +#![allow(clippy::doc_markdown)] + +use alloc::string::ToString; +use alloc::vec::Vec; +use alloc::{format, vec}; + +use zune_core::bytestream::{ZByteReaderTrait, ZReader}; +use zune_core::colorspace::ColorSpace; +use zune_core::log::{error, trace, warn}; +use zune_core::options::DecoderOptions; + +use crate::color_convert::choose_ycbcr_to_rgb_convert_func; +use crate::components::{Components, SampleRatios}; +use crate::errors::{DecodeErrors, UnsupportedSchemes}; +use crate::headers::{ + parse_app1, parse_app14, parse_app2, parse_dqt, parse_huffman, parse_sos, parse_start_of_frame +}; +use crate::huffman::HuffmanTable; +use crate::idct::choose_idct_func; +use crate::marker::Marker; +use crate::misc::SOFMarkers; +use crate::upsampler::{ + choose_horizontal_samp_function, choose_hv_samp_function, choose_v_samp_function, + upsample_no_op +}; + +/// Maximum components +pub(crate) const MAX_COMPONENTS: usize = 4; + +/// Maximum image dimensions supported. +pub(crate) const MAX_DIMENSIONS: usize = 1 << 27; + +/// Color conversion function that can convert YCbCr colorspace to RGB(A/X) for +/// 16 values +/// +/// The following are guarantees to the following functions +/// +/// 1. The `&[i16]` slices passed contain 16 items +/// +/// 2. The slices passed are in the following order +/// `y,cb,cr` +/// +/// 3. `&mut [u8]` is zero initialized +/// +/// 4. `&mut usize` points to the position in the array where new values should +/// be used +/// +/// The pointer should +/// 1. Carry out color conversion +/// 2. Update `&mut usize` with the new position + +pub type ColorConvert16Ptr = fn(&[i16; 16], &[i16; 16], &[i16; 16], &mut [u8], &mut usize); + +/// IDCT function prototype +/// +/// This encapsulates a dequantize and IDCT function which will carry out the +/// following functions +/// +/// Multiply each 64 element block of `&mut [i16]` with `&Aligned32<[i32;64]>` +/// Carry out IDCT (type 3 dct) on ach block of 64 i16's +pub type IDCTPtr = fn(&mut [i32; 64], &mut [i16], usize); + +/// An encapsulation of an ICC chunk +pub(crate) struct ICCChunk { + pub(crate) seq_no: u8, + pub(crate) num_markers: u8, + pub(crate) data: Vec +} + +/// A JPEG Decoder Instance. +#[allow(clippy::upper_case_acronyms, clippy::struct_excessive_bools)] +pub struct JpegDecoder { + /// Struct to hold image information from SOI + pub(crate) info: ImageInfo, + /// Quantization tables, will be set to none and the tables will + /// be moved to `components` field + pub(crate) qt_tables: [Option<[i32; 64]>; MAX_COMPONENTS], + /// DC Huffman Tables with a maximum of 4 tables for each component + pub(crate) dc_huffman_tables: [Option; MAX_COMPONENTS], + /// AC Huffman Tables with a maximum of 4 tables for each component + pub(crate) ac_huffman_tables: [Option; MAX_COMPONENTS], + /// Image components, holds information like DC prediction and quantization + /// tables of a component + pub(crate) components: Vec, + /// maximum horizontal component of all channels in the image + pub(crate) h_max: usize, + // maximum vertical component of all channels in the image + pub(crate) v_max: usize, + /// mcu's width (interleaved scans) + pub(crate) mcu_width: usize, + /// MCU height(interleaved scans + pub(crate) mcu_height: usize, + /// Number of MCU's in the x plane + pub(crate) mcu_x: usize, + /// Number of MCU's in the y plane + pub(crate) mcu_y: usize, + /// Is the image interleaved? + pub(crate) is_interleaved: bool, + pub(crate) sub_sample_ratio: SampleRatios, + /// Image input colorspace, should be YCbCr for a sane image, might be + /// grayscale too + pub(crate) input_colorspace: ColorSpace, + // Progressive image details + /// Is the image progressive? + pub(crate) is_progressive: bool, + + /// Start of spectral scan + pub(crate) spec_start: u8, + /// End of spectral scan + pub(crate) spec_end: u8, + /// Successive approximation bit position high + pub(crate) succ_high: u8, + /// Successive approximation bit position low + pub(crate) succ_low: u8, + /// Number of components. + pub(crate) num_scans: u8, + // Function pointers, for pointy stuff. + /// Dequantize and idct function + // This is determined at runtime which function to run, statically it's + // initialized to a platform independent one and during initialization + // of this struct, we check if we can switch to a faster one which + // depend on certain CPU extensions. + pub(crate) idct_func: IDCTPtr, + // Color convert function which acts on 16 YCbCr values + pub(crate) color_convert_16: ColorConvert16Ptr, + pub(crate) z_order: [usize; MAX_COMPONENTS], + /// restart markers + pub(crate) restart_interval: usize, + pub(crate) todo: usize, + // decoder options + pub(crate) options: DecoderOptions, + // byte-stream + pub(crate) stream: ZReader, + // Indicate whether headers have been decoded + pub(crate) headers_decoded: bool, + pub(crate) seen_sof: bool, + // exif data, lifted from app2 + pub(crate) exif_data: Option>, + + pub(crate) icc_data: Vec, + pub(crate) is_mjpeg: bool, + pub(crate) coeff: usize // Solves some weird bug :) +} + +impl JpegDecoder +where + T: ZByteReaderTrait +{ + #[allow(clippy::redundant_field_names)] + fn default(options: DecoderOptions, buffer: T) -> Self { + let color_convert = choose_ycbcr_to_rgb_convert_func(ColorSpace::RGB, &options).unwrap(); + JpegDecoder { + info: ImageInfo::default(), + qt_tables: [None, None, None, None], + dc_huffman_tables: [None, None, None, None], + ac_huffman_tables: [None, None, None, None], + components: vec![], + // Interleaved information + h_max: 1, + v_max: 1, + mcu_height: 0, + mcu_width: 0, + mcu_x: 0, + mcu_y: 0, + is_interleaved: false, + sub_sample_ratio: SampleRatios::None, + is_progressive: false, + spec_start: 0, + spec_end: 0, + succ_high: 0, + succ_low: 0, + num_scans: 0, + idct_func: choose_idct_func(&options), + color_convert_16: color_convert, + input_colorspace: ColorSpace::YCbCr, + z_order: [0; MAX_COMPONENTS], + restart_interval: 0, + todo: 0x7fff_ffff, + options: options, + stream: ZReader::new(buffer), + headers_decoded: false, + seen_sof: false, + exif_data: None, + icc_data: vec![], + is_mjpeg: false, + coeff: 1 + } + } + /// Decode a buffer already in memory + /// + /// The buffer should be a valid jpeg file, perhaps created by the command + /// `std:::fs::read()` or a JPEG file downloaded from the internet. + /// + /// # Errors + /// See DecodeErrors for an explanation + pub fn decode(&mut self) -> Result, DecodeErrors> { + self.decode_headers()?; + let size = self.output_buffer_size().unwrap(); + let mut out = vec![0; size]; + self.decode_into(&mut out)?; + Ok(out) + } + + /// Create a new Decoder instance + /// + /// # Arguments + /// - `stream`: The raw bytes of a jpeg file. + #[must_use] + #[allow(clippy::new_without_default)] + pub fn new(stream: T) -> JpegDecoder { + JpegDecoder::default(DecoderOptions::default(), stream) + } + + /// Returns the image information + /// + /// This **must** be called after a subsequent call to [`decode`] or [`decode_headers`] + /// it will return `None` + /// + /// # Returns + /// - `Some(info)`: Image information,width, height, number of components + /// - None: Indicates image headers haven't been decoded + /// + /// [`decode`]: JpegDecoder::decode + /// [`decode_headers`]: JpegDecoder::decode_headers + #[must_use] + pub fn info(&self) -> Option { + // we check for fails to that call by comparing what we have to the default, if + // it's default we assume that the caller failed to uphold the + // guarantees. We can be sure that an image cannot be the default since + // its a hard panic in-case width or height are set to zero. + if !self.headers_decoded { + return None; + } + + return Some(self.info.clone()); + } + + /// Return the number of bytes required to hold a decoded image frame + /// decoded using the given input transformations + /// + /// # Returns + /// - `Some(usize)`: Minimum size for a buffer needed to decode the image + /// - `None`: Indicates the image was not decoded, or image dimensions would overflow a usize + /// + #[must_use] + pub fn output_buffer_size(&self) -> Option { + return if self.headers_decoded { + Some( + usize::from(self.width()) + .checked_mul(usize::from(self.height()))? + .checked_mul(self.options.jpeg_get_out_colorspace().num_components())? + ) + } else { + None + }; + } + + /// Get an immutable reference to the decoder options + /// for the decoder instance + /// + /// This can be used to modify options before actual decoding + /// but after initial creation + /// + /// # Example + /// ```no_run + /// use zune_core::bytestream::ZCursor; + /// use zune_jpeg::JpegDecoder; + /// + /// let mut decoder = JpegDecoder::new(ZCursor::new(&[])); + /// // get current options + /// let mut options = decoder.options(); + /// // modify it + /// let new_options = options.set_max_width(10); + /// // set it back + /// decoder.set_options(new_options); + /// + /// ``` + #[must_use] + pub const fn options(&self) -> &DecoderOptions { + &self.options + } + /// Return the input colorspace of the image + /// + /// This indicates the colorspace that is present in + /// the image, but this may be different to the colorspace that + /// the output will be transformed to + /// + /// # Returns + /// -`Some(Colorspace)`: Input colorspace + /// - None : Indicates the headers weren't decoded + #[must_use] + pub fn input_colorspace(&self) -> Option { + return if self.headers_decoded { Some(self.input_colorspace) } else { None }; + } + /// Set decoder options + /// + /// This can be used to set new options even after initialization + /// but before decoding. + /// + /// This does not bear any significance after decoding an image + /// + /// # Arguments + /// - `options`: New decoder options + /// + /// # Example + /// Set maximum jpeg progressive passes to be 4 + /// + /// ```no_run + /// use zune_core::bytestream::ZCursor; + /// use zune_jpeg::JpegDecoder; + /// let mut decoder =JpegDecoder::new(ZCursor::new(&[])); + /// // this works also because DecoderOptions implements `Copy` + /// let options = decoder.options().jpeg_set_max_scans(4); + /// // set the new options + /// decoder.set_options(options); + /// // now decode + /// decoder.decode().unwrap(); + /// ``` + pub fn set_options(&mut self, options: DecoderOptions) { + self.options = options; + } + /// Decode Decoder headers + /// + /// This routine takes care of parsing supported headers from a Decoder + /// image + /// + /// # Supported Headers + /// - APP(0) + /// - SOF(O) + /// - DQT -> Quantization tables + /// - DHT -> Huffman tables + /// - SOS -> Start of Scan + /// # Unsupported Headers + /// - SOF(n) -> Decoder images which are not baseline/progressive + /// - DAC -> Images using Arithmetic tables + /// - JPG(n) + fn decode_headers_internal(&mut self) -> Result<(), DecodeErrors> { + if self.headers_decoded { + trace!("Headers decoded!"); + return Ok(()); + } + // match output colorspace here + // we know this will only be called once per image + // so makes sense + // We only care for ycbcr to rgb/rgba here + // in case one is using another colorspace. + // May god help you + let out_colorspace = self.options.jpeg_get_out_colorspace(); + + if matches!( + out_colorspace, + ColorSpace::BGR | ColorSpace::BGRA | ColorSpace::RGB | ColorSpace::RGBA + ) { + self.color_convert_16 = choose_ycbcr_to_rgb_convert_func( + self.options.jpeg_get_out_colorspace(), + &self.options + ) + .unwrap(); + } + // First two bytes should be jpeg soi marker + let magic_bytes = self.stream.get_u16_be_err()?; + + let mut last_byte = 0; + let mut bytes_before_marker = 0; + + if magic_bytes != 0xffd8 { + return Err(DecodeErrors::IllegalMagicBytes(magic_bytes)); + } + + loop { + // read a byte + let mut m = self.stream.read_u8_err()?; + + // AND OF COURSE some images will have fill bytes in their marker + // bitstreams because why not. + // + // I am disappointed as a man. + if (m == 0xFF || m == 0) && last_byte == 0xFF { + // This handles the edge case where + // images have markers with fill bytes(0xFF) + // or byte stuffing (0) + // I.e 0xFF 0xFF 0xDA + // and + // 0xFF 0 0xDA + // It should ignore those fill bytes and take 0xDA + // I don't know why such images exist + // but they do. + // so this is for you (with love) + while m == 0xFF || m == 0x0 { + last_byte = m; + m = self.stream.read_u8_err()?; + } + } + // Last byte should be 0xFF to confirm existence of a marker since markers look + // like OxFF(some marker data) + if last_byte == 0xFF { + let marker = Marker::from_u8(m); + if let Some(n) = marker { + if bytes_before_marker > 3 { + if self.options.strict_mode() + /*No reason to use this*/ + { + return Err(DecodeErrors::FormatStatic( + "[strict-mode]: Extra bytes between headers" + )); + } + + error!( + "Extra bytes {} before marker 0xFF{:X}", + bytes_before_marker - 3, + m + ); + } + + bytes_before_marker = 0; + + self.parse_marker_inner(n)?; + + if n == Marker::SOS { + self.headers_decoded = true; + trace!("Input colorspace {:?}", self.input_colorspace); + return Ok(()); + } + } else { + bytes_before_marker = 0; + + warn!("Marker 0xFF{:X} not known", m); + + let length = self.stream.get_u16_be_err()?; + + if length < 2 { + return Err(DecodeErrors::Format(format!( + "Found a marker with invalid length : {length}" + ))); + } + + warn!("Skipping {} bytes", length - 2); + self.stream.skip((length - 2) as usize)?; + } + } + last_byte = m; + bytes_before_marker += 1; + } + } + #[allow(clippy::too_many_lines)] + pub(crate) fn parse_marker_inner(&mut self, m: Marker) -> Result<(), DecodeErrors> { + match m { + Marker::SOF(0..=2) => { + let marker = { + // choose marker + if m == Marker::SOF(0) || m == Marker::SOF(1) { + SOFMarkers::BaselineDct + } else { + self.is_progressive = true; + SOFMarkers::ProgressiveDctHuffman + } + }; + + trace!("Image encoding scheme =`{:?}`", marker); + // get components + parse_start_of_frame(marker, self)?; + } + // Start of Frame Segments not supported + Marker::SOF(v) => { + let feature = UnsupportedSchemes::from_int(v); + + if let Some(feature) = feature { + return Err(DecodeErrors::Unsupported(feature)); + } + + return Err(DecodeErrors::Format("Unsupported image format".to_string())); + } + //APP(0) segment + Marker::APP(0) => { + let mut length = self.stream.get_u16_be_err()?; + + if length < 2 { + return Err(DecodeErrors::Format(format!( + "Found a marker with invalid length:{length}\n" + ))); + } + // skip for now + if length > 5 { + let mut buffer = [0u8; 5]; + self.stream.read_exact_bytes(&mut buffer)?; + if &buffer == b"AVI1\0" { + self.is_mjpeg = true; + } + length -= 5; + } + + self.stream.skip(length.saturating_sub(2) as usize)?; + + //parse_app(buf, m, &mut self.info)?; + } + Marker::APP(1) => { + parse_app1(self)?; + } + + Marker::APP(2) => { + parse_app2(self)?; + } + // Quantization tables + Marker::DQT => { + parse_dqt(self)?; + } + // Huffman tables + Marker::DHT => { + parse_huffman(self)?; + } + // Start of Scan Data + Marker::SOS => { + parse_sos(self)?; + + // break after reading the start of scan. + // what follows is the image data + return Ok(()); + } + Marker::EOI => return Err(DecodeErrors::FormatStatic("Premature End of image")), + + Marker::DAC | Marker::DNL => { + return Err(DecodeErrors::Format(format!( + "Parsing of the following header `{m:?}` is not supported,\ + cannot continue" + ))); + } + Marker::DRI => { + trace!("DRI marker present"); + + if self.stream.get_u16_be_err()? != 4 { + return Err(DecodeErrors::Format( + "Bad DRI length, Corrupt JPEG".to_string() + )); + } + + self.restart_interval = usize::from(self.stream.get_u16_be_err()?); + self.todo = self.restart_interval; + } + Marker::APP(14) => { + parse_app14(self)?; + } + _ => { + warn!( + "Capabilities for processing marker \"{:?}\" not implemented", + m + ); + + let length = self.stream.get_u16_be_err()?; + + if length < 2 { + return Err(DecodeErrors::Format(format!( + "Found a marker with invalid length:{length}\n" + ))); + } + warn!("Skipping {} bytes", length - 2); + self.stream.skip((length - 2) as usize)?; + } + } + Ok(()) + } + /// Get the embedded ICC profile if it exists + /// and is correct + /// + /// One needs not to decode the whole image to extract this, + /// calling [`decode_headers`] for an image with an ICC profile + /// allows you to decode this + /// + /// # Returns + /// - `Some(Vec)`: The raw ICC profile of the image + /// - `None`: May indicate an error in the ICC profile , non-existence of + /// an ICC profile, or that the headers weren't decoded. + /// + /// [`decode_headers`]:Self::decode_headers + #[must_use] + pub fn icc_profile(&self) -> Option> { + let mut marker_present: [Option<&ICCChunk>; 256] = [None; 256]; + + if !self.headers_decoded { + return None; + } + let num_markers = self.icc_data.len(); + + if num_markers == 0 || num_markers >= 255 { + return None; + } + // check validity + for chunk in &self.icc_data { + if usize::from(chunk.num_markers) != num_markers { + // all the lengths must match + return None; + } + if chunk.seq_no == 0 { + warn!("Zero sequence number in ICC, corrupt ICC chunk"); + return None; + } + if marker_present[usize::from(chunk.seq_no)].is_some() { + // duplicate seq_no + warn!("Duplicate sequence number in ICC, corrupt chunk"); + return None; + } + + marker_present[usize::from(chunk.seq_no)] = Some(chunk); + } + let mut data = Vec::with_capacity(1000); + // assemble the data now + for chunk in marker_present.get(1..=num_markers).unwrap() { + if let Some(ch) = chunk { + data.extend_from_slice(&ch.data); + } else { + warn!("Missing icc sequence number, corrupt ICC chunk "); + return None; + } + } + + Some(data) + } + /// Return the exif data for the file + /// + /// This returns the raw exif data starting at the + /// TIFF header + /// + /// # Returns + /// -`Some(data)`: The raw exif data, if present in the image + /// - None: May indicate the following + /// + /// 1. The image doesn't have exif data + /// 2. The image headers haven't been decoded + #[must_use] + pub fn exif(&self) -> Option<&Vec> { + return self.exif_data.as_ref(); + } + /// Get the output colorspace the image pixels will be decoded into + /// + /// + /// # Note. + /// This field can only be regarded after decoding headers, + /// as markers such as Adobe APP14 may dictate different colorspaces + /// than requested. + /// + /// Calling `decode_headers` is sufficient to know what colorspace the + /// output is, if this is called after `decode` it indicates the colorspace + /// the output is currently in + /// + /// Additionally not all input->output colorspace mappings are supported + /// but all input colorspaces can map to RGB colorspace, so that's a safe bet + /// if one is handling image formats + /// + ///# Returns + /// - `Some(Colorspace)`: If headers have been decoded, the colorspace the + ///output array will be in + ///- `None + #[must_use] + pub fn output_colorspace(&self) -> Option { + return if self.headers_decoded { + Some(self.options.jpeg_get_out_colorspace()) + } else { + None + }; + } + + /// Decode into a pre-allocated buffer + /// + /// It is an error if the buffer size is smaller than + /// [`output_buffer_size()`](Self::output_buffer_size) + /// + /// If the buffer is bigger than expected, we ignore the end padding bytes + /// + /// # Example + /// + /// - Read headers and then alloc a buffer big enough to hold the image + /// + /// ```no_run + /// use zune_core::bytestream::ZCursor; + /// use zune_jpeg::JpegDecoder; + /// let mut decoder = JpegDecoder::new(ZCursor::new(&[])); + /// // before we get output, we must decode the headers to get width + /// // height, and input colorspace + /// decoder.decode_headers().unwrap(); + /// + /// let mut out = vec![0;decoder.output_buffer_size().unwrap()]; + /// // write into out + /// decoder.decode_into(&mut out).unwrap(); + /// ``` + /// + /// + pub fn decode_into(&mut self, out: &mut [u8]) -> Result<(), DecodeErrors> { + self.decode_headers_internal()?; + + let expected_size = self.output_buffer_size().unwrap(); + + if out.len() < expected_size { + // too small of a size + return Err(DecodeErrors::TooSmallOutput(expected_size, out.len())); + } + + // ensure we don't touch anyone else's scratch space + let out_len = core::cmp::min(out.len(), expected_size); + let out = &mut out[0..out_len]; + + if self.is_progressive { + self.decode_mcu_ycbcr_progressive(out) + } else { + self.decode_mcu_ycbcr_baseline(out) + } + } + + /// Read only headers from a jpeg image buffer + /// + /// This allows you to extract important information like + /// image width and height without decoding the full image + /// + /// # Examples + /// ```no_run + /// use zune_core::bytestream::ZCursor; + /// use zune_jpeg::{JpegDecoder}; + /// + /// let img_data = std::fs::read("a_valid.jpeg").unwrap(); + /// let mut decoder = JpegDecoder::new(ZCursor::new(&img_data)); + /// decoder.decode_headers().unwrap(); + /// + /// println!("Total decoder dimensions are : {:?} pixels",decoder.dimensions()); + /// println!("Number of components in the image are {}", decoder.info().unwrap().components); + /// ``` + /// # Errors + /// See DecodeErrors enum for list of possible errors during decoding + pub fn decode_headers(&mut self) -> Result<(), DecodeErrors> { + self.decode_headers_internal()?; + Ok(()) + } + /// Create a new decoder with the specified options to be used for decoding + /// an image + /// + /// # Arguments + /// - `buf`: The input buffer from where we will pull in compressed jpeg bytes from + /// - `options`: Options specific to this decoder instance + #[must_use] + pub fn new_with_options(buf: T, options: DecoderOptions) -> JpegDecoder { + JpegDecoder::default(options, buf) + } + + /// Set up-sampling routines in case an image is down sampled + pub(crate) fn set_upsampling(&mut self) -> Result<(), DecodeErrors> { + // no sampling, return early + // check if horizontal max ==1 + if self.h_max == self.v_max && self.h_max == 1 { + return Ok(()); + } + match (self.h_max, self.v_max) { + (1, 1) => { + self.sub_sample_ratio = SampleRatios::None; + } + (1, 2) => { + self.sub_sample_ratio = SampleRatios::V; + } + (2, 1) => { + self.sub_sample_ratio = SampleRatios::H; + } + (2, 2) => { + self.sub_sample_ratio = SampleRatios::HV; + } + _ => { + return Err(DecodeErrors::Format( + "Unknown down-sampling method, cannot continue".to_string() + )) + } + } + + for comp in &mut self.components { + let hs = self.h_max / comp.horizontal_sample; + let vs = self.v_max / comp.vertical_sample; + + let samp_factor = match (hs, vs) { + (1, 1) => { + comp.sample_ratio = SampleRatios::None; + upsample_no_op + } + (2, 1) => { + comp.sample_ratio = SampleRatios::H; + choose_horizontal_samp_function(self.options.use_unsafe()) + } + (1, 2) => { + comp.sample_ratio = SampleRatios::V; + choose_v_samp_function(self.options.use_unsafe()) + } + (2, 2) => { + comp.sample_ratio = SampleRatios::HV; + choose_hv_samp_function(self.options.use_unsafe()) + } + _ => { + return Err(DecodeErrors::Format( + "Unknown down-sampling method, cannot continue".to_string() + )) + } + }; + comp.setup_upsample_scanline(); + comp.up_sampler = samp_factor; + } + + return Ok(()); + } + #[must_use] + /// Get the width of the image as a u16 + /// + /// The width lies between 1 and 65535 + pub(crate) fn width(&self) -> u16 { + self.info.width + } + + /// Get the height of the image as a u16 + /// + /// The height lies between 1 and 65535 + #[must_use] + pub(crate) fn height(&self) -> u16 { + self.info.height + } + + /// Get image dimensions as a tuple of width and height + /// or `None` if the image hasn't been decoded. + /// + /// # Returns + /// - `Some(width,height)`: Image dimensions + /// - None : The image headers haven't been decoded + #[must_use] + pub const fn dimensions(&self) -> Option<(usize, usize)> { + return if self.headers_decoded { + Some((self.info.width as usize, self.info.height as usize)) + } else { + None + }; + } +} + +/// A struct representing Image Information +#[derive(Default, Clone, Eq, PartialEq)] +#[allow(clippy::module_name_repetitions)] +pub struct ImageInfo { + /// Width of the image + pub width: u16, + /// Height of image + pub height: u16, + /// PixelDensity + pub pixel_density: u8, + /// Start of frame markers + pub sof: SOFMarkers, + /// Horizontal sample + pub x_density: u16, + /// Vertical sample + pub y_density: u16, + /// Number of components + pub components: u8 +} + +impl ImageInfo { + /// Set width of the image + /// + /// Found in the start of frame + + pub(crate) fn set_width(&mut self, width: u16) { + self.width = width; + } + + /// Set height of the image + /// + /// Found in the start of frame + + pub(crate) fn set_height(&mut self, height: u16) { + self.height = height; + } + + /// Set the image density + /// + /// Found in the start of frame + + pub(crate) fn set_density(&mut self, density: u8) { + self.pixel_density = density; + } + + /// Set image Start of frame marker + /// + /// found in the Start of frame header + + pub(crate) fn set_sof_marker(&mut self, marker: SOFMarkers) { + self.sof = marker; + } + + /// Set image x-density(dots per pixel) + /// + /// Found in the APP(0) marker + #[allow(dead_code)] + pub(crate) fn set_x(&mut self, sample: u16) { + self.x_density = sample; + } + + /// Set image y-density + /// + /// Found in the APP(0) marker + #[allow(dead_code)] + pub(crate) fn set_y(&mut self, sample: u16) { + self.y_density = sample; + } +} diff --git a/third_party/zune-jpeg/src/errors.rs b/third_party/zune-jpeg/src/errors.rs new file mode 100644 index 0000000..1410961 --- /dev/null +++ b/third_party/zune-jpeg/src/errors.rs @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +//! Contains most common errors that may be encountered in decoding a Decoder +//! image + +use alloc::string::String; +use core::fmt::{Debug, Display, Formatter}; + +use zune_core::bytestream::ZByteIoError; + +use crate::misc::{ + START_OF_FRAME_EXT_AR, START_OF_FRAME_EXT_SEQ, START_OF_FRAME_LOS_SEQ, + START_OF_FRAME_LOS_SEQ_AR, START_OF_FRAME_PROG_DCT_AR +}; + +/// Common Decode errors +#[allow(clippy::module_name_repetitions)] +pub enum DecodeErrors { + /// Any other thing we do not know + Format(String), + /// Any other thing we do not know but we + /// don't need to allocate space on the heap + FormatStatic(&'static str), + /// Illegal Magic Bytes + IllegalMagicBytes(u16), + /// problems with the Huffman Tables in a Decoder file + HuffmanDecode(String), + /// Image has zero width + ZeroError, + /// Discrete Quantization Tables error + DqtError(String), + /// Start of scan errors + SosError(String), + /// Start of frame errors + SofError(String), + /// UnsupportedImages + Unsupported(UnsupportedSchemes), + /// MCU errors + MCUError(String), + /// Exhausted data + ExhaustedData, + /// Large image dimensions(Corrupted data)? + LargeDimensions(usize), + /// Too small output for size + TooSmallOutput(usize, usize), + + IoErrors(ZByteIoError) +} + +#[cfg(feature = "std")] +impl std::error::Error for DecodeErrors {} + +impl From<&'static str> for DecodeErrors { + fn from(data: &'static str) -> Self { + return Self::FormatStatic(data); + } +} + +impl From for DecodeErrors { + fn from(data: ZByteIoError) -> Self { + return Self::IoErrors(data); + } +} +impl Debug for DecodeErrors { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + match &self + { + Self::Format(ref a) => write!(f, "{a:?}"), + Self::FormatStatic(a) => write!(f, "{:?}", &a), + + Self::HuffmanDecode(ref reason) => + { + write!(f, "Error decoding huffman values: {reason}") + } + Self::ZeroError => write!(f, "Image width or height is set to zero, cannot continue"), + Self::DqtError(ref reason) => write!(f, "Error parsing DQT segment. Reason:{reason}"), + Self::SosError(ref reason) => write!(f, "Error parsing SOS Segment. Reason:{reason}"), + Self::SofError(ref reason) => write!(f, "Error parsing SOF segment. Reason:{reason}"), + Self::IllegalMagicBytes(bytes) => + { + write!(f, "Error parsing image. Illegal start bytes:{bytes:X}") + } + Self::MCUError(ref reason) => write!(f, "Error in decoding MCU. Reason {reason}"), + Self::Unsupported(ref image_type) => + { + write!(f, "{image_type:?}") + } + Self::ExhaustedData => write!(f, "Exhausted data in the image"), + Self::LargeDimensions(ref dimensions) => write!( + f, + "Too large dimensions {dimensions},library supports up to {}", crate::decoder::MAX_DIMENSIONS + ), + Self::TooSmallOutput(expected, found) => write!(f, "Too small output, expected buffer with at least {expected} bytes but got one with {found} bytes"), + Self::IoErrors(error)=>write!(f,"I/O errors {error:?}"), + } + } +} + +impl Display for DecodeErrors { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + write!(f, "{self:?}") + } +} + +/// Contains Unsupported/Yet-to-be supported Decoder image encoding types. +#[derive(Eq, PartialEq, Copy, Clone)] +pub enum UnsupportedSchemes { + /// SOF_1 Extended sequential DCT,Huffman coding + ExtendedSequentialHuffman, + /// Lossless (sequential), huffman coding, + LosslessHuffman, + /// Extended sequential DEC, arithmetic coding + ExtendedSequentialDctArithmetic, + /// Progressive DCT, arithmetic coding, + ProgressiveDctArithmetic, + /// Lossless ( sequential), arithmetic coding + LosslessArithmetic +} + +impl Debug for UnsupportedSchemes { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + match &self { + Self::ExtendedSequentialHuffman => { + write!(f, "The library cannot yet decode images encoded using Extended Sequential Huffman encoding scheme yet.") + } + Self::LosslessHuffman => { + write!(f, "The library cannot yet decode images encoded with Lossless Huffman encoding scheme") + } + Self::ExtendedSequentialDctArithmetic => { + write!(f,"The library cannot yet decode Images Encoded with Extended Sequential DCT Arithmetic scheme") + } + Self::ProgressiveDctArithmetic => { + write!(f,"The library cannot yet decode images encoded with Progressive DCT Arithmetic scheme") + } + Self::LosslessArithmetic => { + write!(f,"The library cannot yet decode images encoded with Lossless Arithmetic encoding scheme") + } + } + } +} + +impl UnsupportedSchemes { + #[must_use] + /// Create an unsupported scheme from an integer + /// + /// # Returns + /// `Some(UnsupportedScheme)` if the int refers to a specific scheme, + /// otherwise returns `None` + pub fn from_int(int: u8) -> Option { + let int = u16::from_be_bytes([0xff, int]); + + match int { + START_OF_FRAME_PROG_DCT_AR => Some(Self::ProgressiveDctArithmetic), + START_OF_FRAME_LOS_SEQ => Some(Self::LosslessHuffman), + START_OF_FRAME_LOS_SEQ_AR => Some(Self::LosslessArithmetic), + START_OF_FRAME_EXT_SEQ => Some(Self::ExtendedSequentialHuffman), + START_OF_FRAME_EXT_AR => Some(Self::ExtendedSequentialDctArithmetic), + _ => None + } + } +} diff --git a/third_party/zune-jpeg/src/headers.rs b/third_party/zune-jpeg/src/headers.rs new file mode 100644 index 0000000..93126e2 --- /dev/null +++ b/third_party/zune-jpeg/src/headers.rs @@ -0,0 +1,544 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +//! Decode Decoder markers/segments +//! +//! This file deals with decoding header information in a jpeg file +//! +use alloc::format; +use alloc::string::ToString; +use alloc::vec::Vec; + +use zune_core::bytestream::ZByteReaderTrait; +use zune_core::colorspace::ColorSpace; +use zune_core::log::{debug, error, trace, warn}; + +use crate::components::Components; +use crate::decoder::{ICCChunk, JpegDecoder, MAX_COMPONENTS}; +use crate::errors::DecodeErrors; +use crate::huffman::HuffmanTable; +use crate::misc::{SOFMarkers, UN_ZIGZAG}; + +///**B.2.4.2 Huffman table-specification syntax** +#[allow(clippy::similar_names, clippy::cast_sign_loss)] +pub(crate) fn parse_huffman( + decoder: &mut JpegDecoder +) -> Result<(), DecodeErrors> +where +{ + // Read the length of the Huffman table + let mut dht_length = i32::from(decoder.stream.get_u16_be_err()?.checked_sub(2).ok_or( + DecodeErrors::FormatStatic("Invalid Huffman length in image") + )?); + + while dht_length > 16 { + // HT information + let ht_info = decoder.stream.read_u8_err()?; + // third bit indicates whether the huffman encoding is DC or AC type + let dc_or_ac = (ht_info >> 4) & 0xF; + // Indicate the position of this table, should be less than 4; + let index = (ht_info & 0xF) as usize; + // read the number of symbols + let mut num_symbols: [u8; 17] = [0; 17]; + + if index >= MAX_COMPONENTS { + return Err(DecodeErrors::HuffmanDecode(format!( + "Invalid DHT index {index}, expected between 0 and 3" + ))); + } + + if dc_or_ac > 1 { + return Err(DecodeErrors::HuffmanDecode(format!( + "Invalid DHT position {dc_or_ac}, should be 0 or 1" + ))); + } + + decoder.stream.read_exact_bytes(&mut num_symbols[1..17])?; + + dht_length -= 1 + 16; + + let symbols_sum: i32 = num_symbols.iter().map(|f| i32::from(*f)).sum(); + + // The sum of the number of symbols cannot be greater than 256; + if symbols_sum > 256 { + return Err(DecodeErrors::FormatStatic( + "Encountered Huffman table with excessive length in DHT" + )); + } + if symbols_sum > dht_length { + return Err(DecodeErrors::HuffmanDecode(format!( + "Excessive Huffman table of length {symbols_sum} found when header length is {dht_length}" + ))); + } + dht_length -= symbols_sum; + // A table containing symbols in increasing code length + let mut symbols = [0; 256]; + + decoder + .stream + .read_exact_bytes(&mut symbols[0..(symbols_sum as usize)])?; + // store + match dc_or_ac { + 0 => { + decoder.dc_huffman_tables[index] = Some(HuffmanTable::new( + &num_symbols, + symbols, + true, + decoder.is_progressive + )?); + } + _ => { + decoder.ac_huffman_tables[index] = Some(HuffmanTable::new( + &num_symbols, + symbols, + false, + decoder.is_progressive + )?); + } + } + } + + if dht_length > 0 { + return Err(DecodeErrors::FormatStatic("Bogus Huffman table definition")); + } + + Ok(()) +} + +///**B.2.4.1 Quantization table-specification syntax** +#[allow(clippy::cast_possible_truncation, clippy::needless_range_loop)] +pub(crate) fn parse_dqt(img: &mut JpegDecoder) -> Result<(), DecodeErrors> { + // read length + let mut qt_length = + img.stream + .get_u16_be_err()? + .checked_sub(2) + .ok_or(DecodeErrors::FormatStatic( + "Invalid DQT length. Length should be greater than 2" + ))?; + // A single DQT header may have multiple QT's + while qt_length > 0 { + let qt_info = img.stream.read_u8_err()?; + // 0 = 8 bit otherwise 16 bit dqt + let precision = (qt_info >> 4) as usize; + // last 4 bits give us position + let table_position = (qt_info & 0x0f) as usize; + let precision_value = 64 * (precision + 1); + + if (precision_value + 1) as u16 > qt_length { + return Err(DecodeErrors::DqtError(format!("Invalid QT table bytes left :{}. Too small to construct a valid qt table which should be {} long", qt_length, precision_value + 1))); + } + + let dct_table = match precision { + 0 => { + let mut qt_values = [0; 64]; + + img.stream.read_exact_bytes(&mut qt_values)?; + + qt_length -= (precision_value as u16) + 1 /*QT BIT*/; + // carry out un zig-zag here + un_zig_zag(&qt_values) + } + 1 => { + // 16 bit quantization tables + let mut qt_values = [0_u16; 64]; + + for i in 0..64 { + qt_values[i] = img.stream.get_u16_be_err()?; + } + qt_length -= (precision_value as u16) + 1; + + un_zig_zag(&qt_values) + } + _ => { + return Err(DecodeErrors::DqtError(format!( + "Expected QT precision value of either 0 or 1, found {precision:?}" + ))); + } + }; + + if table_position >= MAX_COMPONENTS { + return Err(DecodeErrors::DqtError(format!( + "Too large table position for QT :{table_position}, expected between 0 and 3" + ))); + } + + img.qt_tables[table_position] = Some(dct_table); + } + + return Ok(()); +} + +/// Section:`B.2.2 Frame header syntax` + +pub(crate) fn parse_start_of_frame( + sof: SOFMarkers, img: &mut JpegDecoder +) -> Result<(), DecodeErrors> { + if img.seen_sof { + return Err(DecodeErrors::SofError( + "Two Start of Frame Markers".to_string() + )); + } + // Get length of the frame header + let length = img.stream.get_u16_be_err()?; + // usually 8, but can be 12 and 16, we currently support only 8 + // so sorry about that 12 bit images + let dt_precision = img.stream.read_u8_err()?; + + if dt_precision != 8 { + return Err(DecodeErrors::SofError(format!( + "The library can only parse 8-bit images, the image has {dt_precision} bits of precision" + ))); + } + + img.info.set_density(dt_precision); + + // read and set the image height. + let img_height = img.stream.get_u16_be_err()?; + img.info.set_height(img_height); + + // read and set the image width + let img_width = img.stream.get_u16_be_err()?; + img.info.set_width(img_width); + + trace!("Image width :{}", img_width); + trace!("Image height :{}", img_height); + + if usize::from(img_width) > img.options.max_width() { + return Err(DecodeErrors::Format(format!("Image width {} greater than width limit {}. If use `set_limits` if you want to support huge images", img_width, img.options.max_width()))); + } + + if usize::from(img_height) > img.options.max_height() { + return Err(DecodeErrors::Format(format!("Image height {} greater than height limit {}. If use `set_limits` if you want to support huge images", img_height, img.options.max_height()))); + } + + // Check image width or height is zero + if img_width == 0 || img_height == 0 { + return Err(DecodeErrors::ZeroError); + } + + // Number of components for the image. + let num_components = img.stream.read_u8_err()?; + + if num_components == 0 { + return Err(DecodeErrors::SofError( + "Number of components cannot be zero.".to_string() + )); + } + + let expected = 8 + 3 * u16::from(num_components); + // length should be equal to num components + if length != expected { + return Err(DecodeErrors::SofError(format!( + "Length of start of frame differs from expected {expected},value is {length}" + ))); + } + + trace!("Image components : {}", num_components); + + if num_components == 1 { + // SOF sets the number of image components + // and that to us translates to setting input and output + // colorspaces to zero + img.input_colorspace = ColorSpace::Luma; + img.options = img.options.jpeg_set_out_colorspace(ColorSpace::Luma); + debug!("Overriding default colorspace set to Luma"); + } + if num_components == 4 && img.input_colorspace == ColorSpace::YCbCr { + trace!("Input image has 4 components, defaulting to CMYK colorspace"); + // https://entropymine.wordpress.com/2018/10/22/how-is-a-jpeg-images-color-type-determined/ + img.input_colorspace = ColorSpace::CMYK; + } + + // set number of components + img.info.components = num_components; + + let mut components = Vec::with_capacity(num_components as usize); + let mut temp = [0; 3]; + + for pos in 0..num_components { + // read 3 bytes for each component + img.stream.read_exact_bytes(&mut temp)?; + + // create a component. + let component = Components::from(temp, pos)?; + + components.push(component); + } + img.seen_sof = true; + + img.info.set_sof_marker(sof); + + img.components = components; + + Ok(()) +} + +/// Parse a start of scan data +pub(crate) fn parse_sos( + image: &mut JpegDecoder +) -> Result<(), DecodeErrors> { + // Scan header length + let ls = image.stream.get_u16_be_err()?; + // Number of image components in scan + let ns = image.stream.read_u8_err()?; + + let mut seen = [-1; { MAX_COMPONENTS + 1 }]; + + image.num_scans = ns; + + if ls != 6 + 2 * u16::from(ns) { + return Err(DecodeErrors::SosError(format!( + "Bad SOS length {ls},corrupt jpeg" + ))); + } + + // Check number of components. + if !(1..5).contains(&ns) { + return Err(DecodeErrors::SosError(format!( + "Number of components in start of scan should be less than 3 but more than 0. Found {ns}" + ))); + } + + if image.info.components == 0 { + return Err(DecodeErrors::FormatStatic( + "Error decoding SOF Marker, Number of components cannot be zero." + )); + } + + // consume spec parameters + for i in 0..ns { + // CS_i parameter, I don't need it so I might as well delete it + let id = image.stream.read_u8_err()?; + + if seen.contains(&i32::from(id)) { + return Err(DecodeErrors::SofError(format!( + "Duplicate ID {id} seen twice in the same component" + ))); + } + + seen[usize::from(i)] = i32::from(id); + // DC and AC huffman table position + // top 4 bits contain dc huffman destination table + // lower four bits contain ac huffman destination table + let y = image.stream.read_u8_err()?; + + let mut j = 0; + + while j < image.info.components { + if image.components[j as usize].id == id { + break; + } + + j += 1; + } + + if j == image.info.components { + return Err(DecodeErrors::SofError(format!( + "Invalid component id {}, expected a value between 0 and {}", + id, + image.components.len() + ))); + } + + image.components[usize::from(j)].dc_huff_table = usize::from((y >> 4) & 0xF); + image.components[usize::from(j)].ac_huff_table = usize::from(y & 0xF); + image.z_order[i as usize] = j as usize; + } + + // Collect the component spec parameters + // This is only needed for progressive images but I'll read + // them in order to ensure they are correct according to the spec + + // Extract progressive information + + // https://www.w3.org/Graphics/JPEG/itu-t81.pdf + // Page 42 + + // Start of spectral / predictor selection. (between 0 and 63) + image.spec_start = image.stream.read_u8_err()?; + // End of spectral selection + image.spec_end = image.stream.read_u8_err()?; + + let bit_approx = image.stream.read_u8_err()?; + // successive approximation bit position high + image.succ_high = bit_approx >> 4; + + if image.spec_end > 63 { + return Err(DecodeErrors::SosError(format!( + "Invalid Se parameter {}, range should be 0-63", + image.spec_end + ))); + } + if image.spec_start > 63 { + return Err(DecodeErrors::SosError(format!( + "Invalid Ss parameter {}, range should be 0-63", + image.spec_start + ))); + } + if image.succ_high > 13 { + return Err(DecodeErrors::SosError(format!( + "Invalid Ah parameter {}, range should be 0-13", + image.succ_low + ))); + } + // successive approximation bit position low + image.succ_low = bit_approx & 0xF; + + if image.succ_low > 13 { + return Err(DecodeErrors::SosError(format!( + "Invalid Al parameter {}, range should be 0-13", + image.succ_low + ))); + } + + trace!( + "Ss={}, Se={} Ah={} Al={}", + image.spec_start, + image.spec_end, + image.succ_high, + image.succ_low + ); + + Ok(()) +} + +/// Parse Adobe App14 segment +pub(crate) fn parse_app14( + decoder: &mut JpegDecoder +) -> Result<(), DecodeErrors> { + // skip length + let mut length = usize::from(decoder.stream.get_u16_be()); + + if length < 2 { + return Err(DecodeErrors::FormatStatic("Too small APP14 length")); + } + if length < 14 { + return Err(DecodeErrors::FormatStatic( + "Too short of a length for App14 segment" + )); + } + if decoder.stream.peek_at(0, 5)? == b"Adobe" { + // move stream 6 bytes to remove adobe id + decoder.stream.skip(6)?; + // skip version, flags0 and flags1 + decoder.stream.skip(5)?; + // get color transform + let transform = decoder.stream.read_u8(); + // https://exiftool.org/TagNames/JPEG.html#Adobe + match transform { + 0 => decoder.input_colorspace = ColorSpace::CMYK, + 1 => decoder.input_colorspace = ColorSpace::YCbCr, + 2 => decoder.input_colorspace = ColorSpace::YCCK, + _ => { + return Err(DecodeErrors::Format(format!( + "Unknown Adobe colorspace {transform}" + ))) + } + } + // length = 2 + // adobe id = 6 + // version = 5 + // transform = 1 + length = length.saturating_sub(14); + } else if decoder.options.strict_mode() { + return Err(DecodeErrors::FormatStatic("Corrupt Adobe App14 segment")); + } else { + length = length.saturating_sub(2); + error!("Not a valid Adobe APP14 Segment"); + } + // skip any proceeding lengths. + // we do not need them + decoder.stream.skip(length)?; + + Ok(()) +} + +/// Parse the APP1 segment +/// +/// This contains the exif tag +pub(crate) fn parse_app1( + decoder: &mut JpegDecoder +) -> Result<(), DecodeErrors> { + // contains exif data + let mut length = usize::from(decoder.stream.get_u16_be()); + + if length < 2 { + return Err(DecodeErrors::FormatStatic("Too small app1 length")); + } + // length bytes + length -= 2; + + if length > 6 && decoder.stream.peek_at(0, 6)? == b"Exif\x00\x00" { + trace!("Exif segment present"); + // skip bytes we read above + decoder.stream.skip(6)?; + length -= 6; + + let exif_bytes = decoder.stream.peek_at(0, length)?.to_vec(); + + decoder.exif_data = Some(exif_bytes); + } else { + warn!("Wrongly formatted exif tag"); + } + + decoder.stream.skip(length)?; + Ok(()) +} + +pub(crate) fn parse_app2( + decoder: &mut JpegDecoder +) -> Result<(), DecodeErrors> { + let mut length = usize::from(decoder.stream.get_u16_be()); + + if length < 2 { + return Err(DecodeErrors::FormatStatic("Too small app2 segment")); + } + // length bytes + length -= 2; + + if length > 14 && decoder.stream.peek_at(0, 12)? == *b"ICC_PROFILE\0" { + trace!("ICC Profile present"); + // skip 12 bytes which indicate ICC profile + length -= 12; + decoder.stream.skip(12)?; + let seq_no = decoder.stream.read_u8(); + let num_markers = decoder.stream.read_u8(); + // deduct the two bytes we read above + length -= 2; + + let data = decoder.stream.peek_at(0, length)?.to_vec(); + + let icc_chunk = ICCChunk { + seq_no, + num_markers, + data + }; + decoder.icc_data.push(icc_chunk); + } + + decoder.stream.skip(length)?; + + Ok(()) +} + +/// Small utility function to print Un-zig-zagged quantization tables + +fn un_zig_zag(a: &[T]) -> [i32; 64] +where + T: Default + Copy, + i32: core::convert::From +{ + let mut output = [i32::default(); 64]; + + for i in 0..64 { + output[UN_ZIGZAG[i]] = i32::from(a[i]); + } + + output +} diff --git a/third_party/zune-jpeg/src/huffman.rs b/third_party/zune-jpeg/src/huffman.rs new file mode 100644 index 0000000..f5f12c2 --- /dev/null +++ b/third_party/zune-jpeg/src/huffman.rs @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +//! This file contains a single struct `HuffmanTable` that +//! stores Huffman tables needed during `BitStream` decoding. +#![allow(clippy::similar_names, clippy::module_name_repetitions)] + +use alloc::string::ToString; + +use crate::errors::DecodeErrors; + +/// Determines how many bits of lookahead we have for our bitstream decoder. + +pub const HUFF_LOOKAHEAD: u8 = 9; + +/// A struct which contains necessary tables for decoding a JPEG +/// huffman encoded bitstream + +pub struct HuffmanTable { + // element `[0]` of each array is unused + /// largest code of length k + pub(crate) maxcode: [i32; 18], + /// offset for codes of length k + /// Answers the question, where do code-lengths of length k end + /// Element 0 is unused + pub(crate) offset: [i32; 18], + /// lookup table for fast decoding + /// + /// top bits above HUFF_LOOKAHEAD contain the code length. + /// + /// Lower (8) bits contain the symbol in order of increasing code length. + pub(crate) lookup: [i32; 1 << HUFF_LOOKAHEAD], + + /// A table which can be used to decode small AC coefficients and + /// do an equivalent of receive_extend + pub(crate) ac_lookup: Option<[i16; 1 << HUFF_LOOKAHEAD]>, + + /// Directly represent contents of a JPEG DHT marker + /// + /// \# number of symbols with codes of length `k` bits + // bits[0] is unused + /// Symbols in order of increasing code length + pub(crate) values: [u8; 256] +} + +impl HuffmanTable { + pub fn new( + codes: &[u8; 17], values: [u8; 256], is_dc: bool, is_progressive: bool + ) -> Result { + let too_long_code = (i32::from(HUFF_LOOKAHEAD) + 1) << HUFF_LOOKAHEAD; + let mut p = HuffmanTable { + maxcode: [0; 18], + offset: [0; 18], + lookup: [too_long_code; 1 << HUFF_LOOKAHEAD], + values, + ac_lookup: None + }; + + p.make_derived_table(is_dc, is_progressive, codes)?; + + Ok(p) + } + + /// Create a new huffman tables with values that aren't fixed + /// used by fill_mjpeg_tables + pub fn new_unfilled( + codes: &[u8; 17], values: &[u8], is_dc: bool, is_progressive: bool + ) -> Result { + let mut buf = [0; 256]; + buf[..values.len()].copy_from_slice(values); + HuffmanTable::new(codes, buf, is_dc, is_progressive) + } + + /// Compute derived values for a Huffman table + /// + /// This routine performs some validation checks on the table + #[allow( + clippy::cast_possible_truncation, + clippy::cast_possible_wrap, + clippy::cast_sign_loss, + clippy::too_many_lines, + clippy::needless_range_loop + )] + fn make_derived_table( + &mut self, is_dc: bool, _is_progressive: bool, bits: &[u8; 17] + ) -> Result<(), DecodeErrors> { + // build a list of code size + let mut huff_size = [0; 257]; + // Huffman code lengths + let mut huff_code: [u32; 257] = [0; 257]; + // figure C.1 make table of Huffman code length for each symbol + let mut p = 0; + + for l in 1..=16 { + let mut i = i32::from(bits[l]); + // table overrun is checked before ,so we dont need to check + while i != 0 { + huff_size[p] = l as u8; + p += 1; + i -= 1; + } + } + + huff_size[p] = 0; + + let num_symbols = p; + // Generate the codes themselves + // We also validate that the counts represent a legal Huffman code tree + let mut code = 0; + let mut si = i32::from(huff_size[0]); + + p = 0; + + while huff_size[p] != 0 { + while i32::from(huff_size[p]) == si { + huff_code[p] = code; + code += 1; + p += 1; + } + // maximum code of length si, pre-shifted by 16-k bits + self.maxcode[si as usize] = (code << (16 - si)) as i32; + // code is now 1 more than the last code used for code-length si; but + // it must still fit in si bits, since no code is allowed to be all ones. + if (code as i32) >= (1 << si) { + return Err(DecodeErrors::HuffmanDecode("Bad Huffman Table".to_string())); + } + + code <<= 1; + si += 1; + } + + // Figure F.15 generate decoding tables for bit-sequential decoding + p = 0; + + for l in 0..=16 { + if bits[l] == 0 { + // -1 if no codes of this length + self.maxcode[l] = -1; + } else { + // offset[l]=codes[index of 1st symbol of code length l + // minus minimum code of length l] + self.offset[l] = (p as i32) - (huff_code[p]) as i32; + p += usize::from(bits[l]); + } + } + + self.offset[17] = 0; + // we ensure that decode terminates + self.maxcode[17] = 0x000F_FFFF; + + /* + * Compute lookahead tables to speed up decoding. + * First we set all the table entries to 0(left justified), indicating "too long"; + * (Note too long was set during initialization) + * then we iterate through the Huffman codes that are short enough and + * fill in all the entries that correspond to bit sequences starting + * with that code. + */ + + p = 0; + + for l in 1..=HUFF_LOOKAHEAD { + for _ in 1..=i32::from(bits[usize::from(l)]) { + // l -> Current code length, + // p => Its index in self.code and self.values + // Generate left justified code followed by all possible bit sequences + let mut look_bits = (huff_code[p] as usize) << (HUFF_LOOKAHEAD - l); + + for _ in 0..1 << (HUFF_LOOKAHEAD - l) { + self.lookup[look_bits] = + (i32::from(l) << HUFF_LOOKAHEAD) | i32::from(self.values[p]); + look_bits += 1; + } + + p += 1; + } + } + // build an ac table that does an equivalent of decode and receive_extend + if !is_dc { + let mut fast = [255; 1 << HUFF_LOOKAHEAD]; + // Iterate over number of symbols + for i in 0..num_symbols { + // get code size for an item + let s = huff_size[i]; + + if s <= HUFF_LOOKAHEAD { + // if it's lower than what we need for our lookup table create the table + let c = (huff_code[i] << (HUFF_LOOKAHEAD - s)) as usize; + let m = (1 << (HUFF_LOOKAHEAD - s)) as usize; + + for j in 0..m { + fast[c + j] = i as i16; + } + } + } + + // build a table that decodes both magnitude and value of small ACs in + // one go. + let mut fast_ac = [0; 1 << HUFF_LOOKAHEAD]; + + for i in 0..(1 << HUFF_LOOKAHEAD) { + let fast_v = fast[i]; + + if fast_v < 255 { + // get symbol value from AC table + let rs = self.values[fast_v as usize]; + // shift by 4 to get run length + let run = i16::from((rs >> 4) & 15); + // get magnitude bits stored at the lower 3 bits + let mag_bits = i16::from(rs & 15); + // length of the bit we've read + let len = i16::from(huff_size[fast_v as usize]); + + if mag_bits != 0 && (len + mag_bits) <= i16::from(HUFF_LOOKAHEAD) { + // magnitude code followed by receive_extend code + let mut k = (((i as i16) << len) & ((1 << HUFF_LOOKAHEAD) - 1)) + >> (i16::from(HUFF_LOOKAHEAD) - mag_bits); + let m = 1 << (mag_bits - 1); + + if k < m { + k += (!0_i16 << mag_bits) + 1; + }; + + // if result is small enough fit into fast ac table + if (-128..=127).contains(&k) { + fast_ac[i] = (k << 8) + (run << 4) + (len + mag_bits); + } + } + } + } + self.ac_lookup = Some(fast_ac); + } + + // Validate symbols as being reasonable + // For AC tables, we make no check, but accept all byte values 0..255 + // For DC tables, we require symbols to be in range 0..15 + if is_dc { + for i in 0..num_symbols { + let sym = self.values[i]; + + if sym > 15 { + return Err(DecodeErrors::HuffmanDecode("Bad Huffman Table".to_string())); + } + } + } + + Ok(()) + } +} diff --git a/third_party/zune-jpeg/src/idct.rs b/third_party/zune-jpeg/src/idct.rs new file mode 100644 index 0000000..d2b6f06 --- /dev/null +++ b/third_party/zune-jpeg/src/idct.rs @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +//! Routines for IDCT +//! +//! Essentially we provide 2 routines for IDCT, a scalar implementation and a not super optimized +//! AVX2 one, i'll talk about them here. +//! +//! There are 2 reasons why we have the avx one +//! 1. No one compiles with -C target-features=avx2 hence binaries won't probably take advantage(even +//! if it exists). +//! 2. AVX employs zero short circuit in a way the scalar code cannot employ it. +//! - AVX does this by checking for MCU's whose 63 AC coefficients are zero and if true, it writes +//! values directly, if false, it goes the long way of calculating. +//! - Although this can be trivially implemented in the scalar version, it generates code +//! I'm not happy width(scalar version that basically loops and that is too many branches for me) +//! The avx one does a better job of using bitwise or's with (`_mm256_or_si256`) which is magnitudes of faster +//! than anything I could come up with +//! +//! The AVX code also has some cool transpose_u16 instructions which look so complicated to be cool +//! (spoiler alert, i barely understand how it works, that's why I credited the owner). +//! +#![allow( + clippy::excessive_precision, + clippy::unreadable_literal, + clippy::module_name_repetitions, + unused_parens, + clippy::wildcard_imports +)] + +use zune_core::log::debug; +use zune_core::options::DecoderOptions; + +use crate::decoder::IDCTPtr; +use crate::idct::scalar::idct_int; + +#[cfg(feature = "x86")] +pub mod avx2; +#[cfg(feature = "neon")] +pub mod neon; + +pub mod scalar; + +/// Choose an appropriate IDCT function +#[allow(unused_variables)] +pub fn choose_idct_func(options: &DecoderOptions) -> IDCTPtr { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #[cfg(feature = "x86")] + { + if options.use_avx2() { + debug!("Using vector integer IDCT"); + // use avx one + return crate::idct::avx2::idct_avx2; + } + } + #[cfg(target_arch = "aarch64")] + #[cfg(feature = "neon")] + { + if options.use_neon() { + debug!("Using vector integer IDCT"); + return crate::idct::neon::idct_neon; + } + } + debug!("Using scalar integer IDCT"); + // use generic one + return idct_int; +} + +#[cfg(test)] +#[allow(unreachable_code)] +#[allow(dead_code)] +mod tests { + use super::*; + + #[test] + fn idct_test0() { + let stride = 8; + let mut coeff = [10; 64]; + let mut coeff2 = [10; 64]; + let mut output_scalar = [0; 64]; + let mut output_vector = [0; 64]; + idct_fnc()(&mut coeff, &mut output_vector, stride); + idct_int(&mut coeff2, &mut output_scalar, stride); + assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match"); + } + + #[test] + fn do_idct_test1() { + let stride = 8; + let mut coeff = [14; 64]; + let mut coeff2 = [14; 64]; + let mut output_scalar = [0; 64]; + let mut output_vector = [0; 64]; + idct_fnc()(&mut coeff, &mut output_vector, stride); + idct_int(&mut coeff2, &mut output_scalar, stride); + assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match"); + } + + #[test] + fn do_idct_test2() { + let stride = 8; + let mut coeff = [0; 64]; + coeff[0] = 255; + coeff[63] = -256; + let mut coeff2 = coeff; + let mut output_scalar = [0; 64]; + let mut output_vector = [0; 64]; + idct_fnc()(&mut coeff, &mut output_vector, stride); + idct_int(&mut coeff2, &mut output_scalar, stride); + assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match"); + } + + #[test] + fn do_idct_zeros() { + let stride = 8; + let mut coeff = [0; 64]; + let mut coeff2 = [0; 64]; + let mut output_scalar = [0; 64]; + let mut output_vector = [0; 64]; + idct_fnc()(&mut coeff, &mut output_vector, stride); + idct_int(&mut coeff2, &mut output_scalar, stride); + assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match"); + } + + fn idct_fnc() -> IDCTPtr { + #[cfg(feature = "neon")] + #[cfg(target_arch = "aarch64")] + { + use crate::idct::neon::idct_neon; + return idct_neon; + } + + #[cfg(feature = "x86")] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + use crate::idct::avx2::idct_avx2; + return idct_avx2; + } + + idct_int + } +} diff --git a/third_party/zune-jpeg/src/idct/avx2.rs b/third_party/zune-jpeg/src/idct/avx2.rs new file mode 100644 index 0000000..0f2f195 --- /dev/null +++ b/third_party/zune-jpeg/src/idct/avx2.rs @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +#![cfg(any(target_arch = "x86", target_arch = "x86_64"))] +//! AVX optimised IDCT. +//! +//! Okay not thaat optimised. +//! +//! +//! # The implementation +//! The implementation is neatly broken down into two operations. +//! +//! 1. Test for zeroes +//! > There is a shortcut method for idct where when all AC values are zero, we can get the answer really quickly. +//! by scaling the 1/8th of the DCT coefficient of the block to the whole block and level shifting. +//! +//! 2. If above fails, we proceed to carry out IDCT as a two pass one dimensional algorithm. +//! IT does two whole scans where it carries out IDCT on all items +//! After each successive scan, data is transposed in register(thank you x86 SIMD powers). and the second +//! pass is carried out. +//! +//! The code is not super optimized, it produces bit identical results with scalar code hence it's +//! `mm256_add_epi16` +//! and it also has the advantage of making this implementation easy to maintain. + +#![cfg(feature = "x86")] +#![allow(dead_code)] + +#[cfg(target_arch = "x86")] +use core::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::*; + +use crate::unsafe_utils::{transpose, YmmRegister}; + +const SCALE_BITS: i32 = 512 + 65536 + (128 << 17); + +/// SAFETY +/// ------ +/// +/// It is the responsibility of the CALLER to ensure that this function is +/// called in contexts where the CPU supports it +/// +/// +/// For documentation see module docs. + +pub fn idct_avx2(in_vector: &mut [i32; 64], out_vector: &mut [i16], stride: usize) { + unsafe { + // We don't call this method directly because we need to flag the code function + // with #[target_feature] so that the compiler does do weird stuff with + // it + idct_int_avx2_inner(in_vector, out_vector, stride); + } +} + +#[target_feature(enable = "avx2")] +#[allow( + clippy::too_many_lines, + clippy::cast_possible_truncation, + clippy::similar_names, + clippy::op_ref, + unused_assignments, + clippy::zero_prefixed_literal +)] +pub unsafe fn idct_int_avx2_inner( + in_vector: &mut [i32; 64], out_vector: &mut [i16], stride: usize +) { + let mut pos = 0; + + // load into registers + // + // We sign extend i16's to i32's and calculate them with extended precision and + // later reduce them to i16's when we are done carrying out IDCT + + let rw0 = _mm256_loadu_si256(in_vector[00..].as_ptr().cast()); + let rw1 = _mm256_loadu_si256(in_vector[08..].as_ptr().cast()); + let rw2 = _mm256_loadu_si256(in_vector[16..].as_ptr().cast()); + let rw3 = _mm256_loadu_si256(in_vector[24..].as_ptr().cast()); + let rw4 = _mm256_loadu_si256(in_vector[32..].as_ptr().cast()); + let rw5 = _mm256_loadu_si256(in_vector[40..].as_ptr().cast()); + let rw6 = _mm256_loadu_si256(in_vector[48..].as_ptr().cast()); + let rw7 = _mm256_loadu_si256(in_vector[56..].as_ptr().cast()); + + // Forward DCT and quantization may cause all the AC terms to be zero, for such + // cases we can try to accelerate it + + // Basically the poop is that whenever the array has 63 zeroes, its idct is + // (arr[0]>>3)or (arr[0]/8) propagated to all the elements. + // We first test to see if the array contains zero elements and if it does, we go the + // short way. + // + // This reduces IDCT overhead from about 39% to 18 %, almost half + + // Do another load for the first row, we don't want to check DC value, because + // we only care about AC terms + let rw8 = _mm256_loadu_si256(in_vector[1..].as_ptr().cast()); + + let zero = _mm256_setzero_si256(); + + let mut non_zero = 0; + + non_zero += _mm256_movemask_epi8(_mm256_cmpeq_epi32(rw8, zero)); + non_zero += _mm256_movemask_epi8(_mm256_cmpeq_epi32(rw1, zero)); + non_zero += _mm256_movemask_epi8(_mm256_cmpeq_epi32(rw2, zero)); + non_zero += _mm256_movemask_epi8(_mm256_cmpeq_epi64(rw3, zero)); + + non_zero += _mm256_movemask_epi8(_mm256_cmpeq_epi64(rw4, zero)); + non_zero += _mm256_movemask_epi8(_mm256_cmpeq_epi64(rw5, zero)); + non_zero += _mm256_movemask_epi8(_mm256_cmpeq_epi64(rw6, zero)); + non_zero += _mm256_movemask_epi8(_mm256_cmpeq_epi64(rw7, zero)); + + if non_zero == -8 { + // AC terms all zero, idct of the block is is ( coeff[0] * qt[0] )/8 + 128 (bias) + // (and clamped to 255) + let idct_value = _mm_set1_epi16(((in_vector[0] >> 3) + 128).clamp(0, 255) as i16); + + macro_rules! store { + ($pos:tt,$value:tt) => { + // store + _mm_storeu_si128( + out_vector + .get_mut($pos..$pos + 8) + .unwrap() + .as_mut_ptr() + .cast(), + $value + ); + $pos += stride; + }; + } + store!(pos, idct_value); + store!(pos, idct_value); + store!(pos, idct_value); + store!(pos, idct_value); + + store!(pos, idct_value); + store!(pos, idct_value); + store!(pos, idct_value); + store!(pos, idct_value); + + return; + } + + let mut row0 = YmmRegister { mm256: rw0 }; + let mut row1 = YmmRegister { mm256: rw1 }; + let mut row2 = YmmRegister { mm256: rw2 }; + let mut row3 = YmmRegister { mm256: rw3 }; + + let mut row4 = YmmRegister { mm256: rw4 }; + let mut row5 = YmmRegister { mm256: rw5 }; + let mut row6 = YmmRegister { mm256: rw6 }; + let mut row7 = YmmRegister { mm256: rw7 }; + + macro_rules! dct_pass { + ($SCALE_BITS:tt,$scale:tt) => { + // There are a lot of ways to do this + // but to keep it simple(and beautiful), ill make a direct translation of the + // scalar code to also make this code fully transparent(this version and the non + // avx one should produce identical code.) + + // even part + let p1 = (row2 + row6) * 2217; + + let mut t2 = p1 + row6 * -7567; + let mut t3 = p1 + row2 * 3135; + + let mut t0 = YmmRegister { + mm256: _mm256_slli_epi32((row0 + row4).mm256, 12) + }; + let mut t1 = YmmRegister { + mm256: _mm256_slli_epi32((row0 - row4).mm256, 12) + }; + + let x0 = t0 + t3 + $SCALE_BITS; + let x3 = t0 - t3 + $SCALE_BITS; + let x1 = t1 + t2 + $SCALE_BITS; + let x2 = t1 - t2 + $SCALE_BITS; + + let p3 = row7 + row3; + let p4 = row5 + row1; + let p1 = row7 + row1; + let p2 = row5 + row3; + let p5 = (p3 + p4) * 4816; + + t0 = row7 * 1223; + t1 = row5 * 8410; + t2 = row3 * 12586; + t3 = row1 * 6149; + + let p1 = p5 + p1 * -3685; + let p2 = p5 + (p2 * -10497); + let p3 = p3 * -8034; + let p4 = p4 * -1597; + + t3 += p1 + p4; + t2 += p2 + p3; + t1 += p2 + p4; + t0 += p1 + p3; + + row0.mm256 = _mm256_srai_epi32((x0 + t3).mm256, $scale); + row1.mm256 = _mm256_srai_epi32((x1 + t2).mm256, $scale); + row2.mm256 = _mm256_srai_epi32((x2 + t1).mm256, $scale); + row3.mm256 = _mm256_srai_epi32((x3 + t0).mm256, $scale); + + row4.mm256 = _mm256_srai_epi32((x3 - t0).mm256, $scale); + row5.mm256 = _mm256_srai_epi32((x2 - t1).mm256, $scale); + row6.mm256 = _mm256_srai_epi32((x1 - t2).mm256, $scale); + row7.mm256 = _mm256_srai_epi32((x0 - t3).mm256, $scale); + }; + } + + // Process rows + dct_pass!(512, 10); + transpose( + &mut row0, &mut row1, &mut row2, &mut row3, &mut row4, &mut row5, &mut row6, &mut row7 + ); + + // process columns + dct_pass!(SCALE_BITS, 17); + transpose( + &mut row0, &mut row1, &mut row2, &mut row3, &mut row4, &mut row5, &mut row6, &mut row7 + ); + + // Pack i32 to i16's, + // clamp them to be between 0-255 + // Undo shuffling + // Store back to array + macro_rules! permute_store { + ($x:tt,$y:tt,$index:tt,$out:tt) => { + let a = _mm256_packs_epi32($x, $y); + + // Clamp the values after packing, we can clamp more values at once + let b = clamp_avx(a); + + // /Undo shuffling + let c = _mm256_permute4x64_epi64(b, shuffle(3, 1, 2, 0)); + + // store first vector + _mm_storeu_si128( + ($out) + .get_mut($index..$index + 8) + .unwrap() + .as_mut_ptr() + .cast(), + _mm256_extractf128_si256::<0>(c) + ); + $index += stride; + // second vector + _mm_storeu_si128( + ($out) + .get_mut($index..$index + 8) + .unwrap() + .as_mut_ptr() + .cast(), + _mm256_extractf128_si256::<1>(c) + ); + $index += stride; + }; + } + // Pack and write the values back to the array + permute_store!((row0.mm256), (row1.mm256), pos, out_vector); + permute_store!((row2.mm256), (row3.mm256), pos, out_vector); + permute_store!((row4.mm256), (row5.mm256), pos, out_vector); + permute_store!((row6.mm256), (row7.mm256), pos, out_vector); +} + +#[inline] +#[target_feature(enable = "avx2")] +unsafe fn clamp_avx(reg: __m256i) -> __m256i { + let min_s = _mm256_set1_epi16(0); + let max_s = _mm256_set1_epi16(255); + + let max_v = _mm256_max_epi16(reg, min_s); //max(a,0) + let min_v = _mm256_min_epi16(max_v, max_s); //min(max(a,0),255) + return min_v; +} + +/// A copy of `_MM_SHUFFLE()` that doesn't require +/// a nightly compiler +#[inline] +const fn shuffle(z: i32, y: i32, x: i32, w: i32) -> i32 { + ((z << 6) | (y << 4) | (x << 2) | w) +} diff --git a/third_party/zune-jpeg/src/idct/neon.rs b/third_party/zune-jpeg/src/idct/neon.rs new file mode 100644 index 0000000..cba9f7d --- /dev/null +++ b/third_party/zune-jpeg/src/idct/neon.rs @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +#![cfg(target_arch = "aarch64")] +//! AVX optimised IDCT. +//! +//! Okay not thaat optimised. +//! +//! +//! # The implementation +//! The implementation is neatly broken down into two operations. +//! +//! 1. Test for zeroes +//! > There is a shortcut method for idct where when all AC values are zero, we can get the answer really quickly. +//! by scaling the 1/8th of the DCT coefficient of the block to the whole block and level shifting. +//! +//! 2. If above fails, we proceed to carry out IDCT as a two pass one dimensional algorithm. +//! IT does two whole scans where it carries out IDCT on all items +//! After each successive scan, data is transposed in register(thank you x86 SIMD powers). and the second +//! pass is carried out. +//! +//! The code is not super optimized, it produces bit identical results with scalar code hence it's +//! `mm256_add_epi16` +//! and it also has the advantage of making this implementation easy to maintain. + +#![cfg(feature = "neon")] + +use core::arch::aarch64::*; + +use crate::unsafe_utils::{transpose, YmmRegister}; + +const SCALE_BITS: i32 = 512 + 65536 + (128 << 17); + +/// SAFETY +/// ------ +/// +/// It is the responsibility of the CALLER to ensure that this function is +/// called in contexts where the CPU supports it +/// +/// +/// For documentation see module docs. + +pub fn idct_neon(in_vector: &mut [i32; 64], out_vector: &mut [i16], stride: usize) { + unsafe { + // We don't call this method directly because we need to flag the code function + // with #[target_feature] so that the compiler does do weird stuff with + // it + idct_int_neon_inner(in_vector, out_vector, stride); + } +} + +#[inline] +#[target_feature(enable = "neon")] +unsafe fn pack_16(a: int32x4x2_t) -> int16x8_t { + vcombine_s16(vqmovn_s32(a.0), vqmovn_s32(a.1)) +} + +#[inline] +#[target_feature(enable = "neon")] +unsafe fn condense_bottom_16(a: int32x4x2_t, b: int32x4x2_t) -> int16x8x2_t { + int16x8x2_t(pack_16(a), pack_16(b)) +} + +#[target_feature(enable = "neon")] +#[allow( + clippy::too_many_lines, + clippy::cast_possible_truncation, + clippy::similar_names, + clippy::op_ref, + unused_assignments, + clippy::zero_prefixed_literal +)] +pub unsafe fn idct_int_neon_inner( + in_vector: &mut [i32; 64], out_vector: &mut [i16], stride: usize +) { + let mut pos = 0; + + // load into registers + // + // We sign extend i16's to i32's and calculate them with extended precision and + // later reduce them to i16's when we are done carrying out IDCT + + let mut row0 = YmmRegister::load(in_vector[00..].as_ptr().cast()); + let mut row1 = YmmRegister::load(in_vector[08..].as_ptr().cast()); + let mut row2 = YmmRegister::load(in_vector[16..].as_ptr().cast()); + let mut row3 = YmmRegister::load(in_vector[24..].as_ptr().cast()); + let mut row4 = YmmRegister::load(in_vector[32..].as_ptr().cast()); + let mut row5 = YmmRegister::load(in_vector[40..].as_ptr().cast()); + let mut row6 = YmmRegister::load(in_vector[48..].as_ptr().cast()); + let mut row7 = YmmRegister::load(in_vector[56..].as_ptr().cast()); + + // Forward DCT and quantization may cause all the AC terms to be zero, for such + // cases we can try to accelerate it + + // Basically the poop is that whenever the array has 63 zeroes, its idct is + // (arr[0]>>3)or (arr[0]/8) propagated to all the elements. + // We first test to see if the array contains zero elements and if it does, we go the + // short way. + // + // This reduces IDCT overhead from about 39% to 18 %, almost half + + // Do another load for the first row, we don't want to check DC value, because + // we only care about AC terms + // TODO this should be a shift/shuffle, not a likely unaligned load + let row8 = YmmRegister::load(in_vector[1..].as_ptr().cast()); + + let or_tree = (((row1 | row8) | (row2 | row3)) | ((row4 | row5) | (row6 | row7))); + + if or_tree.all_zero() { + // AC terms all zero, idct of the block is is ( coeff[0] * qt[0] )/8 + 128 (bias) + // (and clamped to 255) + let clamped_16 = ((in_vector[0] >> 3) + 128).clamp(0, 255) as i16; + let idct_value = vdupq_n_s16(clamped_16); + + macro_rules! store { + ($pos:tt,$value:tt) => { + // store + vst1q_s16( + out_vector + .get_mut($pos..$pos + 8) + .unwrap() + .as_mut_ptr() + .cast(), + $value + ); + $pos += stride; + }; + } + store!(pos, idct_value); + store!(pos, idct_value); + store!(pos, idct_value); + store!(pos, idct_value); + + store!(pos, idct_value); + store!(pos, idct_value); + store!(pos, idct_value); + store!(pos, idct_value); + + return; + } + + macro_rules! dct_pass { + ($SCALE_BITS:tt,$scale:tt) => { + // There are a lot of ways to do this + // but to keep it simple(and beautiful), ill make a direct translation of the + // scalar code to also make this code fully transparent(this version and the non + // avx one should produce identical code.) + + // Compiler does a pretty good job of optimizing add + mul pairs + // into multiply-acumulate pairs + + // even part + let p1 = (row2 + row6) * 2217; + + let mut t2 = p1 + row6 * -7567; + let mut t3 = p1 + row2 * 3135; + + let mut t0 = (row0 + row4).const_shl::<12>(); + let mut t1 = (row0 - row4).const_shl::<12>(); + + let x0 = t0 + t3 + $SCALE_BITS; + let x3 = t0 - t3 + $SCALE_BITS; + let x1 = t1 + t2 + $SCALE_BITS; + let x2 = t1 - t2 + $SCALE_BITS; + + let p3 = row7 + row3; + let p4 = row5 + row1; + let p1 = row7 + row1; + let p2 = row5 + row3; + let p5 = (p3 + p4) * 4816; + + t0 = row7 * 1223; + t1 = row5 * 8410; + t2 = row3 * 12586; + t3 = row1 * 6149; + + let p1 = p5 + p1 * -3685; + let p2 = p5 + (p2 * -10497); + let p3 = p3 * -8034; + let p4 = p4 * -1597; + + t3 += p1 + p4; + t2 += p2 + p3; + t1 += p2 + p4; + t0 += p1 + p3; + + row0 = (x0 + t3).const_shra::<$scale>(); + row1 = (x1 + t2).const_shra::<$scale>(); + row2 = (x2 + t1).const_shra::<$scale>(); + row3 = (x3 + t0).const_shra::<$scale>(); + + row4 = (x3 - t0).const_shra::<$scale>(); + row5 = (x2 - t1).const_shra::<$scale>(); + row6 = (x1 - t2).const_shra::<$scale>(); + row7 = (x0 - t3).const_shra::<$scale>(); + }; + } + + // Process rows + dct_pass!(512, 10); + transpose( + &mut row0, &mut row1, &mut row2, &mut row3, &mut row4, &mut row5, &mut row6, &mut row7 + ); + + // process columns + dct_pass!(SCALE_BITS, 17); + transpose( + &mut row0, &mut row1, &mut row2, &mut row3, &mut row4, &mut row5, &mut row6, &mut row7 + ); + + // Pack i32 to i16's, + // clamp them to be between 0-255 + // Undo shuffling + // Store back to array + + // This could potentially be reorganized to take advantage of the multi-register stores + macro_rules! permute_store { + ($x:tt,$y:tt,$index:tt,$out:tt) => { + let a = condense_bottom_16($x, $y); + + // Clamp the values after packing, we can clamp more values at once + let b = clamp256_neon(a); + + // store first vector + vst1q_s16( + ($out) + .get_mut($index..$index + 8) + .unwrap() + .as_mut_ptr() + .cast(), + b.0 + ); + $index += stride; + // second vector + vst1q_s16( + ($out) + .get_mut($index..$index + 8) + .unwrap() + .as_mut_ptr() + .cast(), + b.1 + ); + $index += stride; + }; + } + // Pack and write the values back to the array + permute_store!((row0.mm256), (row1.mm256), pos, out_vector); + permute_store!((row2.mm256), (row3.mm256), pos, out_vector); + permute_store!((row4.mm256), (row5.mm256), pos, out_vector); + permute_store!((row6.mm256), (row7.mm256), pos, out_vector); +} + +#[inline] +#[target_feature(enable = "neon")] +unsafe fn clamp_neon(reg: int16x8_t) -> int16x8_t { + let min_s = vdupq_n_s16(0); + let max_s = vdupq_n_s16(255); + + let max_v = vmaxq_s16(reg, min_s); //max(a,0) + let min_v = vminq_s16(max_v, max_s); //min(max(a,0),255) + min_v +} + +#[inline] +#[target_feature(enable = "neon")] +unsafe fn clamp256_neon(reg: int16x8x2_t) -> int16x8x2_t { + int16x8x2_t(clamp_neon(reg.0), clamp_neon(reg.1)) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_neon_clamp_256() { + unsafe { + let vals: [i16; 16] = [-1, -2, -3, 4, 256, 257, 258, 240, -1, 290, 2, 3, 4, 5, 6, 7]; + let loaded = vld1q_s16_x2(vals.as_ptr().cast()); + let shuffled = clamp256_neon(loaded); + + let mut result: [i16; 16] = [0; 16]; + + vst1q_s16_x2(result.as_mut_ptr().cast(), shuffled); + + assert_eq!( + result, + [0, 0, 0, 4, 255, 255, 255, 240, 0, 255, 2, 3, 4, 5, 6, 7] + ) + } + } +} diff --git a/third_party/zune-jpeg/src/idct/scalar.rs b/third_party/zune-jpeg/src/idct/scalar.rs new file mode 100644 index 0000000..3120381 --- /dev/null +++ b/third_party/zune-jpeg/src/idct/scalar.rs @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +//! Platform independent IDCT algorithm +//! +//! Not as fast as AVX one. + +const SCALE_BITS: i32 = 512 + 65536 + (128 << 17); + +#[allow(unused_assignments)] +#[allow( + clippy::too_many_lines, + clippy::op_ref, + clippy::cast_possible_truncation +)] +pub fn idct_int(in_vector: &mut [i32; 64], out_vector: &mut [i16], stride: usize) { + // Temporary variables. + + let mut pos = 0; + + let mut i = 0; + // Don't check for zeroes inside loop, lift it and check outside + // we want to accelerate the case with 63 0 ac coeff + if &in_vector[1..] == &[0_i32; 63] { + // okay then if you work, yay, let's write you really quick + let coeff = [(((in_vector[0] >> 3) + 128) as i16).clamp(0, 255); 8]; + + macro_rules! store { + ($index:tt) => { + // position of the MCU + let mcu_stride: &mut [i16; 8] = out_vector + .get_mut($index..$index + 8) + .unwrap() + .try_into() + .unwrap(); + // copy coefficients + mcu_stride.copy_from_slice(&coeff); + // increment index + $index += stride; + }; + } + // write to four positions + store!(pos); + store!(pos); + store!(pos); + store!(pos); + + store!(pos); + store!(pos); + store!(pos); + store!(pos); + } else { + // because the compiler fails to see that it can be auto_vectorised so i'll + // leave it here check out [idct_int_slow, and idct_int_1D to get what i mean ] https://godbolt.org/z/8hqW9z9j9 + for ptr in 0..8 { + let p2 = in_vector[ptr + 16]; + let p3 = in_vector[ptr + 48]; + + let p1 = (p2 + p3).wrapping_mul(2217); + + let t2 = p1 + p3 * -7567; + let t3 = p1 + p2 * 3135; + + let p2 = in_vector[ptr]; + let p3 = in_vector[32 + ptr]; + let t0 = fsh(p2 + p3); + let t1 = fsh(p2 - p3); + + let x0 = t0 + t3 + 512; + let x3 = t0 - t3 + 512; + let x1 = t1 + t2 + 512; + let x2 = t1 - t2 + 512; + + // odd part + let mut t0 = in_vector[ptr + 56]; + let mut t1 = in_vector[ptr + 40]; + let mut t2 = in_vector[ptr + 24]; + let mut t3 = in_vector[ptr + 8]; + + let p3 = t0 + t2; + let p4 = t1 + t3; + let p1 = t0 + t3; + let p2 = t1 + t2; + let p5 = (p3 + p4) * 4816; + + t0 *= 1223; + t1 *= 8410; + t2 *= 12586; + t3 *= 6149; + + let p1 = p5 + p1 * -3685; + let p2 = p5 + p2 * -10497; + let p3 = p3 * -8034; + let p4 = p4 * -1597; + + t3 += p1 + p4; + t2 += p2 + p3; + t1 += p2 + p4; + t0 += p1 + p3; + + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + in_vector[ptr] = (x0 + t3) >> 10; + in_vector[ptr + 8] = (x1 + t2) >> 10; + in_vector[ptr + 16] = (x2 + t1) >> 10; + in_vector[ptr + 24] = (x3 + t0) >> 10; + in_vector[ptr + 32] = (x3 - t0) >> 10; + in_vector[ptr + 40] = (x2 - t1) >> 10; + in_vector[ptr + 48] = (x1 - t2) >> 10; + in_vector[ptr + 56] = (x0 - t3) >> 10; + } + + // This is vectorised in architectures supporting SSE 4.1 + while i < 64 { + // We won't try to short circuit here because it rarely works + + // Even part + let p2 = in_vector[i + 2]; + let p3 = in_vector[i + 6]; + + let p1 = (p2 + p3) * 2217; + let t2 = p1 + p3 * -7567; + let t3 = p1 + p2 * 3135; + + let p2 = in_vector[i]; + let p3 = in_vector[i + 4]; + + let t0 = fsh(p2 + p3); + let t1 = fsh(p2 - p3); + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + // so we want to round that, which means adding 0.5 * 1<<17, + // aka 65536. Also, we'll end up with -128 to 127 that we want + // to encode as 0..255 by adding 128, so we'll add that before the shift + let x0 = t0 + t3 + SCALE_BITS; + let x3 = t0 - t3 + SCALE_BITS; + let x1 = t1 + t2 + SCALE_BITS; + let x2 = t1 - t2 + SCALE_BITS; + // odd part + let mut t0 = in_vector[i + 7]; + let mut t1 = in_vector[i + 5]; + let mut t2 = in_vector[i + 3]; + let mut t3 = in_vector[i + 1]; + + let p3 = t0 + t2; + let p4 = t1 + t3; + let p1 = t0 + t3; + let p2 = t1 + t2; + let p5 = (p3 + p4) * f2f(1.175875602); + + t0 = t0.wrapping_mul(1223); + t1 = t1.wrapping_mul(8410); + t2 = t2.wrapping_mul(12586); + t3 = t3.wrapping_mul(6149); + + let p1 = p5 + p1 * -3685; + let p2 = p5 + p2 * -10497; + let p3 = p3 * -8034; + let p4 = p4 * -1597; + + t3 += p1 + p4; + t2 += p2 + p3; + t1 += p2 + p4; + t0 += p1 + p3; + + let out: &mut [i16; 8] = out_vector + .get_mut(pos..pos + 8) + .unwrap() + .try_into() + .unwrap(); + + out[0] = clamp((x0 + t3) >> 17); + out[1] = clamp((x1 + t2) >> 17); + out[2] = clamp((x2 + t1) >> 17); + out[3] = clamp((x3 + t0) >> 17); + out[4] = clamp((x3 - t0) >> 17); + out[5] = clamp((x2 - t1) >> 17); + out[6] = clamp((x1 - t2) >> 17); + out[7] = clamp((x0 - t3) >> 17); + + i += 8; + + pos += stride; + } + } +} + +#[inline] +#[allow(clippy::cast_possible_truncation)] +/// Multiply a number by 4096 +fn f2f(x: f32) -> i32 { + (x * 4096.0 + 0.5) as i32 +} + +#[inline] +/// Multiply a number by 4096 +fn fsh(x: i32) -> i32 { + x << 12 +} + +/// Clamp values between 0 and 255 +#[inline] +#[allow(clippy::cast_possible_truncation)] +fn clamp(a: i32) -> i16 { + a.clamp(0, 255) as i16 +} diff --git a/third_party/zune-jpeg/src/lib.rs b/third_party/zune-jpeg/src/lib.rs new file mode 100644 index 0000000..d20ab4f --- /dev/null +++ b/third_party/zune-jpeg/src/lib.rs @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +//!This crate provides a library for decoding valid +//! ITU-T Rec. T.851 (09/2005) ITU-T T.81 (JPEG-1) or JPEG images. +//! +//! +//! +//! # Features +//! - SSE and AVX accelerated functions to speed up certain decoding operations +//! - FAST and accurate 32 bit IDCT algorithm +//! - Fast color convert functions +//! - RGBA and RGBX (4-Channel) color conversion functions +//! - YCbCr to Luma(Grayscale) conversion. +//! +//! # Usage +//! Add zune-jpeg to the dependencies in the project Cargo.toml +//! +//! ```toml +//! [dependencies] +//! zune_jpeg = "0.3" +//! ``` +//! # Examples +//! +//! ## Decode a JPEG file with default arguments. +//!```no_run +//! use std::fs::read; +//! use std::io::BufReader; +//! use zune_jpeg::JpegDecoder; +//! let file_contents = BufReader::new(std::fs::File::open("a_jpeg.file").unwrap()); +//! let mut decoder = JpegDecoder::new(file_contents); +//! let mut pixels = decoder.decode().unwrap(); +//! ``` +//! +//! ## Decode a JPEG file to RGBA format +//! +//! - Other (limited) supported formats are and BGR, BGRA +//! +//!```no_run +//! use zune_core::bytestream::ZCursor; +//! use zune_core::colorspace::ColorSpace; +//! use zune_core::options::DecoderOptions; +//! use zune_jpeg::JpegDecoder; +//! +//! let mut options = DecoderOptions::default().jpeg_set_out_colorspace(ColorSpace::RGBA); +//! +//! let mut decoder = JpegDecoder::new_with_options(ZCursor::new(&[]),options); +//! let pixels = decoder.decode().unwrap(); +//! ``` +//! +//! ## Decode an image and get it's width and height. +//!```no_run +//! use zune_core::bytestream::ZCursor; +//! use zune_jpeg::JpegDecoder; +//! +//! let mut decoder = JpegDecoder::new(ZCursor::new(&[])); +//! decoder.decode_headers().unwrap(); +//! let image_info = decoder.info().unwrap(); +//! println!("{},{}",image_info.width,image_info.height) +//! ``` +//! # Crate features. +//! This crate tries to be as minimal as possible while being extensible +//! enough to handle the complexities arising from parsing different types +//! of jpeg images. +//! +//! Safety is a top concern that is why we provide both static ways to disable unsafe code, +//! disabling x86 feature, and dynamic ,by using [`DecoderOptions::set_use_unsafe(false)`], +//! both of these disable platform specific optimizations, which reduce the speed of decompression. +//! +//! Please do note that careful consideration has been taken to ensure that the unsafe paths +//! are only unsafe because they depend on platform specific intrinsics, hence no need to disable them +//! +//! The crate tries to decode as many images as possible, as a best effort, even those violating the standard +//! , this means a lot of images may get silent warnings and wrong output, but if you are sure you will be handling +//! images that follow the spec, set `ZuneJpegOptions::set_strict` to true. +//! +//![`DecoderOptions::set_use_unsafe(false)`]: https://docs.rs/zune-core/0.2.1/zune_core/options/struct.DecoderOptions.html#method.set_use_unsafe + +#![warn( + clippy::correctness, + clippy::perf, + clippy::pedantic, + clippy::inline_always, + clippy::missing_errors_doc, + clippy::panic +)] +#![allow( + clippy::needless_return, + clippy::similar_names, + clippy::inline_always, + clippy::similar_names, + clippy::doc_markdown, + clippy::module_name_repetitions, + clippy::missing_panics_doc, + clippy::missing_errors_doc +)] +// no_std compatibility +#![deny(clippy::std_instead_of_alloc, clippy::alloc_instead_of_core)] +#![cfg_attr(not(feature = "x86"), forbid(unsafe_code))] +#![cfg_attr(not(feature = "std"), no_std)] +#![macro_use] +extern crate alloc; +extern crate core; + +pub use zune_core; + +pub use crate::decoder::{ImageInfo, JpegDecoder}; + +mod bitstream; +mod color_convert; +mod components; +mod decoder; +pub mod errors; +mod headers; +mod huffman; +#[cfg(not(fuzzing))] +mod idct; +#[cfg(fuzzing)] +pub mod idct; +mod marker; +mod mcu; +mod mcu_prog; +mod misc; +mod unsafe_utils; +mod unsafe_utils_avx2; +mod unsafe_utils_neon; +mod upsampler; +mod worker; diff --git a/third_party/zune-jpeg/src/marker.rs b/third_party/zune-jpeg/src/marker.rs new file mode 100644 index 0000000..f2d6ac1 --- /dev/null +++ b/third_party/zune-jpeg/src/marker.rs @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +#![allow(clippy::upper_case_acronyms)] + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Marker { + /// Start Of Frame markers + /// + /// - SOF(0): Baseline DCT (Huffman coding) + /// - SOF(1): Extended sequential DCT (Huffman coding) + /// - SOF(2): Progressive DCT (Huffman coding) + /// - SOF(3): Lossless (sequential) (Huffman coding) + /// - SOF(5): Differential sequential DCT (Huffman coding) + /// - SOF(6): Differential progressive DCT (Huffman coding) + /// - SOF(7): Differential lossless (sequential) (Huffman coding) + /// - SOF(9): Extended sequential DCT (arithmetic coding) + /// - SOF(10): Progressive DCT (arithmetic coding) + /// - SOF(11): Lossless (sequential) (arithmetic coding) + /// - SOF(13): Differential sequential DCT (arithmetic coding) + /// - SOF(14): Differential progressive DCT (arithmetic coding) + /// - SOF(15): Differential lossless (sequential) (arithmetic coding) + SOF(u8), + /// Define Huffman table(s) + DHT, + /// Define arithmetic coding conditioning(s) + DAC, + /// Restart with modulo 8 count `m` + RST(u8), + /// Start of image + SOI, + /// End of image + EOI, + /// Start of scan + SOS, + /// Define quantization table(s) + DQT, + /// Define number of lines + DNL, + /// Define restart interval + DRI, + /// Reserved for application segments + APP(u8), + /// Comment + COM +} + +impl Marker { + pub fn from_u8(n: u8) -> Option { + use self::Marker::{APP, COM, DAC, DHT, DNL, DQT, DRI, EOI, RST, SOF, SOI, SOS}; + + match n { + 0xFE => Some(COM), + 0xC0 => Some(SOF(0)), + 0xC1 => Some(SOF(1)), + 0xC2 => Some(SOF(2)), + 0xC4 => Some(DHT), + 0xCC => Some(DAC), + 0xD0 => Some(RST(0)), + 0xD1 => Some(RST(1)), + 0xD2 => Some(RST(2)), + 0xD3 => Some(RST(3)), + 0xD4 => Some(RST(4)), + 0xD5 => Some(RST(5)), + 0xD6 => Some(RST(6)), + 0xD7 => Some(RST(7)), + 0xD8 => Some(SOI), + 0xD9 => Some(EOI), + 0xDA => Some(SOS), + 0xDB => Some(DQT), + 0xDC => Some(DNL), + 0xDD => Some(DRI), + 0xE0 => Some(APP(0)), + 0xE1 => Some(APP(1)), + 0xE2 => Some(APP(2)), + 0xEE => Some(APP(14)), + _ => None + } + } +} diff --git a/third_party/zune-jpeg/src/mcu.rs b/third_party/zune-jpeg/src/mcu.rs new file mode 100644 index 0000000..2d7b406 --- /dev/null +++ b/third_party/zune-jpeg/src/mcu.rs @@ -0,0 +1,504 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +use alloc::{format, vec}; +use core::cmp::min; + +use zune_core::bytestream::ZByteReaderTrait; +use zune_core::colorspace::ColorSpace; +use zune_core::colorspace::ColorSpace::Luma; +use zune_core::log::{error, trace, warn}; + +use crate::bitstream::BitStream; +use crate::components::SampleRatios; +use crate::decoder::MAX_COMPONENTS; +use crate::errors::DecodeErrors; +use crate::marker::Marker; +use crate::misc::{calculate_padded_width, setup_component_params}; +use crate::worker::{color_convert, upsample}; +use crate::JpegDecoder; + +/// The size of a DC block for a MCU. + +pub const DCT_BLOCK: usize = 64; + +impl JpegDecoder { + /// Check for existence of DC and AC Huffman Tables + pub(crate) fn check_tables(&self) -> Result<(), DecodeErrors> { + // check that dc and AC tables exist outside the hot path + for component in &self.components { + let _ = &self + .dc_huffman_tables + .get(component.dc_huff_table) + .as_ref() + .ok_or_else(|| { + DecodeErrors::HuffmanDecode(format!( + "No Huffman DC table for component {:?} ", + component.component_id + )) + })? + .as_ref() + .ok_or_else(|| { + DecodeErrors::HuffmanDecode(format!( + "No DC table for component {:?}", + component.component_id + )) + })?; + + let _ = &self + .ac_huffman_tables + .get(component.ac_huff_table) + .as_ref() + .ok_or_else(|| { + DecodeErrors::HuffmanDecode(format!( + "No Huffman AC table for component {:?} ", + component.component_id + )) + })? + .as_ref() + .ok_or_else(|| { + DecodeErrors::HuffmanDecode(format!( + "No AC table for component {:?}", + component.component_id + )) + })?; + } + Ok(()) + } + + /// Decode MCUs and carry out post processing. + /// + /// This is the main decoder loop for the library, the hot path. + /// + /// Because of this, we pull in some very crazy optimization tricks hence readability is a pinch + /// here. + #[allow( + clippy::similar_names, + clippy::too_many_lines, + clippy::cast_possible_truncation + )] + #[inline(never)] + pub(crate) fn decode_mcu_ycbcr_baseline( + &mut self, pixels: &mut [u8] + ) -> Result<(), DecodeErrors> { + setup_component_params(self)?; + + // check dc and AC tables + self.check_tables()?; + + let (mut mcu_width, mut mcu_height); + + if self.is_interleaved { + // set upsampling functions + self.set_upsampling()?; + + mcu_width = self.mcu_x; + mcu_height = self.mcu_y; + } else { + // For non-interleaved images( (1*1) subsampling) + // number of MCU's are the widths (+7 to account for paddings) divided bu 8. + mcu_width = ((self.info.width + 7) / 8) as usize; + mcu_height = ((self.info.height + 7) / 8) as usize; + } + if self.is_interleaved + && self.input_colorspace.num_components() > 1 + && self.options.jpeg_get_out_colorspace().num_components() == 1 + && (self.sub_sample_ratio == SampleRatios::V + || self.sub_sample_ratio == SampleRatios::HV) + { + // For a specific set of images, e.g interleaved, + // when converting from YcbCr to grayscale, we need to + // take into account mcu height since the MCU decoding needs to take + // it into account for padding purposes and the post processor + // parses two rows per mcu width. + // + // set coeff to be 2 to ensure that we increment two rows + // for every mcu processed also + mcu_height *= self.v_max; + mcu_height /= self.h_max; + self.coeff = 2; + } + + if self.input_colorspace.num_components() > self.components.len() { + let msg = format!( + " Expected {} number of components but found {}", + self.input_colorspace.num_components(), + self.components.len() + ); + return Err(DecodeErrors::Format(msg)); + } + + if self.input_colorspace == ColorSpace::Luma && self.is_interleaved { + warn!("Grayscale image with down-sampled component, resetting component details"); + + self.reset_params(); + + mcu_width = ((self.info.width + 7) / 8) as usize; + mcu_height = ((self.info.height + 7) / 8) as usize; + } + let width = usize::from(self.info.width); + + let padded_width = calculate_padded_width(width, self.sub_sample_ratio); + + let mut stream = BitStream::new(); + let mut tmp = [0_i32; DCT_BLOCK]; + + let comp_len = self.components.len(); + + for (pos, comp) in self.components.iter_mut().enumerate() { + // Allocate only needed components. + // + // For special colorspaces i.e YCCK and CMYK, just allocate all of the needed + // components. + if min( + self.options.jpeg_get_out_colorspace().num_components() - 1, + pos + ) == pos + || comp_len == 4 + // Special colorspace + { + // allocate enough space to hold a whole MCU width + // this means we should take into account sampling ratios + // `*8` is because each MCU spans 8 widths. + let len = comp.width_stride * comp.vertical_sample * 8; + + comp.needed = true; + comp.raw_coeff = vec![0; len]; + } else { + comp.needed = false; + } + } + + let mut pixels_written = 0; + + let is_hv = usize::from(self.is_interleaved); + let upsampler_scratch_size = is_hv * self.components[0].width_stride; + let mut upsampler_scratch_space = vec![0; upsampler_scratch_size]; + + for i in 0..mcu_height { + // Report if we have no more bytes + // This may generate false negatives since we over-read bytes + // hence that why 37 is chosen(we assume if we over-read more than 37 bytes, we have a problem) + if stream.overread_by > 37 + // favourite number :) + { + if self.options.strict_mode() { + return Err(DecodeErrors::FormatStatic("Premature end of buffer")); + }; + + error!("Premature end of buffer"); + break; + } + // decode a whole MCU width, + // this takes into account interleaved components. + self.decode_mcu_width(mcu_width, &mut tmp, &mut stream)?; + // process that width up until it's impossible + self.post_process( + pixels, + i, + mcu_height, + width, + padded_width, + &mut pixels_written, + &mut upsampler_scratch_space + )?; + } + // it may happen that some images don't have the whole buffer + // so we can't panic in case of that + // assert_eq!(pixels_written, pixels.len()); + + trace!("Finished decoding image"); + + Ok(()) + } + fn decode_mcu_width( + &mut self, mcu_width: usize, tmp: &mut [i32; 64], stream: &mut BitStream + ) -> Result<(), DecodeErrors> { + for j in 0..mcu_width { + // iterate over components + for component in &mut self.components { + let dc_table = self.dc_huffman_tables[component.dc_huff_table % MAX_COMPONENTS] + .as_ref() + .unwrap(); + + let ac_table = self.ac_huffman_tables[component.ac_huff_table % MAX_COMPONENTS] + .as_ref() + .unwrap(); + + let qt_table = &component.quantization_table; + let channel = &mut component.raw_coeff; + + // If image is interleaved iterate over scan components, + // otherwise if it-s non-interleaved, these routines iterate in + // trivial scanline order(Y,Cb,Cr) + for v_samp in 0..component.vertical_sample { + for h_samp in 0..component.horizontal_sample { + // Fill the array with zeroes, decode_mcu_block expects + // a zero based array. + tmp.fill(0); + + stream.decode_mcu_block( + &mut self.stream, + dc_table, + ac_table, + qt_table, + tmp, + &mut component.dc_pred + )?; + + if component.needed { + let idct_position = { + // derived from stb and rewritten for my tastes + let c2 = v_samp * 8; + let c3 = ((j * component.horizontal_sample) + h_samp) * 8; + + component.width_stride * c2 + c3 + }; + + let idct_pos = channel.get_mut(idct_position..).unwrap(); + // call idct. + (self.idct_func)(tmp, idct_pos, component.width_stride); + } + } + } + } + self.todo = self.todo.saturating_sub(1); + // After all interleaved components, that's an MCU + // handle stream markers + // + // In some corrupt images, it may occur that header markers occur in the stream. + // The spec EXPLICITLY FORBIDS this, specifically, in + // routine F.2.2.5 it says + // `The only valid marker which may occur within the Huffman coded data is the RSTm marker.` + // + // But libjpeg-turbo allows it because of some weird reason. so I'll also + // allow it because of some weird reason. + if let Some(m) = stream.marker { + if m == Marker::EOI { + // acknowledge and ignore EOI marker. + stream.marker.take(); + trace!("Found EOI marker"); + // Google Introduced the Ultra-HD image format which is basically + // stitching two images into one container. + // They basically separate two images via a EOI and SOI marker + // so let's just ensure if we ever see EOI, we never read past that + // ever. + // https://github.com/google/libultrahdr + stream.seen_eoi = true; + } else if let Marker::RST(_) = m { + if self.todo == 0 { + self.handle_rst(stream)?; + } + } else { + if self.options.strict_mode() { + return Err(DecodeErrors::Format(format!( + "Marker {m:?} found where not expected" + ))); + } + error!( + "Marker `{:?}` Found within Huffman Stream, possibly corrupt jpeg", + m + ); + + self.parse_marker_inner(m)?; + } + } + } + Ok(()) + } + // handle RST markers. + // No-op if not using restarts + // this routine is shared with mcu_prog + #[cold] + pub(crate) fn handle_rst(&mut self, stream: &mut BitStream) -> Result<(), DecodeErrors> { + self.todo = self.restart_interval; + + if let Some(marker) = stream.marker { + // Found a marker + // Read stream and see what marker is stored there + match marker { + Marker::RST(_) => { + // reset stream + stream.reset(); + // Initialize dc predictions to zero for all components + self.components.iter_mut().for_each(|x| x.dc_pred = 0); + // Start iterating again. from position. + } + Marker::EOI => { + // silent pass + } + _ => { + return Err(DecodeErrors::MCUError(format!( + "Marker {marker:?} found in bitstream, possibly corrupt jpeg" + ))); + } + } + } + Ok(()) + } + #[allow(clippy::too_many_lines, clippy::too_many_arguments)] + pub(crate) fn post_process( + &mut self, pixels: &mut [u8], i: usize, mcu_height: usize, width: usize, + padded_width: usize, pixels_written: &mut usize, upsampler_scratch_space: &mut [i16] + ) -> Result<(), DecodeErrors> { + let out_colorspace_components = self.options.jpeg_get_out_colorspace().num_components(); + + let mut px = *pixels_written; + // indicates whether image is vertically up-sampled + let is_vertically_sampled = self + .components + .iter() + .any(|c| c.sample_ratio == SampleRatios::HV || c.sample_ratio == SampleRatios::V); + + let mut comp_len = self.components.len(); + + // If we are moving from YCbCr-> Luma, we do not allocate storage for other components, so we + // will panic when we are trying to read samples, so for that case, + // hardcode it so that we don't panic when doing + // *samp = &samples[j][pos * padded_width..(pos + 1) * padded_width] + if out_colorspace_components < comp_len && self.options.jpeg_get_out_colorspace() == Luma { + comp_len = out_colorspace_components; + } + let mut color_conv_function = + |num_iters: usize, samples: [&[i16]; 4]| -> Result<(), DecodeErrors> { + for (pos, output) in pixels[px..] + .chunks_exact_mut(width * out_colorspace_components) + .take(num_iters) + .enumerate() + { + let mut raw_samples: [&[i16]; 4] = [&[], &[], &[], &[]]; + + // iterate over each line, since color-convert needs only + // one line + for (j, samp) in raw_samples.iter_mut().enumerate().take(comp_len) { + *samp = &samples[j][pos * padded_width..(pos + 1) * padded_width]; + } + color_convert( + &raw_samples, + self.color_convert_16, + self.input_colorspace, + self.options.jpeg_get_out_colorspace(), + output, + width, + padded_width + )?; + px += width * out_colorspace_components; + } + Ok(()) + }; + + let comps = &mut self.components[..]; + + if self.is_interleaved && self.options.jpeg_get_out_colorspace() != ColorSpace::Luma { + { + // duplicated so that we can check that samples match + // Fixes bug https://github.com/etemesi254/zune-image/issues/151 + let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]]; + + for (samp, component) in samples.iter_mut().zip(comps.iter()) { + *samp = if component.sample_ratio == SampleRatios::None { + &component.raw_coeff + } else { + &component.upsample_dest + }; + } + } + for comp in comps.iter_mut() { + upsample(comp, mcu_height, i, upsampler_scratch_space); + } + + if is_vertically_sampled { + if i > 0 { + // write the last line, it wasn't up-sampled as we didn't have row_down + // yet + let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]]; + + for (samp, component) in samples.iter_mut().zip(comps.iter()) { + *samp = &component.first_row_upsample_dest; + } + + // ensure length matches for all samples + let first_len = samples[0].len(); + for samp in samples.iter().take(comp_len) { + assert_eq!(first_len, samp.len()); + } + let num_iters = self.coeff * self.v_max; + + color_conv_function(num_iters, samples)?; + } + + // After upsampling the last row, save any row that can be used for + // a later upsampling, + // + // E.g the Y sample is not sampled but we haven't finished upsampling the last row of + // the previous mcu, since we don't have the down row, so save it + for component in comps.iter_mut() { + // copy last row to be used for the next color conversion + let size = component.vertical_sample + * component.width_stride + * component.sample_ratio.sample(); + + let last_bytes = component.raw_coeff.rchunks_exact_mut(size).next().unwrap(); + + component + .first_row_upsample_dest + .copy_from_slice(last_bytes); + } + } + + let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]]; + + for (samp, component) in samples.iter_mut().zip(comps.iter()) { + *samp = if component.sample_ratio == SampleRatios::None { + &component.raw_coeff + } else { + &component.upsample_dest + }; + } + + // we either do 7 or 8 MCU's depending on the state, this only applies to + // vertically sampled images + // + // for rows up until the last MCU, we do not upsample the last stride of the MCU + // which means that the number of iterations should take that into account is one less the + // up-sampled size + // + // For the last MCU, we upsample the last stride, meaning that if we hit the last MCU, we + // should sample full raw coeffs + let is_last_considered = is_vertically_sampled && (i != mcu_height.saturating_sub(1)); + + let num_iters = (8 - usize::from(is_last_considered)) * self.coeff * self.v_max; + + color_conv_function(num_iters, samples)?; + } else { + let mut channels_ref: [&[i16]; MAX_COMPONENTS] = [&[]; MAX_COMPONENTS]; + + self.components + .iter() + .enumerate() + .for_each(|(pos, x)| channels_ref[pos] = &x.raw_coeff); + + color_conv_function(8 * self.coeff, channels_ref)?; + } + + *pixels_written = px; + Ok(()) + } +} +// #[cfg(test)] +// mod tests { +// use zune_core::bytestream::ZCursor; +// +// use crate::JpegDecoder; +// +// #[test] +// fn im() { +// let image = std::fs::read("/home/caleb/Downloads/re.jpg").unwrap(); +// JpegDecoder::new(ZCursor::new(&image)).decode().unwrap(); +// } +// } diff --git a/third_party/zune-jpeg/src/mcu_prog.rs b/third_party/zune-jpeg/src/mcu_prog.rs new file mode 100644 index 0000000..ce309a6 --- /dev/null +++ b/third_party/zune-jpeg/src/mcu_prog.rs @@ -0,0 +1,617 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +//!Routines for progressive decoding +/* +This file is needlessly complicated, + +It is that way to ensure we don't burn memory anyhow + +Memory is a scarce resource in some environments, I would like this to be viable +in such environments + +Half of the complexity comes from the jpeg spec, because progressive decoding, +is one hell of a ride. + +*/ +use alloc::string::ToString; +use alloc::vec::Vec; +use alloc::{format, vec}; +use core::cmp::min; + +use zune_core::bytestream::{ZByteReaderTrait, ZReader}; +use zune_core::colorspace::ColorSpace; +use zune_core::log::{debug, error, warn}; + +use crate::bitstream::BitStream; +use crate::components::{ComponentID, SampleRatios}; +use crate::decoder::{JpegDecoder, MAX_COMPONENTS}; +use crate::errors::DecodeErrors; +use crate::errors::DecodeErrors::Format; +use crate::headers::{parse_huffman, parse_sos}; +use crate::marker::Marker; +use crate::mcu::DCT_BLOCK; +use crate::misc::{calculate_padded_width, setup_component_params}; + +impl JpegDecoder { + /// Decode a progressive image + /// + /// This routine decodes a progressive image, stopping if it finds any error. + #[allow( + clippy::needless_range_loop, + clippy::cast_sign_loss, + clippy::redundant_else, + clippy::too_many_lines + )] + #[inline(never)] + pub(crate) fn decode_mcu_ycbcr_progressive( + &mut self, pixels: &mut [u8] + ) -> Result<(), DecodeErrors> { + setup_component_params(self)?; + + let mut mcu_height; + + // memory location for decoded pixels for components + let mut block: [Vec; MAX_COMPONENTS] = [vec![], vec![], vec![], vec![]]; + let mut mcu_width; + + let mut seen_scans = 1; + + if self.input_colorspace == ColorSpace::Luma && self.is_interleaved { + warn!("Grayscale image with down-sampled component, resetting component details"); + self.reset_params(); + } + + if self.is_interleaved { + // this helps us catch component errors. + self.set_upsampling()?; + } + if self.is_interleaved { + mcu_width = self.mcu_x; + mcu_height = self.mcu_y; + } else { + mcu_width = (self.info.width as usize + 7) / 8; + mcu_height = (self.info.height as usize + 7) / 8; + } + if self.is_interleaved + && self.input_colorspace.num_components() > 1 + && self.options.jpeg_get_out_colorspace().num_components() == 1 + && (self.sub_sample_ratio == SampleRatios::V + || self.sub_sample_ratio == SampleRatios::HV) + { + // For a specific set of images, e.g interleaved, + // when converting from YcbCr to grayscale, we need to + // take into account mcu height since the MCU decoding needs to take + // it into account for padding purposes and the post processor + // parses two rows per mcu width. + // + // set coeff to be 2 to ensure that we increment two rows + // for every mcu processed also + mcu_height *= self.v_max; + mcu_height /= self.h_max; + self.coeff = 2; + } + + mcu_width *= 64; + + if self.input_colorspace.num_components() > self.components.len() { + let msg = format!( + " Expected {} number of components but found {}", + self.input_colorspace.num_components(), + self.components.len() + ); + return Err(DecodeErrors::Format(msg)); + } + for i in 0..self.input_colorspace.num_components() { + let comp = &self.components[i]; + let len = mcu_width * comp.vertical_sample * comp.horizontal_sample * mcu_height; + + block[i] = vec![0; len]; + } + + let mut stream = BitStream::new_progressive( + self.succ_high, + self.succ_low, + self.spec_start, + self.spec_end + ); + + // there are multiple scans in the stream, this should resolve the first scan + self.parse_entropy_coded_data(&mut stream, &mut block)?; + + // extract marker + let mut marker = stream + .marker + .take() + .ok_or(DecodeErrors::FormatStatic("Marker missing where expected"))?; + + // if marker is EOI, we are done, otherwise continue scanning. + // + // In case we have a premature image, we print a warning or return + // an error, depending on the strictness of the decoder, so there + // is that logic to handle too + 'eoi: while marker != Marker::EOI { + match marker { + Marker::DHT => { + parse_huffman(self)?; + } + Marker::SOS => { + parse_sos(self)?; + + stream.update_progressive_params( + self.succ_high, + self.succ_low, + self.spec_start, + self.spec_end + ); + + // after every SOS, marker, parse data for that scan. + self.parse_entropy_coded_data(&mut stream, &mut block)?; + // extract marker, might either indicate end of image or we continue + // scanning(hence the continue statement to determine). + match get_marker(&mut self.stream, &mut stream) { + Ok(marker_n) => { + marker = marker_n; + seen_scans += 1; + if seen_scans > self.options.jpeg_get_max_scans() { + return Err(DecodeErrors::Format(format!( + "Too many scans, exceeded limit of {}", + self.options.jpeg_get_max_scans() + ))); + } + + stream.reset(); + continue 'eoi; + } + Err(msg) => { + if self.options.strict_mode() { + return Err(msg); + } + error!("{:?}", msg); + break 'eoi; + } + } + } + _ => { + break 'eoi; + } + } + + match get_marker(&mut self.stream, &mut stream) { + Ok(marker_n) => { + marker = marker_n; + } + Err(e) => { + if self.options.strict_mode() { + return Err(e); + } + error!("{}", e); + } + } + } + + self.finish_progressive_decoding(&block, mcu_width, pixels) + } + + #[allow(clippy::too_many_lines, clippy::cast_sign_loss)] + fn parse_entropy_coded_data( + &mut self, stream: &mut BitStream, buffer: &mut [Vec; MAX_COMPONENTS] + ) -> Result<(), DecodeErrors> { + stream.reset(); + self.components.iter_mut().for_each(|x| x.dc_pred = 0); + + if usize::from(self.num_scans) > self.input_colorspace.num_components() { + return Err(Format(format!( + "Number of scans {} cannot be greater than number of components, {}", + self.num_scans, + self.input_colorspace.num_components() + ))); + } + + if self.num_scans == 1 { + // Safety checks + if self.spec_end != 0 && self.spec_start == 0 { + return Err(DecodeErrors::FormatStatic( + "Can't merge DC and AC corrupt jpeg" + )); + } + // non interleaved data, process one block at a time in trivial scanline order + + let k = self.z_order[0]; + + if k >= self.components.len() { + return Err(DecodeErrors::Format(format!( + "Cannot find component {k}, corrupt image" + ))); + } + + let (mcu_width, mcu_height); + + if self.components[k].component_id == ComponentID::Y + && (self.components[k].vertical_sample != 1 + || self.components[k].horizontal_sample != 1) + || !self.is_interleaved + { + // For Y channel or non interleaved scans , + // mcu's is the image dimensions divided by 8 + mcu_width = ((self.info.width + 7) / 8) as usize; + mcu_height = ((self.info.height + 7) / 8) as usize; + } else { + // For other channels, in an interleaved mcu, number of MCU's + // are determined by some weird maths done in headers.rs->parse_sos() + mcu_width = self.mcu_x; + mcu_height = self.mcu_y; + } + + for i in 0..mcu_height { + for j in 0..mcu_width { + if self.spec_start != 0 && self.succ_high == 0 && stream.eob_run > 0 { + // handle EOB runs here. + stream.eob_run -= 1; + continue; + } + let start = 64 * (j + i * (self.components[k].width_stride / 8)); + + let data: &mut [i16; 64] = buffer + .get_mut(k) + .unwrap() + .get_mut(start..start + 64) + .unwrap() + .try_into() + .unwrap(); + + if self.spec_start == 0 { + let pos = self.components[k].dc_huff_table & (MAX_COMPONENTS - 1); + let dc_table = self + .dc_huffman_tables + .get(pos) + .ok_or(DecodeErrors::FormatStatic( + "No huffman table for DC component" + ))? + .as_ref() + .ok_or(DecodeErrors::FormatStatic( + "Huffman table at index {} not initialized" + ))?; + + let dc_pred = &mut self.components[k].dc_pred; + + if self.succ_high == 0 { + // first scan for this mcu + stream.decode_prog_dc_first( + &mut self.stream, + dc_table, + &mut data[0], + dc_pred + )?; + } else { + // refining scans for this MCU + stream.decode_prog_dc_refine(&mut self.stream, &mut data[0])?; + } + } else { + let pos = self.components[k].ac_huff_table; + let ac_table = self + .ac_huffman_tables + .get(pos) + .ok_or_else(|| { + DecodeErrors::Format(format!( + "No huffman table for component:{pos}" + )) + })? + .as_ref() + .ok_or_else(|| { + DecodeErrors::Format(format!( + "Huffman table at index {pos} not initialized" + )) + })?; + + if self.succ_high == 0 { + debug_assert!(stream.eob_run == 0, "EOB run is not zero"); + + stream.decode_mcu_ac_first(&mut self.stream, ac_table, data)?; + } else { + // refinement scan + stream.decode_mcu_ac_refine(&mut self.stream, ac_table, data)?; + } + } + // + EOB and investigate effect. + self.todo -= 1; + + if self.todo == 0 { + self.handle_rst(stream)?; + } + } + } + } else { + if self.spec_end != 0 { + return Err(DecodeErrors::HuffmanDecode( + "Can't merge dc and AC corrupt jpeg".to_string() + )); + } + // process scan n elements in order + + // Do the error checking with allocs here. + // Make the one in the inner loop free of allocations. + for k in 0..self.num_scans { + let n = self.z_order[k as usize]; + + if n >= self.components.len() { + return Err(DecodeErrors::Format(format!( + "Cannot find component {n}, corrupt image" + ))); + } + + let component = &mut self.components[n]; + let _ = self + .dc_huffman_tables + .get(component.dc_huff_table) + .ok_or_else(|| { + DecodeErrors::Format(format!( + "No huffman table for component:{}", + component.dc_huff_table + )) + })? + .as_ref() + .ok_or_else(|| { + DecodeErrors::Format(format!( + "Huffman table at index {} not initialized", + component.dc_huff_table + )) + })?; + } + // Interleaved scan + + // Components shall not be interleaved in progressive mode, except for + // the DC coefficients in the first scan for each component of a progressive frame. + for i in 0..self.mcu_y { + for j in 0..self.mcu_x { + // process scan n elements in order + for k in 0..self.num_scans { + let n = self.z_order[k as usize]; + let component = &mut self.components[n]; + let huff_table = self + .dc_huffman_tables + .get(component.dc_huff_table) + .ok_or(DecodeErrors::FormatStatic("No huffman table for component"))? + .as_ref() + .ok_or(DecodeErrors::FormatStatic( + "Huffman table at index not initialized" + ))?; + + for v_samp in 0..component.vertical_sample { + for h_samp in 0..component.horizontal_sample { + let x2 = j * component.horizontal_sample + h_samp; + let y2 = i * component.vertical_sample + v_samp; + let position = 64 * (x2 + y2 * component.width_stride / 8); + + let data = &mut buffer[n][position]; + + if self.succ_high == 0 { + stream.decode_prog_dc_first( + &mut self.stream, + huff_table, + data, + &mut component.dc_pred + )?; + } else { + stream.decode_prog_dc_refine(&mut self.stream, data)?; + } + } + } + } + // We want wrapping subtraction here because it means + // we get a higher number in the case this underflows + self.todo = self.todo.wrapping_sub(1); + // after every scan that's a mcu, count down restart markers. + if self.todo == 0 { + self.handle_rst(stream)?; + } + } + } + } + return Ok(()); + } + + #[allow(clippy::too_many_lines)] + #[allow(clippy::needless_range_loop, clippy::cast_sign_loss)] + fn finish_progressive_decoding( + &mut self, block: &[Vec; MAX_COMPONENTS], _mcu_width: usize, pixels: &mut [u8] + ) -> Result<(), DecodeErrors> { + // This function is complicated because we need to replicate + // the function in mcu.rs + // + // The advantage is that we do very little allocation and very lot + // channel reusing. + // The trick is to notice that we repeat the same procedure per MCU + // width. + // + // So we can set it up that we only allocate temporary storage large enough + // to store a single mcu width, then reuse it per invocation. + // + // This is advantageous to us. + // + // Remember we need to have the whole MCU buffer so we store 3 unprocessed + // channels in memory, and then we allocate the whole output buffer in memory, both of + // which are huge. + // + // + + let mcu_height = if self.is_interleaved { + self.mcu_y + } else { + // For non-interleaved images( (1*1) subsampling) + // number of MCU's are the widths (+7 to account for paddings) divided by 8. + ((self.info.height + 7) / 8) as usize + }; + + // Size of our output image(width*height) + let is_hv = usize::from(self.is_interleaved); + let upsampler_scratch_size = is_hv * self.components[0].width_stride; + let width = usize::from(self.info.width); + let padded_width = calculate_padded_width(width, self.sub_sample_ratio); + + //let mut pixels = vec![0; capacity * out_colorspace_components]; + let mut upsampler_scratch_space = vec![0; upsampler_scratch_size]; + let mut tmp = [0_i32; DCT_BLOCK]; + + for (pos, comp) in self.components.iter_mut().enumerate() { + // Allocate only needed components. + // + // For special colorspaces i.e YCCK and CMYK, just allocate all of the needed + // components. + if min( + self.options.jpeg_get_out_colorspace().num_components() - 1, + pos + ) == pos + || self.input_colorspace == ColorSpace::YCCK + || self.input_colorspace == ColorSpace::CMYK + { + // allocate enough space to hold a whole MCU width + // this means we should take into account sampling ratios + // `*8` is because each MCU spans 8 widths. + let len = comp.width_stride * comp.vertical_sample * 8; + + comp.needed = true; + comp.raw_coeff = vec![0; len]; + } else { + comp.needed = false; + } + } + + let mut pixels_written = 0; + + // dequantize, idct and color convert. + for i in 0..mcu_height { + 'component: for (position, component) in &mut self.components.iter_mut().enumerate() { + if !component.needed { + continue 'component; + } + let qt_table = &component.quantization_table; + + // step is the number of pixels this iteration wil be handling + // Given by the number of mcu's height and the length of the component block + // Since the component block contains the whole channel as raw pixels + // we this evenly divides the pixels into MCU blocks + // + // For interleaved images, this gives us the exact pixels comprising a whole MCU + // block + let step = block[position].len() / mcu_height; + // where we will be reading our pixels from. + let start = i * step; + + let slice = &block[position][start..start + step]; + + let temp_channel = &mut component.raw_coeff; + + // The next logical step is to iterate width wise. + // To figure out how many pixels we iterate by we use effective pixels + // Given to us by component.x + // iterate per effective pixels. + let mcu_x = component.width_stride / 8; + + // iterate per every vertical sample. + for k in 0..component.vertical_sample { + for j in 0..mcu_x { + // after writing a single stride, we need to skip 8 rows. + // This does the row calculation + let width_stride = k * 8 * component.width_stride; + let start = j * 64 + width_stride; + + // dequantize + for ((x, out), qt_val) in slice[start..start + 64] + .iter() + .zip(tmp.iter_mut()) + .zip(qt_table.iter()) + { + *out = i32::from(*x) * qt_val; + } + // determine where to write. + let sl = &mut temp_channel[component.idct_pos..]; + + component.idct_pos += 8; + // tmp now contains a dequantized block so idct it + (self.idct_func)(&mut tmp, sl, component.width_stride); + } + // after every write of 8, skip 7 since idct write stride wise 8 times. + // + // Remember each MCU is 8x8 block, so each idct will write 8 strides into + // sl + // + // and component.idct_pos is one stride long + component.idct_pos += 7 * component.width_stride; + } + component.idct_pos = 0; + } + + // process that width up until it's impossible + self.post_process( + pixels, + i, + mcu_height, + width, + padded_width, + &mut pixels_written, + &mut upsampler_scratch_space + )?; + } + + debug!("Finished decoding image"); + + return Ok(()); + } + pub(crate) fn reset_params(&mut self) { + /* + Apparently, grayscale images which can be down sampled exists, which is weird in the sense + that it has one component Y, which is not usually down sampled. + + This means some calculations will be wrong, so for that we explicitly reset params + for such occurrences, warn and reset the image info to appear as if it were + a non-sampled image to ensure decoding works + */ + self.h_max = 1; + self.options = self.options.jpeg_set_out_colorspace(ColorSpace::Luma); + self.v_max = 1; + self.sub_sample_ratio = SampleRatios::None; + self.is_interleaved = false; + self.components[0].vertical_sample = 1; + self.components[0].width_stride = (((self.info.width as usize) + 7) / 8) * 8; + self.components[0].horizontal_sample = 1; + } +} + +///Get a marker from the bit-stream. +/// +/// This reads until it gets a marker or end of file is encountered +fn get_marker(reader: &mut ZReader, stream: &mut BitStream) -> Result +where + T: ZByteReaderTrait +{ + if let Some(marker) = stream.marker { + stream.marker = None; + return Ok(marker); + } + + // read until we get a marker + + while !reader.eof()? { + let marker = reader.read_u8_err()?; + + if marker == 255 { + let mut r = reader.read_u8_err()?; + // 0xFF 0XFF(some images may be like that) + while r == 0xFF { + r = reader.read_u8_err()?; + } + + if r != 0 { + return Marker::from_u8(r) + .ok_or_else(|| DecodeErrors::Format(format!("Unknown marker 0xFF{r:X}"))); + } + } + } + return Err(DecodeErrors::ExhaustedData); +} diff --git a/third_party/zune-jpeg/src/misc.rs b/third_party/zune-jpeg/src/misc.rs new file mode 100644 index 0000000..1c180b3 --- /dev/null +++ b/third_party/zune-jpeg/src/misc.rs @@ -0,0 +1,431 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +//!Miscellaneous stuff +#![allow(dead_code)] + +use alloc::format; +use core::cmp::max; +use core::fmt; + +use zune_core::bytestream::ZByteReaderTrait; +use zune_core::colorspace::ColorSpace; +use zune_core::log::trace; + +use crate::components::{ComponentID, SampleRatios}; +use crate::errors::DecodeErrors; +use crate::huffman::HuffmanTable; +use crate::JpegDecoder; + +/// Start of baseline DCT Huffman coding + +pub const START_OF_FRAME_BASE: u16 = 0xffc0; + +/// Start of another frame + +pub const START_OF_FRAME_EXT_SEQ: u16 = 0xffc1; + +/// Start of progressive DCT encoding + +pub const START_OF_FRAME_PROG_DCT: u16 = 0xffc2; + +/// Start of Lossless sequential Huffman coding + +pub const START_OF_FRAME_LOS_SEQ: u16 = 0xffc3; + +/// Start of extended sequential DCT arithmetic coding + +pub const START_OF_FRAME_EXT_AR: u16 = 0xffc9; + +/// Start of Progressive DCT arithmetic coding + +pub const START_OF_FRAME_PROG_DCT_AR: u16 = 0xffca; + +/// Start of Lossless sequential Arithmetic coding + +pub const START_OF_FRAME_LOS_SEQ_AR: u16 = 0xffcb; + +/// Undo run length encoding of coefficients by placing them in natural order +#[rustfmt::skip] +pub const UN_ZIGZAG: [usize; 64 + 16] = [ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, + // Prevent overflowing + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63 +]; + +/// Align data to a 16 byte boundary +#[repr(align(16))] +#[derive(Clone)] + +pub struct Aligned16(pub T); + +impl Default for Aligned16 +where + T: Default +{ + fn default() -> Self { + Aligned16(T::default()) + } +} + +/// Align data to a 32 byte boundary +#[repr(align(32))] +#[derive(Clone)] +pub struct Aligned32(pub T); + +impl Default for Aligned32 +where + T: Default +{ + fn default() -> Self { + Aligned32(T::default()) + } +} + +/// Markers that identify different Start of Image markers +/// They identify the type of encoding and whether the file use lossy(DCT) or +/// lossless compression and whether we use Huffman or arithmetic coding schemes +#[derive(Eq, PartialEq, Copy, Clone)] +#[allow(clippy::upper_case_acronyms)] +pub enum SOFMarkers { + /// Baseline DCT markers + BaselineDct, + /// SOF_1 Extended sequential DCT,Huffman coding + ExtendedSequentialHuffman, + /// Progressive DCT, Huffman coding + ProgressiveDctHuffman, + /// Lossless (sequential), huffman coding, + LosslessHuffman, + /// Extended sequential DEC, arithmetic coding + ExtendedSequentialDctArithmetic, + /// Progressive DCT, arithmetic coding, + ProgressiveDctArithmetic, + /// Lossless ( sequential), arithmetic coding + LosslessArithmetic +} + +impl Default for SOFMarkers { + fn default() -> Self { + Self::BaselineDct + } +} + +impl SOFMarkers { + /// Check if a certain marker is sequential DCT or not + + pub fn is_sequential_dct(self) -> bool { + matches!( + self, + Self::BaselineDct + | Self::ExtendedSequentialHuffman + | Self::ExtendedSequentialDctArithmetic + ) + } + + /// Check if a marker is a Lossles type or not + + pub fn is_lossless(self) -> bool { + matches!(self, Self::LosslessHuffman | Self::LosslessArithmetic) + } + + /// Check whether a marker is a progressive marker or not + + pub fn is_progressive(self) -> bool { + matches!( + self, + Self::ProgressiveDctHuffman | Self::ProgressiveDctArithmetic + ) + } + + /// Create a marker from an integer + + pub fn from_int(int: u16) -> Option { + match int { + START_OF_FRAME_BASE => Some(Self::BaselineDct), + START_OF_FRAME_PROG_DCT => Some(Self::ProgressiveDctHuffman), + START_OF_FRAME_PROG_DCT_AR => Some(Self::ProgressiveDctArithmetic), + START_OF_FRAME_LOS_SEQ => Some(Self::LosslessHuffman), + START_OF_FRAME_LOS_SEQ_AR => Some(Self::LosslessArithmetic), + START_OF_FRAME_EXT_SEQ => Some(Self::ExtendedSequentialHuffman), + START_OF_FRAME_EXT_AR => Some(Self::ExtendedSequentialDctArithmetic), + _ => None + } + } +} + +impl fmt::Debug for SOFMarkers { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match &self { + Self::BaselineDct => write!(f, "Baseline DCT"), + Self::ExtendedSequentialHuffman => { + write!(f, "Extended sequential DCT, Huffman Coding") + } + Self::ProgressiveDctHuffman => write!(f, "Progressive DCT,Huffman Encoding"), + Self::LosslessHuffman => write!(f, "Lossless (sequential) Huffman encoding"), + Self::ExtendedSequentialDctArithmetic => { + write!(f, "Extended sequential DCT, arithmetic coding") + } + Self::ProgressiveDctArithmetic => write!(f, "Progressive DCT, arithmetic coding"), + Self::LosslessArithmetic => write!(f, "Lossless (sequential) arithmetic coding") + } + } +} + +/// Set up component parameters. +/// +/// This modifies the components in place setting up details needed by other +/// parts fo the decoder. +pub(crate) fn setup_component_params( + img: &mut JpegDecoder +) -> Result<(), DecodeErrors> { + let img_width = img.width(); + let img_height = img.height(); + + // in case of adobe app14 being present, zero may indicate + // either CMYK if components are 4 or RGB if components are 3, + // see https://docs.oracle.com/javase/6/docs/api/javax/imageio/metadata/doc-files/jpeg_metadata.html + // so since we may not know how many number of components + // we have when decoding app14, we have to defer that check + // until now. + // + // We know adobe app14 was present since it's the only one that can modify + // input colorspace to be CMYK + if img.components.len() == 3 && img.input_colorspace == ColorSpace::CMYK { + img.input_colorspace = ColorSpace::RGB; + } + + for component in &mut img.components { + // compute interleaved image info + // h_max contains the maximum horizontal component + img.h_max = max(img.h_max, component.horizontal_sample); + // v_max contains the maximum vertical component + img.v_max = max(img.v_max, component.vertical_sample); + img.mcu_width = img.h_max * 8; + img.mcu_height = img.v_max * 8; + // Number of MCU's per width + img.mcu_x = (usize::from(img.info.width) + img.mcu_width - 1) / img.mcu_width; + // Number of MCU's per height + img.mcu_y = (usize::from(img.info.height) + img.mcu_height - 1) / img.mcu_height; + + if img.h_max != 1 || img.v_max != 1 { + // interleaved images have horizontal and vertical sampling factors + // not equal to 1. + img.is_interleaved = true; + } + // Extract quantization tables from the arrays into components + let qt_table = *img.qt_tables[component.quantization_table_number as usize] + .as_ref() + .ok_or_else(|| { + DecodeErrors::DqtError(format!( + "No quantization table for component {:?}", + component.component_id + )) + })?; + + let x = (usize::from(img_width) * component.horizontal_sample + img.h_max - 1) / img.h_max; + let y = (usize::from(img_height) * component.horizontal_sample + img.h_max - 1) / img.v_max; + component.x = x; + component.w2 = img.mcu_x * component.horizontal_sample * 8; + // probably not needed. :) + component.y = y; + component.quantization_table = qt_table; + // initially stride contains its horizontal sub-sampling + component.width_stride *= img.mcu_x * 8; + } + { + // Sampling factors are one thing that suck + // this fixes a specific problem with images like + // + // (2 2) None + // (2 1) H + // (2 1) H + // + // The images exist in the wild, the images are not meant to exist + // but they do, it's just an annoying horizontal sub-sampling that + // I don't know why it exists. + // But it does + // So we try to cope with that. + // I am not sure of how to explain how to fix it, but it involved a debugger + // and to much coke(the legal one) + // + // If this wasn't present, self.upsample_dest would have the wrong length + let mut handle_that_annoying_bug = false; + + if let Some(y_component) = img + .components + .iter() + .find(|c| c.component_id == ComponentID::Y) + { + if y_component.horizontal_sample == 2 || y_component.vertical_sample == 2 { + handle_that_annoying_bug = true; + } + } + if handle_that_annoying_bug { + for comp in &mut img.components { + if (comp.component_id != ComponentID::Y) + && (comp.horizontal_sample != 1 || comp.vertical_sample != 1) + { + comp.fix_an_annoying_bug = 2; + } + } + } + } + + if img.is_mjpeg { + fill_default_mjpeg_tables( + img.is_progressive, + &mut img.dc_huffman_tables, + &mut img.ac_huffman_tables + ); + } + + Ok(()) +} + +///Calculate number of fill bytes added to the end of a JPEG image +/// to fill the image +/// +/// JPEG usually inserts padding bytes if the image width cannot be evenly divided into +/// 8 , 16 or 32 chunks depending on the sub sampling ratio. So given a sub-sampling ratio, +/// and the actual width, this calculates the padded bytes that were added to the image +/// +/// # Params +/// -actual_width: Actual width of the image +/// -sub_sample: Sub sampling factor of the image +/// +/// # Returns +/// The padded width, this is how long the width is for a particular image +pub fn calculate_padded_width(actual_width: usize, sub_sample: SampleRatios) -> usize { + match sub_sample { + SampleRatios::None | SampleRatios::V => { + // None+V sends one MCU row, so that's a simple calculation + ((actual_width + 7) / 8) * 8 + } + SampleRatios::H | SampleRatios::HV => { + // sends two rows, width can be expanded by up to 15 more bytes + ((actual_width + 15) / 16) * 16 + } + } +} + +// https://www.loc.gov/preservation/digital/formats/fdd/fdd000063.shtml +// "Avery Lee, writing in the rec.video.desktop newsgroup in 2001, commented that "MJPEG, or at +// least the MJPEG in AVIs having the MJPG fourcc, is restricted JPEG with a fixed -- and +// *omitted* -- Huffman table. The JPEG must be YCbCr colorspace, it must be 4:2:2, and it must +// use basic Huffman encoding, not arithmetic or progressive.... You can indeed extract the +// MJPEG frames and decode them with a regular JPEG decoder, but you have to prepend the DHT +// segment to them, or else the decoder won't have any idea how to decompress the data. +// The exact table necessary is given in the OpenDML spec."" +pub fn fill_default_mjpeg_tables( + is_progressive: bool, dc_huffman_tables: &mut [Option], + ac_huffman_tables: &mut [Option] +) { + // Section K.3.3 + trace!("Filling with default mjpeg tables"); + + if dc_huffman_tables[0].is_none() { + // Table K.3 + dc_huffman_tables[0] = Some( + HuffmanTable::new_unfilled( + &[ + 0x00, 0x00, 0x01, 0x05, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 + ], + &[ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B + ], + true, + is_progressive + ) + .unwrap() + ); + } + if dc_huffman_tables[1].is_none() { + // Table K.4 + dc_huffman_tables[1] = Some( + HuffmanTable::new_unfilled( + &[ + 0x00, 0x00, 0x03, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00 + ], + &[ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B + ], + true, + is_progressive + ) + .unwrap() + ); + } + if ac_huffman_tables[0].is_none() { + // Table K.5 + ac_huffman_tables[0] = Some( + HuffmanTable::new_unfilled( + &[ + 0x00, 0x00, 0x02, 0x01, 0x03, 0x03, 0x02, 0x04, 0x03, 0x05, 0x05, 0x04, 0x04, + 0x00, 0x00, 0x01, 0x7D + ], + &[ + 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06, 0x13, + 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xA1, 0x08, 0x23, 0x42, + 0xB1, 0xC1, 0x15, 0x52, 0xD1, 0xF0, 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0A, + 0x16, 0x17, 0x18, 0x19, 0x1A, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x34, 0x35, + 0x36, 0x37, 0x38, 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, + 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x83, 0x84, + 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, + 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xB2, 0xB3, + 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xE1, + 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xF1, 0xF2, 0xF3, 0xF4, + 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA + ], + false, + is_progressive + ) + .unwrap() + ); + } + if ac_huffman_tables[1].is_none() { + // Table K.6 + ac_huffman_tables[1] = Some( + HuffmanTable::new_unfilled( + &[ + 0x00, 0x00, 0x02, 0x01, 0x02, 0x04, 0x04, 0x03, 0x04, 0x07, 0x05, 0x04, 0x04, + 0x00, 0x01, 0x02, 0x77 + ], + &[ + 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41, 0x51, + 0x07, 0x61, 0x71, 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, 0xA1, 0xB1, + 0xC1, 0x09, 0x23, 0x33, 0x52, 0xF0, 0x15, 0x62, 0x72, 0xD1, 0x0A, 0x16, 0x24, + 0x34, 0xE1, 0x25, 0xF1, 0x17, 0x18, 0x19, 0x1A, 0x26, 0x27, 0x28, 0x29, 0x2A, + 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, + 0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66, + 0x67, 0x68, 0x69, 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x82, + 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, + 0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, + 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3, 0xC4, 0xC5, + 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, + 0xDA, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xF2, 0xF3, 0xF4, + 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA + ], + false, + is_progressive + ) + .unwrap() + ); + } +} diff --git a/third_party/zune-jpeg/src/unsafe_utils.rs b/third_party/zune-jpeg/src/unsafe_utils.rs new file mode 100644 index 0000000..325ebbe --- /dev/null +++ b/third_party/zune-jpeg/src/unsafe_utils.rs @@ -0,0 +1,4 @@ +#[cfg(all(feature = "x86", any(target_arch = "x86", target_arch = "x86_64")))] +pub use crate::unsafe_utils_avx2::*; +#[cfg(all(feature = "neon", target_arch = "aarch64"))] +pub use crate::unsafe_utils_neon::*; diff --git a/third_party/zune-jpeg/src/unsafe_utils_avx2.rs b/third_party/zune-jpeg/src/unsafe_utils_avx2.rs new file mode 100644 index 0000000..eb34b63 --- /dev/null +++ b/third_party/zune-jpeg/src/unsafe_utils_avx2.rs @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +#![cfg(all(feature = "x86", any(target_arch = "x86", target_arch = "x86_64")))] +//! This module provides unsafe ways to do some things +#![allow(clippy::wildcard_imports)] + +#[cfg(target_arch = "x86")] +use core::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::*; +use core::ops::{Add, AddAssign, Mul, MulAssign, Sub}; + +/// A copy of `_MM_SHUFFLE()` that doesn't require +/// a nightly compiler +#[inline] +const fn shuffle(z: i32, y: i32, x: i32, w: i32) -> i32 { + (z << 6) | (y << 4) | (x << 2) | w +} + +/// An abstraction of an AVX ymm register that +///allows some things to not look ugly +#[derive(Clone, Copy)] +pub struct YmmRegister { + /// An AVX register + pub(crate) mm256: __m256i +} + +impl Add for YmmRegister { + type Output = YmmRegister; + + #[inline] + fn add(self, rhs: Self) -> Self::Output { + unsafe { + return YmmRegister { + mm256: _mm256_add_epi32(self.mm256, rhs.mm256) + }; + } + } +} + +impl Add for YmmRegister { + type Output = YmmRegister; + + #[inline] + fn add(self, rhs: i32) -> Self::Output { + unsafe { + let tmp = _mm256_set1_epi32(rhs); + + return YmmRegister { + mm256: _mm256_add_epi32(self.mm256, tmp) + }; + } + } +} + +impl Sub for YmmRegister { + type Output = YmmRegister; + + #[inline] + fn sub(self, rhs: Self) -> Self::Output { + unsafe { + return YmmRegister { + mm256: _mm256_sub_epi32(self.mm256, rhs.mm256) + }; + } + } +} + +impl AddAssign for YmmRegister { + #[inline] + fn add_assign(&mut self, rhs: Self) { + unsafe { + self.mm256 = _mm256_add_epi32(self.mm256, rhs.mm256); + } + } +} + +impl AddAssign for YmmRegister { + #[inline] + fn add_assign(&mut self, rhs: i32) { + unsafe { + let tmp = _mm256_set1_epi32(rhs); + + self.mm256 = _mm256_add_epi32(self.mm256, tmp); + } + } +} + +impl Mul for YmmRegister { + type Output = YmmRegister; + + #[inline] + fn mul(self, rhs: Self) -> Self::Output { + unsafe { + YmmRegister { + mm256: _mm256_mullo_epi32(self.mm256, rhs.mm256) + } + } + } +} + +impl Mul for YmmRegister { + type Output = YmmRegister; + + #[inline] + fn mul(self, rhs: i32) -> Self::Output { + unsafe { + let tmp = _mm256_set1_epi32(rhs); + + YmmRegister { + mm256: _mm256_mullo_epi32(self.mm256, tmp) + } + } + } +} + +impl MulAssign for YmmRegister { + #[inline] + fn mul_assign(&mut self, rhs: Self) { + unsafe { + self.mm256 = _mm256_mullo_epi32(self.mm256, rhs.mm256); + } + } +} + +impl MulAssign for YmmRegister { + #[inline] + fn mul_assign(&mut self, rhs: i32) { + unsafe { + let tmp = _mm256_set1_epi32(rhs); + + self.mm256 = _mm256_mullo_epi32(self.mm256, tmp); + } + } +} + +impl MulAssign<__m256i> for YmmRegister { + #[inline] + fn mul_assign(&mut self, rhs: __m256i) { + unsafe { + self.mm256 = _mm256_mullo_epi32(self.mm256, rhs); + } + } +} + +type Reg = YmmRegister; + +/// Transpose an array of 8 by 8 i32's using avx intrinsics +/// +/// This was translated from [here](https://newbedev.com/transpose-an-8x8-float-using-avx-avx2) +#[allow(unused_parens, clippy::too_many_arguments)] +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn transpose( + v0: &mut Reg, v1: &mut Reg, v2: &mut Reg, v3: &mut Reg, v4: &mut Reg, v5: &mut Reg, + v6: &mut Reg, v7: &mut Reg +) { + macro_rules! merge_epi32 { + ($v0:tt,$v1:tt,$v2:tt,$v3:tt) => { + let va = _mm256_permute4x64_epi64($v0, shuffle(3, 1, 2, 0)); + + let vb = _mm256_permute4x64_epi64($v1, shuffle(3, 1, 2, 0)); + + $v2 = _mm256_unpacklo_epi32(va, vb); + + $v3 = _mm256_unpackhi_epi32(va, vb); + }; + } + + macro_rules! merge_epi64 { + ($v0:tt,$v1:tt,$v2:tt,$v3:tt) => { + let va = _mm256_permute4x64_epi64($v0, shuffle(3, 1, 2, 0)); + + let vb = _mm256_permute4x64_epi64($v1, shuffle(3, 1, 2, 0)); + + $v2 = _mm256_unpacklo_epi64(va, vb); + + $v3 = _mm256_unpackhi_epi64(va, vb); + }; + } + + macro_rules! merge_si128 { + ($v0:tt,$v1:tt,$v2:tt,$v3:tt) => { + $v2 = _mm256_permute2x128_si256($v0, $v1, shuffle(0, 2, 0, 0)); + + $v3 = _mm256_permute2x128_si256($v0, $v1, shuffle(0, 3, 0, 1)); + }; + } + + let (w0, w1, w2, w3, w4, w5, w6, w7); + + merge_epi32!((v0.mm256), (v1.mm256), w0, w1); + + merge_epi32!((v2.mm256), (v3.mm256), w2, w3); + + merge_epi32!((v4.mm256), (v5.mm256), w4, w5); + + merge_epi32!((v6.mm256), (v7.mm256), w6, w7); + + let (x0, x1, x2, x3, x4, x5, x6, x7); + + merge_epi64!(w0, w2, x0, x1); + + merge_epi64!(w1, w3, x2, x3); + + merge_epi64!(w4, w6, x4, x5); + + merge_epi64!(w5, w7, x6, x7); + + merge_si128!(x0, x4, (v0.mm256), (v1.mm256)); + + merge_si128!(x1, x5, (v2.mm256), (v3.mm256)); + + merge_si128!(x2, x6, (v4.mm256), (v5.mm256)); + + merge_si128!(x3, x7, (v6.mm256), (v7.mm256)); +} diff --git a/third_party/zune-jpeg/src/unsafe_utils_neon.rs b/third_party/zune-jpeg/src/unsafe_utils_neon.rs new file mode 100644 index 0000000..78b32f9 --- /dev/null +++ b/third_party/zune-jpeg/src/unsafe_utils_neon.rs @@ -0,0 +1,331 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +#![cfg(target_arch = "aarch64")] +// TODO can this be extended to armv7 + +//! This module provides unsafe ways to do some things +#![allow(clippy::wildcard_imports)] + +use std::arch::aarch64::*; +use std::ops::{Add, AddAssign, BitOr, BitOrAssign, Mul, MulAssign, Sub}; + +pub type VecType = int32x4x2_t; + +pub unsafe fn loadu(src: *const i32) -> VecType { + vld1q_s32_x2(src as *const _) +} + +/// An abstraction of an AVX ymm register that +///allows some things to not look ugly +#[derive(Clone, Copy)] +pub struct YmmRegister { + /// An AVX register + pub(crate) mm256: VecType +} + +impl YmmRegister { + #[inline] + pub unsafe fn load(src: *const i32) -> Self { + loadu(src).into() + } + + #[inline] + pub fn map2(self, other: Self, f: impl Fn(int32x4_t, int32x4_t) -> int32x4_t) -> Self { + let m0 = f(self.mm256.0, other.mm256.0); + let m1 = f(self.mm256.1, other.mm256.1); + + YmmRegister { + mm256: int32x4x2_t(m0, m1) + } + } + + #[inline] + pub fn all_zero(self) -> bool { + unsafe { + let both = vorrq_s32(self.mm256.0, self.mm256.1); + let both_unsigned = vreinterpretq_u32_s32(both); + 0 == vmaxvq_u32(both_unsigned) + } + } + + #[inline] + pub fn const_shl(self) -> Self { + // Ensure that we logically shift left + unsafe { + let m0 = vreinterpretq_s32_u32(vshlq_n_u32::(vreinterpretq_u32_s32(self.mm256.0))); + let m1 = vreinterpretq_s32_u32(vshlq_n_u32::(vreinterpretq_u32_s32(self.mm256.1))); + + YmmRegister { + mm256: int32x4x2_t(m0, m1) + } + } + } + + #[inline] + pub fn const_shra(self) -> Self { + unsafe { + let i0 = vshrq_n_s32::(self.mm256.0); + let i1 = vshrq_n_s32::(self.mm256.1); + + YmmRegister { + mm256: int32x4x2_t(i0, i1) + } + } + } +} + +impl Add for YmmRegister +where + T: Into +{ + type Output = YmmRegister; + + #[inline] + fn add(self, rhs: T) -> Self::Output { + let rhs = rhs.into(); + unsafe { self.map2(rhs, |a, b| vaddq_s32(a, b)) } + } +} + +impl Sub for YmmRegister +where + T: Into +{ + type Output = YmmRegister; + + #[inline] + fn sub(self, rhs: T) -> Self::Output { + let rhs = rhs.into(); + unsafe { self.map2(rhs, |a, b| vsubq_s32(a, b)) } + } +} + +impl AddAssign for YmmRegister +where + T: Into +{ + #[inline] + fn add_assign(&mut self, rhs: T) { + let rhs: Self = rhs.into(); + *self = *self + rhs; + } +} + +impl Mul for YmmRegister +where + T: Into +{ + type Output = YmmRegister; + + #[inline] + fn mul(self, rhs: T) -> Self::Output { + let rhs = rhs.into(); + unsafe { self.map2(rhs, |a, b| vmulq_s32(a, b)) } + } +} + +impl MulAssign for YmmRegister +where + T: Into +{ + #[inline] + fn mul_assign(&mut self, rhs: T) { + let rhs: Self = rhs.into(); + *self = *self * rhs; + } +} + +impl BitOr for YmmRegister +where + T: Into +{ + type Output = YmmRegister; + + #[inline] + fn bitor(self, rhs: T) -> Self::Output { + let rhs = rhs.into(); + unsafe { self.map2(rhs, |a, b| vorrq_s32(a, b)) } + } +} + +impl BitOrAssign for YmmRegister +where + T: Into +{ + #[inline] + fn bitor_assign(&mut self, rhs: T) { + let rhs: Self = rhs.into(); + *self = *self | rhs; + } +} + +impl From for YmmRegister { + #[inline] + fn from(val: i32) -> Self { + unsafe { + let dup = vdupq_n_s32(val); + + YmmRegister { + mm256: int32x4x2_t(dup, dup) + } + } + } +} + +impl From for YmmRegister { + #[inline] + fn from(mm256: VecType) -> Self { + YmmRegister { mm256 } + } +} + +#[allow(clippy::too_many_arguments)] +#[inline] +unsafe fn transpose4( + v0: &mut int32x4_t, v1: &mut int32x4_t, v2: &mut int32x4_t, v3: &mut int32x4_t +) { + let w0 = vtrnq_s32( + vreinterpretq_s32_s64(vtrn1q_s64( + vreinterpretq_s64_s32(*v0), + vreinterpretq_s64_s32(*v2) + )), + vreinterpretq_s32_s64(vtrn1q_s64( + vreinterpretq_s64_s32(*v1), + vreinterpretq_s64_s32(*v3) + )) + ); + let w1 = vtrnq_s32( + vreinterpretq_s32_s64(vtrn2q_s64( + vreinterpretq_s64_s32(*v0), + vreinterpretq_s64_s32(*v2) + )), + vreinterpretq_s32_s64(vtrn2q_s64( + vreinterpretq_s64_s32(*v1), + vreinterpretq_s64_s32(*v3) + )) + ); + + *v0 = w0.0; + *v1 = w0.1; + *v2 = w1.0; + *v3 = w1.1; +} + +/// Transpose an array of 8 by 8 i32 +/// Arm has dedicated interleave/transpose instructions +/// we: +/// 1. Transpose the upper left and lower right quadrants +/// 2. Swap and transpose the upper right and lower left quadrants +#[allow(clippy::too_many_arguments)] +#[inline] +pub unsafe fn transpose( + v0: &mut YmmRegister, v1: &mut YmmRegister, v2: &mut YmmRegister, v3: &mut YmmRegister, + v4: &mut YmmRegister, v5: &mut YmmRegister, v6: &mut YmmRegister, v7: &mut YmmRegister +) { + use std::mem::swap; + + let ul0 = &mut v0.mm256.0; + let ul1 = &mut v1.mm256.0; + let ul2 = &mut v2.mm256.0; + let ul3 = &mut v3.mm256.0; + + let ur0 = &mut v0.mm256.1; + let ur1 = &mut v1.mm256.1; + let ur2 = &mut v2.mm256.1; + let ur3 = &mut v3.mm256.1; + + let ll0 = &mut v4.mm256.0; + let ll1 = &mut v5.mm256.0; + let ll2 = &mut v6.mm256.0; + let ll3 = &mut v7.mm256.0; + + let lr0 = &mut v4.mm256.1; + let lr1 = &mut v5.mm256.1; + let lr2 = &mut v6.mm256.1; + let lr3 = &mut v7.mm256.1; + + swap(ur0, ll0); + swap(ur1, ll1); + swap(ur2, ll2); + swap(ur3, ll3); + + transpose4(ul0, ul1, ul2, ul3); + + transpose4(ur0, ur1, ur2, ur3); + + transpose4(ll0, ll1, ll2, ll3); + + transpose4(lr0, lr1, lr2, lr3); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_transpose() { + fn get_val(i: usize, j: usize) -> i32 { + ((i * 8) / (j + 1)) as i32 + } + unsafe { + let mut vals: [i32; 8 * 8] = [0; 8 * 8]; + + for i in 0..8 { + for j in 0..8 { + // some order-dependent value of i and j + let value = get_val(i, j); + vals[i * 8 + j] = value; + } + } + + let mut regs: [YmmRegister; 8] = std::mem::transmute(vals); + let mut reg0 = regs[0]; + let mut reg1 = regs[1]; + let mut reg2 = regs[2]; + let mut reg3 = regs[3]; + let mut reg4 = regs[4]; + let mut reg5 = regs[5]; + let mut reg6 = regs[6]; + let mut reg7 = regs[7]; + + transpose( + &mut reg0, &mut reg1, &mut reg2, &mut reg3, &mut reg4, &mut reg5, &mut reg6, + &mut reg7 + ); + + regs[0] = reg0; + regs[1] = reg1; + regs[2] = reg2; + regs[3] = reg3; + regs[4] = reg4; + regs[5] = reg5; + regs[6] = reg6; + regs[7] = reg7; + + let vals_from_reg: [i32; 8 * 8] = std::mem::transmute(regs); + + for i in 0..8 { + for j in 0..i { + let orig = vals[i * 8 + j]; + vals[i * 8 + j] = vals[j * 8 + i]; + vals[j * 8 + i] = orig; + } + } + + for i in 0..8 { + for j in 0..8 { + assert_eq!(vals[j * 8 + i], get_val(i, j)); + assert_eq!(vals_from_reg[j * 8 + i], get_val(i, j)); + } + } + + assert_eq!(vals, vals_from_reg); + } + } +} diff --git a/third_party/zune-jpeg/src/upsampler.rs b/third_party/zune-jpeg/src/upsampler.rs new file mode 100644 index 0000000..5ba10d5 --- /dev/null +++ b/third_party/zune-jpeg/src/upsampler.rs @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +//! Up-sampling routines +//! +//! The main upsampling method is a bi-linear interpolation or a "triangle +//! filter " or libjpeg turbo `fancy_upsampling` which is a good compromise +//! between speed and visual quality +//! +//! # The filter +//! Each output pixel is made from `(3*A+B)/4` where A is the original +//! pixel closer to the output and B is the one further. +//! +//! ```text +//!+---+---+ +//! | A | B | +//! +---+---+ +//! +-+-+-+-+ +//! | |P| | | +//! +-+-+-+-+ +//! ``` +//! +//! # Horizontal Bi-linear filter +//! ```text +//! |---+-----------+---+ +//! | | | | +//! | A | |p1 | p2| | B | +//! | | | | +//! |---+-----------+---+ +//! +//! ``` +//! For a horizontal bi-linear it's trivial to implement, +//! +//! `A` becomes the input closest to the output. +//! +//! `B` varies depending on output. +//! - For odd positions, input is the `next` pixel after A +//! - For even positions, input is the `previous` value before A. +//! +//! We iterate in a classic 1-D sliding window with a window of 3. +//! For our sliding window approach, `A` is the 1st and `B` is either the 0th term or 2nd term +//! depending on position we are writing.(see scalar code). +//! +//! For vector code see module sse for explanation. +//! +//! # Vertical bi-linear. +//! Vertical up-sampling is a bit trickier. +//! +//! ```text +//! +----+----+ +//! | A1 | A2 | +//! +----+----+ +//! +----+----+ +//! | p1 | p2 | +//! +----+-+--+ +//! +----+-+--+ +//! | p3 | p4 | +//! +----+-+--+ +//! +----+----+ +//! | B1 | B2 | +//! +----+----+ +//! ``` +//! +//! For `p1` +//! - `A1` is given a weight of `3` and `B1` is given a weight of 1. +//! +//! For `p3` +//! - `B1` is given a weight of `3` and `A1` is given a weight of 1 +//! +//! # Horizontal vertical downsampling/chroma quartering. +//! +//! Carry out a vertical filter in the first pass, then a horizontal filter in the second pass. +use crate::components::UpSampler; + +mod scalar; + +// choose best possible implementation for this platform +pub fn choose_horizontal_samp_function(_use_unsafe: bool) -> UpSampler { + return scalar::upsample_horizontal; +} + +pub fn choose_hv_samp_function(_use_unsafe: bool) -> UpSampler { + return scalar::upsample_hv; +} + +pub fn choose_v_samp_function(_use_unsafe: bool) -> UpSampler { + return scalar::upsample_vertical; +} + +/// Upsample nothing + +pub fn upsample_no_op( + _input: &[i16], _in_ref: &[i16], _in_near: &[i16], _scratch_space: &mut [i16], + _output: &mut [i16] +) { +} diff --git a/third_party/zune-jpeg/src/upsampler/scalar.rs b/third_party/zune-jpeg/src/upsampler/scalar.rs new file mode 100644 index 0000000..a0ca8be --- /dev/null +++ b/third_party/zune-jpeg/src/upsampler/scalar.rs @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +pub fn upsample_horizontal( + input: &[i16], _ref: &[i16], _in_near: &[i16], _scratch: &mut [i16], output: &mut [i16] +) { + assert_eq!( + input.len() * 2, + output.len(), + "Input length is not half the size of the output length" + ); + assert!( + output.len() > 4 && input.len() > 2, + "Too Short of a vector, cannot upsample" + ); + + output[0] = input[0]; + output[1] = (input[0] * 3 + input[1] + 2) >> 2; + + // This code is written for speed and not readability + // + // The readable code is + // + // for i in 1..input.len() - 1{ + // let sample = 3 * input[i] + 2; + // out[i * 2] = (sample + input[i - 1]) >> 2; + // out[i * 2 + 1] = (sample + input[i + 1]) >> 2; + // } + // + // The output of a pixel is determined by it's surrounding neighbours but we attach more weight to it's nearest + // neighbour (input[i]) than to the next nearest neighbour. + + for (output_window, input_window) in output[2..].chunks_exact_mut(2).zip(input.windows(3)) { + let sample = 3 * input_window[1] + 2; + + output_window[0] = (sample + input_window[0]) >> 2; + output_window[1] = (sample + input_window[2]) >> 2; + } + // Get lengths + let out_len = output.len() - 2; + let input_len = input.len() - 2; + + // slice the output vector + let f_out = &mut output[out_len..]; + let i_last = &input[input_len..]; + + // write out manually.. + f_out[0] = (3 * i_last[0] + i_last[1] + 2) >> 2; + f_out[1] = i_last[1]; +} +pub fn upsample_vertical( + input: &[i16], in_near: &[i16], in_far: &[i16], _scratch_space: &mut [i16], output: &mut [i16] +) { + assert_eq!(input.len() * 2, output.len()); + assert_eq!(in_near.len(), input.len()); + assert_eq!(in_far.len(), input.len()); + + let middle = output.len() / 2; + + let (out_top, out_bottom) = output.split_at_mut(middle); + + // for the first row, closest row is in_near + for ((near, far), x) in input.iter().zip(in_near.iter()).zip(out_top) { + *x = (((3 * near) + 2) + far) >> 2; + } + // for the second row, the closest row to input is in_far + for ((near, far), x) in input.iter().zip(in_far.iter()).zip(out_bottom) { + *x = (((3 * near) + 2) + far) >> 2; + } +} + +pub fn upsample_hv( + input: &[i16], in_near: &[i16], in_far: &[i16], scratch_space: &mut [i16], output: &mut [i16] +) { + assert_eq!(input.len() * 4, output.len()); + + let mut t = [0]; + upsample_vertical(input, in_near, in_far, &mut t, scratch_space); + // horizontal upsampling must be done separate for every line + // Otherwise it introduces artifacts that may cause the edge colors + // to appear on the other line. + + // Since this is called for two scanlines/widths currently + // splitting the inputs and outputs into half ensures we only handle + // one scanline per iteration + let scratch_half = scratch_space.len() / 2; + + let output_half = output.len() / 2; + + upsample_horizontal( + &scratch_space[..scratch_half], + &[], + &[], + &mut t, + &mut output[..output_half] + ); + + upsample_horizontal( + &scratch_space[scratch_half..], + &[], + &[], + &mut t, + &mut output[output_half..] + ); +} diff --git a/third_party/zune-jpeg/src/worker.rs b/third_party/zune-jpeg/src/worker.rs new file mode 100644 index 0000000..f16b312 --- /dev/null +++ b/third_party/zune-jpeg/src/worker.rs @@ -0,0 +1,429 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +use alloc::format; +use core::convert::TryInto; + +use zune_core::colorspace::ColorSpace; + +use crate::color_convert::ycbcr_to_grayscale; +use crate::components::{Components, SampleRatios}; +use crate::decoder::{ColorConvert16Ptr, MAX_COMPONENTS}; +use crate::errors::DecodeErrors; + +/// fast 0..255 * 0..255 => 0..255 rounded multiplication +/// +/// Borrowed from stb +#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)] +#[inline] +fn blinn_8x8(in_val: u8, y: u8) -> u8 { + let t = i32::from(in_val) * i32::from(y) + 128; + return ((t + (t >> 8)) >> 8) as u8; +} + +#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)] +pub(crate) fn color_convert( + unprocessed: &[&[i16]; MAX_COMPONENTS], color_convert_16: ColorConvert16Ptr, + input_colorspace: ColorSpace, output_colorspace: ColorSpace, output: &mut [u8], width: usize, + padded_width: usize +) -> Result<(), DecodeErrors> // so many parameters.. +{ + // maximum sampling factors are in Y-channel, no need to pass them. + + if input_colorspace.num_components() == 3 && input_colorspace == output_colorspace { + // sort things like RGB to RGB conversion + copy_removing_padding(unprocessed, width, padded_width, output); + return Ok(()); + } + if input_colorspace.num_components() == 4 && input_colorspace == output_colorspace { + copy_removing_padding_4x(unprocessed, width, padded_width, output); + return Ok(()); + } + // color convert + match (input_colorspace, output_colorspace) { + (ColorSpace::YCbCr | ColorSpace::Luma, ColorSpace::Luma) => { + ycbcr_to_grayscale(unprocessed[0], width, padded_width, output); + } + ( + ColorSpace::YCbCr, + ColorSpace::RGB | ColorSpace::RGBA | ColorSpace::BGR | ColorSpace::BGRA + ) => { + color_convert_ycbcr( + unprocessed, + width, + padded_width, + output_colorspace, + color_convert_16, + output + ); + } + (ColorSpace::YCCK, ColorSpace::RGB) => { + color_convert_ycck_to_rgb::<3>( + unprocessed, + width, + padded_width, + output_colorspace, + color_convert_16, + output + ); + } + + (ColorSpace::YCCK, ColorSpace::RGBA) => { + color_convert_ycck_to_rgb::<4>( + unprocessed, + width, + padded_width, + output_colorspace, + color_convert_16, + output + ); + } + (ColorSpace::CMYK, ColorSpace::RGB) => { + color_convert_cymk_to_rgb::<3>(unprocessed, width, padded_width, output); + } + (ColorSpace::CMYK, ColorSpace::RGBA) => { + color_convert_cymk_to_rgb::<4>(unprocessed, width, padded_width, output); + } + // For the other components we do nothing(currently) + _ => { + let msg = format!( + "Unimplemented colorspace mapping from {input_colorspace:?} to {output_colorspace:?}"); + + return Err(DecodeErrors::Format(msg)); + } + } + Ok(()) +} + +/// Copy a block to output removing padding bytes from input +/// if necessary +#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)] +fn copy_removing_padding( + mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8] +) { + for (((pix_w, c_w), m_w), y_w) in output + .chunks_exact_mut(width * 3) + .zip(mcu_block[0].chunks_exact(padded_width)) + .zip(mcu_block[1].chunks_exact(padded_width)) + .zip(mcu_block[2].chunks_exact(padded_width)) + { + for (((pix, c), y), m) in pix_w.chunks_exact_mut(3).zip(c_w).zip(m_w).zip(y_w) { + pix[0] = *c as u8; + pix[1] = *y as u8; + pix[2] = *m as u8; + } + } +} +#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] +fn copy_removing_padding_4x( + mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8] +) { + for ((((pix_w, c_w), m_w), y_w), k_w) in output + .chunks_exact_mut(width * 4) + .zip(mcu_block[0].chunks_exact(padded_width)) + .zip(mcu_block[1].chunks_exact(padded_width)) + .zip(mcu_block[2].chunks_exact(padded_width)) + .zip(mcu_block[3].chunks_exact(padded_width)) + { + for ((((pix, c), y), m), k) in pix_w + .chunks_exact_mut(4) + .zip(c_w) + .zip(m_w) + .zip(y_w) + .zip(k_w) + { + pix[0] = *c as u8; + pix[1] = *y as u8; + pix[2] = *m as u8; + pix[3] = *k as u8; + } + } +} +/// Convert YCCK image to rgb +#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] +fn color_convert_ycck_to_rgb( + mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, + output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8] +) { + color_convert_ycbcr( + mcu_block, + width, + padded_width, + output_colorspace, + color_convert_16, + output + ); + for (pix_w, m_w) in output + .chunks_exact_mut(width * 3) + .zip(mcu_block[3].chunks_exact(padded_width)) + { + for (pix, m) in pix_w.chunks_exact_mut(NUM_COMPONENTS).zip(m_w) { + let m = (*m) as u8; + pix[0] = blinn_8x8(255 - pix[0], m); + pix[1] = blinn_8x8(255 - pix[1], m); + pix[2] = blinn_8x8(255 - pix[2], m); + } + } +} + +#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)] +fn color_convert_cymk_to_rgb( + mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8] +) { + for ((((pix_w, c_w), m_w), y_w), k_w) in output + .chunks_exact_mut(width * NUM_COMPONENTS) + .zip(mcu_block[0].chunks_exact(padded_width)) + .zip(mcu_block[1].chunks_exact(padded_width)) + .zip(mcu_block[2].chunks_exact(padded_width)) + .zip(mcu_block[3].chunks_exact(padded_width)) + { + for ((((pix, c), m), y), k) in pix_w + .chunks_exact_mut(3) + .zip(c_w) + .zip(m_w) + .zip(y_w) + .zip(k_w) + { + let c = *c as u8; + let m = *m as u8; + let y = *y as u8; + let k = *k as u8; + + pix[0] = blinn_8x8(c, k); + pix[1] = blinn_8x8(m, k); + pix[2] = blinn_8x8(y, k); + } + } +} + +/// Do color-conversion for interleaved MCU +#[allow( + clippy::similar_names, + clippy::too_many_arguments, + clippy::needless_pass_by_value, + clippy::unwrap_used +)] +fn color_convert_ycbcr( + mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, + output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8] +) { + let num_components = output_colorspace.num_components(); + + let stride = width * num_components; + // Allocate temporary buffer for small widths less than 16. + let mut temp = [0; 64]; + // We need to chunk per width to ensure we can discard extra values at the end of the width. + // Since the encoder may pad bits to ensure the width is a multiple of 8. + for (((y_width, cb_width), cr_width), out) in mcu_block[0] + .chunks_exact(padded_width) + .zip(mcu_block[1].chunks_exact(padded_width)) + .zip(mcu_block[2].chunks_exact(padded_width)) + .zip(output.chunks_exact_mut(stride)) + { + if width < 16 { + // allocate temporary buffers for the values received from idct + let mut y_out = [0; 16]; + let mut cb_out = [0; 16]; + let mut cr_out = [0; 16]; + // copy those small widths to that buffer + y_out[0..y_width.len()].copy_from_slice(y_width); + cb_out[0..cb_width.len()].copy_from_slice(cb_width); + cr_out[0..cr_width.len()].copy_from_slice(cr_width); + // we handle widths less than 16 a bit differently, allocating a temporary + // buffer and writing to that and then flushing to the out buffer + // because of the optimizations applied below, + (color_convert_16)(&y_out, &cb_out, &cr_out, &mut temp, &mut 0); + // copy to stride + out[0..width * num_components].copy_from_slice(&temp[0..width * num_components]); + // next + continue; + } + + // Chunk in outputs of 16 to pass to color_convert as an array of 16 i16's. + for (((y, cb), cr), out_c) in y_width + .chunks_exact(16) + .zip(cb_width.chunks_exact(16)) + .zip(cr_width.chunks_exact(16)) + .zip(out.chunks_exact_mut(16 * num_components)) + { + (color_convert_16)( + y.try_into().unwrap(), + cb.try_into().unwrap(), + cr.try_into().unwrap(), + out_c, + &mut 0 + ); + } + //we have more pixels in the end that can't be handled by the main loop. + //move pointer back a little bit to get last 16 bytes, + //color convert, and overwrite + //This means some values will be color converted twice. + for ((y, cb), cr) in y_width[width - 16..] + .chunks_exact(16) + .zip(cb_width[width - 16..].chunks_exact(16)) + .zip(cr_width[width - 16..].chunks_exact(16)) + .take(1) + { + (color_convert_16)( + y.try_into().unwrap(), + cb.try_into().unwrap(), + cr.try_into().unwrap(), + &mut temp, + &mut 0 + ); + } + + let rem = out[(width - 16) * num_components..] + .chunks_exact_mut(16 * num_components) + .next() + .unwrap(); + + rem.copy_from_slice(&temp[0..rem.len()]); + } +} +pub(crate) fn upsample( + component: &mut Components, mcu_height: usize, i: usize, upsampler_scratch_space: &mut [i16] +) { + match component.sample_ratio { + SampleRatios::V | SampleRatios::HV => { + /* + When upsampling vertically sampled images, we have a certain problem + which is that we do not have all MCU's decoded, this usually sucks at boundaries + e.g we can't upsample the last mcu row, since the row_down currently doesn't exist + + To solve this we need to do two things + + 1. Carry over coefficients when we lack enough data to upsample + 2. Upsample when we have enough data + + To achieve (1), we store a previous row, and the current row in components themselves + which will later be used to make (2) + + To achieve (2), we take the stored previous row(second last MCU row), + current row(last mcu row) and row down(first row of newly decoded MCU) + + and upsample that and store it in first_row_upsample_dest, this contains + up-sampled coefficients for the last for the previous decoded mcu row. + + The caller is then expected to process first_row_upsample_dest before processing data + in component.upsample_dest which stores the up-sampled components excluding the last row + */ + + let mut dest_start = 0; + let stride_bytes_written = component.width_stride * component.sample_ratio.sample(); + + if i > 0 { + // Handle the last MCU of the previous row + // This wasn't up-sampled as we didn't have the row_down + // so we do it now + + let stride = component.width_stride; + + let dest = &mut component.first_row_upsample_dest[0..stride_bytes_written]; + + // get current row + let row = &component.row[..]; + let row_up = &component.row_up[..]; + let row_down = &component.raw_coeff[0..stride]; + (component.up_sampler)(row, row_up, row_down, upsampler_scratch_space, dest); + } + + // we have the Y component width stride. + // this may be higher than the actual width,(2x because vertical sampling) + // + // This will not upsample the last row + + // if false, do not upsample. + // set to false on the last row of an mcu + let mut upsample = true; + + let stride = component.width_stride * component.vertical_sample; + let stop_offset = component.raw_coeff.len() / component.width_stride; + for (pos, curr_row) in component + .raw_coeff + .chunks_exact(component.width_stride) + .enumerate() + { + let mut dest: &mut [i16] = &mut []; + let mut row_up: &[i16] = &[]; + // row below current sample + let mut row_down: &[i16] = &[]; + + // Order of ifs matters + + if i == 0 && pos == 0 { + // first IMAGE row, row_up is the same as current row + // row_down is the row below. + row_up = &component.raw_coeff[pos * stride..(pos + 1) * stride]; + row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride]; + } else if i > 0 && pos == 0 { + // first row of a new mcu, previous row was copied so use that + row_up = &component.row[..]; + row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride]; + } else if i == mcu_height.saturating_sub(1) && pos == stop_offset - 1 { + // last IMAGE row, adjust pointer to use previous row and current row + row_up = &component.raw_coeff[(pos - 1) * stride..pos * stride]; + row_down = &component.raw_coeff[pos * stride..(pos + 1) * stride]; + } else if pos > 0 && pos < stop_offset - 1 { + // other rows, get row up and row down relative to our current row + // ignore last row of each mcu + row_up = &component.raw_coeff[(pos - 1) * stride..pos * stride]; + row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride]; + } else if pos == stop_offset - 1 { + // last MCU in a row + // + // we need a row at the next MCU but we haven't decoded that MCU yet + // so we should save this and when we have the next MCU, + // do the upsampling + + // store the current row and previous row in a buffer + let prev_row = &component.raw_coeff[(pos - 1) * stride..pos * stride]; + + component.row_up.copy_from_slice(prev_row); + component.row.copy_from_slice(curr_row); + upsample = false; + } else { + unreachable!("Uh oh!"); + } + if upsample { + dest = + &mut component.upsample_dest[dest_start..dest_start + stride_bytes_written]; + dest_start += stride_bytes_written; + } + + if upsample { + // upsample + (component.up_sampler)( + curr_row, + row_up, + row_down, + upsampler_scratch_space, + dest + ); + } + } + } + SampleRatios::H => { + assert_eq!(component.raw_coeff.len() * 2, component.upsample_dest.len()); + + let raw_coeff = &component.raw_coeff; + let dest_coeff = &mut component.upsample_dest; + + // upsample each row + for (single_row, output_stride) in raw_coeff + .chunks_exact(component.width_stride) + .zip(dest_coeff.chunks_exact_mut(component.width_stride * 2)) + { + // upsample using the fn pointer, should only be H, so no need for + // row up and row down + (component.up_sampler)(single_row, &[], &[], &mut [], output_stride); + } + } + SampleRatios::None => {} + }; +} diff --git a/third_party/zune-jpeg/tests/invalid_images.rs b/third_party/zune-jpeg/tests/invalid_images.rs new file mode 100644 index 0000000..c6f22d6 --- /dev/null +++ b/third_party/zune-jpeg/tests/invalid_images.rs @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2023. + * + * This software is free software; + * + * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license + */ + +use zune_core::bytestream::ZCursor; +use zune_jpeg::JpegDecoder; + +#[test] +fn eof() { + let mut decoder = JpegDecoder::new(ZCursor::new([0xff, 0xd8, 0xa4])); + + decoder.decode().unwrap_err(); +} + +#[test] +fn bad_ff_marker_size() { + let mut decoder = JpegDecoder::new(ZCursor::new([0xff, 0xd8, 0xff, 0x00, 0x00, 0x00])); + + let _ = decoder.decode().unwrap_err(); +} + +#[test] +fn bad_number_of_scans() { + let mut decoder = JpegDecoder::new(ZCursor::new([255, 216, 255, 218, 232, 197, 255])); + + let err = decoder.decode().unwrap_err(); + + assert!( + matches!(err, zune_jpeg::errors::DecodeErrors::SosError(x) if x == "Bad SOS length 59589,corrupt jpeg") + ); +} + +#[test] +fn huffman_length_subtraction_overflow() { + let mut decoder = JpegDecoder::new(ZCursor::new([255, 216, 255, 196, 0, 0])); + + let err = decoder.decode().unwrap_err(); + + assert!( + matches!(err, zune_jpeg::errors::DecodeErrors::FormatStatic(x) if x == "Invalid Huffman length in image") + ); +} + +#[test] +fn index_oob() { + let mut decoder = JpegDecoder::new(ZCursor::new([255, 216, 255, 218, 0, 8, 1, 0, 8, 1])); + + let _ = decoder.decode().unwrap_err(); +} + +#[test] +fn mul_with_overflow() { + let mut decoder = JpegDecoder::new(ZCursor::new([ + 255, 216, 255, 192, 255, 1, 8, 9, 119, 48, 255, 192 + ])); + + let err = decoder.decode().unwrap_err(); + + assert!( + matches!(err, zune_jpeg::errors::DecodeErrors::SofError(x) if x == "Length of start of frame differs from expected 584,value is 65281") + ); +}