JPEG 压缩——第四步:YCbCr+离散余弦变换+量化
将图像转为 YCbCr 色彩空间之后,进行 离散余弦变换再对 Y 用 Q1 量化矩阵量化,Cb 和 Cr 用 Q2 量化矩阵量化。最后通过离散余弦逆变换对图像复原。还需比较图像的容量。算法如下:
- 将图像从RGB色彩空间变换到YCbCr色彩空间;
 - 对YCbCr做DCT;
 - DCT之后做量化;
 - 量化之后应用IDCT;
 - IDCT之后从YCbCr色彩空间变换到RGB色彩空间。
 
这是实际生活中使用的减少 JPEG 数据量的方法,Q1 和 Q2 根据 JPEG 规范由以下等式定义:
Q1 = np.array(((16, 11, 10, 16, 24, 40, 51, 61),
               (12, 12, 14, 19, 26, 58, 60, 55),
               (14, 13, 16, 24, 40, 57, 69, 56),
               (14, 17, 22, 29, 51, 87, 80, 62),
               (18, 22, 37, 56, 68, 109, 103, 77),
               (24, 35, 55, 64, 81, 104, 113, 92),
               (49, 64, 78, 87, 103, 121, 120, 101),
               (72, 92, 95, 98, 112, 100, 103, 99)), dtype=np.float32)
Q2 = np.array(((17, 18, 24, 47, 99, 99, 99, 99),
               (18, 21, 26, 66, 99, 99, 99, 99),
               (24, 26, 56, 99, 99, 99, 99, 99),
               (47, 66, 99, 99, 99, 99, 99, 99),
               (99, 99, 99, 99, 99, 99, 99, 99),
               (99, 99, 99, 99, 99, 99, 99, 99),
               (99, 99, 99, 99, 99, 99, 99, 99),
               (99, 99, 99, 99, 99, 99, 99, 99)), dtype=np.float32)
python实现:
import cv2
import numpy as np
import matplotlib.pyplot as plt
# DCT hyoer-parameter
T = 8
K = 8
channel = 3
# BGR -> Y Cb Cr
def BGR2YCbCr(img):
  H, W, _ = img.shape
  ycbcr = np.zeros([H, W, 3], dtype=np.float32)
  ycbcr[..., 0] = 0.2990 * img[..., 2] + 0.5870 * img[..., 1] + 0.1140 * img[..., 0]
  ycbcr[..., 1] = -0.1687 * img[..., 2] - 0.3313 * img[..., 1] + 0.5 * img[..., 0] + 128.
  ycbcr[..., 2] = 0.5 * img[..., 2] - 0.4187 * img[..., 1] - 0.0813 * img[..., 0] + 128.
  return ycbcr
# Y Cb Cr -> BGR
def YCbCr2BGR(ycbcr):
  H, W, _ = ycbcr.shape
  out = np.zeros([H, W, channel], dtype=np.float32)
  out[..., 2] = ycbcr[..., 0] + (ycbcr[..., 2] - 128.) * 1.4020
  out[..., 1] = ycbcr[..., 0] - (ycbcr[..., 1] - 128.) * 0.3441 - (ycbcr[..., 2] - 128.) * 0.7139
  out[..., 0] = ycbcr[..., 0] + (ycbcr[..., 1] - 128.) * 1.7718
  out = np.clip(out, 0, 255)
  out = out.astype(np.uint8)
  return out
# DCT weight
def DCT_w(x, y, u, v):
    cu = 1.
    cv = 1.
    if u == 0:
        cu /= np.sqrt(2)
    if v == 0:
        cv /= np.sqrt(2)
    theta = np.pi / (2 * T)
    return (( 2 * cu * cv / T) * np.cos((2*x+1)*u*theta) * np.cos((2*y+1)*v*theta))
# DCT
def dct(img):
    H, W, _ = img.shape
    F = np.zeros((H, W, channel), dtype=np.float32)
    for c in range(channel):
        for yi in range(0, H, T):
            for xi in range(0, W, T):
                for v in range(T):
                    for u in range(T):
                        for y in range(T):
                            for x in range(T):
                                F[v+yi, u+xi, c] += img[y+yi, x+xi, c] * DCT_w(x,y,u,v)
    return F
# IDCT
def idct(F):
    H, W, _ = F.shape
    out = np.zeros((H, W, channel), dtype=np.float32)
    for c in range(channel):
        for yi in range(0, H, T):
            for xi in range(0, W, T):
                for y in range(T):
                    for x in range(T):
                        for v in range(K):
                            for u in range(K):
                                out[y+yi, x+xi, c] += F[v+yi, u+xi, c] * DCT_w(x,y,u,v)
    out = np.clip(out, 0, 255)
    out = np.round(out).astype(np.uint8)
    return out
# Quantization
def quantization(F):
    H, W, _ = F.shape
    Q = np.array(((16, 11, 10, 16, 24, 40, 51, 61),
                (12, 12, 14, 19, 26, 58, 60, 55),
                (14, 13, 16, 24, 40, 57, 69, 56),
                (14, 17, 22, 29, 51, 87, 80, 62),
                (18, 22, 37, 56, 68, 109, 103, 77),
                (24, 35, 55, 64, 81, 104, 113, 92),
                (49, 64, 78, 87, 103, 121, 120, 101),
                (72, 92, 95, 98, 112, 100, 103, 99)), dtype=np.float32)
    for ys in range(0, H, T):
        for xs in range(0, W, T):
            for c in range(channel):
                F[ys: ys + T, xs: xs + T, c] =  np.round(F[ys: ys + T, xs: xs + T, c] / Q) * Q
    return F
# JPEG without Hufman coding
def JPEG(img):
    # BGR -> Y Cb Cr
    ycbcr = BGR2YCbCr(img)
    # DCT
    F = dct(ycbcr)
    # quantization
    F = quantization(F)
    # IDCT
    ycbcr = idct(F)
    # Y Cb Cr -> BGR
    out = YCbCr2BGR(ycbcr)
    return out
# MSE
def MSE(img1, img2):
    H, W, _ = img1.shape
    mse = np.sum((img1 - img2) ** 2) / (H * W * channel)
    return mse
# PSNR
def PSNR(mse, vmax=255):
    return 10 * np.log10(vmax * vmax / mse)
# bitrate
def BITRATE():
    return 1. * T * K * K / T / T
# Read image
img = cv2.imread("imori.jpg").astype(np.float32)
# JPEG
out = JPEG(img)
# MSE
mse = MSE(img, out)
# PSNR
psnr = PSNR(mse)
# bitrate
bitrate = BITRATE()
print("MSE:", mse)
print("PSNR:", psnr)
print("bitrate:", bitrate)
# Save result
cv2.imshow("result", out)
cv2.waitKey(0)
cv2.imwrite("out.jpg", out)
c++实现:
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <iostream>
#include <math.h>
#include <complex>
const int height = 128, width = 128, channel = 3;
// DCT hyper-parameter
int T = 8;
int K = 8;
// DCT coefficient
struct dct_str {
  double coef[height][width][channel];
};
// Discrete Cosine transformation
dct_str dct(cv::Mat img, dct_str dct_s){
  double I;
  double F;
  double Cu, Cv;
  for (int ys = 0; ys < height; ys += T){
    for (int xs = 0; xs < width; xs += T){
      for (int c = 0; c < channel; c++){
        for (int v = 0; v < T; v ++){
          for (int u = 0; u < T; u ++){
            F = 0;
            if (u == 0){
              Cu = 1. / sqrt(2);
            } else{
              Cu = 1;
            }
            if (v == 0){
              Cv = 1. / sqrt(2);
            }else {
              Cv = 1;
            }
            for (int y = 0; y < T; y++){
              for(int x = 0; x < T; x++){
                I = (double)img.at<cv::Vec3b>(ys + y, xs + x)[c];
                F += 2. / T * Cu * Cv * I * cos((2. * x + 1) * u * M_PI / 2. / T) * cos((2. * y + 1) * v * M_PI / 2. / T);
              }
            }
            dct_s.coef[ys + v][xs + u][c] = F;
          }
        }
      }
    }
  }
  return dct_s;
}
// Inverse Discrete Cosine transformation
cv::Mat idct(cv::Mat out, dct_str dct_s){
  double f;
  double Cu, Cv;
  for (int ys = 0; ys < height; ys += T){
    for (int xs = 0; xs < width; xs += T){
      for (int c = 0; c < channel; c++){
        for (int y = 0; y < T; y++){
          for (int x = 0; x < T; x++){
            f = 0;
            for (int v = 0; v < K; v++){
              for (int u = 0; u < K; u++){
                if (u == 0){
                  Cu = 1. / sqrt(2);
                } else {
                  Cu = 1;
                }
                if (v == 0){
                  Cv = 1. / sqrt(2);
                } else { 
                  Cv = 1;
                }
                f += 2. / T * Cu * Cv * dct_s.coef[ys + v][xs + u][c] * cos((2. * x + 1) * u * M_PI / 2. / T) * cos((2. * y + 1) * v * M_PI / 2. / T);
              }
            }
            f = fmin(fmax(f, 0), 255);
            out.at<cv::Vec3b>(ys + y, xs + x)[c] = (uchar)f;
          }
        }
      }
    }
  }
  return out;
}
// Quantization
dct_str quantization(dct_str dct_s){
  // Q table for Y
  double Q1[T][T] = {{16, 11, 10, 16, 24, 40, 51, 61},
                    {12, 12, 14, 19, 26, 58, 60, 55},
                    {12, 12, 14, 19, 26, 58, 60, 55},
                    {14, 17, 22, 29, 51, 87, 80, 62},
                    {18, 22, 37, 56, 68, 109, 103, 77},
                    {24, 35, 55, 64, 81, 104, 113, 92},
                    {49, 64, 78, 87, 103, 121, 120, 101},
                    {72, 92, 95, 98, 112, 100, 103, 99}
                  };
  // Q table for Cb Cr
  double Q2[T][T] = {{17, 18, 24, 47, 99, 99, 99, 99},
                    {18, 21, 26, 66, 99, 99, 99, 99},
                    {24, 26, 56, 99, 99, 99, 99, 99},
                    {47, 66, 99, 99, 99, 99, 99, 99},
                    {99, 99, 99, 99, 99, 99, 99, 99},
                    {99, 99, 99, 99, 99, 99, 99, 99},
                    {99, 99, 99, 99, 99, 99, 99, 99},
                    {99, 99, 99, 99, 99, 99, 99, 99}
                  };
  for (int ys = 0; ys < height; ys += T){
    for (int xs = 0; xs < width; xs += T){
      for(int y = 0; y < T; y++){
        for(int x = 0; x < T; x++){
          dct_s.coef[ys + y][xs + x][0] = round(dct_s.coef[ys + y][xs + x][0] / Q1[y][x]) * Q1[y][x];
          dct_s.coef[ys + y][xs + x][1] = round(dct_s.coef[ys + y][xs + x][1] / Q2[y][x]) * Q2[y][x];
          dct_s.coef[ys + y][xs + x][2] = round(dct_s.coef[ys + y][xs + x][2] / Q2[y][x]) * Q2[y][x];
        }
      }
    }
  }
  return dct_s;
}
// BGR -> Y Cb Cr
cv::Mat BGR2YCbCr(cv::Mat img, cv::Mat out){
  int width = img.rows;
  int height = img.cols;
  //cv::Mat out = cv::Mat::zeros(height, width, CV_32F);
  for (int j = 0; j < height; j ++){
    for (int i = 0; i < width; i ++){
      // Y
      out.at<cv::Vec3b>(j, i)[0] = (int)((float)img.at<cv::Vec3b>(j,i)[0] * 0.114 + \
                  (float)img.at<cv::Vec3b>(j,i)[1] * 0.5870 + \
                  (float)img.at<cv::Vec3b>(j,i)[2] * 0.299);
      // Cb
      out.at<cv::Vec3b>(j, i)[1] = (int)((float)img.at<cv::Vec3b>(j,i)[0] * 0.5 + \
                  (float)img.at<cv::Vec3b>(j,i)[1] * (-0.3323) + \
                  (float)img.at<cv::Vec3b>(j,i)[2] * (-0.1687) + 128);
      // Cr
      out.at<cv::Vec3b>(j, i)[2] = (int)((float)img.at<cv::Vec3b>(j,i)[0] * (-0.0813) + \
                  (float)img.at<cv::Vec3b>(j,i)[1] * (-0.4187) + \
                  (float)img.at<cv::Vec3b>(j,i)[2] * 0.5 + 128);
    }
  }
  return out;
}
// Y Cb Cr -> BGR
cv::Mat YCbCr2BGR(cv::Mat ycbcr, cv::Mat out){
  int width = out.rows;
  int height = out.cols;
  int val;
  for (int j = 0; j < height; j ++){
    for (int i = 0; i < width; i ++){
      // R
      val = ycbcr.at<cv::Vec3b>(j, i)[0] + (ycbcr.at<cv::Vec3b>(j, i)[2] - 128) * 1.4102;
      val = fmin(255, fmax(0, val));
      out.at<cv::Vec3b>(j, i)[2] = (uchar)val;
      // G
      val = ycbcr.at<cv::Vec3b>(j, i)[0] - (ycbcr.at<cv::Vec3b>(j, i)[1] - 128) * 0.3441 - (ycbcr.at<cv::Vec3b>(j, i)[2] - 128) * 0.7139;
      val = fmin(255, fmax(0, val));
      out.at<cv::Vec3b>(j, i)[1] = (uchar)val;
      // B
      val = ycbcr.at<cv::Vec3b>(j, i)[0] + (ycbcr.at<cv::Vec3b>(j, i)[1] - 128) * 1.7718;
      val = fmin(255, fmax(0, val));
      out.at<cv::Vec3b>(j, i)[0] = (uchar)val;
    }
  }
  return out;
}
// Compute MSE
double MSE(cv::Mat img1, cv::Mat img2){
  double mse = 0;
  for(int y = 0; y < height; y++){
    for(int x = 0; x < width; x++){
      for(int c = 0; c < channel; c++){
        mse += pow(((double)img1.at<cv::Vec3b>(y, x)[c] - (double)img2.at<cv::Vec3b>(y, x)[c]), 2);
      }
    }
  }
  mse /= (height * width);
  return mse;
}
// Compute PSNR
double PSNR(double mse, double v_max){
  return 10 * log10(v_max * v_max / mse);
}
// Compute bitrate
double BITRATE(){
  return T * K * K / T * T;
}
// Main
int main(int argc, const char* argv[]){
  double mse;
  double psnr;
  double bitrate;
  // read original image
  cv::Mat img = cv::imread("imori.jpg", cv::IMREAD_COLOR);
  // DCT coefficient
  dct_str dct_s;
  // output image
  cv::Mat ycbcr = cv::Mat::zeros(height, width, CV_32FC3);
  cv::Mat out = cv::Mat::zeros(height, width, CV_8UC3);
  // BGR -> Y Cb Cr
  ycbcr = BGR2YCbCr(img, ycbcr);
  // DCT
  dct_s = dct(ycbcr, dct_s);
  // Quantization
  dct_s = quantization(dct_s);
  // IDCT
  ycbcr = idct(ycbcr, dct_s);
  // Y Cb Cr -> BGR
  out = YCbCr2BGR(ycbcr, out);
  // MSE, PSNR
  mse = MSE(img, out);
  psnr = PSNR(mse, 255);
  bitrate = BITRATE();
  std::cout << "MSE: " << mse << std::endl;
  std::cout << "PSNR: " << psnr << std::endl;
  std::cout << "bitrate: " << bitrate << std::endl;
  cv::imwrite("out.jpg", out);
  //cv::imshow("answer", out);
  //cv::waitKey(0);
  cv::destroyAllWindows();
  return 0;
}
输入:

输出:

极客教程