[JAVA]Huffman Tree：實現文件壓縮與解壓(編碼encode到decode)

2022-07-10 置頂文章
160
0
algorithm
透過建立的huffman tree，以遞迴的方式從根結點走向葉子結點；
藉由紀錄走訪過程向左為0、向右為1，恰巧符合二進制，
走過的路徑就成了葉子結點編碼(encode)，而有了encode編碼表。
將1.取得的二進制字串轉成byte(8 bits一個單位)進行減少體積 => 壓縮
最後在進行解壓縮的一系列過程。
package utitled;

import java.io.*;
import java.util.*;

/** Huffman編碼
 * 字串壓縮與解壓
 */
public class HuffmanEncodingDemo {
  public static void main(String[] args) {

    /** 將英文句子進行編碼壓縮 */
    String sentence = "I write them here shows what I have been learning";

    // 取得英文句子Byte陣列
    // Arrays.toString(originBytes) = [105, 32, 108, 105, 107, 101, 32, 108, 105, 107, 101,
    // 32, 108, 105, 107, 101, 32, 106, 97, 118, 97, 32, 100, 111, 32, 121, 111, 117, 32, 108,
    // 105, 107, 101, 32, 97, 32, 106, 97, 118, 97]
    /*
    byte[] originBytes = sentence.getBytes(StandardCharsets.UTF_8);
    byte[] compressBytes = HuffmanEncodeAndDecode.originBytesToCompressBytes(originBytes);
    System.out.println("===============================");
    System.out.println("Arrays.toString(compressBytes) = " + Arrays.toString(compressBytes) +
    ", compressBytes.length = " + compressBytes.length);

    // 將壓縮的byte[]解碼成原來的英文句子
    byte[] alphabetAsciiByteArr = HuffmanEncodeAndDecode.getAlphabetAsciiByteArr(compressBytes);
    System.out.println("new String(alphabetAsciiByteArr) = " + new String(alphabetAsciiByteArr));
    */

    HuffmanEncodeAndDecode.zipFile("C://Users/ching/Pictures/1643184440248.jpg",
            "C://Users/ching/Pictures/ERModel.zip");
    System.out.println("文件壓縮完成！");

    HuffmanEncodeAndDecode.upzipFile("C://Users/ching/Pictures/ERModel.zip", "C://Users/ching/Pictures/result.jpg");
    System.out.println("文件解壓縮完成！");
  }

}



class HuffmanEncodeAndDecode {
  private static Map<Byte, String> alphabetEncodes = new HashMap<>();
  private static StringBuilder stringBuilder = new StringBuilder();
  private static byte[] huffmanEncodesBytes;

  public static void upzipFile(String srcPath, String targetPath) {
    // 建立文件輸入流
    InputStream is = null;
    // 創建與文件相關的物件輸入流
    ObjectInputStream ois = null;
    // 建立文件輸出流
    OutputStream os = null;
    try {
      is = new FileInputStream(srcPath);
      ois = new ObjectInputStream(is);
      byte[] originBytes = (byte[]) ois.readObject();
      Map<Byte, String> alphabetEncodes = (Map<Byte, String>) ois.readObject();
      os = new FileOutputStream(targetPath);
      byte[] alphabetAsciiByteArr = HuffmanEncodeAndDecode.getAlphabetAsciiByteArr(originBytes, alphabetEncodes);
      os.write(alphabetAsciiByteArr);
    } catch (Exception e) {
      System.out.println("e.getMessage() = " + e.getMessage());
    } finally {
      try {
        os.close();
        ois.close();
        is.close();
      } catch (IOException e) {
        System.out.println("e.getMessage() = " + e.getMessage());
      }
    }
  }


  public static void zipFile(String srcPath, String targetPath) {

    FileInputStream fis = null;
    OutputStream os = null;
    ObjectOutputStream oos = null;
    try {
      // 創建輸入流
      fis = new FileInputStream(srcPath);
      // 輸入流讀取文件至byte[]，byte大小與原文件大小相同
      byte[] originbytes = new byte[fis.available()];
      fis.read(originbytes);

      // 進行檔案編碼壓縮
      byte[] compressBytes = HuffmanEncodeAndDecode.originBytesToCompressBytes(originbytes);

      // 創建輸出流
      os = new FileOutputStream(targetPath);
      // 使用輸出流將tmpBytes寫成物件，便於還原
      oos = new ObjectOutputStream(os);
      oos.writeObject(compressBytes);
      // 還原時需要原文件ascii對應編碼表(字節碼)
      oos.writeObject(alphabetEncodes);
    } catch (Exception e) {
      System.out.println("e.getMessage() = " + e.getMessage());
    } finally {
      try {
        oos.close();
        os.close();
        fis.close();
      } catch (IOException e) {
        System.out.println("e.getMessage() = " + e.getMessage());
      }
    }
  }

  public static byte[] getAlphabetAsciiByteArr(byte[] compressBytes) {
    return getAlphabetAsciiByteArr(compressBytes, alphabetEncodes);
  }

  /**
   * 取得原來的英文句子字母Ascii陣列
   * @param compressBytes
   * @param alphabetEncodes
   * @return
   */
  private static byte[] getAlphabetAsciiByteArr(byte[] compressBytes, Map<Byte, String> alphabetEncodes) {
    Map<String, Byte> alphabetDecodes = new HashMap<>();
    List<Byte> alphabetAsciiByteList = new ArrayList<>();

    // 將壓縮二進制byte[]轉為完整的二進制補碼併接字串
    // twosComplementAppendStr = 1010100010111111110010......
    String twosComplementAppendStr = getTwosComplementAppendStr(compressBytes);
    System.out.println("twosComplementAppendStr = " + twosComplementAppendStr);

    // 將alphabetEncodes的key-value對調
    for (Map.Entry<Byte, String> entry : alphabetEncodes.entrySet()) {
      alphabetDecodes.put(entry.getValue(), entry.getKey());
    }

    // 掃描二進制補碼併接字串，直至符合alphabetDecodes的key後再將value取出放入alphabetAsciiByteList


    for (int i = 0; i < twosComplementAppendStr.length();) {
      // 重新進入while循環前先重置count = 1
      int count = 1;
      boolean flag = true;

      while (flag) {
        String decodeKey = twosComplementAppendStr.substring(i, i + count);
        if (!alphabetDecodes.containsKey(decodeKey)) {
          // 還不符合alphabetDecodes的key，繼續向後掃描
          count++;
        } else {
          // 符合就跳出while循環並放入list
          flag = false;
          alphabetAsciiByteList.add(alphabetDecodes.get(decodeKey));
        }
      }

      i += count;
    }

    // 將List<Byte>轉byte[]
    byte[] alphabetAsciiByteArr = new byte[alphabetAsciiByteList.size()];
    for (int i = 0; i < alphabetAsciiByteList.size(); i++) {
      alphabetAsciiByteArr[i] = alphabetAsciiByteList.get(i);
    }
    return alphabetAsciiByteArr;
  }


  /**
   * 將壓縮二進制byte[]轉為完整的二進制補碼併接字串
   * @param compressBytes
   * @return
   */
  private static String getTwosComplementAppendStr(byte[] compressBytes) {
    StringBuilder stringBuilder = new StringBuilder();

    for (int i = 0; i < compressBytes.length; i++) {
      String twosComplementStr = "";
      int byteToInt = compressBytes[i];

      if (i != compressBytes.length - 1) {
        // 除了compressBytes的最後1個byte不足8位，不用補0，其餘高位補0
        byteToInt |= 256;
        // 取得補碼後的二進制字串
        twosComplementStr = Integer.toBinaryString(byteToInt);
        // 只需要補碼後的最後8位即可
        twosComplementStr = twosComplementStr.substring(twosComplementStr.length() - 8);
      } else {
        // 取得補碼後的二進制字串
        twosComplementStr = Integer.toBinaryString(byteToInt);
      }
      stringBuilder.append(twosComplementStr);

    }
    return stringBuilder.toString();
  }

  /**
   * 將英文句子Bytes轉換成壓縮的二進制byte陣列
   * @param originBytes
   * @return
   */
  public static byte[] originBytesToCompressBytes(byte[] originBytes) {
    // 取得英文字母的權重
    // alphabetCounts = {32=9, 97=5, 100=1, 101=4, 117=1, 118=2, 105=5, 121=1, 106=2, 107=4, 108=4, 111=2}
    Map<Byte, Integer> alphabetCounts = HuffmanEncodeAndDecode.getAlphabetCounts(originBytes);

    // 以英文字母出現次數(weight)從小到大創建huffman tree
    Node root = HuffmanEncodeAndDecode.buildHuffmanTree(alphabetCounts);
    //  HuffmanEncode.preOrder(root);

    // 取得英文字母編碼路徑
    // alphabetEncodes = {32=01, 97=100, 100=11000, 117=11001, 101=1110, 118=11011, 105=101, 121=11010,
    // 106=0010, 107=1111, 108=000, 111=0011}
    Map<Byte, String> alphabetEncodes = HuffmanEncodeAndDecode.getAlphabetEncodes(root);

    // 取得huffman tree壓縮後的二進制編碼byte陣列
    // [-88, -65, -56, -65, -56, -65, -55, 77, -57, 6, -24, -14, -117, -4, -60, -90, 28]
    byte[] compressBytes = HuffmanEncodeAndDecode.getCompressBytes(originBytes);

    return  compressBytes;
  }


  /**
   * 取得huffman tree壓縮後的二進制編碼byte陣列
   * @param originBytes
   * @return
   */
  public static byte[] getCompressBytes(byte[] originBytes) {
    for (Byte alphabetByte : originBytes) {
      stringBuilder.append(alphabetEncodes.get(alphabetByte));
    }

    // stringBuilder.toString() = 10101000101111111100100010111111110010.......
//  System.out.println("stringBuilder.toString() = " + stringBuilder.toString());

    // 將英文句子編碼二進制進行壓縮(compress) => 8bits = 1 byte
    // 取得二進制字串轉byte陣列長度
    int length;
    if (stringBuilder.length() % 8 == 0) {
      length = stringBuilder.length() / 8;
    } else {
      length = stringBuilder.length() / 8 + 1;
    }
    huffmanEncodesBytes = new byte[length];

    int byteCounts = 0;
    for (int i = 0; i < stringBuilder.length(); i += 8) {
      if (i + 8 > stringBuilder.length() - 1) {
        // 不滿8bits，無法湊1個byte
        huffmanEncodesBytes[byteCounts++] = (byte) Integer.parseInt(stringBuilder.substring(i), 2);
      } else {
        // 取二進制8位bits，轉int再轉byte
        huffmanEncodesBytes[byteCounts++] = (byte) Integer.parseInt(stringBuilder.substring(i, i + 8),2);
      }
    }
    return huffmanEncodesBytes;
  }


  /**
   * 計算Byte陣列中每個英文字母的出現次數
   * @param originBytes
   * @return
   */
  public static Map<Byte, Integer> getAlphabetCounts(byte[] originBytes) {
    // 計算originBytes中每個英文字母的出現次數
    Map<Byte, Integer> alphabetCounts = new HashMap<>();
    for (Byte alphabetByte : originBytes) {
      int count = alphabetCounts.containsKey(alphabetByte) ? alphabetCounts.get(alphabetByte) : 0;
      alphabetCounts.put(alphabetByte, ++count);
    }
    return alphabetCounts;
  }

  public static Map<Byte, String> getAlphabetEncodes(Node root) {
    if (root != null) {
      getAlphabetEncodes(root.getLeft(), "0", stringBuilder);
      getAlphabetEncodes(root.getRight(), "1", stringBuilder);
    }
    return alphabetEncodes;

  }

  /**
   * 取得英文字母的二進制編碼
   * @param current
   * @param code
   * @param stringBuilder
   */
  private static void getAlphabetEncodes(Node current, String code, StringBuilder stringBuilder) {
    StringBuilder sb = new StringBuilder(stringBuilder);
    sb.append(code);
    
    if (current.getAlphabet() == null) {
      // 當前結點向左向右取得路徑編碼(向左0,向右1)
      getAlphabetEncodes(current.getLeft(), "0", sb);
      getAlphabetEncodes(current.getRight(), "1", sb);
    } else {
      // 已經走到葉子結點
      alphabetEncodes.put(current.getAlphabet(), sb.toString());
    }
  }

  public static void preOrder(Node root) {
    if (root != null) {
      root.preOrder();
    } else {
      System.out.println("huffman tree為空，無法遍歷");
    }

  }

  /**
   * 1. 依據英文字母出現次數(權重)從小到大排列來創建huffman tree
   * 2. 回傳huffman tree根結點(root)即所有葉子結點權重之和
   * @param alphabetCounts
   * @return
   */
  public static Node buildHuffmanTree(Map<Byte, Integer> alphabetCounts) {

    List<Node> nodeList = new ArrayList<>();
    for (Byte alphabetByte : alphabetCounts.keySet()) {
      Node alphabetNode = new Node(alphabetByte, alphabetCounts.get(alphabetByte));
      nodeList.add(alphabetNode);
    }

    while (nodeList.size() > 1) {
      // 將nodeList從小到大排序
      Collections.sort(nodeList);

      // 取出最小與次小alphabetNode
      Node minWeightNode = nodeList.get(0);
      Node secondWeightNode = nodeList.get(1);
      // 創建新的二元樹(權重為minWeightNode, secondWeightNode權重之和)
      Node newTreeNode = new Node(null, minWeightNode.getWeight() + secondWeightNode.getWeight());
      newTreeNode.setLeft(minWeightNode);
      newTreeNode.setRight(secondWeightNode);
      // 將minWeightNode, minWeightNode從nodeList移除
      nodeList.remove(minWeightNode);
      nodeList.remove(secondWeightNode);
      // 將newTreeNode放加到nodeList
      nodeList.add(newTreeNode);
    }
    return nodeList.get(0);
  }
}

class Node implements Comparable<Node>{
  /** 存放英文字母ascii碼 */
  private Byte alphabet;
  /** 存放英文字母出現總次數 */
  private int weight;
  private Node left;
  private Node right;

  public Node(Byte alphabet, int weight) {
    this.alphabet = alphabet;
    this.weight = weight;
  }

  public Byte getAlphabet() {
    return alphabet;
  }

  public int getWeight() {
    return weight;
  }

  public Node getLeft() {
    return left;
  }

  public void setLeft(Node left) {
    this.left = left;
  }

  public Node getRight() {
    return right;
  }

  public void setRight(Node right) {
    this.right = right;
  }

  @Override
  public String toString() {
    final StringBuilder sb = new StringBuilder("Node{");
    sb.append("alphabet=").append(alphabet);
    sb.append(", weight=").append(weight);
    sb.append('}');
    return sb.toString();
  }

  /** Node物件從小到大排序 */
  @Override
  public int compareTo(Node node) {
    return this.weight - node.weight;
  }

  /**
   * 樹的前序遍歷
   */
  public void preOrder() {
    System.out.println(" this = " + this);
    if (this.getLeft() != null) {
      this.getLeft().preOrder();
    }
    if (this.getRight() != null) {
      this.getRight().preOrder();
    }
  }
}
如有敘述錯誤，還請不吝嗇留言指教，thanks!
回首頁