- 透過建立的huffman tree,以遞迴的方式從根結點走向葉子結點;
藉由紀錄走訪過程向左為0、向右為1,恰巧符合二進制,
走過的路徑就成了葉子結點編碼(encode),而有了encode編碼表。 - 將1.取得的二進制字串轉成byte(8 bits一個單位)進行減少體積 => 壓縮
- 最後在進行解壓縮的一系列過程。
package utitled;
import java.io.*;
import java.util.*;
/** Huffman編碼
* 字串壓縮與解壓
*/
public class HuffmanEncodingDemo {
public static void main(String[] args) {
/** 將英文句子進行編碼壓縮 */
String sentence = "I write them here shows what I have been learning";
// 取得英文句子Byte陣列
// Arrays.toString(originBytes) = [105, 32, 108, 105, 107, 101, 32, 108, 105, 107, 101,
// 32, 108, 105, 107, 101, 32, 106, 97, 118, 97, 32, 100, 111, 32, 121, 111, 117, 32, 108,
// 105, 107, 101, 32, 97, 32, 106, 97, 118, 97]
/*
byte[] originBytes = sentence.getBytes(StandardCharsets.UTF_8);
byte[] compressBytes = HuffmanEncodeAndDecode.originBytesToCompressBytes(originBytes);
System.out.println("===============================");
System.out.println("Arrays.toString(compressBytes) = " + Arrays.toString(compressBytes) +
", compressBytes.length = " + compressBytes.length);
// 將壓縮的byte[]解碼成原來的英文句子
byte[] alphabetAsciiByteArr = HuffmanEncodeAndDecode.getAlphabetAsciiByteArr(compressBytes);
System.out.println("new String(alphabetAsciiByteArr) = " + new String(alphabetAsciiByteArr));
*/
HuffmanEncodeAndDecode.zipFile("C://Users/ching/Pictures/1643184440248.jpg",
"C://Users/ching/Pictures/ERModel.zip");
System.out.println("文件壓縮完成!");
HuffmanEncodeAndDecode.upzipFile("C://Users/ching/Pictures/ERModel.zip", "C://Users/ching/Pictures/result.jpg");
System.out.println("文件解壓縮完成!");
}
}
class HuffmanEncodeAndDecode {
private static Map<Byte, String> alphabetEncodes = new HashMap<>();
private static StringBuilder stringBuilder = new StringBuilder();
private static byte[] huffmanEncodesBytes;
public static void upzipFile(String srcPath, String targetPath) {
// 建立文件輸入流
InputStream is = null;
// 創建與文件相關的物件輸入流
ObjectInputStream ois = null;
// 建立文件輸出流
OutputStream os = null;
try {
is = new FileInputStream(srcPath);
ois = new ObjectInputStream(is);
byte[] originBytes = (byte[]) ois.readObject();
Map<Byte, String> alphabetEncodes = (Map<Byte, String>) ois.readObject();
os = new FileOutputStream(targetPath);
byte[] alphabetAsciiByteArr = HuffmanEncodeAndDecode.getAlphabetAsciiByteArr(originBytes, alphabetEncodes);
os.write(alphabetAsciiByteArr);
} catch (Exception e) {
System.out.println("e.getMessage() = " + e.getMessage());
} finally {
try {
os.close();
ois.close();
is.close();
} catch (IOException e) {
System.out.println("e.getMessage() = " + e.getMessage());
}
}
}
public static void zipFile(String srcPath, String targetPath) {
FileInputStream fis = null;
OutputStream os = null;
ObjectOutputStream oos = null;
try {
// 創建輸入流
fis = new FileInputStream(srcPath);
// 輸入流讀取文件至byte[],byte大小與原文件大小相同
byte[] originbytes = new byte[fis.available()];
fis.read(originbytes);
// 進行檔案編碼壓縮
byte[] compressBytes = HuffmanEncodeAndDecode.originBytesToCompressBytes(originbytes);
// 創建輸出流
os = new FileOutputStream(targetPath);
// 使用輸出流將tmpBytes寫成物件,便於還原
oos = new ObjectOutputStream(os);
oos.writeObject(compressBytes);
// 還原時需要原文件ascii對應編碼表(字節碼)
oos.writeObject(alphabetEncodes);
} catch (Exception e) {
System.out.println("e.getMessage() = " + e.getMessage());
} finally {
try {
oos.close();
os.close();
fis.close();
} catch (IOException e) {
System.out.println("e.getMessage() = " + e.getMessage());
}
}
}
public static byte[] getAlphabetAsciiByteArr(byte[] compressBytes) {
return getAlphabetAsciiByteArr(compressBytes, alphabetEncodes);
}
/**
* 取得原來的英文句子字母Ascii陣列
* @param compressBytes
* @param alphabetEncodes
* @return
*/
private static byte[] getAlphabetAsciiByteArr(byte[] compressBytes, Map<Byte, String> alphabetEncodes) {
Map<String, Byte> alphabetDecodes = new HashMap<>();
List<Byte> alphabetAsciiByteList = new ArrayList<>();
// 將壓縮二進制byte[]轉為完整的二進制補碼併接字串
// twosComplementAppendStr = 1010100010111111110010......
String twosComplementAppendStr = getTwosComplementAppendStr(compressBytes);
System.out.println("twosComplementAppendStr = " + twosComplementAppendStr);
// 將alphabetEncodes的key-value對調
for (Map.Entry<Byte, String> entry : alphabetEncodes.entrySet()) {
alphabetDecodes.put(entry.getValue(), entry.getKey());
}
// 掃描二進制補碼併接字串,直至符合alphabetDecodes的key後再將value取出放入alphabetAsciiByteList
for (int i = 0; i < twosComplementAppendStr.length();) {
// 重新進入while循環前先重置count = 1
int count = 1;
boolean flag = true;
while (flag) {
String decodeKey = twosComplementAppendStr.substring(i, i + count);
if (!alphabetDecodes.containsKey(decodeKey)) {
// 還不符合alphabetDecodes的key,繼續向後掃描
count++;
} else {
// 符合就跳出while循環並放入list
flag = false;
alphabetAsciiByteList.add(alphabetDecodes.get(decodeKey));
}
}
i += count;
}
// 將List<Byte>轉byte[]
byte[] alphabetAsciiByteArr = new byte[alphabetAsciiByteList.size()];
for (int i = 0; i < alphabetAsciiByteList.size(); i++) {
alphabetAsciiByteArr[i] = alphabetAsciiByteList.get(i);
}
return alphabetAsciiByteArr;
}
/**
* 將壓縮二進制byte[]轉為完整的二進制補碼併接字串
* @param compressBytes
* @return
*/
private static String getTwosComplementAppendStr(byte[] compressBytes) {
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < compressBytes.length; i++) {
String twosComplementStr = "";
int byteToInt = compressBytes[i];
if (i != compressBytes.length - 1) {
// 除了compressBytes的最後1個byte不足8位,不用補0,其餘高位補0
byteToInt |= 256;
// 取得補碼後的二進制字串
twosComplementStr = Integer.toBinaryString(byteToInt);
// 只需要補碼後的最後8位即可
twosComplementStr = twosComplementStr.substring(twosComplementStr.length() - 8);
} else {
// 取得補碼後的二進制字串
twosComplementStr = Integer.toBinaryString(byteToInt);
}
stringBuilder.append(twosComplementStr);
}
return stringBuilder.toString();
}
/**
* 將英文句子Bytes轉換成壓縮的二進制byte陣列
* @param originBytes
* @return
*/
public static byte[] originBytesToCompressBytes(byte[] originBytes) {
// 取得英文字母的權重
// alphabetCounts = {32=9, 97=5, 100=1, 101=4, 117=1, 118=2, 105=5, 121=1, 106=2, 107=4, 108=4, 111=2}
Map<Byte, Integer> alphabetCounts = HuffmanEncodeAndDecode.getAlphabetCounts(originBytes);
// 以英文字母出現次數(weight)從小到大創建huffman tree
Node root = HuffmanEncodeAndDecode.buildHuffmanTree(alphabetCounts);
// HuffmanEncode.preOrder(root);
// 取得英文字母編碼路徑
// alphabetEncodes = {32=01, 97=100, 100=11000, 117=11001, 101=1110, 118=11011, 105=101, 121=11010,
// 106=0010, 107=1111, 108=000, 111=0011}
Map<Byte, String> alphabetEncodes = HuffmanEncodeAndDecode.getAlphabetEncodes(root);
// 取得huffman tree壓縮後的二進制編碼byte陣列
// [-88, -65, -56, -65, -56, -65, -55, 77, -57, 6, -24, -14, -117, -4, -60, -90, 28]
byte[] compressBytes = HuffmanEncodeAndDecode.getCompressBytes(originBytes);
return compressBytes;
}
/**
* 取得huffman tree壓縮後的二進制編碼byte陣列
* @param originBytes
* @return
*/
public static byte[] getCompressBytes(byte[] originBytes) {
for (Byte alphabetByte : originBytes) {
stringBuilder.append(alphabetEncodes.get(alphabetByte));
}
// stringBuilder.toString() = 10101000101111111100100010111111110010.......
// System.out.println("stringBuilder.toString() = " + stringBuilder.toString());
// 將英文句子編碼二進制進行壓縮(compress) => 8bits = 1 byte
// 取得二進制字串轉byte陣列長度
int length;
if (stringBuilder.length() % 8 == 0) {
length = stringBuilder.length() / 8;
} else {
length = stringBuilder.length() / 8 + 1;
}
huffmanEncodesBytes = new byte[length];
int byteCounts = 0;
for (int i = 0; i < stringBuilder.length(); i += 8) {
if (i + 8 > stringBuilder.length() - 1) {
// 不滿8bits,無法湊1個byte
huffmanEncodesBytes[byteCounts++] = (byte) Integer.parseInt(stringBuilder.substring(i), 2);
} else {
// 取二進制8位bits,轉int再轉byte
huffmanEncodesBytes[byteCounts++] = (byte) Integer.parseInt(stringBuilder.substring(i, i + 8),2);
}
}
return huffmanEncodesBytes;
}
/**
* 計算Byte陣列中每個英文字母的出現次數
* @param originBytes
* @return
*/
public static Map<Byte, Integer> getAlphabetCounts(byte[] originBytes) {
// 計算originBytes中每個英文字母的出現次數
Map<Byte, Integer> alphabetCounts = new HashMap<>();
for (Byte alphabetByte : originBytes) {
int count = alphabetCounts.containsKey(alphabetByte) ? alphabetCounts.get(alphabetByte) : 0;
alphabetCounts.put(alphabetByte, ++count);
}
return alphabetCounts;
}
public static Map<Byte, String> getAlphabetEncodes(Node root) {
if (root != null) {
getAlphabetEncodes(root.getLeft(), "0", stringBuilder);
getAlphabetEncodes(root.getRight(), "1", stringBuilder);
}
return alphabetEncodes;
}
/**
* 取得英文字母的二進制編碼
* @param current
* @param code
* @param stringBuilder
*/
private static void getAlphabetEncodes(Node current, String code, StringBuilder stringBuilder) {
StringBuilder sb = new StringBuilder(stringBuilder);
sb.append(code);
if (current.getAlphabet() == null) {
// 當前結點向左向右取得路徑編碼(向左0,向右1)
getAlphabetEncodes(current.getLeft(), "0", sb);
getAlphabetEncodes(current.getRight(), "1", sb);
} else {
// 已經走到葉子結點
alphabetEncodes.put(current.getAlphabet(), sb.toString());
}
}
public static void preOrder(Node root) {
if (root != null) {
root.preOrder();
} else {
System.out.println("huffman tree為空,無法遍歷");
}
}
/**
* 1. 依據英文字母出現次數(權重)從小到大排列來創建huffman tree
* 2. 回傳huffman tree根結點(root)即所有葉子結點權重之和
* @param alphabetCounts
* @return
*/
public static Node buildHuffmanTree(Map<Byte, Integer> alphabetCounts) {
List<Node> nodeList = new ArrayList<>();
for (Byte alphabetByte : alphabetCounts.keySet()) {
Node alphabetNode = new Node(alphabetByte, alphabetCounts.get(alphabetByte));
nodeList.add(alphabetNode);
}
while (nodeList.size() > 1) {
// 將nodeList從小到大排序
Collections.sort(nodeList);
// 取出最小與次小alphabetNode
Node minWeightNode = nodeList.get(0);
Node secondWeightNode = nodeList.get(1);
// 創建新的二元樹(權重為minWeightNode, secondWeightNode權重之和)
Node newTreeNode = new Node(null, minWeightNode.getWeight() + secondWeightNode.getWeight());
newTreeNode.setLeft(minWeightNode);
newTreeNode.setRight(secondWeightNode);
// 將minWeightNode, minWeightNode從nodeList移除
nodeList.remove(minWeightNode);
nodeList.remove(secondWeightNode);
// 將newTreeNode放加到nodeList
nodeList.add(newTreeNode);
}
return nodeList.get(0);
}
}
class Node implements Comparable<Node>{
/** 存放英文字母ascii碼 */
private Byte alphabet;
/** 存放英文字母出現總次數 */
private int weight;
private Node left;
private Node right;
public Node(Byte alphabet, int weight) {
this.alphabet = alphabet;
this.weight = weight;
}
public Byte getAlphabet() {
return alphabet;
}
public int getWeight() {
return weight;
}
public Node getLeft() {
return left;
}
public void setLeft(Node left) {
this.left = left;
}
public Node getRight() {
return right;
}
public void setRight(Node right) {
this.right = right;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("Node{");
sb.append("alphabet=").append(alphabet);
sb.append(", weight=").append(weight);
sb.append('}');
return sb.toString();
}
/** Node物件從小到大排序 */
@Override
public int compareTo(Node node) {
return this.weight - node.weight;
}
/**
* 樹的前序遍歷
*/
public void preOrder() {
System.out.println(" this = " + this);
if (this.getLeft() != null) {
this.getLeft().preOrder();
}
if (this.getRight() != null) {
this.getRight().preOrder();
}
}
}
如有敘述錯誤,還請不吝嗇留言指教,thanks!