master

分支 (2)

管理

管理

master

xc-test1

structures-and-algorithm
/
src
/
main
/
java
/
com
/
xcc
/
dataStructures
/
demo10_huffman
/
HuffmanCode.java

package com.xcc.dataStructures.demo10_huffman;


import java.io.*;
import java.util.*;


/**
 * 赫夫曼编码
 *
 * @author xiaocheng
 * @date 2020/12/18 9:23
 */
public class HuffmanCode {

    public static void main(String[] args) {
        //压缩文件
        String srcFile = "D:\\input\\59fc1fa95d771.jpg";
        String destFile = "D:\\input\\59fc1fa95d771.zip";
        zipFile(srcFile, destFile);
        System.out.println("压缩完成");

        String zipFile = "D:\\input\\59fc1fa95d771.zip";
        String unzipFile = "D:\\input\\59fc1fa95d7711.jpg";
        unzipFile(zipFile,unzipFile);
        System.out.println("解压成功");

        /*//压缩字符串
        String str = "i like like like java do you like a java 12";
        byte[] contentBytes = str.getBytes();
        byte[] zipBytes = zip(contentBytes);
        System.out.println(Arrays.toString(zipBytes));

        byte[] sourceBytes = unzip(zipBytes, huffmanCodeMap);
        System.out.println(new String(sourceBytes));*/
    }

    /**
     * 解压文件
     *
     * @param srcFile   压缩之后的文件
     * @param destFile  解压后的文件
     */
    private static void unzipFile(String srcFile, String destFile) {
        InputStream is = null;
        ObjectInputStream ois = null;
        OutputStream os = null;
        try {
            is = new FileInputStream(srcFile);
            ois = new ObjectInputStream(is);
            //读取压缩之后的 文件 和 对应的赫夫曼编码
            byte[] zipBytes = (byte[]) ois.readObject();
            Map<Byte, String> huffmanMap = (Map<Byte, String>) ois.readObject();

            //解压
            byte[] srcBytes = unzip(zipBytes, huffmanMap);

            //输出
            os = new FileOutputStream(destFile);
            os.write(srcBytes);
        } catch (Exception e) {
            e.printStackTrace();
        }finally {
            try {
                os.close();
                ois.close();
                is.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * 压缩文件
     *
     * @param srcFile  原始文件路径
     * @param destFile 压缩后文件路径
     */
    private static void zipFile(String srcFile, String destFile) {
        InputStream is = null;
        OutputStream os = null;
        //对象输出流
        ObjectOutputStream oos = null;
        try {
            is = new FileInputStream(srcFile);
            //创建原始字节数组
            byte[] bytes = new byte[is.available()];
            //读取文件
            is.read(bytes);
            //压缩
            byte[] zip = zip(bytes);

            os = new FileOutputStream(destFile);
            //关联
            oos = new ObjectOutputStream(os);
            //将压缩之后的数组 和 赫夫曼编码对应关系 写出
            oos.writeObject(zip);
            oos.writeObject(huffmanCodeMap);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                oos.close();
                os.close();
                is.close();
            } catch (IOException e) {
                e.printStackTrace();
            }

        }
    }

    /**
     * 转换成原始的字符串
     */
    private static byte[] unzip(byte[] bytes, Map<Byte, String> huffmanMap) {
        //先将数组转换为对应的赫夫曼编码的10010..组成的字符串
        String huffmancode = decodeBytesToHuffmancode(bytes);
//        System.out.println(huffmancode);
        //将赫夫曼编码转换为对应的字符串
        return decodeHuffmancodeToSourceStr(huffmancode, huffmanMap);
    }

    /**
     * 将赫夫曼编码进行解码为原始字节数组
     *
     * @param huffmancode 赫夫曼编码
     * @param huffmanMap  记录原始字节与编码的map集合
     * @return 原始字符串
     */
    private static byte[] decodeHuffmancodeToSourceStr(String huffmancode, Map<Byte, String> huffmanMap) {
        //将map反转。原来是32->010,98->001... 转换为001->98,010->32...
        Map<String, Byte> map = new HashMap<>();
        for (Map.Entry<Byte, String> entry : huffmanMap.entrySet()) {
            map.put(entry.getValue(), entry.getKey());
        }
        //原始字节集合
        List<Byte> byteList = new ArrayList<>();
        for (int i = 0; i < huffmancode.length(); ) {
            StringBuilder stringBuilder = new StringBuilder();
            //将huffman编码组成的字符村1000100011...，进行遍历。
            //当指针指向一定的值存储在反转红藕的map集合中时，停止
            while (map.get(stringBuilder.toString()) == null) {
                if (i >= huffmancode.length()) {
                    break;
                }
                stringBuilder.append(huffmancode.charAt(i));
                i++;
            }
            //获取每个字符，并组装成对应的字符串
            Byte aByte = map.get(stringBuilder.toString());
            byteList.add(aByte);
        }
        byte[] bytes = new byte[byteList.size()];
        for (int i = 0; i < byteList.size(); i++) {
            bytes[i] = byteList.get(i);
        }
        return bytes;
    }

    /**
     * 将压缩后的数组转换为赫夫曼编码
     *
     * @param bytes 压缩后的字节数组
     * @return 赫夫曼编码
     */
    private static String decodeBytesToHuffmancode(byte[] bytes) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < bytes.length; i++) {
            byte b = bytes[i];
            //将当前byte转换为二进制字符串
            String binaryString = Integer.toBinaryString(b);
            //判断当前二进制是否长度大于8，如果大于8则截取
            if (binaryString.length() > 8) {
                sb.append(binaryString.substring(binaryString.length() - 8));
            } else {
                //判断是否为倒数第二个字节，如果为倒数第二个字节  则其二进制的长度应该为倒数第一个数的长度。
                if (i == bytes.length - 2) {
                    byte last = bytes[i + 1];
                    int length = binaryString.length();
                    StringBuilder builder = new StringBuilder();
                    //将少于倒数第一个数值长度 的二进制  前面用0 补齐
                    for (int j = 0; j < last - length; j++) {
                        builder.append(0);
                    }
                    builder.append(binaryString);
                    sb.append(builder);
                    break;
                } else {
                    //如果不是则补齐前面的0
                    int length = binaryString.length();
                    StringBuilder builder = new StringBuilder();
                    for (int j = 0; j < 8 - length; j++) {
                        builder.append(0);
                    }
                    builder.append(binaryString);
                    sb.append(builder);
                }
            }

        }
        return sb.toString();
    }

    /**
     * 整合 : 将原始的数据转换为赫夫曼数组
     */
    private static byte[] zip(byte[] contentBytes) {
        //第一步:构建node结点集合，将每个字符(byte)出现的次数统计并封装到list集合中
        List<Node> nodes = getNodes(contentBytes);
        //第二步:构建赫夫曼树
        Node root = creatHuffmanTree(nodes);
        //第三步:获取赫夫曼树中每个结点对应的字符(byte)，出现对应的路径。其中key为字符(byte)，value为路径(0,1组装)
        Map<Byte, String> nodePath = getNodePath(root);
        //第四步:根据字符路径集合，获取赫夫曼编码的字符串
        String huffmanCode = createHuffmanCode(contentBytes, nodePath);
//        System.out.println(huffmanCode);
        //第五步:将赫夫曼编码字符串，转换为压缩后的赫夫曼数组
        return convertToHuffmanBytes(huffmanCode);
    }

    /**
     * 将赫夫曼编码数据转换为压缩之后的数组
     *
     * @param code 赫夫曼编码的字符串
     * @return 赫夫曼压缩数组
     */
    private static byte[] convertToHuffmanBytes(String code) {
        //获取转换后数组长度
        int length = (code.length() + 7) / 8;
        byte[] bytes = new byte[length + 1];
        for (int i = 0, index = 0; i < code.length(); i += 8, index++) {
            if (code.length() > i + 8) {
                //将2进制转成10进制  并强转为byte
                bytes[index] = (byte) Integer.parseInt(code.substring(i, i + 8), 2);
            } else {
                //最后一个byte存储  倒数第二个字节的位数(防止出现0110->6->110)  出现位数少1的情况 最后无法匹配而报错
                String substring = code.substring(i);
                bytes[index] = (byte) Integer.parseInt(substring, 2);
                bytes[index + 1] = (byte) substring.length();
            }
        }
        return bytes;
    }

    /**
     * 获取赫夫曼编码的字符串
     */
    private static String createHuffmanCode(byte[] contentBytes, Map<Byte, String> nodePath) {
        StringBuilder stringBuilder = new StringBuilder();
        for (byte key : contentBytes) {
            stringBuilder.append(nodePath.get(key));
        }
        return stringBuilder.toString();
    }

    /**
     * 根据根节点获取对应的编码集合
     */
    private static Map<Byte, String> getNodePath(Node node) {
        return getNodePath(node, "", new StringBuilder());
    }

    static Map<Byte, String> huffmanCodeMap = new HashMap<>();

    /**
     * 获取对应的节点和对应的编码
     *
     * @param node          需要获取的节点
     * @param code          当前节点相对上一个节点的编码
     * @param stringBuilder 父节点的路径
     * @return map集合:key为字符byte,value为路径
     */
    private static Map<Byte, String> getNodePath(Node node, String code, StringBuilder stringBuilder) {
        StringBuilder sb = new StringBuilder(stringBuilder);
        sb.append(code);
        if (node != null) {
            //非叶子结点
            if (node.data == null) {
                //左子树
                getNodePath(node.left, "0", sb);
                //右子数
                getNodePath(node.right, "1", sb);
            } else {
                //叶子结点
                huffmanCodeMap.put(node.data, sb.toString());
            }
        }
        return huffmanCodeMap;
    }

    /**
     * 获取节点集合
     */
    private static List<Node> getNodes(byte[] contentBytes) {
        //先统计bytes中每个字节的次数
        Map<Byte, Integer> map = new HashMap<>();
        for (byte key : contentBytes) {
            map.put(key, map.get(key) == null ? 1 : map.get(key) + 1);
        }

        List<Node> list = new ArrayList<>();
        for (Map.Entry<Byte, Integer> entry : map.entrySet()) {
            list.add(new Node(entry.getKey(), entry.getValue()));
        }
        return list;
    }

    /**
     * 生成赫夫曼树
     */
    private static Node creatHuffmanTree(List<Node> nodes) {
        while (nodes.size() > 1) {
            Collections.sort(nodes);
            Node leftNode = nodes.get(0);
            Node rightNode = nodes.get(1);
            Node parent = new Node(null, leftNode.count + rightNode.count);
            parent.left = leftNode;
            parent.right = rightNode;

            nodes.remove(leftNode);
            nodes.remove(rightNode);
            nodes.add(parent);
        }

        return nodes.get(0);
    }

    public static void preOrder(Node root) {
        if (root == null) {
            System.out.println("当前二叉树为空，不能遍历!");
            return;
        }
        root.preOrder();
    }


    /**
     * 结点
     */
    static class Node implements Comparable<Node> {
        //字节 i l i ...
        Byte data;
        //出现的次数
        Integer count;
        //左子树
        Node left;
        //右子数
        Node right;

        //前序遍历
        public void preOrder() {
//            System.out.println((this.data == null?null:(char)Integer.parseInt(this.data.toString()))+"\t"+this.count);
            System.out.println(this);
            if (this.left != null) {
                this.left.preOrder();
            }
            if (this.right != null) {
                this.right.preOrder();
            }
        }

        public Node(Byte data, Integer count) {
            this.data = data;
            this.count = count;
        }

        @Override
        public String toString() {
            return "Node[" +
                    "data=" + data +
                    ", count=" + count +
                    ']';
        }

        @Override
        public int compareTo(Node o) {
            return this.count - o.count;
        }
    }

}