#pragma once //Huffman encodes a line and also produces the vocabulary ids #include "hash.hh" #include "line_splitter.hh" #include #include #include #include #include #include #include #include #include //Sorting for the second struct sort_pair { bool operator()(const std::pair &left, const std::pair &right) { return left.second > right.second; //This puts biggest numbers first. } }; struct sort_pair_vec { bool operator()(const std::pair, unsigned int> &left, const std::pair, unsigned int> &right) { return left.second > right.second; //This puts biggest numbers first. } }; class Huffman { unsigned long uniq_lines; //Unique lines in the file. //Containers used when counting the occurence of a given phrase std::map target_phrase_words; std::map, unsigned int> word_all1; //Same containers as vectors, for sorting std::vector > target_phrase_words_counts; std::vector, unsigned int> > word_all1_counts; //Huffman maps std::map target_phrase_huffman; std::map, unsigned int> word_all1_huffman; //inverted maps std::map lookup_target_phrase; std::map > lookup_word_all1; public: Huffman (const char *); void count_elements (line_text line); void assign_values(); void serialize_maps(const char * dirname); void produce_lookups(); std::vector encode_line(line_text line); //encode line + variable byte ontop std::vector full_encode_line(line_text line); //Getters const std::map get_target_lookup_map() const{ return lookup_target_phrase; } const std::map > get_word_all1_lookup_map() const{ return lookup_word_all1; } unsigned long getUniqLines() { return uniq_lines; } }; class HuffmanDecoder { std::map lookup_target_phrase; std::map > lookup_word_all1; public: HuffmanDecoder (const char *); HuffmanDecoder (std::map *, std::map > *); //Getters const std::map get_target_lookup_map() const{ return lookup_target_phrase; } const std::map > get_word_all1_lookup_map() const{ return lookup_word_all1; } inline std::string getTargetWordFromID(unsigned int id); std::string getTargetWordsFromIDs(std::vector ids); target_text decode_line (std::vector input); //Variable byte decodes a all target phrases contained here and then passes them to decode_line std::vector full_decode_line (std::vector lines); }; std::string getTargetWordsFromIDs(std::vector ids, std::map * lookup_target_phrase); inline std::string getTargetWordFromID(unsigned int id, std::map * lookup_target_phrase); inline unsigned int reinterpret_float(float * num); inline float reinterpret_uint(unsigned int * num); std::vector vbyte_encode_line(std::vector line); inline std::vector vbyte_encode(unsigned int num); std::vector vbyte_decode_line(std::vector line); inline unsigned int bytes_to_int(std::vector number);