/*
 * The MIT License (MIT)

 * Copyright (c) 2025 GenText-Checker Developers

 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:

 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.

 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

#ifndef GTCHECKER_SIGNATURE_H_
#define GTCHECKER_SIGNATURE_H_

#include "tokenizer.h"

#include <string>
#include <unordered_map>
#include <vector>
#include <iostream>

namespace gtchecker {

const int BITS = 6;

class Signature {
 public:
  Signature(const std::string& doc, 
            const std::string& hash_type = "--hfws") 
   : doc_(doc), hash_type_(hash_type) { }

  std::string sign() {
    std::string signature = "";
    if (hash_type_ == "--6bits") {
      gtchecker::Tokenizer tokenizer;
      std::vector<std::string> doc_words = tokenizer.GetSplitWords(doc_);
      std::vector<int> count_vec;
      for (int i = 0; i < BITS; ++i) {
        count_vec.push_back(0);
      }

      for (int i = 0; i < doc_words.size(); ++i) {
        if (hfword_bits_map.find(doc_words[i]) != hfword_bits_map.end() 
            && doc_words[i] != "the") {
          std::string bits = hfword_bits_map[doc_words[i]];
          for (int n = 0; n < bits.size(); ++n) {
            if (bits[n] == '0') {
              count_vec[n] -= 1;
            } else if (bits[n] == '1') {
              count_vec[n] += 1;
            }
          }
        }
      }
      for (int i = 0; i < count_vec.size(); ++i) {
        if (count_vec[i] > 0) {
          signature += '1';
        } else {
          signature += '0';
        }
      }
    } else if (hash_type_ == "--hfws") {
      gtchecker::Tokenizer tokenizer;
      std::vector<std::string> doc_words = tokenizer.GetSplitWords(doc_);
      for (int i = 0; i < doc_words.size(); ++i) {
        if (hfword_map.find(doc_words[i]) != hfword_map.end()) {
          signature += hfword_map[doc_words[i]];
        }
      }
    } else {
      std::cout << "Unknow hash type: " << hash_type_ << std::endl;
    }

    return signature;
  }

 private:
  // High frequency word bits map
  std::unordered_map<std::string, std::string> hfword_bits_map = {
     {"the","000000"}, {"and","000001"}, {"a","000010"}, {"to","000011"}, {"i","000100"}, {"of","000101"}, {"in","000110"},
     {"was","000111"}, {"he","001000"}, {"that","001001"}, {"it","001010"}, {"for","001011"}, {"on","001100"}, {"with","001101"},
     {"you","001110"}, {"is","001111"}, {"at","010000"}, {"his","010001"}, {"my","010010"}, {"said","010011"}, {"all","010100"},
     {"as","010101"}, {"but","010110"}, {"me","010111"}, {"she","011000"}, {"had","011001"}, {"this","011010"}, {"have","011011"},
     {"from","011100"}, {"they","011101"}, {"we","011110"}, {"be","011111"}, {"are","100000"}, {"one","100001"}, {"not","100010"},
     {"when","100011"}, {"little","100100"}, {"about","100101"}, {"her","100110"}, {"him","100111"}, {"up","101000"}, {"out","101001"},
     {"if","101010"}, {"like","101011"}, {"or","101100"}, {"were","101101"}, {"by","101110"}, {"an","101111"}, {"just","110000"},
     {"so","110001"}, {"very","110010"}, {"what","110011"}, {"over","110100"}, {"old","110101"}, {"then","110110"}, {"do","110111"},
     {"there","111000"}, {"get","111001"}, {"them","111010"}, {"know","111011"}, {"time","111100"}, {"has","111101"}
  };

  // High frequency word map
  std::unordered_map<std::string, std::string> hfword_map = {
     {"the","0"}, {"and","1"}, {"a","2"}, {"to","3"}, {"i","4"}, {"of","5"}, {"in","6"},
     {"was","7"}, {"he","8"}, {"that","9"}, {"it","a"}, {"for","b"}, {"on","c"}, {"with","d"},
     {"you","e"}, {"is","f"}, {"at","g"}, {"his","h"}, {"my","i"}, {"said","j"}, {"all","k"},
     {"as","l"}, {"but","m"}, {"me","n"}, {"she","o"}, {"had","p"}, {"this","q"}, {"have","r"},
     {"from","s"}, {"they","t"}, {"we","u"}, {"be","v"}, {"are","w"}, {"one","x"}, {"not","y"},
     {"when","z"}, {"little","A"}, {"about","B"}, {"her","C"}, {"him","D"}, {"up","E"}, {"out","F"},
     {"if","G"}, {"like","H"}, {"or","I"}, {"were","J"}, {"by","K"}, {"an","L"}, {"just","M"},
     {"so","N"}, {"very","O"}, {"what","P"}, {"over","Q"}, {"old","R"}, {"then","S"}, {"do","T"},
     {"there","U"}, {"get","V"}, {"them","W"}, {"know","X"}, {"time","Y"}, {"has","Z"}
  };

  std::string doc_;
  std::string hash_type_;
};

} // namespace gtchecker

#endif // GTCHECKER_SIGNATURE_H_