/*
 * The MIT License (MIT)

 * Copyright (c) 2025 GenText-Checker Developers

 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:

 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.

 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

#ifndef GTCHECKER_COSINE_SIMILARITY_H_
#define GTCHECKER_COSINE_SIMILARITY_H_

#include "similarity.h"

#include <string>
#include <vector>
#include <iostream>
#include <unordered_map>
#include <cmath>

using std::string;
using std::vector;

namespace gtchecker {

class CosineSimilarity : public Similarity {
 public:
  CosineSimilarity(const vector<string>& doc_A, 
                   const vector<string>& doc_B) : 
    document_A(doc_A), document_B(doc_B) {}

  ~CosineSimilarity() {}

  double CalculateSimilarity() {
    double similarity = 0.0;

    vector<int> vector_A;
    vector<int> vector_B;

    Vectorize(document_A, 
              document_B, 
              &vector_A, 
              &vector_B);

    similarity = Dot(vector_A, vector_B) / (Norm(vector_A) * Norm(vector_B));

    return similarity;
  }

 private:
  void Vectorize(const vector<string>& document_A,
                 const vector<string>& document_B,
                 vector<int>* vector_A,
                 vector<int>* vector_B) {
    
    std::unordered_map<string, int> word_map;
    std::unordered_map<string, int> A_map;
    std::unordered_map<string, int> B_map;

    for (int i = 0; i < document_A.size(); ++i) {
      word_map[document_A[i]]++;
      A_map[document_A[i]]++;
    }
    for (int i = 0; i < document_B.size(); ++i) {
      word_map[document_B[i]]++;
      B_map[document_B[i]]++;
    }

    std::unordered_map<string, int>::iterator it = word_map.begin();
    for (; it != word_map.end(); ++it) {
      vector_A->push_back(A_map[it->first]);
      vector_B->push_back(B_map[it->first]);
    }
  }

  double Dot(const vector<int>& vector_A,
             const vector<int>& vector_B) {

    if (vector_A.size() != vector_B.size()) {
      throw std::invalid_argument("Vectors must be of the same length.");
    }

    double result = 0.0;
    for (int i = 0; i < vector_A.size(); ++i) {
      result += vector_A[i] * vector_B[i];
    }

    return result;
  }

  double Norm(vector<int> vector_A) {
    double accumulate = 0.0;
    for (int i = 0; i < vector_A.size(); ++i) {
      accumulate += vector_A[i] * vector_A[i];
    }
    return std::sqrt(accumulate);
  }

  vector<string> document_A;
  vector<string> document_B;
};

} // namespace gtchecker

#endif // GTCHECKER_COSINE_SIMILARITY_H_