/*
 * The MIT License (MIT)

 * Copyright (c) 2025 GenText-Checker Developers

 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:

 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.

 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

#include "common.h"
#include "cxxtoken.h"
#include "cosine_similarity.h"
#include "min_edit_distance.h"

#include <iostream>
#include <vector>
#include <fstream>
#include <sstream>
#include <chrono>

using std::string;
using std::vector;
using std::stringstream;
using std::ofstream;

const int PAPER_COUNT = 10; // We have 10 paper for exampples
const int MODE = 8;         // We have 8 modes (D1 to D8)
const int START_MODE = 3;   // D1 and D2 are all original paper, so we start from mode-3

const string output_file = "./output_result.txt";

int main(int argc, char** argv) {
  auto begin = std::chrono::high_resolution_clock::now();

  // Open output file
  std::ofstream ofs;
  ofs.open(output_file, std::ofstream::out | std::ofstream::trunc);
  if (!ofs.is_open()) {
    std::cerr << "Error in opening the file: " << output_file << std::endl;
  }

  // Go through all the input files
  for (int i = 1; i <= PAPER_COUNT ; ++i) {
    for (int j = START_MODE; j <= MODE; ++j) {
      stringstream ss_i;
      ss_i << i;
      string str_i = ss_i.str();
      stringstream ss_j;
      ss_j << j;
      string str_j = ss_j.str();
      string file_a = "./data/paper_" + str_i + ".txt";
      string file_b = "./data/paper_" + str_i + "_D" + str_j + ".txt";

      string document_A = gtchecker::ReadFileToString(file_a);
      string document_B = gtchecker::ReadFileToString(file_b);

      vector<string> document_A_words = gtchecker::GetSplitWords(document_A);
      vector<string> document_B_words = gtchecker::GetSplitWords(document_B);

      // Compare the size of two strings
      // For instance, document_a = "abcd", size = 4
      //               document_b = "abcde", size = 5
      // diff_ratio = (5-4) / 4 = 0.25
      double diff_ratio = gtchecker::SizeDifferenceRatio(document_A, document_B);

      double similarity = 0.0;

      // If the size difference of two doucments is not very big
      if (diff_ratio < 0.2) {
        gtchecker::CosineSimilarity cosine_similarity(document_A_words,
                                                      document_B_words);
        similarity = cosine_similarity.CalculateSimilarity();
      } else {
        gtchecker::MEDSimilarity med_similarity(document_A_words,
                                                document_B_words);
        similarity = med_similarity.CalculateSimilarity();
      }

      std::cout << file_a << " " << file_b << " Similarity: " << similarity << std::endl;

      if (ofs.is_open()) {
        ofs << file_a << " " << file_b << " Similarity: " << similarity << std::endl;
      }
    }
  }

  ofs.close();

  auto end = std::chrono::high_resolution_clock::now();

  auto dur = end - begin;
  auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(dur).count();
  std::cout << "Execution Time: " << ms << " ms" << std::endl;
  std::cout << "Result is written to output_result.txt" << std::endl;
}