/*
 * The MIT License (MIT)

 * Copyright (c) 2025 GenText-Checker Developers

 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:

 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.

 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

#ifndef GTCHECKER_MIN_EDIT_DISTANCE_H_
#define GTCHECKER_MIN_EDIT_DISTANCE_H_

#include "similarity.h"

#include <string>
#include <vector>
#include <iostream>
#include <algorithm>
#include <unordered_map>

using std::string;
using std::vector;

namespace gtchecker {

class MEDSimilarity : public Similarity {
 public:
  MEDSimilarity(const vector<string> doc_A,
                const vector<string> doc_B) : 
    document_A(doc_A), document_B(doc_B) {}

  ~MEDSimilarity() {}

  double CalculateSimilarity() {
    double similarity = 0.0;

    vector<int> vector_A;
    vector<int> vector_B;

    WordIDMaping(document_A, 
                 document_B, 
                 &vector_A, 
                 &vector_B);

    similarity = MinEditDistanceSimilarity(vector_A, 
                                           vector_B);

    return similarity;
  }

 private:
  void WordIDMaping(const vector<string>& document_A,
                    const vector<string>& document_B,
                    vector<int>* vector_A,
                    vector<int>* vector_B) {
    int count = 1;
    std::unordered_map<string, int> word_map;
  
    for (int i = 0; i < document_A.size(); ++i) {
      if (word_map[document_A[i]] == 0) {
        word_map[document_A[i]] = count;
        count++;
      }
      vector_A->push_back(word_map[document_A[i]]);
    }

    for (int i = 0; i < document_B.size(); ++i) {
      if (word_map[document_B[i]] == 0) {
        word_map[document_B[i]] = count;
        count++;
      }
      vector_B->push_back(word_map[document_B[i]]);
    }
  }

  double MinEditDistanceSimilarity(const vector<int>& vector_A, 
                                   const vector<int>& vector_B) {
    // Allocate an N x M size array
    int N = vector_A.size() + 1;
    int M = vector_B.size() + 1;

    int** dp = new int*[N];
    for (int i = 0; i < N; ++i) {
      dp[i] = new int[M];
    }
    for (int i = 0; i < N; ++i) {
      for (int j = 0; j < M; ++j) {
        dp[i][j] = 0;
      }
    }

    for (int i = 0; i < N; ++i) {
      dp[i][0] = i;
    }
    for (int j = 0; j < M; ++j) {
      dp[0][j] = j;
    }
    for (int i = 1; i < N; ++i) {
      for (int j = 1; j < M; ++j) {
        if (vector_A[i-1] == vector_B[j-1]) {
          dp[i][j] = dp[i-1][j-1];
        } else {
          dp[i][j] = std::min(std::min(dp[i-1][j]+1, 
                                       dp[i][j-1]+1), 
                                       dp[i-1][j-1]+1);
        }
      }
    }

    int distance = dp[N-1][M-1];
    int tmp = vector_A.size() - vector_B.size();
    int abs_ab = tmp >= 0 ? tmp : -tmp;
    double eta = double(distance - abs_ab);
    double similarity = 1.0 - (eta / vector_A.size());

    return similarity;
  }

  double MinEditDistanceSimilarity(const string& document_A, 
                                   const string& document_B) {
    // Allocate an N x M size array
    int N = document_A.size() + 1;
    int M = document_B.size() + 1;

    int** dp = new int*[N];
    for (int i = 0; i < N; ++i) {
      dp[i] = new int[M];
    }
    for (int i = 0; i < N; ++i) {
      for (int j = 0; j < M; ++j) {
        dp[i][j] = 0;
      }
    }

    for (int i = 0; i < N; ++i) {
      dp[i][0] = i;
    }
    for (int j = 0; j < M; ++j) {
      dp[0][j] = j;
    }
    for (int i = 1; i < N; ++i) {
      for (int j = 1; j < M; ++j) {
        if (document_A[i-1] == document_B[j-1]) {
          dp[i][j] = dp[i-1][j-1];
        } else {
          dp[i][j] = std::min(std::min(dp[i-1][j]+1, 
                                       dp[i][j-1]+1), 
                                       dp[i-1][j-1]+1);
        }
      }
    }

    int distance = dp[N-1][M-1];
    int tmp = document_A.size() - document_B.size();
    int abs_ab = tmp >= 0 ? tmp : -tmp;
    double eta = double(distance - abs_ab);
    double similarity = 1.0 - (eta / document_A.size());

    return similarity;
  }

  vector<string> document_A;
  vector<string> document_B;
};

} // namespace gtchecker

#endif // GTCHECKER_MIN_EDIT_DISTANCE_H_