/*
 * The MIT License (MIT)

 * Copyright (c) 2025 GenText-Checker Developers

 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:

 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.

 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

#ifndef GTCHECKER_SPLITER_H_
#define GTCHECKER_SPLITER_H_

#include <string>
#include <vector>

namespace gtchecker {

class Sentence {
 public:
  Sentence(const std::string& sentence, 
           int start_pos, 
           int end_pos) : 
   sentence_(sentence), 
   start_pos_(start_pos), 
   end_pos_(end_pos) {}

  std::string sentence() {
    return sentence_;
  }

  int start_pos() {
    return start_pos_;
  }

  int end_pos() {
    return end_pos_;
  }

 private:
  std::string sentence_;
  int start_pos_;
  int end_pos_;
};

class Spliter {
 public:
  Spliter(const std::string& doc) : 
    document_(doc) { }

  ~Spliter() { }

  std::vector<gtchecker::Sentence> split() {
    std::vector<gtchecker::Sentence> result;
    int start_pos = 0;
    std::string tmp_str = "";

    // Skip periods and whitespaces at the begin
    int pos = 0;
    while (pos < document_.size()) {
      if (document_[pos] == '.' || document_[pos] == ' ') {
        pos++;
      } else {
        break;
      }
    }
    start_pos = pos;

    while (pos < document_.size()) {
      tmp_str += document_[pos];
      pos++;
      if (isSentenceEnd(document_, pos)) {
        tmp_str += document_[pos];

        gtchecker::Sentence sentence(tmp_str, start_pos, pos);

        result.push_back(sentence);
        tmp_str = "";
        // Skip multiple periods, whitespaces, and \n characters
        while (isSentenceEnd(document_, pos) || 
               document_[pos] == ' ' || 
               static_cast<int>(document_[pos]) == 10) {
          ++pos;
        }
        start_pos = pos;
      }
    }

    if (tmp_str != "") {
      gtchecker::Sentence sentence(tmp_str, start_pos, pos-1);
      result.push_back(sentence);
    }

    return result;
  }

 private:
  bool isSentenceEnd(const std::string doc, int pos) {
    if (doc[pos] == '.' || 
        doc[pos] == '?' || 
        doc[pos] == '!' || 
        doc[pos] == ';') {
      if (pos+1 < doc.size()) {
        if (doc[pos+1] == ' ' || 
            static_cast<int>(doc[pos+1]) == 10) {
          return true;
        }
      }
    }
    return false;
  }

  std::string document_;
};

} // namespace gtchecher

#endif // GTCHECKER_SPLITER_H_