//
//  Copyright (C) 2024 greg landrum and other RDKit contributors
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#ifndef RD_MOLSUPPLIER_v1_H
#define RD_MOLSUPPLIER_v1_H

namespace RDKit {
inline namespace v1 {
/*!
//
//  Here are a couple of ways one can interact with MolSuppliers:
//
//  1) Lazy (ForwardIterator):
//     while(!supplier.atEnd()){
//       ROMol *mol = supplier.next();
//       if(mol){
//           do something;
//       }
//     }
//  2) Random Access:
//     for(int i=0;i<supplier.length();i++){
//       ROMol *mol = supplier[i];
//       if(mol){
//           do something;
//       }
//     }
//
//
*/
class RDKIT_FILEPARSERS_EXPORT MolSupplier {
  // this is an abstract base class to supply molecules one at a time
 public:
  MolSupplier() {}
  virtual ~MolSupplier() {}
  void init() {
    if (dp_supplier) {
      dp_supplier->init();
    }
  }
  void reset() {
    if (dp_supplier) {
      dp_supplier->reset();
    }
  }

  bool atEnd() {
    if (dp_supplier) {
      return dp_supplier->atEnd();
    }
    return true;
  }
  ROMol *next() {
    PRECONDITION(dp_supplier, "no supplier");
    return dp_supplier->next().release();
  }

  virtual void close() {
    if (dp_supplier) {
      dp_supplier->close();
    }
  }

 private:
  // disable automatic copy constructors and assignment operators
  // for this class and its subclasses.  They will likely be
  // carrying around stream pointers and copying those is a recipe
  // for disaster.
  MolSupplier(const MolSupplier &);
  MolSupplier &operator=(const MolSupplier &);

 protected:
  std::unique_ptr<v2::FileParsers::MolSupplier> dp_supplier;
};

// \brief a supplier from an SD file that only reads forward:
class RDKIT_FILEPARSERS_EXPORT ForwardSDMolSupplier : public MolSupplier {
  /*************************************************************************
   * A lazy mol supplier from a SD file.
   *  - When new molecules are read using "next" their positions in the file are
   *noted.
   ***********************************************************************************/
 public:
  using ContainedType = v2::FileParsers::ForwardSDMolSupplier;
  ForwardSDMolSupplier() {}

  explicit ForwardSDMolSupplier(std::istream *inStream,
                                bool takeOwnership = true, bool sanitize = true,
                                bool removeHs = true,
                                bool strictParsing = false) {
    v2::FileParsers::MolFileParserParams params;
    params.sanitize = sanitize;
    params.removeHs = removeHs;
    params.strictParsing = strictParsing;
    dp_supplier.reset(new v2::FileParsers::ForwardSDMolSupplier(
        inStream, takeOwnership, params));
  };

  ~ForwardSDMolSupplier() override {}

  void setProcessPropertyLists(bool val) {
    PRECONDITION(dp_supplier, "no supplier");
    static_cast<ContainedType *>(dp_supplier.get())
        ->setProcessPropertyLists(val);
  }
  bool getProcessPropertyLists() const {
    if (dp_supplier) {
      return static_cast<ContainedType *>(dp_supplier.get())
          ->getProcessPropertyLists();
    }
    return false;
  }

  bool getEOFHitOnRead() const {
    if (dp_supplier) {
      return static_cast<ContainedType *>(dp_supplier.get())->getEOFHitOnRead();
    }
    return false;
  }
};

// \brief a lazy supplier from an SD file
class RDKIT_FILEPARSERS_EXPORT SDMolSupplier : public ForwardSDMolSupplier {
  /*************************************************************************
   * A lazy mol supplier from a SD file.
   *  - When new molecules are read using "next" their positions in the file are
   *noted.
   *  - A call to the "length" will automatically parse the entire file and
   *cache all the mol
   *    block positions
   *  - [] operator is used to access a molecule at "idx", calling next
   *following this will result
   *    in the next molecule after "idx"
   ***********************************************************************************/

 public:
  using ContainedType = v2::FileParsers::SDMolSupplier;
  SDMolSupplier() { dp_supplier.reset(new ContainedType()); }

  /*!
   *   \param fileName - the name of the SD file
   *   \param sanitize - if true sanitize the molecule before returning it
   *   \param removeHs - if true remove Hs from the molecule before returning it
   *                     (triggers sanitization)
   *   \param strictParsing - if set to false, the parser is more lax about
   * correctness
   *                          of the contents.
   */
  explicit SDMolSupplier(const std::string &fileName, bool sanitize = true,
                         bool removeHs = true, bool strictParsing = true) {
    v2::FileParsers::MolFileParserParams params;
    params.sanitize = sanitize;
    params.removeHs = removeHs;
    params.strictParsing = strictParsing;
    dp_supplier.reset(new v2::FileParsers::SDMolSupplier(fileName, params));
  }

  explicit SDMolSupplier(std::istream *inStream, bool takeOwnership = true,
                         bool sanitize = true, bool removeHs = true,
                         bool strictParsing = true) {
    v2::FileParsers::MolFileParserParams params;
    params.sanitize = sanitize;
    params.removeHs = removeHs;
    params.strictParsing = strictParsing;
    dp_supplier.reset(
        new v2::FileParsers::SDMolSupplier(inStream, takeOwnership, params));
  }

  void moveTo(unsigned int idx) {
    PRECONDITION(dp_supplier, "no supplier");
    static_cast<ContainedType *>(dp_supplier.get())->moveTo(idx);
  }
  ROMol *operator[](unsigned int idx) {
    PRECONDITION(dp_supplier, "no supplier");
    return static_cast<ContainedType *>(dp_supplier.get())
        ->operator[](idx)
        .release();
  }
  /*! \brief returns the text block for a particular item
   *
   *  \param idx - which item to return
   */
  std::string getItemText(unsigned int idx) {
    PRECONDITION(dp_supplier, "no supplier");
    return static_cast<ContainedType *>(dp_supplier.get())->getItemText(idx);
  }
  unsigned int length() {
    PRECONDITION(dp_supplier, "no supplier");
    return static_cast<ContainedType *>(dp_supplier.get())->length();
  }
  void setData(const std::string &text, bool sanitize = true,
               bool removeHs = true) {
    PRECONDITION(dp_supplier, "no supplier");
    v2::FileParsers::MolFileParserParams params;
    params.sanitize = sanitize;
    params.removeHs = removeHs;
    static_cast<ContainedType *>(dp_supplier.get())->setData(text, params);
  }
  void setData(const std::string &text, bool sanitize, bool removeHs,
               bool strictParsing) {
    v2::FileParsers::MolFileParserParams params;
    params.sanitize = sanitize;
    params.removeHs = removeHs;
    params.strictParsing = strictParsing;
    static_cast<ContainedType *>(dp_supplier.get())->setData(text, params);
  }
  /*! Resets our internal state and sets the indices of molecules in the stream.
   *  The client should be *very* careful about calling this method, as it's
   *trivial
   *  to end up with a completely useless supplier.
   *
   *   \param locs - the vector of stream positions.
   *
   *  Note that this can be used not only to make reading selected molecules
   *from a
   *  large SD file much faster, but it can also allow subsetting an SD file or
   *  rearranging the order of the molecules.
   */
  void setStreamIndices(const std::vector<std::streampos> &locs) {
    PRECONDITION(dp_supplier, "no supplier");
    static_cast<ContainedType *>(dp_supplier.get())->setStreamIndices(locs);
  }
};

//! lazy file parser for Smiles tables
class RDKIT_FILEPARSERS_EXPORT SmilesMolSupplier : public MolSupplier {
  /**************************************************************************
   * Lazy file parser for Smiles table file, similar to the lazy SD
   * file parser above
   * - As an when new molecules are read using "next" their
   *    positions in the file are noted.
   *  - A call to the "length" will automatically parse the entire
   *    file and cache all the mol block positions
   *  - [] operator is used to access a molecule at "idx", calling
   *    next following this will result in the next molecule after
   *    "idx"
   ***************************************************************************/
 public:
  using ContainedType = v2::FileParsers::SmilesMolSupplier;
  /*!
   *   \param fileName - the name of smiles table file
   *   \param delimiter - delimiting characters between records on a each
   *     line NOTE that this is not a string, the tokenizer looks for
   *     the individual characters in delimiter, not the full string
   *     itself.  So the default delimiter: " \t", means " " or "\t".
   *   \param smilesColumn - column number for the SMILES string (defaults
   *     to the first column)
   *   \param nameColumn - column number for the molecule name (defaults to
   *     the second column) If set to -1 we assume that no name is
   *     available for the molecule and the name is defaulted to the
   *     smiles string
   *   \param titleLine - if true, the first line is assumed to list the
   *     names of properties in order separated by 'delimiter'. It is
   *     also assume that the 'SMILES' column and the 'name' column
   *     are not specified here if false - no title line is assumed
   *     and the properties are recorded as the "columnX" where "X" is
   *     the column number
   *   \param sanitize - if true sanitize the molecule before returning it
   */
  explicit SmilesMolSupplier(const std::string &fileName,
                             const std::string &delimiter = " \t",
                             int smilesColumn = 0, int nameColumn = 1,
                             bool titleLine = true, bool sanitize = true) {
    v2::FileParsers::SmilesMolSupplierParams params;
    params.delimiter = delimiter;
    params.smilesColumn = smilesColumn;
    params.nameColumn = nameColumn;
    params.titleLine = titleLine;
    params.parseParameters.sanitize = sanitize;
    dp_supplier.reset(new v2::FileParsers::SmilesMolSupplier(fileName, params));
  }
  explicit SmilesMolSupplier(std::istream *inStream, bool takeOwnership = true,
                             const std::string &delimiter = " \t",
                             int smilesColumn = 0, int nameColumn = 1,
                             bool titleLine = true, bool sanitize = true) {
    v2::FileParsers::SmilesMolSupplierParams params;
    params.delimiter = delimiter;
    params.smilesColumn = smilesColumn;
    params.nameColumn = nameColumn;
    params.titleLine = titleLine;
    params.parseParameters.sanitize = sanitize;
    dp_supplier.reset(new v2::FileParsers::SmilesMolSupplier(
        inStream, takeOwnership, params));
  }
  SmilesMolSupplier() { dp_supplier.reset(new ContainedType()); }

  void setData(const std::string &text, const std::string &delimiter = " ",
               int smilesColumn = 0, int nameColumn = 1, bool titleLine = true,
               bool sanitize = true) {
    PRECONDITION(dp_supplier, "no supplier");
    v2::FileParsers::SmilesMolSupplierParams params;
    params.delimiter = delimiter;
    params.smilesColumn = smilesColumn;
    params.nameColumn = nameColumn;
    params.titleLine = titleLine;
    params.parseParameters.sanitize = sanitize;
    static_cast<ContainedType *>(dp_supplier.get())->setData(text, params);
  }
  void moveTo(unsigned int idx) {
    PRECONDITION(dp_supplier, "no supplier");
    static_cast<ContainedType *>(dp_supplier.get())->moveTo(idx);
  }
  ROMol *operator[](unsigned int idx) {
    PRECONDITION(dp_supplier, "no supplier");
    return static_cast<ContainedType *>(dp_supplier.get())
        ->operator[](idx)
        .release();
  }
  /*! \brief returns the text block for a particular item
   *
   *  \param idx - which item to return
   */
  std::string getItemText(unsigned int idx) {
    PRECONDITION(dp_supplier, "no supplier");
    return static_cast<ContainedType *>(dp_supplier.get())->getItemText(idx);
  }
  unsigned int length() {
    PRECONDITION(dp_supplier, "no supplier")
    return static_cast<ContainedType *>(dp_supplier.get())->length();
  }
};

//! lazy file parser for TDT files
class RDKIT_FILEPARSERS_EXPORT TDTMolSupplier : public MolSupplier {
  /**************************************************************************
   * Lazy file parser for TDT files, similar to the lazy SD
   * file parser above
   * - As an when new molecules are read using "next" their
   *    positions in the file are noted.
   *  - A call to the "length" will automatically parse the entire
   *    file and cache all the mol block positions
   *  - [] operator is used to access a molecule at "idx", calling
   *    next following this will result in the next molecule after
   *    "idx"
   ***************************************************************************/
 public:
  using ContainedType = v2::FileParsers::TDTMolSupplier;
  /*!
   *   \param fileName - the name of the TDT file
   *   \param nameRecord - property name for the molecule name.
   *     If empty (the default), the name defaults to be empty
   *   \param confId2D - if >=0 and 2D coordinates are provided, the 2D
   *                   structure (depiction) in the input will be read into the
   *                   corresponding conformer id.
   *   \param confId3D - if >=0 and 3D coordinates are provided, the 3D
   *                   structure (depiction) in the input will be read into the
   *                   corresponding conformer id.
   *   \param sanitize - if true sanitize the molecule before returning it
   */
  explicit TDTMolSupplier(const std::string &fileName,
                          const std::string &nameRecord = "", int confId2D = -1,
                          int confId3D = 0, bool sanitize = true) {
    v2::FileParsers::TDTMolSupplierParams params;
    params.nameRecord = nameRecord;
    params.confId2D = confId2D;
    params.confId3D = confId3D;
    params.parseParameters.sanitize = sanitize;
    dp_supplier.reset(new v2::FileParsers::TDTMolSupplier(fileName, params));
  }
  explicit TDTMolSupplier(std::istream *inStream, bool takeOwnership = true,
                          const std::string &nameRecord = "", int confId2D = -1,
                          int confId3D = 0, bool sanitize = true) {
    v2::FileParsers::TDTMolSupplierParams params;
    params.nameRecord = nameRecord;
    params.confId2D = confId2D;
    params.confId3D = confId3D;
    params.parseParameters.sanitize = sanitize;
    dp_supplier.reset(
        new v2::FileParsers::TDTMolSupplier(inStream, takeOwnership, params));
  }
  TDTMolSupplier() { dp_supplier.reset(new ContainedType()); }
  void setData(const std::string &text, const std::string &nameRecord = "",
               int confId2D = -1, int confId3D = 0, bool sanitize = true) {
    PRECONDITION(dp_supplier, "no supplier");
    v2::FileParsers::TDTMolSupplierParams params;
    params.nameRecord = nameRecord;
    params.confId2D = confId2D;
    params.confId3D = confId3D;
    params.parseParameters.sanitize = sanitize;
    static_cast<ContainedType *>(dp_supplier.get())->setData(text, params);
  }
  void moveTo(unsigned int idx) {
    PRECONDITION(dp_supplier, "no supplier");
    static_cast<ContainedType *>(dp_supplier.get())->moveTo(idx);
  }
  ROMol *operator[](unsigned int idx) {
    PRECONDITION(dp_supplier, "no supplier");
    return static_cast<ContainedType *>(dp_supplier.get())
        ->operator[](idx)
        .release();
  }
  /*! \brief returns the text block for a particular item
   *
   *  \param idx - which item to return
   */
  std::string getItemText(unsigned int idx) {
    PRECONDITION(dp_supplier, "no supplier");
    return static_cast<ContainedType *>(dp_supplier.get())->getItemText(idx);
  }
  unsigned int length() {
    PRECONDITION(dp_supplier, "no supplier");
    return static_cast<ContainedType *>(dp_supplier.get())->length();
  }
};

#ifdef RDK_BUILD_MAEPARSER_SUPPORT
//! lazy file parser for MAE files
class RDKIT_FILEPARSERS_EXPORT MaeMolSupplier : public MolSupplier {
  /**
   * Due to maeparser's shared_ptr<istream> Reader interface, MaeMolSupplier
   * always requires taking ownership of the istream ptr, as the shared ptr will
   * always clear it upon destruction.
   */

 public:
  using ContainedType = v2::FileParsers::MaeMolSupplier;
  MaeMolSupplier() { dp_supplier.reset(new ContainedType()); }

  explicit MaeMolSupplier(std::shared_ptr<std::istream> inStream,
                          bool sanitize = true, bool removeHs = true) {
    v2::FileParsers::MaeMolSupplierParams params;
    params.sanitize = sanitize;
    params.removeHs = removeHs;
    dp_supplier.reset(new ContainedType(inStream, params));
  }

  explicit MaeMolSupplier(std::istream *inStream, bool takeOwnership = true,
                          bool sanitize = true, bool removeHs = true) {
    v2::FileParsers::MaeMolSupplierParams params;
    params.sanitize = sanitize;
    params.removeHs = removeHs;
    dp_supplier.reset(new ContainedType(inStream, takeOwnership, params));
  }

  explicit MaeMolSupplier(const std::string &fname, bool sanitize = true,
                          bool removeHs = true) {
    v2::FileParsers::MaeMolSupplierParams params;
    params.sanitize = sanitize;
    params.removeHs = removeHs;
    dp_supplier.reset(new ContainedType(fname, params));
  }
  void moveTo(unsigned int idx) {
    PRECONDITION(dp_supplier, "no supplier");
    static_cast<ContainedType *>(dp_supplier.get())->moveTo(idx);
  }
  RWMol *operator[](unsigned int idx) {
    PRECONDITION(dp_supplier, "no supplier");
    return static_cast<ContainedType *>(dp_supplier.get())
        ->operator[](idx)
        .release();
  }
  unsigned int length() {
    PRECONDITION(dp_supplier, "no supplier");
    return static_cast<ContainedType *>(dp_supplier.get())->length();
  }

  void setData(const std::string &text, bool sanitize = true,
               bool removeHs = true) {
    PRECONDITION(dp_supplier, "no supplier");
    v2::FileParsers::MaeMolSupplierParams params;
    params.sanitize = sanitize;
    params.removeHs = removeHs;
    static_cast<ContainedType *>(dp_supplier.get())->setData(text, params);
  }
};
#endif  // RDK_BUILD_MAEPARSER_SUPPORT

}  // namespace v1
}  // namespace RDKit

#endif
