// Fungimol - an extensible system for designing atomic-scale objects.
// Copyright (C) 2000 Tim Freeman
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Library General Public
// License as published by the Free Software Foundation; either
// version 2 of the License, or (at your option) any later version.
// 
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Library General Public License for more details.
// 
// You should have received a copy of the GNU Library General Public
// License along with this library in the file COPYING.txt; if not,
// write to the Free Software Foundation, Inc., 59 Temple Place -
// Suite 330, Boston, MA 02111-1307, USA
//
// The author can be reached by email at tim@infoscreen.com, or by
// paper mail at:
//
// Tim Freeman
// 655 S. FairOaks Ave., Apt B-316
// Sunnyvale, CA 94086
//

// For strerror.
#ifndef  __string_h__
#include <string.h>
#define __string_h__
#endif

// For errno.
#ifndef __errno_h__
#include <errno.h>
#define __errno_h__
#endif

#ifndef __FileSceneLoader_h__
#include "FileSceneLoader.h"
#endif

#ifndef __Configurable_h__
#include "Configurable.h"
#endif

#ifndef __myassert_h__
#include "myassert.h"
#endif

#ifndef __fstream_h__
#include <fstream.h>
#define __fstream_h__
#endif

#ifndef __FileSceneLoaderConfiguration_h__
#include "FileSceneLoaderConfiguration.h"
#endif

#ifndef __AtomFactory_h__
#include "AtomFactory.h"
#endif

#ifndef __RecursiveSlotValue_h__
#include "RecursiveSlotValue.h"
#endif

#ifndef __MemoryUtil_h__
#include "MemoryUtil.h"
#endif

#ifndef __FactoryTable_h__
#include "FactoryTable.h"
#endif

#ifndef __Vec3_h__
#include "Vec3.h"
#endif

#ifndef __PhysicsObject_h__
#include "PhysicsObject.h"
#endif

#ifndef __SceneGraph_h__
#include "SceneGraph.h"
#endif

#ifndef __TopLevel_h__
#include "TopLevel.h"
#endif

// We really use streams here, since we have to open and read a file.
#ifndef __iostream_h__
#include <iostream.h>
#define __iostream_h__
#endif

#ifndef __String_h__
#include "String.h"
#endif

#ifndef __Stopwatch_h__
#include "Stopwatch.h"
#endif

#ifndef __LinkManager_h__
#include "LinkManager.h"
#endif

#ifndef __TypedFactory_h__
#include "TypedFactory.h"
#endif

#ifndef __Action_h__
#include "Action.h"
#endif

#ifndef __SelectionManager_h__
#include "SelectionManager.h"
#endif

#ifndef __Factory_h__
#include "Factory.h"
#endif

#ifndef __AtomConfiguration_h__
#include "AtomConfiguration.h"
#endif

#ifndef __AtomInfo_h__
#include "AtomInfo.h"
#endif

namespace {
  // A SceneLoader that reads PDB files.
  // The URL
  // http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html describes the format.
  // (Actually, I'm using a version 2.1 document, but I'm working from
  // a local copy and have misplaced the URL for it.)
  class PDBSceneLoader
    : public TypedFactory <FileSceneLoaderConfiguration, Action>
  {
    // The value that will eventually be returned from makeIt.  An instance
    // variable so anybody can call setProblem () on it.
    SP<Action> m_result;
    SP<Factory> m_f;
    SP<AtomConfiguration> m_ai;
    SP<SceneGraph> m_sg;
    Vec3 m_translation;
    // Maps PDB serial numbers to object numbers.  If we don't know the object
    // number corresponding to a serial number, this will have -1 for the
    // serial number, or maybe we never saw the serial number before so the
    // serial number will index past the end of the array.
    Dynavec <int> m_serialToObject;
    // Next one is logically local to makeConnection, but have it here to avoid
    // repeated memory allocation.
    Dynavec <int> m_connections;
    // Logically local to makeAtomLine, here to avoid repeated allocation.
    Dynavec <Float> m_plausibleState;
    void setProblem (const String &s) {
      m_result->setProblem (s);
    }
    bool isProblem () {
      return m_result->isProblem();
    }
    // Map a serial number to an object number.  Return -1 if no match.
    int mapSerialToObject (int serial) {
      assert (serial > 0);
      if (serial >= m_serialToObject.size ()) {
	return -1;
      } else {
	return m_serialToObject [serial];
      }
    }
    // Parse a 5 digit integer from the string.  Don't make assumptions about
    // what happens after the fifth digit, but we'll stop on an invalid
    // character.  Used for serial numbers.  If the field is blank, you'll get
    // a zero, which is not considered an error.
    int parseSerial (const String &s, int start, String &error) {
      return s.toInt (start, 5, &error);
    }
    static inline bool symbolJunk (char ch) {
      return (' ' == ch) | (('0' <= ch) & ('9' >= ch));
    }
    // ATOM lines have stuff after the chemical name, letters like A B G Z and
    // sometimes a number or *.  For ATOM lines we just want to pay attention
    // to the first nonblank character of the chemical name.  CA in an ATOM
    // line is an alpha-carbon, which so far as we're concerned is just a
    // carbon.
    // I presume HETATM records have real symbol names, so I suppose
    // CA in a HETATM line would be calcium.
    // The document for this is at
    // http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html, and
    // the section about the naming is Appendix 3.
    // The AtomLine flag is true if it's an ATOM line, false for a HETATM line.
    // We add the new object to the scene graph and update the m_serialToObject
    // map to map the serial number of this new object to its object number.
    // If there's an error, then neither of these data structures is changed. 
    void makeAtomLine (const String &line) {
      if (line.size() >= 54) {
	// According to appendix 3 of the aforementioned guide, columns 13 and
	// 14 are the chemical symbol, which may be right justified or for
	// hydrogen it may be left justified.
	int start = 12;
	if (' ' == line[start]) {
	  start = 13;
	}
	int end = 13;
	if (' ' == line [13]) end = 12;
	String symbol = line.substring (start, end - start + 1);
	String error = "";
	// I used to have calls like "line.toFloat (30, &error)" here, which is
	// right so long as there are spaces between one field and the next to
	// stop the parser from grabbing too much.  The standard does not say
	// that there are any spaces.
	const Float x = line.toFloat (30, 8, &error);
	const Float y = line.toFloat (38, 8, &error);
	const Float z = line.toFloat (46, 8, &error);
	if (error.size()) {
	  m_result->
	    setProblem (String ("Could not parse the position in the line ") +
			line + "\n" + error);
	}
	int serial = 0;
	if (!error.size()) {
	  // The file format specifies an unused column after the serial
	  // number.  We use parseSerial here to avoid assuming that the unused
	  // column has a space in it.
	  serial = parseSerial (line, 6, error);
	  if (error.size()) {
	    setProblem
	      (String ("Could not parse the serial number in the line ") +
	       line + "\n"+error);
	  }
	}
	if (!error.size()) {
	  if (0 == serial) {
	    setProblem (String ("Zero or blank serial number in the line ")
			+line);
	    error = true;
	  }
	}
	if (!error.size()) {
	  if (serial >= m_serialToObject.size()) {
	    m_serialToObject.extendTo (serial+1, -1);
	  }
	}
	SP<PhysicsObject> atom;
	if (!error.size()) {
	  int number = AtomInfo::symbolToNumber (symbol);
	  if (-1 == number) {
	    setProblem (String ("Could not recognize \"")+symbol+
			"\" as a chemical symbol in the line\n" + line);
	    error = "error";
	  } else {
	    m_ai->setSymbolNumber (number);
	    atom = dynamic_cast <PhysicsObject *> (&*m_f->makeIt (&*m_ai));
	    assert (atom);
	  }
	}
	int objNo = 0;
	if (! error.size()) {
	  atom->plausibleState (m_plausibleState);
	  atom->translate (m_plausibleState, m_translation + Vec3 (x, y, z));
	  objNo = m_sg->addObject (atom, m_plausibleState);
	  if (-1 == objNo) {
	    setProblem (String ("Could not add atom number ")+serial+
			" to the scene graph; it was probably out of bounds.");
	    error = "error";
	  }
	}
	if (!error.size()) {
	  if (-1 == m_serialToObject [serial]) {
	    m_serialToObject [serial] = objNo;
	  } else {
	    setProblem (String ("The serial number ")+serial+
			" appears twice.");
	    error = true;
	  }
	}
      } // else the line is too short.  Do nothing.
    }
    void makeConnection (const String &line) {
      // Can't simply use String::toInt to parse the serial numbers, since that
      // wants to keep going until it finds a non-digit, and the numerical
      // fields in the CONECT record may be right against each other without
      // any separation.
      String error = "";
      int serial = parseSerial (line, 6, error);
      if (error.size()) {
	setProblem (String ("Can't parse the atom serial number in the line ")
		    +line+"\n"+error);
      }
      if (!error.size()) {
	if (0 == serial) {
	  setProblem (String ("Missing serial number in the line ")+line);
	  error = "error";
	}
      }
      if (!error.size()) {
	if (-1 == mapSerialToObject (serial)) {
	  setProblem (String ("Serial number ")
		      +serial+" unrecognized in line "+line);
	  error = "error";
	}
      }
      // Number of bonds in a CONECT record.
      const int NBONDS = 10;
      int bonds [NBONDS];
      for (int i = 0; i < NBONDS && !error.size(); i++) {
	const int start = i*5+11;
	if (line.size() > start) {
	  const int bond = parseSerial (line, start, error);
	  bonds [i] = bond;
	  if (error.size()) {
	    setProblem (String ("Can't parse a bond serial number in line ")
			+line+"\n"+error);
	  }
	  if (!error.size()) {
	    if (0 != bond && -1 == mapSerialToObject (bond)) {
	      setProblem (String ("Serial number ")+bond+
			  " unrecognized in bond in line "+line);
	      error = "error";
	    }
	  }
	} else {
	  bonds [i] = 0;
	}
      }
      if (!error.size()) {
	// Add the links.
	SP<LinkManager> lm = m_sg->getLinkManager();
	const int serialObj = mapSerialToObject (serial);
	int count;
	const int *links;
	lm->getLinks (serialObj, count, links);
	m_connections.extendTo (0);
	for (int i = 0; i < count; i++) {
	  m_connections.push (links [i]);
	}
	for (int i = 0; i < NBONDS; i++) {
	  if (0 != bonds [i]) {
	    m_connections.push (mapSerialToObject (bonds [i]));
	  }
	}
	lm->setLinks (serialObj, m_connections);
      }
    }
  public:
    PDBSceneLoader ()
      : TypedFactory <FileSceneLoaderConfiguration, Action> ("PDBSceneLoader")
    {}
    SP<FileSceneLoaderConfiguration> typedDefaultConfiguration () const {
      SP<FileSceneLoaderConfiguration> result =
	NEW (FileSceneLoaderConfiguration ());
      return result;
    }
    // Next one is like makeIt, except no const qualifier.  See definition of
    // makeit below.  For example, if the const qualifier were there, the first
    // assignment to m_result would be disallowed.
    SP<Action> reallyMakeIt (FileSceneLoaderConfiguration *slc) {
      m_result = NEW (Action ());
      m_f = FactoryTable::load ("Atom", "BoringAtom");
      assert (m_f);
      m_ai = dynamic_cast <AtomConfiguration *> (&*m_f->defaultConfiguration());
      assert (m_ai);
      m_sg = slc->getTopLevel ()->getSceneGraph ();
      assert (m_sg);
      m_translation = slc->getWhere ();
      String fileName = slc->getFileName ();
      ifstream stream (&*fileName);
      if (!stream.is_open ()) {
	setProblem (String ("Could not open ") + fileName +
		    " for reading: "+strerror (errno));
      }
      while (! stream.eof () && !isProblem ()) {
	String line;
	stream >> line;
	if (line.size() > 0) {
	  switch (line [0]) {
	  case 'H':
	    if (line.matches (0, "HETATM")) makeAtomLine (line);
	    break;
	  case 'A':
	    // FIXME Connections within atoms defined by ATOM are not all
	    // specified with CONECT, and we don't infer them either.
	    if (line.matches (0, "ATOM  ")) makeAtomLine (line);
	    break;
	  case 'C':
	    if (line.matches (0, "CONECT ")) makeConnection (line);
	    break;
	  default:
	    // do nothing.
	    break;
	  }
	}
      }
      stream.close ();
      if (slc->getSelect()) {
	// Have to select everything we just loaded.  We do this even if there
	// was an error, because if there was an error the user will probably
	// want to delete the mangled useless mess he just loaded from his
	// scene.
	Dynavec <int> selection;
	for (int i = 0; i < m_serialToObject.size(); i++) {
	  const int s = m_serialToObject [i];
	  if (-1 != s && m_sg->object(s)->isSelectable ()) {
	    selection.push (s);
	  }
	}
	SelectionManager::setSelection (m_sg, selection);
      }
      return m_result;
    }
    SP<Action> makeIt (FileSceneLoaderConfiguration *slc) const {
      // The requirement that makeIt is const itches here.  Dodge by calling
      // NEW.  Avoid problems with smart pointer dereferencing by creating a
      // smart pointer temporary object.  We can afford to call New one more
      // time per loading a file.
      return SP<PDBSceneLoader> (NEW (PDBSceneLoader ()))->reallyMakeIt (slc);
    }
  };
  static const bool useless =
  (FactoryTable::store ("FileSceneLoader", NEW (PDBSceneLoader ())),
   true);
};
