Chemistry Toolkit Rosetta Wiki
Register
Advertisement

Convert a SMILES file (yet to be determined) into an SD file. The conversion must do its best to use the MDL conventions for the SD file, including aromaticity perception. Note that the use of aromatic bond types in CTABs is only allowed for queries, so aromatic structures must be written in a Kekule form.

(In some tools the conversion is automatic, in other tools it must be done explicitly, while in still others there's only a single chemistry model for every format.)

Because the stereochemistry of molecules in SD files is defined solely by the arrangement of atoms, it is necessary to assign either 2D or 3D coordinates to the molecule before generating output. The coordinates do not have to be reasonable (i.e. it's ok if they would make a chemist scream in horror), so long as the resulting structure is chemically correct.

Indigo/C++

#include "base_cpp/scanner.h"
#include "base_cpp/output.h"
#include "molecule/molecule.h"
#include "molecule/smiles_loader.h"
#include "molecule/molfile_saver.h"
#include "molecule/molecule_dearom.h"
#include "layout/molecule_layout.h"

int main (int argc, char *argv[])
{
   if (argc < 3)
   {
      fprintf(stderr, "Usage: smi_sdf_convert infile.smi outfile.sdf\n");
      return -1;
   }

   try
   {
      FileScanner scanner(argv[1]);
      FileOutput output(argv[2]);
      Array<char> smiles;
      Molecule mol;
      int cnt = 0;

      while (!scanner.isEOF())
      {
         scanner.readString(smiles, false);

         if (smiles.size() < 1)
            continue;

         printf("saving molecule #%d... ", ++cnt);

         BufferScanner smiles_s(smiles);
         SmilesLoader smiles_l(smiles_s);

         smiles_l.loadMolecule(mol, false);
         mol.calcImplicitHydrogens(true);

         {         
            DearomatizationsStorage dst;
            Dearomatizer dearom(mol);
            dearom.setDearomatizationParams(Dearomatizer::PARAMS_SAVE_ONE_DEAROMATIZATION);
            dearom.enumerateDearomatizations(dst);

            MoleculeDearomatizer mdearom(mol, dst);
            for (int i = 0; i < dst.getGroupsCount(); i++)
               mdearom.dearomatizeGroup(i, 0);
         }

         {
            MoleculeLayout ml(mol);
            ml.make();
            mol.getStereocenters().markBonds();
         }

         MolfileSaver saver(output);

         // saver.v2000 = true;
         saver.saveMolecule(mol);
         output.printf("$$$$\n");
         printf("\n");
      }
   }
   catch (Exception &e)
   {
      fprintf(stderr, "error: %s\n", e.message());
      return -1;
   }
   return 0;
}

Instructions:

  1. Unpack 'graph', 'molecule', and 'layout' projects into some folder
  2. Create 'utils' folder nearby
  3. Paste the above code into utils/smi_sdf_convert.cpp file
  4. Compile the file using the following commands:
    $ cd graph; make CONF=Release32; cd ..
    $ cd molecule; make CONF=Release32; cd ..
    $ cd layout; make CONF=Release32; cd ..
    $ cd utils
    $ gcc smi_sdf_convert.cpp -o smi_sdf_convert -O3 -m32 -I.. -I../common ../liblayout/dist/Release32/GNU-Linux-x86/liblayout.a ../molecule/dist/Release32/GNU-Linux-x86/libmolecule.a ../graph/dist/Release32/GNU-Linux-x86/libgraph.a -lpthread -lstdc++
  5. Run the program like that:
    $ ./smi_sdf_convert infile.smi infile.sdf
Advertisement