Chemistry Toolkit Rosetta Wiki
Advertisement

Run a SMILES file containing multiple structures. Perform a substructure match of a query SMILES structure against each input structure. In every found substructure, invert the stereo configuration of selected stereocenters of the query, then save the resulting SMILES.

This task originated from a topic on BlueObelisk.

Implementation

Take the following query substructure: O[C@H]1[C@@H]([C@H]([C@@H]([C@@H](O1)CO)O)O)O

Run the query substructure against sugars.smi.gz, and in every matched structure, invert stereo configurations of atoms that correspond to atoms #3, 4, 5 of the query substructure. Write the resulting SMILES to standard output.


Indigo/C++

#include "base_cpp/scanner.h"
#include "base_cpp/output.h"
#include "molecule/smiles_saver.h"
#include "gzip/gzip_scanner.h"
#include "molecule/molecule.h"
#include "molecule/smiles_loader.h"
#include "molecule/molecule_substructure_matcher.h"

int main (void)
{
   const char *query = "O[C@H]1[C@@H]([C@H]([C@@H]([C@@H](O1)CO)O)O)O";
   int atoms_to_invert[] = {3, 4, 5};

   Molecule qmol;
   BufferScanner qs(query);
   SmilesLoader ql(qs);
   ql.loadMolecule(qmol, true);
   
   try
   {
      FileScanner scanner("sugars.smi.gz");
      GZipScanner gzscanner(scanner);
      
      while (!gzscanner.isEOF())
      {
         Molecule mol;
         Array<char> str;
         gzscanner.readString(str, false);
         BufferScanner strscanner(str);
         SmilesLoader loader(strscanner);
         loader.loadMolecule(mol, false);
         mol.calcImplicitHydrogens(true);

         MoleculeSubstructureMatcher matcher(mol);
         matcher.setQuery(qmol);
         if (!matcher.find())
         {
            printf("%.*s\n", str.size(), str.ptr());
            continue;
         }
         const int *mapping = matcher.getQueryMapping();

         for (int i = 0; i < NELEM(atoms_to_invert); i++)
         {
            int *pyramid = mol.getStereocenters().getPyramid(mapping[atoms_to_invert[i]]);
            int tmp;

            __swap(pyramid[0], pyramid[1], tmp);
         }
         StandardOutput output;
         SmilesSaver saver(output);
         saver.saveMolecule(mol);
         output.writeCR();
      }
   }
   catch (Exception &e)
   {
      fprintf(stderr, "error: %s\n", e.message());
      return -1;
   }
   return 0;
}

Instructions:

  1. Unpack 'graph' and 'molecule' projects into some folder
  2. Create 'utils' folder nearby
  3. Paste the above code into utils/stereo-invert.cpp file
  4. Compile the file using the following commands:
    $ cd graph; make CONF=Release32; cd ..
    $ cd molecule; make CONF=Release32; cd ..
    $ cd utils
    $ gcc stereo-invert.cpp -o stereo-invert -O3 -m32 -I.. -I../common ../molecule/dist/Release32/GNU-Linux-x86/libmolecule.a ../graph/dist/Release32/GNU-Linux-x86/libgraph.a -lpthread -lstdc++
  5. Run the program like that:
    $ ./stereo-invert
Advertisement