Run a SMILES file containing multiple structures. Perform a substructure match of a query SMILES structure against each input structure. In every found substructure, invert the stereo configuration of selected stereocenters of the query, then save the resulting SMILES.
This task originated from a topic on BlueObelisk.
Implementation
Take the following query substructure: O[C@H]1[C@@H]([C@H]([C@@H]([C@@H](O1)CO)O)O)O
Run the query substructure against sugars.smi.gz, and in every matched structure, invert stereo configurations of atoms that correspond to atoms #3, 4, 5 of the query substructure. Write the resulting SMILES to standard output.
Indigo/C++
#include "base_cpp/scanner.h"
#include "base_cpp/output.h"
#include "molecule/smiles_saver.h"
#include "gzip/gzip_scanner.h"
#include "molecule/molecule.h"
#include "molecule/smiles_loader.h"
#include "molecule/molecule_substructure_matcher.h"
int main (void)
{
const char *query = "O[C@H]1[C@@H]([C@H]([C@@H]([C@@H](O1)CO)O)O)O";
int atoms_to_invert[] = {3, 4, 5};
Molecule qmol;
BufferScanner qs(query);
SmilesLoader ql(qs);
ql.loadMolecule(qmol, true);
try
{
FileScanner scanner("sugars.smi.gz");
GZipScanner gzscanner(scanner);
while (!gzscanner.isEOF())
{
Molecule mol;
Array<char> str;
gzscanner.readString(str, false);
fflush(stdout);
BufferScanner strscanner(str);
SmilesLoader loader(strscanner);
loader.loadMolecule(mol, false);
mol.calcImplicitHydrogens(true);
MoleculeSubstructureMatcher matcher(mol);
matcher.setQuery(qmol);
if (!matcher.find())
{
printf("%.*s\n", str.size(), str.ptr());
continue;
}
const int *mapping = matcher.getQueryMapping();
for (int i = 0; i < NELEM(atoms_to_invert); i++)
{
int *pyramid = mol.getStereocenters().getPyramid(mapping[atoms_to_invert[i]]);
int tmp;
__swap(pyramid[0], pyramid[1], tmp);
}
StandardOutput output;
SmilesSaver saver(output);
saver.saveMolecule(mol);
output.writeCR();
}
}
catch (Exception &e)
{
fprintf(stderr, "error: %s\n", e.message());
return -1;
}
return 0;
}