/// <summary> /// Internal method for parsing charge, to allow concatenated signs (--, ++) /// the method recursively invokes increment or decrementing an accumulator. /// </summary> /// <param name="acc"> accumulator</param> /// <param name="buffer">a character buffer</param> /// <returns>the charge value</returns> private static int ReadCharge(int acc, CharBuffer buffer) { if (buffer.GetIf('+')) { return(buffer.NextIsDigit() ? acc + buffer.GetNumber() : ReadCharge(acc + 1, buffer)); } if (buffer.GetIf('-')) { return(buffer.NextIsDigit() ? acc - buffer.GetNumber() : ReadCharge(acc - 1, buffer)); } return(acc); }
/// <summary> /// Read the hydrogen count and progress the provided buffer. The hydrogen /// count is specified by a 'H' an 0 or more digits. A 'H' without digits is /// intercepted as 'H1'. When there is no 'H' or 'H0' is specified then the /// the hydrogen count is 0. /// </summary> /// <param name="buffer">a character buffer</param> /// <returns>the hydrogen count, 0 if none</returns> public static int ReadHydrogens(CharBuffer buffer) { if (buffer.GetIf('H')) { // when no number is specified 'H' then there is 1 hydrogen int count = buffer.GetNumber(); return(count < 0 ? 1 : count); } return(0); }
/// <summary> /// Read the atom class of a bracket atom and progress the buffer (if read). /// The atom class is the last attribute of the bracket atom and is /// identified by a ':' followed by one or more digits. The atom class may be /// padded such that ':005' and ':5' are equivalent. /// </summary> /// <seealso href="http://www.opensmiles.org/opensmiles.html#atomclass">Atom Class - OpenSMILES Specification</seealso > /// <param name="buffer">a character buffer</param> /// <returns>the atom class, or 0</returns> public static int ReadClass(CharBuffer buffer) { if (buffer.GetIf(':')) { if (buffer.NextIsDigit()) { return(buffer.GetNumber()); } throw new InvalidSmilesException("invalid atom class, <digit>+ must follow ':'", buffer); } return(0); }
/// <summary> /// Read a bracket atom from the buffer. A bracket atom optionally defines /// isotope, chirality, hydrogen count, formal charge and the atom class. /// <para> /// bracket_atom ::= '[' isotope? symbol chiral? hcount? charge? class? ']' /// </para> /// </summary> /// <param name="buffer">a character buffer</param> /// <returns>a bracket atom</returns> /// <exception cref="InvalidSmilesException">if the bracket atom did not match the grammar, invalid symbol, missing closing bracket or invalid chiral specification.</exception> public IAtom ReadBracketAtom(CharBuffer buffer) { int start = buffer.Position; bool arbitraryLabel = false; if (!buffer.HasRemaining()) { throw new InvalidSmilesException("Unclosed bracket atom, SMILES may be truncated", buffer); } var isotope = buffer.GetNumber(); var aromatic = buffer.NextChar >= 'a' && buffer.NextChar <= 'z'; var element = Element.Read(buffer); if (element == Element.Unknown) { hasAstrix = true; } if (strict && element == null) { throw new InvalidSmilesException("unrecognised element symbol, SMILES may be truncated: ", buffer); } if (element != null && aromatic) { g.AddFlags(Graph.HAS_AROM); } // element isn't aromatic as per the OpenSMILES specification if (strict && aromatic && !element.IsAromatic(Element.AromaticSpecification.OpenSmiles)) { throw new InvalidSmilesException("abnormal aromatic element", buffer); } if (element == null) { arbitraryLabel = true; } configuration = Configuration.Read(buffer); var hCount = ReadHydrogens(buffer); var charge = ReadCharge(buffer); var atomClass = ReadClass(buffer); if (!arbitraryLabel && !buffer.GetIf(']')) { if (strict) { throw InvalidSmilesException.InvalidBracketAtom(buffer); } else { arbitraryLabel = true; } } if (arbitraryLabel) { var end = buffer.Position; int depth = 1; while (buffer.HasRemaining()) { char c = buffer.Get(); if (c == '[') { depth++; } else if (c == ']') { depth--; if (depth == 0) { break; } } end++; } if (depth != 0) { throw new InvalidSmilesException("unparsable label in bracket atom", buffer, buffer.Position - 1); } var label = buffer.Substr(start, end); hasAstrix = true; return(new AtomImpl.BracketAtom(label)); } return(new AtomImpl.BracketAtom(isotope, element, hCount, charge, atomClass, aromatic)); }
/// <summary> /// Read a molecule from the character buffer. /// </summary> /// <param name="buffer">a character buffer</param> /// <exception cref="InvalidSmilesException">invalid grammar</exception> private void ReadSmiles(CharBuffer buffer) { // primary dispatch while (buffer.HasRemaining()) { char c = buffer.Get(); switch (c) { // aliphatic subset case '*': hasAstrix = true; AddAtom(AtomImpl.AliphaticSubset.Any, buffer); break; case 'B': if (buffer.GetIf('r')) { AddAtom(AtomImpl.AliphaticSubset.Bromine, buffer); } else { AddAtom(AtomImpl.AliphaticSubset.Boron, buffer); } break; case 'C': if (buffer.GetIf('l')) { AddAtom(AtomImpl.AliphaticSubset.Chlorine, buffer); } else { AddAtom(AtomImpl.AliphaticSubset.Carbon, buffer); } break; case 'N': AddAtom(AtomImpl.AliphaticSubset.Nitrogen, buffer); break; case 'O': AddAtom(AtomImpl.AliphaticSubset.Oxygen, buffer); break; case 'P': AddAtom(AtomImpl.AliphaticSubset.Phosphorus, buffer); break; case 'S': AddAtom(AtomImpl.AliphaticSubset.Sulfur, buffer); break; case 'F': AddAtom(AtomImpl.AliphaticSubset.Fluorine, buffer); break; case 'I': AddAtom(AtomImpl.AliphaticSubset.Iodine, buffer); break; // aromatic subset case 'b': AddAtom(AtomImpl.AromaticSubset.Boron, buffer); g.AddFlags(Graph.HAS_AROM); break; case 'c': AddAtom(AtomImpl.AromaticSubset.Carbon, buffer); g.AddFlags(Graph.HAS_AROM); break; case 'n': AddAtom(AtomImpl.AromaticSubset.Nitrogen, buffer); g.AddFlags(Graph.HAS_AROM); break; case 'o': AddAtom(AtomImpl.AromaticSubset.Oxygen, buffer); g.AddFlags(Graph.HAS_AROM); break; case 'p': AddAtom(AtomImpl.AromaticSubset.Phosphorus, buffer); g.AddFlags(Graph.HAS_AROM); break; case 's': AddAtom(AtomImpl.AromaticSubset.Sulfur, buffer); g.AddFlags(Graph.HAS_AROM); break; // D/T for hydrogen isotopes - non-standard but OpenSMILES spec // says it's possible. The D and T here are automatic converted // to [2H] and [3H]. case 'H': if (strict) { throw new InvalidSmilesException("hydrogens should be specified in square brackets - '[H]'", buffer); } AddAtom(AtomImpl.EXPLICIT_HYDROGEN, buffer); break; case 'D': if (strict) { throw new InvalidSmilesException("deuterium should be specified as a hydrogen isotope - '[2H]'", buffer); } AddAtom(AtomImpl.DEUTERIUM, buffer); break; case 'T': if (strict) { throw new InvalidSmilesException("tritium should be specified as a hydrogen isotope - '[3H]'", buffer); } AddAtom(AtomImpl.TRITIUM, buffer); break; // bracket atom case '[': AddAtom(ReadBracketAtom(buffer), buffer); break; // ring bonds case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': Ring(c - '0', buffer); break; case '%': int num = buffer.GetNumber(2); if (num < 0) { throw new InvalidSmilesException("a number (<digit>+) must follow '%':", buffer); } if (strict && num < 10) { throw new InvalidSmilesException("two digits must follow '%'", buffer); } Ring(num, buffer); lastBondPos = buffer.Position; break; // bond/dot case '-': if (bond != Bond.Implicit) { throw new InvalidSmilesException("Multiple bonds specified:", buffer); } bond = Bond.Single; lastBondPos = buffer.Position; break; case '=': if (bond != Bond.Implicit) { throw new InvalidSmilesException("Multiple bonds specified:", buffer); } bond = Bond.Double; lastBondPos = buffer.Position; break; case '#': if (bond != Bond.Implicit) { throw new InvalidSmilesException("Multiple bonds specified:", buffer); } bond = Bond.Triple; lastBondPos = buffer.Position; break; case '$': if (bond != Bond.Implicit) { throw new InvalidSmilesException("Multiple bonds specified:", buffer); } bond = Bond.Quadruple; lastBondPos = buffer.Position; break; case ':': if (bond != Bond.Implicit) { throw new InvalidSmilesException("Multiple bonds specified:", buffer); } g.AddFlags(Graph.HAS_AROM); bond = Bond.Aromatic; lastBondPos = buffer.Position; break; case '/': if (bond != Bond.Implicit) { throw new InvalidSmilesException("Multiple bonds specified:", buffer); } bond = Bond.Up; lastBondPos = buffer.Position; g.AddFlags(Graph.HAS_BND_STRO); break; case '\\': // we allow C\\C=C/C since it could be an escaping error if (bond != Bond.Implicit && bond != Bond.Down) { throw new InvalidSmilesException("Multiple bonds specified:", buffer); } bond = Bond.Down; lastBondPos = buffer.Position; g.AddFlags(Graph.HAS_BND_STRO); break; case '.': if (bond != Bond.Implicit) { throw new InvalidSmilesException("Bond specified before disconnection:", buffer); } bond = Bond.Dot; break; // branching case '(': if (stack.IsEmpty) { throw new InvalidSmilesException("Cannot open branch at this position, SMILES may be truncated:", buffer); } stack.Push(stack.Peek()); break; case ')': if (stack.Count < 2) { throw new InvalidSmilesException("Closing of an unopened branch, SMILES may be truncated:", buffer); } stack.Pop(); break; // termination case '\t': case ' ': // String suffix is title var sb = new StringBuilder(); while (buffer.HasRemaining()) { c = buffer.Get(); if (c == '\n' || c == '\r') { break; } sb.Append(c); } g.Title = sb.ToString(); return; case '\n': case '\r': return; default: throw new InvalidSmilesException("unexpected character:", buffer); } } }