Beispiel #1
0
 /// <summary>
 /// Internal method for parsing charge, to allow concatenated signs (--, ++)
 /// the method recursively invokes increment or decrementing an accumulator.
 /// </summary>
 /// <param name="acc">   accumulator</param>
 /// <param name="buffer">a character buffer</param>
 /// <returns>the charge value</returns>
 private static int ReadCharge(int acc, CharBuffer buffer)
 {
     if (buffer.GetIf('+'))
     {
         return(buffer.NextIsDigit() ? acc + buffer.GetNumber()
                                     : ReadCharge(acc + 1, buffer));
     }
     if (buffer.GetIf('-'))
     {
         return(buffer.NextIsDigit() ? acc - buffer.GetNumber()
                                     : ReadCharge(acc - 1, buffer));
     }
     return(acc);
 }
Beispiel #2
0
 /// <summary>
 /// Read the hydrogen count and progress the provided buffer. The hydrogen
 /// count is specified by a 'H' an 0 or more digits. A 'H' without digits is
 /// intercepted as 'H1'. When there is no 'H' or 'H0' is specified then the
 /// the hydrogen count is 0.
 /// </summary>
 /// <param name="buffer">a character buffer</param>
 /// <returns>the hydrogen count, 0 if none</returns>
 public static int ReadHydrogens(CharBuffer buffer)
 {
     if (buffer.GetIf('H'))
     {
         // when no number is specified 'H' then there is 1 hydrogen
         int count = buffer.GetNumber();
         return(count < 0 ? 1 : count);
     }
     return(0);
 }
Beispiel #3
0
 /// <summary>
 /// Read the atom class of a bracket atom and progress the buffer (if read).
 /// The atom class is the last attribute of the bracket atom and is
 /// identified by a ':' followed by one or more digits. The atom class may be
 /// padded such that ':005' and ':5' are equivalent.
 /// </summary>
 /// <seealso href="http://www.opensmiles.org/opensmiles.html#atomclass">Atom Class - OpenSMILES Specification</seealso >
 /// <param name="buffer">a character buffer</param>
 /// <returns>the atom class, or 0</returns>
 public static int ReadClass(CharBuffer buffer)
 {
     if (buffer.GetIf(':'))
     {
         if (buffer.NextIsDigit())
         {
             return(buffer.GetNumber());
         }
         throw new InvalidSmilesException("invalid atom class, <digit>+ must follow ':'", buffer);
     }
     return(0);
 }
Beispiel #4
0
        /// <summary>
        /// Read a bracket atom from the buffer. A bracket atom optionally defines
        /// isotope, chirality, hydrogen count, formal charge and the atom class.
        /// <para>
        /// bracket_atom ::= '[' isotope? symbol chiral? hcount? charge? class? ']'
        /// </para>
        /// </summary>
        /// <param name="buffer">a character buffer</param>
        /// <returns>a bracket atom</returns>
        /// <exception cref="InvalidSmilesException">if the bracket atom did not match the grammar, invalid symbol, missing closing bracket or invalid chiral specification.</exception>
        public IAtom ReadBracketAtom(CharBuffer buffer)
        {
            int start = buffer.Position;

            bool arbitraryLabel = false;

            if (!buffer.HasRemaining())
            {
                throw new InvalidSmilesException("Unclosed bracket atom, SMILES may be truncated", buffer);
            }

            var isotope  = buffer.GetNumber();
            var aromatic = buffer.NextChar >= 'a' && buffer.NextChar <= 'z';
            var element  = Element.Read(buffer);

            if (element == Element.Unknown)
            {
                hasAstrix = true;
            }

            if (strict && element == null)
            {
                throw new InvalidSmilesException("unrecognised element symbol, SMILES may be truncated: ", buffer);
            }

            if (element != null && aromatic)
            {
                g.AddFlags(Graph.HAS_AROM);
            }

            // element isn't aromatic as per the OpenSMILES specification
            if (strict && aromatic && !element.IsAromatic(Element.AromaticSpecification.OpenSmiles))
            {
                throw new InvalidSmilesException("abnormal aromatic element", buffer);
            }

            if (element == null)
            {
                arbitraryLabel = true;
            }

            configuration = Configuration.Read(buffer);

            var hCount    = ReadHydrogens(buffer);
            var charge    = ReadCharge(buffer);
            var atomClass = ReadClass(buffer);

            if (!arbitraryLabel && !buffer.GetIf(']'))
            {
                if (strict)
                {
                    throw InvalidSmilesException.InvalidBracketAtom(buffer);
                }
                else
                {
                    arbitraryLabel = true;
                }
            }

            if (arbitraryLabel)
            {
                var end   = buffer.Position;
                int depth = 1;
                while (buffer.HasRemaining())
                {
                    char c = buffer.Get();
                    if (c == '[')
                    {
                        depth++;
                    }
                    else if (c == ']')
                    {
                        depth--;
                        if (depth == 0)
                        {
                            break;
                        }
                    }
                    end++;
                }
                if (depth != 0)
                {
                    throw new InvalidSmilesException("unparsable label in bracket atom",
                                                     buffer,
                                                     buffer.Position - 1);
                }
                var label = buffer.Substr(start, end);
                hasAstrix = true;
                return(new AtomImpl.BracketAtom(label));
            }

            return(new AtomImpl.BracketAtom(isotope,
                                            element,
                                            hCount,
                                            charge,
                                            atomClass,
                                            aromatic));
        }
Beispiel #5
0
        /// <summary>
        /// Read a molecule from the character buffer.
        /// </summary>
        /// <param name="buffer">a character buffer</param>
        /// <exception cref="InvalidSmilesException">invalid grammar</exception>
        private void ReadSmiles(CharBuffer buffer)
        {
            // primary dispatch
            while (buffer.HasRemaining())
            {
                char c = buffer.Get();
                switch (c)
                {
                // aliphatic subset
                case '*':
                    hasAstrix = true;
                    AddAtom(AtomImpl.AliphaticSubset.Any, buffer);
                    break;

                case 'B':
                    if (buffer.GetIf('r'))
                    {
                        AddAtom(AtomImpl.AliphaticSubset.Bromine, buffer);
                    }
                    else
                    {
                        AddAtom(AtomImpl.AliphaticSubset.Boron, buffer);
                    }
                    break;

                case 'C':
                    if (buffer.GetIf('l'))
                    {
                        AddAtom(AtomImpl.AliphaticSubset.Chlorine, buffer);
                    }
                    else
                    {
                        AddAtom(AtomImpl.AliphaticSubset.Carbon, buffer);
                    }
                    break;

                case 'N':
                    AddAtom(AtomImpl.AliphaticSubset.Nitrogen, buffer);
                    break;

                case 'O':
                    AddAtom(AtomImpl.AliphaticSubset.Oxygen, buffer);
                    break;

                case 'P':
                    AddAtom(AtomImpl.AliphaticSubset.Phosphorus, buffer);
                    break;

                case 'S':
                    AddAtom(AtomImpl.AliphaticSubset.Sulfur, buffer);
                    break;

                case 'F':
                    AddAtom(AtomImpl.AliphaticSubset.Fluorine, buffer);
                    break;

                case 'I':
                    AddAtom(AtomImpl.AliphaticSubset.Iodine, buffer);
                    break;

                // aromatic subset
                case 'b':
                    AddAtom(AtomImpl.AromaticSubset.Boron, buffer);
                    g.AddFlags(Graph.HAS_AROM);
                    break;

                case 'c':
                    AddAtom(AtomImpl.AromaticSubset.Carbon, buffer);
                    g.AddFlags(Graph.HAS_AROM);
                    break;

                case 'n':
                    AddAtom(AtomImpl.AromaticSubset.Nitrogen, buffer);
                    g.AddFlags(Graph.HAS_AROM);
                    break;

                case 'o':
                    AddAtom(AtomImpl.AromaticSubset.Oxygen, buffer);
                    g.AddFlags(Graph.HAS_AROM);
                    break;

                case 'p':
                    AddAtom(AtomImpl.AromaticSubset.Phosphorus, buffer);
                    g.AddFlags(Graph.HAS_AROM);
                    break;

                case 's':
                    AddAtom(AtomImpl.AromaticSubset.Sulfur, buffer);
                    g.AddFlags(Graph.HAS_AROM);
                    break;

                // D/T for hydrogen isotopes - non-standard but OpenSMILES spec
                // says it's possible. The D and T here are automatic converted
                // to [2H] and [3H].
                case 'H':
                    if (strict)
                    {
                        throw new InvalidSmilesException("hydrogens should be specified in square brackets - '[H]'",
                                                         buffer);
                    }
                    AddAtom(AtomImpl.EXPLICIT_HYDROGEN, buffer);
                    break;

                case 'D':
                    if (strict)
                    {
                        throw new InvalidSmilesException("deuterium should be specified as a hydrogen isotope - '[2H]'",
                                                         buffer);
                    }
                    AddAtom(AtomImpl.DEUTERIUM, buffer);
                    break;

                case 'T':
                    if (strict)
                    {
                        throw new InvalidSmilesException("tritium should be specified as a hydrogen isotope - '[3H]'",
                                                         buffer);
                    }
                    AddAtom(AtomImpl.TRITIUM, buffer);
                    break;

                // bracket atom
                case '[':
                    AddAtom(ReadBracketAtom(buffer), buffer);
                    break;

                // ring bonds
                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                    Ring(c - '0', buffer);
                    break;

                case '%':
                    int num = buffer.GetNumber(2);
                    if (num < 0)
                    {
                        throw new InvalidSmilesException("a number (<digit>+) must follow '%':", buffer);
                    }
                    if (strict && num < 10)
                    {
                        throw new InvalidSmilesException("two digits must follow '%'", buffer);
                    }
                    Ring(num, buffer);
                    lastBondPos = buffer.Position;
                    break;

                // bond/dot
                case '-':
                    if (bond != Bond.Implicit)
                    {
                        throw new InvalidSmilesException("Multiple bonds specified:", buffer);
                    }
                    bond        = Bond.Single;
                    lastBondPos = buffer.Position;
                    break;

                case '=':
                    if (bond != Bond.Implicit)
                    {
                        throw new InvalidSmilesException("Multiple bonds specified:", buffer);
                    }
                    bond        = Bond.Double;
                    lastBondPos = buffer.Position;
                    break;

                case '#':
                    if (bond != Bond.Implicit)
                    {
                        throw new InvalidSmilesException("Multiple bonds specified:", buffer);
                    }
                    bond        = Bond.Triple;
                    lastBondPos = buffer.Position;
                    break;

                case '$':
                    if (bond != Bond.Implicit)
                    {
                        throw new InvalidSmilesException("Multiple bonds specified:", buffer);
                    }
                    bond        = Bond.Quadruple;
                    lastBondPos = buffer.Position;
                    break;

                case ':':
                    if (bond != Bond.Implicit)
                    {
                        throw new InvalidSmilesException("Multiple bonds specified:", buffer);
                    }
                    g.AddFlags(Graph.HAS_AROM);
                    bond        = Bond.Aromatic;
                    lastBondPos = buffer.Position;
                    break;

                case '/':
                    if (bond != Bond.Implicit)
                    {
                        throw new InvalidSmilesException("Multiple bonds specified:", buffer);
                    }
                    bond        = Bond.Up;
                    lastBondPos = buffer.Position;
                    g.AddFlags(Graph.HAS_BND_STRO);
                    break;

                case '\\':
                    // we allow C\\C=C/C since it could be an escaping error
                    if (bond != Bond.Implicit && bond != Bond.Down)
                    {
                        throw new InvalidSmilesException("Multiple bonds specified:", buffer);
                    }
                    bond        = Bond.Down;
                    lastBondPos = buffer.Position;
                    g.AddFlags(Graph.HAS_BND_STRO);
                    break;

                case '.':
                    if (bond != Bond.Implicit)
                    {
                        throw new InvalidSmilesException("Bond specified before disconnection:", buffer);
                    }
                    bond = Bond.Dot;
                    break;

                // branching
                case '(':
                    if (stack.IsEmpty)
                    {
                        throw new InvalidSmilesException("Cannot open branch at this position, SMILES may be truncated:", buffer);
                    }
                    stack.Push(stack.Peek());
                    break;

                case ')':
                    if (stack.Count < 2)
                    {
                        throw new InvalidSmilesException("Closing of an unopened branch, SMILES may be truncated:", buffer);
                    }
                    stack.Pop();
                    break;

                // termination
                case '\t':
                case ' ':
                    // String suffix is title
                    var sb = new StringBuilder();
                    while (buffer.HasRemaining())
                    {
                        c = buffer.Get();
                        if (c == '\n' || c == '\r')
                        {
                            break;
                        }
                        sb.Append(c);
                    }
                    g.Title = sb.ToString();
                    return;

                case '\n':
                case '\r':
                    return;

                default:
                    throw new InvalidSmilesException("unexpected character:", buffer);
                }
            }
        }