Пример #1
0
        /// <summary>
        /// Converts the specified string representation of a tree element to its <see cref="AdTreeElement"/>
        /// equivalent and returns a value that indicates whether the conversion succeeded.
        /// </summary>
        /// <param name="element">
        /// When this method returns, contains the <see cref="AdTreeElement"/> value equivalent to the element
        /// contained in <paramref name="line"/>, if the conversion succeeded, or <c>null</c> if the conversion
        /// failed. The conversion fails if the <paramref name="line"/> parameter is null, is an empty string (""),
        /// or does not contain a valid string representation of a AdElement. This parameter is passed
        /// uninitialized.
        /// </param>
        /// <param name="line">The string representation of the element.</param>
        /// <param name="safeParse">if set to <c>true</c> the invalid sentences will be ignored.</param>
        /// <param name="monitor">The evaluation monitor.</param>
        /// <returns><c>true</c> if the s parameter was converted successfully; otherwise, <c>false</c>.</returns>
        private static bool TryParseElement(out AdTreeElement element, string line, bool safeParse, Monitor monitor)
        {
            var m = nodePattern.Match(line);

            if (m.Success)
            {
                element = new AdNode {
                    Level        = m.Groups[1].Length + 1,
                    SyntacticTag = m.Groups[2].Value
                };
                return(true);
            }

            m = leafPattern.Match(line);
            if (m.Success)
            {
                element = new AdLeaf {
                    Level            = m.Groups[1].Length + 1,
                    SyntacticTag     = m.Groups[2].Value,
                    FunctionalTag    = m.Groups[3].Value,
                    Lemma            = m.Groups[4].Value,
                    SecondaryTag     = m.Groups[5].Value,
                    MorphologicalTag = m.Groups[6].Value,
                    Lexeme           = m.Groups[7].Value
                };
                return(true);
            }

            m = punctuationPattern.Match(line);
            if (m.Success)
            {
                element = new AdLeaf {
                    Level  = m.Groups[1].Length + 1,
                    Lexeme = m.Groups[2].Value
                };
                return(true);
            }

            if (safeParse)
            {
                element = null;
                return(false);
            }

            // Knuppe: The most bizarre cases I found, were invalid data (like HTML, inside the sentences)
            //         so I decided to implement the safeParse attribute, to ignore this junk...
            //
            //         I think any program should adapt to an error in a file. otherwise the files will never
            //         be fixed...

            // process the bizarre cases.
            if (line.Equals("_") || line.StartsWith("<lixo") || line.StartsWith("pause"))
            {
                element = null;
                return(false);
            }

            if (line.StartsWith("="))
            {
                m = bizarreLeafPattern.Match(line);
                if (m.Success)
                {
                    var leaf = new AdLeaf {
                        Level            = m.Groups[1].Length + 1,
                        SyntacticTag     = m.Groups[2].Value,
                        Lemma            = m.Groups[3].Value,
                        MorphologicalTag = m.Groups[4].Value,
                        Lexeme           = m.Groups[5].Value
                    };

                    if (!string.IsNullOrEmpty(leaf.Lemma) && leaf.Lemma.Length > 2)
                    {
                        leaf.Lemma = leaf.Lemma.Substring(1);
                    }
                    element = leaf;
                    return(true);
                }

                var level = line.LastIndexOf("=", StringComparison.InvariantCulture) + 1;
                if (level > 0 && level < line.Length - 2 && Regex.IsMatch(line.Substring(level + 1), "\\w.*?[\\.<>].*"))
                {
                    element = new AdLeaf {
                        Level  = level + 1,
                        Lexeme = line.Substring(level + 1)
                    };
                    return(true);
                }
            }

            if (monitor != null)
            {
                monitor.OnWarning("Couldn't parse leaf: " + line);
            }

            element = null;
            return(false);
        }
Пример #2
0
 /// <summary>
 /// Adds an element.
 /// </summary>
 /// <param name="element">The element.</param>
 public void AddElement(AdTreeElement element) {
     Elements.Add(element);
 }
Пример #3
0
        /// <summary>
        /// Converts the specified string representation of a tree element to its <see cref="AdTreeElement"/> 
        /// equivalent and returns a value that indicates whether the conversion succeeded.
        /// </summary>
        /// <param name="element">
        /// When this method returns, contains the <see cref="AdTreeElement"/> value equivalent to the element 
        /// contained in <paramref name="line"/>, if the conversion succeeded, or <c>null</c> if the conversion 
        /// failed. The conversion fails if the <paramref name="line"/> parameter is null, is an empty string (""),
        /// or does not contain a valid string representation of a AdElement. This parameter is passed 
        /// uninitialized.
        /// </param>
        /// <param name="line">The string representation of the element.</param>
        /// <param name="safeParse">if set to <c>true</c> the invalid sentences will be ignored.</param>
        /// <param name="monitor">The evaluation monitor.</param>
        /// <returns><c>true</c> if the s parameter was converted successfully; otherwise, <c>false</c>.</returns>
        private static bool TryParseElement(out AdTreeElement element, string line, bool safeParse, Monitor monitor) {
            var m = nodePattern.Match(line);
            if (m.Success) {
                element = new AdNode {
                    Level = m.Groups[1].Length + 1,
                    SyntacticTag = m.Groups[2].Value
                };
                return true;
            }

            m = leafPattern.Match(line);
            if (m.Success) {
                element = new AdLeaf {
                    Level = m.Groups[1].Length + 1,
                    SyntacticTag = m.Groups[2].Value,
                    FunctionalTag = m.Groups[3].Value,
                    Lemma = m.Groups[4].Value,
                    SecondaryTag = m.Groups[5].Value,
                    MorphologicalTag = m.Groups[6].Value,
                    Lexeme = m.Groups[7].Value
                };
                return true;
            }

            m = punctuationPattern.Match(line);
            if (m.Success) {
                element = new AdLeaf {
                    Level = m.Groups[1].Length + 1,
                    Lexeme = m.Groups[2].Value
                };
                return true;
            }

            if (safeParse) {
                element = null;
                return false;
            }

            // Knuppe: The most bizarre cases I found, were invalid data (like HTML, inside the sentences)
            //         so I decided to implement the safeParse attribute, to ignore this junk...
            //
            //         I think any program should adapt to an error in a file. otherwise the files will never
            //         be fixed...                      

            // process the bizarre cases.
            if (line.Equals("_") || line.StartsWith("<lixo") || line.StartsWith("pause")) {
                element = null;
                return false;
            }

            if (line.StartsWith("=")) {
                m = bizarreLeafPattern.Match(line);
                if (m.Success) {
                    var leaf = new AdLeaf {
                        Level = m.Groups[1].Length + 1,
                        SyntacticTag = m.Groups[2].Value,
                        Lemma = m.Groups[3].Value,
                        MorphologicalTag = m.Groups[4].Value,
                        Lexeme = m.Groups[5].Value
                    };

                    if (!string.IsNullOrEmpty(leaf.Lemma) && leaf.Lemma.Length > 2) {
                        leaf.Lemma = leaf.Lemma.Substring(1);
                    }
                    element = leaf;
                    return true;
                }

                var level = line.LastIndexOf("=", StringComparison.InvariantCulture) + 1;
                if (level > 0 && level < line.Length - 2 && Regex.IsMatch(line.Substring(level + 1), "\\w.*?[\\.<>].*")) {
                    element = new AdLeaf {
                        Level = level + 1,
                        Lexeme = line.Substring(level + 1)
                    };
                    return true;
                }
            }

            if (monitor != null) {
                monitor.OnWarning("Couldn't parse leaf: " + line);
            }

            element = null;
            return false;
        }
Пример #4
0
 /// <summary>
 /// Adds an element.
 /// </summary>
 /// <param name="element">The element.</param>
 public void AddElement(AdTreeElement element)
 {
     Elements.Add(element);
 }