/// <summary> /// Converts the specified string representation of a tree element to its <see cref="AdTreeElement"/> /// equivalent and returns a value that indicates whether the conversion succeeded. /// </summary> /// <param name="element"> /// When this method returns, contains the <see cref="AdTreeElement"/> value equivalent to the element /// contained in <paramref name="line"/>, if the conversion succeeded, or <c>null</c> if the conversion /// failed. The conversion fails if the <paramref name="line"/> parameter is null, is an empty string (""), /// or does not contain a valid string representation of a AdElement. This parameter is passed /// uninitialized. /// </param> /// <param name="line">The string representation of the element.</param> /// <param name="safeParse">if set to <c>true</c> the invalid sentences will be ignored.</param> /// <param name="monitor">The evaluation monitor.</param> /// <returns><c>true</c> if the s parameter was converted successfully; otherwise, <c>false</c>.</returns> private static bool TryParseElement(out AdTreeElement element, string line, bool safeParse, Monitor monitor) { var m = nodePattern.Match(line); if (m.Success) { element = new AdNode { Level = m.Groups[1].Length + 1, SyntacticTag = m.Groups[2].Value }; return(true); } m = leafPattern.Match(line); if (m.Success) { element = new AdLeaf { Level = m.Groups[1].Length + 1, SyntacticTag = m.Groups[2].Value, FunctionalTag = m.Groups[3].Value, Lemma = m.Groups[4].Value, SecondaryTag = m.Groups[5].Value, MorphologicalTag = m.Groups[6].Value, Lexeme = m.Groups[7].Value }; return(true); } m = punctuationPattern.Match(line); if (m.Success) { element = new AdLeaf { Level = m.Groups[1].Length + 1, Lexeme = m.Groups[2].Value }; return(true); } if (safeParse) { element = null; return(false); } // Knuppe: The most bizarre cases I found, were invalid data (like HTML, inside the sentences) // so I decided to implement the safeParse attribute, to ignore this junk... // // I think any program should adapt to an error in a file. otherwise the files will never // be fixed... // process the bizarre cases. if (line.Equals("_") || line.StartsWith("<lixo") || line.StartsWith("pause")) { element = null; return(false); } if (line.StartsWith("=")) { m = bizarreLeafPattern.Match(line); if (m.Success) { var leaf = new AdLeaf { Level = m.Groups[1].Length + 1, SyntacticTag = m.Groups[2].Value, Lemma = m.Groups[3].Value, MorphologicalTag = m.Groups[4].Value, Lexeme = m.Groups[5].Value }; if (!string.IsNullOrEmpty(leaf.Lemma) && leaf.Lemma.Length > 2) { leaf.Lemma = leaf.Lemma.Substring(1); } element = leaf; return(true); } var level = line.LastIndexOf("=", StringComparison.InvariantCulture) + 1; if (level > 0 && level < line.Length - 2 && Regex.IsMatch(line.Substring(level + 1), "\\w.*?[\\.<>].*")) { element = new AdLeaf { Level = level + 1, Lexeme = line.Substring(level + 1) }; return(true); } } if (monitor != null) { monitor.OnWarning("Couldn't parse leaf: " + line); } element = null; return(false); }
/// <summary> /// Adds an element. /// </summary> /// <param name="element">The element.</param> public void AddElement(AdTreeElement element) { Elements.Add(element); }
/// <summary> /// Converts the specified string representation of a tree element to its <see cref="AdTreeElement"/> /// equivalent and returns a value that indicates whether the conversion succeeded. /// </summary> /// <param name="element"> /// When this method returns, contains the <see cref="AdTreeElement"/> value equivalent to the element /// contained in <paramref name="line"/>, if the conversion succeeded, or <c>null</c> if the conversion /// failed. The conversion fails if the <paramref name="line"/> parameter is null, is an empty string (""), /// or does not contain a valid string representation of a AdElement. This parameter is passed /// uninitialized. /// </param> /// <param name="line">The string representation of the element.</param> /// <param name="safeParse">if set to <c>true</c> the invalid sentences will be ignored.</param> /// <param name="monitor">The evaluation monitor.</param> /// <returns><c>true</c> if the s parameter was converted successfully; otherwise, <c>false</c>.</returns> private static bool TryParseElement(out AdTreeElement element, string line, bool safeParse, Monitor monitor) { var m = nodePattern.Match(line); if (m.Success) { element = new AdNode { Level = m.Groups[1].Length + 1, SyntacticTag = m.Groups[2].Value }; return true; } m = leafPattern.Match(line); if (m.Success) { element = new AdLeaf { Level = m.Groups[1].Length + 1, SyntacticTag = m.Groups[2].Value, FunctionalTag = m.Groups[3].Value, Lemma = m.Groups[4].Value, SecondaryTag = m.Groups[5].Value, MorphologicalTag = m.Groups[6].Value, Lexeme = m.Groups[7].Value }; return true; } m = punctuationPattern.Match(line); if (m.Success) { element = new AdLeaf { Level = m.Groups[1].Length + 1, Lexeme = m.Groups[2].Value }; return true; } if (safeParse) { element = null; return false; } // Knuppe: The most bizarre cases I found, were invalid data (like HTML, inside the sentences) // so I decided to implement the safeParse attribute, to ignore this junk... // // I think any program should adapt to an error in a file. otherwise the files will never // be fixed... // process the bizarre cases. if (line.Equals("_") || line.StartsWith("<lixo") || line.StartsWith("pause")) { element = null; return false; } if (line.StartsWith("=")) { m = bizarreLeafPattern.Match(line); if (m.Success) { var leaf = new AdLeaf { Level = m.Groups[1].Length + 1, SyntacticTag = m.Groups[2].Value, Lemma = m.Groups[3].Value, MorphologicalTag = m.Groups[4].Value, Lexeme = m.Groups[5].Value }; if (!string.IsNullOrEmpty(leaf.Lemma) && leaf.Lemma.Length > 2) { leaf.Lemma = leaf.Lemma.Substring(1); } element = leaf; return true; } var level = line.LastIndexOf("=", StringComparison.InvariantCulture) + 1; if (level > 0 && level < line.Length - 2 && Regex.IsMatch(line.Substring(level + 1), "\\w.*?[\\.<>].*")) { element = new AdLeaf { Level = level + 1, Lexeme = line.Substring(level + 1) }; return true; } } if (monitor != null) { monitor.OnWarning("Couldn't parse leaf: " + line); } element = null; return false; }