public static StringsTree ParseTreeStructure(string startTag, string endTag, string data) { if (startTag == null) { throw new ArgumentNullException("startTag"); } if (endTag == null) { throw new ArgumentNullException("endTag"); } if (data == null) { throw new ArgumentNullException("data"); } int startTagLength = startTag.Length; int endTagLength = endTag.Length; List <TreeParseData> parseList = new List <TreeParseData>(); int idx = 0; int ident = 0; while (idx < data.Length) { int resultIdxStart = data.IndexOf(startTag, idx); if (resultIdxStart < 0) { resultIdxStart = int.MaxValue; } int resultIdxEnd = data.IndexOf(endTag, idx); if (resultIdxEnd < 0) { resultIdxEnd = int.MaxValue; } if (resultIdxStart < resultIdxEnd) { ident++; parseList.Add(new TreeParseData(true, resultIdxStart)); idx = resultIdxStart + startTagLength; } else if (resultIdxStart > resultIdxEnd) { ident--; if (ident < 0) { throw new ArgumentNullException("Wrong data structure", "data"); } parseList.Add(new TreeParseData(false, resultIdxEnd)); idx = resultIdxEnd + endTagLength; } else { //equal, end found break; } } StringsTree current = new StringsTree(null, 0); for (idx = 0; idx < parseList.Count; idx++) { if (parseList[idx].IsStartTag) { StringsTree child = new StringsTree(current, idx); current.Childs.Add(child); current = child; } else { //this is child int childStart = parseList[current.Offset].Position; int childEnd = parseList[idx].Position; if (parseList[current.Offset + 1].IsStartTag) { childEnd = parseList[current.Offset + 1].Position; } current.Data = data.Substring(childStart + startTagLength, childEnd - childStart - startTagLength); current = current.Parent; } } return(current); }
public StringsTree(StringsTree parent, int offset) { this.parent = parent; this.offset = offset; }
protected override void DoTranslate(string phrase, LanguagePair languagesPair, string subject, Result result, NetworkSetting networkSetting) { string query = "http://www.slovnenya.com/dictionary/{0}"; query = string.Format(query, HttpUtility.UrlEncode(phrase).Replace("+", "%20")); WebRequestHelper helper = new WebRequestHelper(result, new Uri(query), networkSetting, WebRequestContentType.UrlEncodedGet); helper.AcceptLanguage = "en-us,en"; string responseFromServer = helper.GetResponse(); if (responseFromServer.Contains("did not return any results</div>") || responseFromServer.Contains("</span>` did not return any results")) { result.ResultNotFound = true; throw new TranslationException("Nothing found"); } else if (responseFromServer.IndexOf("Query contains extraneous symbol(s)<") >= 0) { throw new TranslationException("Query contains extraneous symbols"); } else { result.ArticleUrl = query; result.ArticleUrlCaption = phrase; string translation = StringParser.Parse("<hr style=\"border:0;background-color:grey;height:1px;width:92%;text-align:center\" />", "<hr style=\"border:0;background-color:grey;height:1px;width:92%;text-align:center\" />", responseFromServer); StringsTree tree = StringParser.ParseTreeStructure("<table", "</table>", translation); if (tree.Childs.Count != 1) { throw new TranslationException("Wrong data structure"); } tree = tree.Childs[0]; if (tree.Childs.Count != 1) { throw new TranslationException("Wrong data structure"); } tree = tree.Childs[0]; Result wordres = result; if (tree.Childs.Count == 0) { throw new TranslationException("Wrong data structure"); } //get word if (tree.Childs[0].Childs.Count != 1) { throw new TranslationException("Wrong data structure"); } string word = StringParser.Parse("font-size:14pt\">", "<", tree.Childs[0].Childs[0].Data); for (int i = 1; i < tree.Childs.Count; i++) { StringsTree abbr_tree = tree.Childs[i]; Result abbrres = null; string abbr = StringParser.Parse("font-size:12pt\">", "<", abbr_tree.Data); Result tmpRes = CreateNewResult(abbr, languagesPair, subject); wordres.Childs.Add(tmpRes); abbrres = tmpRes; StringParser parser = new StringParser(abbr_tree.Childs[0].Data); string[] translations = parser.ReadItemsList("font-size:12pt\">", "<"); foreach (string trans in translations) { abbrres.Translations.Add(trans); } } } }