public List<Tuple<string, double>> ParseNGrams(string xmlData, bool sortByFrequency) { HtmlAgilityPack.XmlDocument xmlDoc = new HtmlAgilityPack.XmlDocument(); xmlDoc.LoadXml(xmlData); List<Tuple<string, double>> ngrams = new List<Tuple<string, double>>(); HtmlAgilityPack.HtmlNodeCollection ngramNodes = xmlDoc.DocumentNode.SelectNodes("/results/ngrams//ngram"); if (ngramNodes == null) return ngrams; foreach (var ngramNode in ngramNodes) { string text = GenLib.Text.Text.DecodeXMLString(ngramNode.GetAttributeValue("text", "").ToLower()); if (string.IsNullOrEmpty(text)) continue; string fqStr = ngramNode.GetAttributeValue("fq", "0"); int fq = int.Parse(fqStr); ngrams.Add(new Tuple<string, double>(text, fq)); } if (sortByFrequency) ngrams.Sort((x, y) => { if (x.Item2 > y.Item2) return -1; if (x.Item2 < y.Item2) return 1; return 0; }); return ngrams; }
void KEUIResponse_OnMessageReceived(string message) { try { HtmlAgilityPack.XmlDocument eventDoc = new HtmlAgilityPack.XmlDocument(); eventDoc.LoadXml(message); var sequenceIdNode = eventDoc.DocumentNode.SelectSingleNode("//ns1:head/ns1:sequencenumber"); string sequenceId = sequenceIdNode.InnerText; Trace.WriteLine("Recieved response to " + sequenceId); var responseNode = eventDoc.DocumentNode.SelectSingleNode("//ns1:eventData/s1:keuiResponse"); string responseData = responseNode.InnerHtml; var requestNode = eventDoc.DocumentNode.SelectSingleNode("//ns1:eventData/s1:keuiRequest"); var requestTypeNode = eventDoc.DocumentNode.SelectSingleNode("//ns1:eventData/s1:keuiRequest/s1:requestType"); var requestDataNode = eventDoc.DocumentNode.SelectSingleNode("//ns1:eventData/s1:keuiRequest/s1:requestData"); string requestType = requestTypeNode.InnerText; if (requestType == "GetTagInfo") { var paramNode = requestDataNode.SelectSingleNode("./params"); int startIndex = paramNode.GetAttributeValue("startIndex", 0); int count = paramNode.GetAttributeValue("count", 1000); HtmlAgilityPack.HtmlNodeCollection tags = responseNode.SelectNodes("./TagInfoBase") ?? new HtmlAgilityPack.HtmlNodeCollection(null); foreach (var tag in tags) { string tagData = tag.OuterHtml; TagInfoBase newTag = TagInfoBase.FromXML(tagData); _tagIdToTagInfo[newTag.TagId] = newTag; if (!string.IsNullOrEmpty(newTag.TagIdStr)) _tagIdStrToTagInfo[newTag.TagIdStr] = newTag; if (!_tagIdToChildrenInfo.ContainsKey(newTag.ParentTagId)) _tagIdToChildrenInfo[newTag.ParentTagId] = new List<TagInfoBase>(); _tagIdToChildrenInfo[newTag.ParentTagId].Add(newTag); } if (tags.Count() == count) { string newSequenceId; string request = Defaults.BuildRequest(PublisherName, "GetTagInfo", String.Format("<params startIndex=\"{0}\" count=\"{1}\" />", startIndex + count, count), out newSequenceId); //this.BeginInvoke((Action)(() => { SendRequest(request, newSequenceId, "GetTagInfo"); })); SendRequest(request, newSequenceId, "GetTagInfo"); } Invoke((Action)(() => { if (tags.Count == count) StatusText.Text = String.Format("Loading tags... ({0} tags loaded)", _tagIdToTagInfo.Count); else StatusText.Text = "Tags successfully loaded."; })); } else if (requestType == "GetAnnotationOntologyRDF") { LoadOntology(responseData); } else if (requestType == "GetSimilarConcepts") { foreach (HtmlAgilityPack.HtmlNode conceptNode in responseNode.SelectNodes(".//item") ?? new HtmlAgilityPack.HtmlNodeCollection(null)) { string uri = conceptNode.GetAttributeValue("uri", ""); string label = conceptNode.GetAttributeValue("label", ""); AddRelatedConcept(label, uri, _conceptRelations[0]); } } else if (requestType == "ExecuteCommand") Response_ExecuteCommand(responseData, sequenceId, message); else if (requestType == "SetData") Response_SetData(responseData, sequenceId, message); else if (requestType == "Query") Response_Query(responseData, sequenceId, message); else MessageBox.Show("Don't know how to process request type " + requestType + "."); // remove the id of the processed event if (_sequenceIdToType.ContainsKey(sequenceId)) _sequenceIdToType.Remove(sequenceId); } catch (Exception ex) { Trace.WriteLine("Exception while processing response: " + ex.Message); } }
public List<Tuple<string, double>> ParseKeywords(string xmlData) { HtmlAgilityPack.XmlDocument xmlDoc = new HtmlAgilityPack.XmlDocument(); xmlDoc.LoadXml(xmlData); List<Tuple<string, double>> keywords = new List<Tuple<string, double>>(); HtmlAgilityPack.HtmlNodeCollection kwNodes = xmlDoc.DocumentNode.SelectNodes("/results/keywords//kw"); if (kwNodes == null) return keywords; foreach (var kwNode in kwNodes) { string text = GenLib.Text.Text.DecodeXMLString(kwNode.GetAttributeValue("str", "").ToLower()); text = text.Replace("-", "").Replace("_", ""); if (string.IsNullOrEmpty(text)) continue; if (text.Contains(':')) text = text.Substring(text.IndexOf(':') + 1); string wgtStr = kwNode.GetAttributeValue("wgt", "0.0"); double wgt = double.Parse(wgtStr, System.Globalization.NumberFormatInfo.InvariantInfo); bool existed = false; for (int i = 0; i < keywords.Count; i++) { var kw = keywords[i]; if (kw.Item1 == text) { if (kw.Item2 < wgt) keywords[i] = new Tuple<string, double>(text, wgt); existed = true; } } if (!existed) keywords.Add(new Tuple<string, double>(text, wgt)); } return keywords; }