/// <summary> /// Create index by content of paragraph in page. /// </summary> /// <param name="pageID"></param> private void IndexByParagraph(string pageID) { string xmlString; oneNote.GetPageContent(pageID, out xmlString); var des = XDocument.Parse(xmlString).Descendants(One + "OE"); currentPageTitle = GetPageTitle(pageID); currentNotebookTitle = GetPageNotebookTitle(pageID); if (isDebug) { Console.WriteLine("\t" + des.Count() + " Paragraphs"); } if (des.Count() > 0) { foreach (var el in des) { var paraID = el.Attribute("objectID").Value; var text = GetTextFromNode(el); if (text == null) { continue; } var paragraphText = RemoveUnwantedTags(text); lucene.AddDocument(new Tuple <string, string, string>(pageID, paraID, paragraphText)); } } }
/// <summary> /// Take a xml string which follows the standard of /// </summary> /// <param name="outputXML"></param> public void BuildIndex(string outputXML) { var info = XDocument.Parse(outputXML); var pageList = info.Descendants(One + "Page"); int totalCount = pageList.Count(); Console.WriteLine("All Count" + totalCount); double count = 0; foreach (var n in pageList) { count += 1; progressRate = count / totalCount; currentPageTitle = n.Attribute("name").Value; Console.Write("Adding " + currentPageTitle); var pageID = n.Attribute("ID").Value; String xmlString; oneNote.GetPageContent(pageID, out xmlString); var doc = XDocument.Parse(xmlString); var des = doc.Descendants(One + "OE"); Console.WriteLine("\t" + des.Count() + " Paragraphs"); var documentList = new List <Tuple <string, string> >(); foreach (var el in des) { var parId = el.Attribute("objectID"); var text = el.Element(One + "T"); if (parId == null || text == null || text.Value.Length <= 2) { continue; } var id = parId.Value + " " + pageID; var par = RemoveUnwantedTags(text.Value); //Console.WriteLine(id + "\t" + par + "\n"); documentList.Add(new Tuple <string, string>(id, par)); } lucene.AddDocument(documentList); } }