public void AddAnchors(List <string> Anchors, Dictionary <string, DetectedHeaders> DetectAnchorText) { foreach (string anch in Anchors) { DetectedHeaders det = new DetectedHeaders(); string source = Text + "#" + anch; DetectAnchorText.TryGetValue(source, out det); List <string> text = det != null ? det.Result : null; MyNode n = new MyNode(anch, text); n.Tag = new NavDetails(Guid.NewGuid().ToString(), source, n.DetectedCombo); n.OriginalCount = det != null ? det.OriginalCount : 0; Nodes.Add(n); } }
/// <summary> /// Returns The First Line of Text(Value) for each File(Key) /// </summary> /// <param name="filenames">List of Filenames to Check</param> /// <returns>Returns The First Line of Text(Value) for each File(Key)</returns> public Dictionary <string, DetectedHeaders> FindHeaderTextInFile(List <string> filenames) { Dictionary <string, DetectedHeaders> Result = new Dictionary <string, DetectedHeaders>(); foreach (string path in filenames) { DetectedHeaders header = new DetectedHeaders(); if (Variables.HeaderTextInFile.ContainsKey(path)) { //The file already exists in the cache if (!Result.ContainsKey(path)) { //The cache exists but not into the current memory DetectedHeaders cache = Variables.HeaderTextInFile.Where(x => x.Key == path).Select(x => x.Value).FirstOrDefault(); header.Result = cache.Result == null ? null : (from f in cache.Result select HttpUtility.HtmlDecode(f)).ToList(); header.OriginalCount = header.Result == null ? 0 : cache.OriginalCount; Result.Add(path, header); } } else { HtmlDocument html = GetHtml(path); if (html != null) { header.Result = GetText(html.DocumentNode.SelectSingleNode("//body")); header.OriginalCount = header.Result == null ? 0 : header.Result.Count; Result.Add(path, header); Variables.HeaderTextInFile.Add(path, header); } } } //Variables.HeaderTextInFile = Result; return(Result); }
/// <summary> /// Returns The First Line of Text(Value) after each Anchor(Key) /// </summary> /// <param name="path">Path of The file</param> /// <param name="Anchors">List of Anchors to Check</param> /// <returns>Returns The First Line of Text(Value) after each Anchor(Key)</returns> public Dictionary <string, DetectedHeaders> FindAchorTextInFile(string path, List <string> Anchors) { Dictionary <string, DetectedHeaders> Result = new Dictionary <string, DetectedHeaders>(); HtmlDocument html = null; foreach (string id in Anchors) { DetectedHeaders header = new DetectedHeaders(); string key = path + "#" + id; if (Variables.AnchorTextInFile.ContainsKey(key)) { //The Anchor already exists in the cache if (!Result.ContainsKey(key)) { //The cache exists but not the speficed text, Loding from cache DetectedHeaders cache = Variables.AnchorTextInFile.Where(x => x.Key == key).Select(x => x.Value).FirstOrDefault(); header.Result = (from f in cache.Result select HttpUtility.HtmlDecode(f)).ToList(); header.OriginalCount = cache.OriginalCount; Result.Add(key, header); } } else { if (html == null) { html = GetHtml(path); break; } } } if (html != null) { var body = html.DocumentNode.SelectNodes("//body//*"); foreach (string id in Anchors) { DetectedHeaders header = new DetectedHeaders(); string key = path + "#" + id; if (Variables.AnchorTextInFile.ContainsKey(key)) { if (!Result.ContainsKey(key)) { Variables.AnchorTextInFile.TryGetValue(key, out header); header.OriginalCount = Variables.AnchorTextInFile.Count; Result.Add(key, header); } } else { #region New Method (Slower) //var t = body.SkipWhile(x => x.Id != id) // .Where(x => x.InnerText.Trim() != "" && x.InnerHtml.Trim() == x.InnerText.Trim()) // .Select(x => x.InnerText.Trim()) // .Take(3) // .ToList(); //Result.Add(key, t); //Variables.AnchorTextInFile.Add(key, t); #endregion #region Old Method for (int i = 0; i < body.Count; i++) { HtmlNode element = body[i]; HtmlNode NodeFound = null; string idAtt = element.GetAttributeValue("id", ""); string nameAtt = element.GetAttributeValue("name", ""); string id2 = string.IsNullOrEmpty(idAtt) ? nameAtt : idAtt; if (id2 != "" && id2 == id) { NodeFound = element; } else { continue; } if (NodeFound != null) { List <string> t = GetText(NodeFound); int j = i; int Counter = t.Count; while (Counter < 5) { if (j < body.Count - 1) { j++; NodeFound = body[j]; } else { break; } List <string> getText = GetText(NodeFound); if (getText != null && getText.Count > 0) { t.AddRange(getText); t = t.Distinct().ToList(); Counter = t.Count; } if (t.Count >= 5) { break; } } header.Result = t; header.OriginalCount = t.Count; Result.Add(key, header); Variables.AnchorTextInFile.Add(key, header); break; } } #endregion } } } return(Result); }
private void LoadFiles() { try { using (new HourGlass()) { treeView1.BeginUpdate(); if (Model != null && Model.Nodes.Count > 0) { Model.Nodes.Clear(); } OpfDocument OpfDoc = new OpfDocument(); List <string> htmlFileLIst = OpfDoc.GetFilesList("html"); List <string> t = htmlFileLIst; if (!cbShowAll.Checked) { t = (from i in htmlFileLIst where !PresentFileList.Contains(i) select i).ToList(); } MyHtmlDocument htmlDoc = new MyHtmlDocument(); Dictionary <string, DetectedHeaders> DetectText = htmlDoc.FindHeaderTextInFile(t); foreach (string item in t) { DetectedHeaders det = new DetectedHeaders(); DetectText.TryGetValue(item, out det); List <string> text = det != null ? det.Result : null; MyNode n = new MyNode(item, text); n.OriginalCount = det != null ? det.OriginalCount : 0; Model.Nodes.Add(n); } SortList(); Dictionary <string, string> SrcTag = OpfDoc.GetFilesList(); foreach (MyNode item in Model.Nodes) { item.Tag = new NavDetails(Utils.GetId(item.Text, SrcTag), item.Text, item.DetectedCombo); NavDetails nav = item.Tag as NavDetails; if (AddType == AddWindowType.TOCEdit && cbShowAnchors.Checked) { List <string> Anchors = htmlDoc.FindAnchorsInFile(item.Text); if (!cbShowAll.Checked) { Anchors = (from i in Anchors where !PresentAnchors.ContainsKey(nav.File + "#" + i) select i).ToList(); } Dictionary <string, DetectedHeaders> DetectAnchorText = htmlDoc.FindAchorTextInFile(item.Text, Anchors); item.AddAnchors(Anchors, DetectAnchorText); } } RemoveEmptyNodes(); treeView1.EndUpdate(); //Utils.RemoveNonExistantNode(Model.Nodes); } } catch (Exception) { treeView1.EndUpdate(); } }