public Dictionary <string, List <Tuple <string /* pageId */, string /* pageName */> > > GetDuplicatesGroups() { if (pageInfos == null) { return(null); } else { Dictionary <string, List <Tuple <string, string> > > duplicatesGroups = new Dictionary <string, List <Tuple <string, string> > >(); foreach (KeyValuePair <string, OneNotePageInfo> elem in pageInfos) { string pageId = elem.Key; OneNotePageInfo pageInfo = elem.Value; string hashValueForInnerText = pageInfo.HashValueForInnerText; if (hashValueForInnerText != null) { if (duplicatesGroups.ContainsKey(hashValueForInnerText) == false) { duplicatesGroups.Add(hashValueForInnerText, new List <Tuple <string, string> >()); } duplicatesGroups[hashValueForInnerText].Add(Tuple.Create(pageId, pageInfo.PageTitle)); } } return(duplicatesGroups); } }
private bool TryGetOneNotePageInfos(System.Xml.XmlDocument xmlDocument, out Dictionary <string /*PageId*/, OneNotePageInfo> pageInfos) { pageInfos = new Dictionary <string, OneNotePageInfo>(); if (xmlDocument != null) { System.Xml.XmlNodeList pageNodeList = xmlDocument.GetElementsByTagName("one:Page"); foreach (System.Xml.XmlNode pageNode in pageNodeList) { try { string pageUniqueId = pageNode.Attributes["ID"].Value; string parentNodeName = pageNode.ParentNode.Name; if (parentNodeName == "one:Section") { bool isDeletedPages = CheckIfDeleted(pageNode); // To avoid the situation that it is going to delete the pages that shouldn't be deleted and to keep the pages in the 'trash' folder. if (isDeletedPages == false) { if (pageInfos.ContainsKey(pageUniqueId) == false) { // 'ID', 'path' and 'name' attributes are always existing. string sectionId = pageNode.ParentNode.Attributes["ID"].Value; string sectionPath = pageNode.ParentNode.Attributes["path"].Value; string sectionName = pageNode.ParentNode.Attributes["name"].Value; OneNotePageInfo pageInfo = new OneNotePageInfo(); pageInfo.ParentSectionId = sectionId; pageInfo.ParentSectionFilePath = sectionPath; pageInfo.ParentSectionName = sectionName; pageInfo.PageName = pageNode.Attributes["name"].Value; pageInfos.Add(pageUniqueId, pageInfo); } } } } catch (System.Exception exception) { etc.LoggerHelper.LogWarn("Ignore the exception: {0}", exception.ToString()); } } return(true); } else { etc.LoggerHelper.LogWarn("xmlDocument is null"); return(false); } }
private bool TryGetOneNotePageInfos(System.Xml.XmlDocument xmlDocument, out Dictionary<string /*PageId*/, OneNotePageInfo> pageInfos) { pageInfos = new Dictionary<string, OneNotePageInfo>(); if (xmlDocument != null) { System.Xml.XmlNodeList pageNodeList = xmlDocument.GetElementsByTagName("one:Page"); foreach (System.Xml.XmlNode pageNode in pageNodeList) { try { string pageUniqueId = pageNode.Attributes["ID"].Value; string parentNodeName = pageNode.ParentNode.Name; if (parentNodeName == "one:Section") { bool isDeletedPages = CheckIfDeleted(pageNode); // To avoid the situation that it is going to delete the pages that shouldn't be deleted and to keep the pages in the 'trash' folder. if (isDeletedPages == false) { if (pageInfos.ContainsKey(pageUniqueId) == false) { // 'ID', 'path' and 'name' attributes are always existing. string sectionId = pageNode.ParentNode.Attributes["ID"].Value; string sectionPath = pageNode.ParentNode.Attributes["path"].Value; string sectionName = pageNode.ParentNode.Attributes["name"].Value; OneNotePageInfo pageInfo = new OneNotePageInfo(); pageInfo.ParentSectionId = sectionId; pageInfo.ParentSectionFilePath = sectionPath; pageInfo.ParentSectionName = sectionName; pageInfo.PageName = pageNode.Attributes["name"].Value; pageInfos.Add(pageUniqueId, pageInfo); } } } } catch (System.Exception exception) { etc.LoggerHelper.LogWarn("Ignore the exception: {0}", exception.ToString()); } } return true; } else { etc.LoggerHelper.LogWarn("xmlDocument is null"); return false; } }
private Tuple <bool, string> UpdatePageInfos() { pageInfos = new Dictionary <string, OneNotePageInfo>(); string rawXmlString = ""; if (onenoteApplication.TryGetPageHierarchyAsXML(out rawXmlString) == false) { return(Tuple.Create(false, "Unable to retrieve page hierarchy.")); } etc.LoggerHelper.LogInfo("Retrieved page hierarchy: {0} bytes.", rawXmlString.Length); System.Xml.XmlDocument hierarchyXml = new System.Xml.XmlDocument(); try { hierarchyXml.LoadXml(rawXmlString); } catch (System.Exception exception) { etc.LoggerHelper.LogUnexpectedException(exception); return(Tuple.Create(false, "Unable to parse page hierarchy.")); } System.Xml.XmlNodeList pageNodeList = hierarchyXml.GetElementsByTagName("one:Page"); foreach (System.Xml.XmlNode pageNode in pageNodeList) { try { string pageUniqueId = pageNode.Attributes["ID"].Value; string parentNodeName = pageNode.ParentNode.Name; if (parentNodeName == "one:Section") { // We must check whether the pages are deleted. Otherwise, we may end up deleting the duplicates in the trash folder. if (IsPageDeleted(pageNode) == false) { if (pageInfos.ContainsKey(pageUniqueId) == false) { // 'ID', 'path' and 'name' attributes always exist. string sectionId = pageNode.ParentNode.Attributes["ID"].Value; string sectionPath = pageNode.ParentNode.Attributes["path"].Value; string sectionName = pageNode.ParentNode.Attributes["name"].Value; OneNotePageInfo newPageInfo = new OneNotePageInfo(); newPageInfo.ParentSectionId = sectionId; newPageInfo.ParentSectionFilePath = sectionPath; newPageInfo.ParentSectionName = sectionName; newPageInfo.PageTitle = pageNode.Attributes["name"].Value; pageInfos.Add(pageUniqueId, newPageInfo); } else { return(Tuple.Create(false, string.Format("The page id ({0}) is not unique.", pageUniqueId))); } } } } catch (System.Exception exception) { etc.LoggerHelper.LogUnexpectedException(exception); return(Tuple.Create(false, "The page hierarchy is corrupted.")); } } return(Tuple.Create(true, "")); }
public Tuple <bool, string> ScanOneNotePages(IProgress <Tuple <int, int, int, string> > progress, System.Threading.CancellationToken cancellationToken) { Tuple <bool, string> resultUpdatePageInfos = UpdatePageInfos(); if (resultUpdatePageInfos.Item1 == false) { return(resultUpdatePageInfos); } else { etc.LoggerHelper.LogInfo("Found {0} pages.", pageInfos.Count); int statCountReadSuccess = 0; int statCountReadFailed = 0; int statCountTotal = pageInfos.Count; string statPageTitle = null; progress.Report(Tuple.Create(statCountReadSuccess, statCountReadFailed, statCountTotal, statPageTitle)); foreach (KeyValuePair <string, OneNotePageInfo> elem in pageInfos) { if (cancellationToken.IsCancellationRequested) { OnCancelled.Invoke(); break; } string pageId = elem.Key; OneNotePageInfo pageInfo = elem.Value; string pageContent = ""; bool successHash = false; pageInfo.HashValueForInnerText = null; if (onenoteApplication.TryGetPageContent(elem.Key, out pageContent)) { try { System.Xml.XmlDocument pageContentXml = new System.Xml.XmlDocument(); pageContentXml.LoadXml(pageContent); /* * Though the page contents are identical, it is quite common to see different 'objectID' and 'lastModified' attributes. * This difference results in a completely different hash value, which cannot be detected by other duplicate remove software. * By simply taking 'innerText' of the internal XML-like format, those attributes will be ignored. * [!] The underlying assumption is that a user cannot modify the attributes directly. */ if (TryCalculateHashValue(pageContentXml.InnerText, out string hashValue)) { successHash = true; pageInfo.HashValueForInnerText = hashValue; } else { etc.LoggerHelper.LogWarn("Failed to calculate hash for the page ({0}).", pageId); } } catch (System.Exception exception) { etc.LoggerHelper.LogUnexpectedException(exception); } } statPageTitle = pageInfo.PageTitle; if (successHash) { statCountReadSuccess += 1; } else { statCountReadFailed += 1; etc.LoggerHelper.LogWarn("Failed to retrieve the content of the page ({0}).", pageId); } progress.Report(Tuple.Create(statCountReadSuccess, statCountReadFailed, statCountTotal, statPageTitle)); } } return(Tuple.Create(true, "")); }