public Dictionary <string, List <Tuple <string /* pageId */, string /* pageName */> > > GetDuplicatesGroups()
        {
            if (pageInfos == null)
            {
                return(null);
            }
            else
            {
                Dictionary <string, List <Tuple <string, string> > > duplicatesGroups = new Dictionary <string, List <Tuple <string, string> > >();
                foreach (KeyValuePair <string, OneNotePageInfo> elem in pageInfos)
                {
                    string          pageId   = elem.Key;
                    OneNotePageInfo pageInfo = elem.Value;

                    string hashValueForInnerText = pageInfo.HashValueForInnerText;
                    if (hashValueForInnerText != null)
                    {
                        if (duplicatesGroups.ContainsKey(hashValueForInnerText) == false)
                        {
                            duplicatesGroups.Add(hashValueForInnerText, new List <Tuple <string, string> >());
                        }
                        duplicatesGroups[hashValueForInnerText].Add(Tuple.Create(pageId, pageInfo.PageTitle));
                    }
                }
                return(duplicatesGroups);
            }
        }
Exemplo n.º 2
0
        private bool TryGetOneNotePageInfos(System.Xml.XmlDocument xmlDocument, out Dictionary <string /*PageId*/, OneNotePageInfo> pageInfos)
        {
            pageInfos = new Dictionary <string, OneNotePageInfo>();

            if (xmlDocument != null)
            {
                System.Xml.XmlNodeList pageNodeList = xmlDocument.GetElementsByTagName("one:Page");
                foreach (System.Xml.XmlNode pageNode in pageNodeList)
                {
                    try
                    {
                        string pageUniqueId   = pageNode.Attributes["ID"].Value;
                        string parentNodeName = pageNode.ParentNode.Name;

                        if (parentNodeName == "one:Section")
                        {
                            bool isDeletedPages = CheckIfDeleted(pageNode);
                            // To avoid the situation that it is going to delete the pages that shouldn't be deleted and to keep the pages in the 'trash' folder.
                            if (isDeletedPages == false)
                            {
                                if (pageInfos.ContainsKey(pageUniqueId) == false)
                                {
                                    // 'ID', 'path' and 'name' attributes are always existing.
                                    string sectionId   = pageNode.ParentNode.Attributes["ID"].Value;
                                    string sectionPath = pageNode.ParentNode.Attributes["path"].Value;
                                    string sectionName = pageNode.ParentNode.Attributes["name"].Value;

                                    OneNotePageInfo pageInfo = new OneNotePageInfo();
                                    pageInfo.ParentSectionId       = sectionId;
                                    pageInfo.ParentSectionFilePath = sectionPath;
                                    pageInfo.ParentSectionName     = sectionName;
                                    pageInfo.PageName = pageNode.Attributes["name"].Value;
                                    pageInfos.Add(pageUniqueId, pageInfo);
                                }
                            }
                        }
                    }
                    catch (System.Exception exception)
                    {
                        etc.LoggerHelper.LogWarn("Ignore the exception: {0}", exception.ToString());
                    }
                }
                return(true);
            }
            else
            {
                etc.LoggerHelper.LogWarn("xmlDocument is null");
                return(false);
            }
        }
    private bool TryGetOneNotePageInfos(System.Xml.XmlDocument xmlDocument, out Dictionary<string /*PageId*/, OneNotePageInfo> pageInfos)
    {
      pageInfos = new Dictionary<string, OneNotePageInfo>();

      if (xmlDocument != null)
      {
        System.Xml.XmlNodeList pageNodeList = xmlDocument.GetElementsByTagName("one:Page");
        foreach (System.Xml.XmlNode pageNode in pageNodeList)
        {
          try
          {
            string pageUniqueId = pageNode.Attributes["ID"].Value;
            string parentNodeName = pageNode.ParentNode.Name;

            if (parentNodeName == "one:Section")
            {
              bool isDeletedPages = CheckIfDeleted(pageNode);
              // To avoid the situation that it is going to delete the pages that shouldn't be deleted and to keep the pages in the 'trash' folder.
              if (isDeletedPages == false)
              {
                if (pageInfos.ContainsKey(pageUniqueId) == false)
                {
                  // 'ID', 'path' and 'name' attributes are always existing.
                  string sectionId = pageNode.ParentNode.Attributes["ID"].Value;
                  string sectionPath = pageNode.ParentNode.Attributes["path"].Value;
                  string sectionName = pageNode.ParentNode.Attributes["name"].Value;

                  OneNotePageInfo pageInfo = new OneNotePageInfo();
                  pageInfo.ParentSectionId = sectionId;
                  pageInfo.ParentSectionFilePath = sectionPath;
                  pageInfo.ParentSectionName = sectionName;
                  pageInfo.PageName = pageNode.Attributes["name"].Value;
                  pageInfos.Add(pageUniqueId, pageInfo);
                }
              }
            }
          }
          catch (System.Exception exception)
          {
            etc.LoggerHelper.LogWarn("Ignore the exception: {0}", exception.ToString());
          }
        }
        return true;
      }
      else
      {
        etc.LoggerHelper.LogWarn("xmlDocument is null");
        return false;
      }
    }
        private Tuple <bool, string> UpdatePageInfos()
        {
            pageInfos = new Dictionary <string, OneNotePageInfo>();

            string rawXmlString = "";

            if (onenoteApplication.TryGetPageHierarchyAsXML(out rawXmlString) == false)
            {
                return(Tuple.Create(false, "Unable to retrieve page hierarchy."));
            }
            etc.LoggerHelper.LogInfo("Retrieved page hierarchy: {0} bytes.", rawXmlString.Length);

            System.Xml.XmlDocument hierarchyXml = new System.Xml.XmlDocument();
            try
            {
                hierarchyXml.LoadXml(rawXmlString);
            }
            catch (System.Exception exception)
            {
                etc.LoggerHelper.LogUnexpectedException(exception);
                return(Tuple.Create(false, "Unable to parse page hierarchy."));
            }

            System.Xml.XmlNodeList pageNodeList = hierarchyXml.GetElementsByTagName("one:Page");
            foreach (System.Xml.XmlNode pageNode in pageNodeList)
            {
                try
                {
                    string pageUniqueId   = pageNode.Attributes["ID"].Value;
                    string parentNodeName = pageNode.ParentNode.Name;

                    if (parentNodeName == "one:Section")
                    {
                        // We must check whether the pages are deleted. Otherwise, we may end up deleting the duplicates in the trash folder.
                        if (IsPageDeleted(pageNode) == false)
                        {
                            if (pageInfos.ContainsKey(pageUniqueId) == false)
                            {
                                // 'ID', 'path' and 'name' attributes always exist.
                                string sectionId   = pageNode.ParentNode.Attributes["ID"].Value;
                                string sectionPath = pageNode.ParentNode.Attributes["path"].Value;
                                string sectionName = pageNode.ParentNode.Attributes["name"].Value;

                                OneNotePageInfo newPageInfo = new OneNotePageInfo();
                                newPageInfo.ParentSectionId       = sectionId;
                                newPageInfo.ParentSectionFilePath = sectionPath;
                                newPageInfo.ParentSectionName     = sectionName;
                                newPageInfo.PageTitle             = pageNode.Attributes["name"].Value;
                                pageInfos.Add(pageUniqueId, newPageInfo);
                            }
                            else
                            {
                                return(Tuple.Create(false, string.Format("The page id ({0}) is not unique.", pageUniqueId)));
                            }
                        }
                    }
                }
                catch (System.Exception exception)
                {
                    etc.LoggerHelper.LogUnexpectedException(exception);
                    return(Tuple.Create(false, "The page hierarchy is corrupted."));
                }
            }
            return(Tuple.Create(true, ""));
        }
        public Tuple <bool, string> ScanOneNotePages(IProgress <Tuple <int, int, int, string> > progress, System.Threading.CancellationToken cancellationToken)
        {
            Tuple <bool, string> resultUpdatePageInfos = UpdatePageInfos();

            if (resultUpdatePageInfos.Item1 == false)
            {
                return(resultUpdatePageInfos);
            }
            else
            {
                etc.LoggerHelper.LogInfo("Found {0} pages.", pageInfos.Count);
                int    statCountReadSuccess = 0;
                int    statCountReadFailed  = 0;
                int    statCountTotal       = pageInfos.Count;
                string statPageTitle        = null;
                progress.Report(Tuple.Create(statCountReadSuccess, statCountReadFailed, statCountTotal, statPageTitle));
                foreach (KeyValuePair <string, OneNotePageInfo> elem in pageInfos)
                {
                    if (cancellationToken.IsCancellationRequested)
                    {
                        OnCancelled.Invoke();
                        break;
                    }
                    string          pageId      = elem.Key;
                    OneNotePageInfo pageInfo    = elem.Value;
                    string          pageContent = "";
                    bool            successHash = false;
                    pageInfo.HashValueForInnerText = null;
                    if (onenoteApplication.TryGetPageContent(elem.Key, out pageContent))
                    {
                        try
                        {
                            System.Xml.XmlDocument pageContentXml = new System.Xml.XmlDocument();
                            pageContentXml.LoadXml(pageContent);

                            /*
                             * Though the page contents are identical, it is quite common to see different 'objectID' and 'lastModified' attributes.
                             * This difference results in a completely different hash value, which cannot be detected by other duplicate remove software.
                             * By simply taking 'innerText' of the internal XML-like format, those attributes will be ignored.
                             * [!] The underlying assumption is that a user cannot modify the attributes directly.
                             */
                            if (TryCalculateHashValue(pageContentXml.InnerText, out string hashValue))
                            {
                                successHash = true;
                                pageInfo.HashValueForInnerText = hashValue;
                            }
                            else
                            {
                                etc.LoggerHelper.LogWarn("Failed to calculate hash for the page ({0}).", pageId);
                            }
                        }
                        catch (System.Exception exception)
                        {
                            etc.LoggerHelper.LogUnexpectedException(exception);
                        }
                    }

                    statPageTitle = pageInfo.PageTitle;
                    if (successHash)
                    {
                        statCountReadSuccess += 1;
                    }
                    else
                    {
                        statCountReadFailed += 1;
                        etc.LoggerHelper.LogWarn("Failed to retrieve the content of the page ({0}).", pageId);
                    }
                    progress.Report(Tuple.Create(statCountReadSuccess, statCountReadFailed, statCountTotal, statPageTitle));
                }
            }
            return(Tuple.Create(true, ""));
        }