예제 #1
0
        private bool TryGetHashOfOneNotePage(string pageId, out string hash)
        {
            hash = "";

            // The OneNote page consists of XML-like markups.
            // Though the innerText is identical, it is common to have different 'objectID' and  'lastModifiedTime' attributes.
            // These differences would cause a complete different hash value even if the contents are the same.
            // Therefore, I will ignore those attributes by extracting 'innerText' and calculate a hash value without those attributes.

            string pageContents = "";
            bool   success      = onenoteApplication.TryGetPageContent(pageId, out pageContents);

            if (success)
            {
                System.Xml.XmlDocument pageXmlContents = new System.Xml.XmlDocument();
                try
                {
                    pageXmlContents.LoadXml(pageContents);
                }
                catch (System.Exception exception)
                {
                    etc.LoggerHelper.LogException(exception);
                    return(false);
                }

                bool successCalculateHash = TryCalculateHashOf(pageXmlContents.InnerText, out hash);
                if (successCalculateHash)
                {
                    return(true);
                }
                else
                {
                    etc.LoggerHelper.LogWarn("Unable to calculate a hash, pageId:{0}", pageId);
                    return(false);
                }
            }
            else
            {
                etc.LoggerHelper.LogWarn("Unable to get a page content, pageId:{0}", pageId);
                return(false);
            }
        }
        public Tuple <bool, string> ScanOneNotePages(IProgress <Tuple <int, int, int, string> > progress, System.Threading.CancellationToken cancellationToken)
        {
            Tuple <bool, string> resultUpdatePageInfos = UpdatePageInfos();

            if (resultUpdatePageInfos.Item1 == false)
            {
                return(resultUpdatePageInfos);
            }
            else
            {
                etc.LoggerHelper.LogInfo("Found {0} pages.", pageInfos.Count);
                int    statCountReadSuccess = 0;
                int    statCountReadFailed  = 0;
                int    statCountTotal       = pageInfos.Count;
                string statPageTitle        = null;
                progress.Report(Tuple.Create(statCountReadSuccess, statCountReadFailed, statCountTotal, statPageTitle));
                foreach (KeyValuePair <string, OneNotePageInfo> elem in pageInfos)
                {
                    if (cancellationToken.IsCancellationRequested)
                    {
                        OnCancelled.Invoke();
                        break;
                    }
                    string          pageId      = elem.Key;
                    OneNotePageInfo pageInfo    = elem.Value;
                    string          pageContent = "";
                    bool            successHash = false;
                    pageInfo.HashValueForInnerText = null;
                    if (onenoteApplication.TryGetPageContent(elem.Key, out pageContent))
                    {
                        try
                        {
                            System.Xml.XmlDocument pageContentXml = new System.Xml.XmlDocument();
                            pageContentXml.LoadXml(pageContent);

                            /*
                             * Though the page contents are identical, it is quite common to see different 'objectID' and 'lastModified' attributes.
                             * This difference results in a completely different hash value, which cannot be detected by other duplicate remove software.
                             * By simply taking 'innerText' of the internal XML-like format, those attributes will be ignored.
                             * [!] The underlying assumption is that a user cannot modify the attributes directly.
                             */
                            if (TryCalculateHashValue(pageContentXml.InnerText, out string hashValue))
                            {
                                successHash = true;
                                pageInfo.HashValueForInnerText = hashValue;
                            }
                            else
                            {
                                etc.LoggerHelper.LogWarn("Failed to calculate hash for the page ({0}).", pageId);
                            }
                        }
                        catch (System.Exception exception)
                        {
                            etc.LoggerHelper.LogUnexpectedException(exception);
                        }
                    }

                    statPageTitle = pageInfo.PageTitle;
                    if (successHash)
                    {
                        statCountReadSuccess += 1;
                    }
                    else
                    {
                        statCountReadFailed += 1;
                        etc.LoggerHelper.LogWarn("Failed to retrieve the content of the page ({0}).", pageId);
                    }
                    progress.Report(Tuple.Create(statCountReadSuccess, statCountReadFailed, statCountTotal, statPageTitle));
                }
            }
            return(Tuple.Create(true, ""));
        }