/// <summary>
        /// Gets the sub assertion: uses only links that have already here, sets <see cref="indexPageEvaluationEntryState.notInTheIndex"/> new links
        /// </summary>
        /// <param name="links">The links.</param>
        /// <returns></returns>
        public indexURLAssertionResult GetSubAssertion(IEnumerable <string> links, bool useIndex = true)
        {
            indexURLAssertionResult output = new indexURLAssertionResult();
            List <string>           failed = new List <string>();

            foreach (string lnk in links)
            {
                if (flagsByItem.ContainsKey(lnk))
                {
                    output.Add(flagsByItem[lnk], lnk);
                }
                else
                {
                    if (useIndex)
                    {
                        failed.Add(lnk);
                    }
                    else
                    {
                        output.Add(indexPageEvaluationEntryState.notInTheIndex, lnk);
                    }
                }
            }


            if (useIndex)
            {
                imbWEMManager.index.pageIndexTable.GetUrlAssertion(failed, output);
            }

            return(output);
        }
Example #2
0
        /// <summary>
        /// Gets the URL assertion from collection of spider links
        /// </summary>
        /// <param name="urls">The urls.</param>
        /// <returns></returns>
        public indexURLAssertionResult GetUrlAssertion(IEnumerable <spiderLink> urls) // , out Int32 relevant, out Int32 notRelevant, out Int32 notKnown, out Int32 notInIndex
        {
            indexURLAssertionResult output = new indexURLAssertionResult();


            foreach (spiderLink url in urls)
            {
                indexPageEvaluationEntryState state = GetPageAssertion(url.url);
                output.Add(state, url.url);
            }

            return(output);
        }
Example #3
0
        /// <summary>
        /// Gets the URL assertion and returns metrics to the provided variables
        /// </summary>
        /// <param name="urls">The urls.</param>
        /// <param name="relevant">The relevant.</param>
        /// <param name="notRelevant">The not relevant.</param>
        /// <param name="notKnown">The not known.</param>
        /// <param name="notInIndex">Index of the not in.</param>
        public indexURLAssertionResult GetUrlAssertion(IEnumerable <string> urls, indexURLAssertionResult output = null) // , out Int32 relevant, out Int32 notRelevant, out Int32 notKnown, out Int32 notInIndex
        {
            if (output == null)
            {
                output = new indexURLAssertionResult();
            }


            foreach (string url in urls)
            {
                indexPageEvaluationEntryState state = GetPageAssertion(url);
                output.Add(state, url);
            }

            return(output);
        }
Example #4
0
        /// <summary>
        /// Performs domain index assertion
        /// </summary>
        /// <param name="domainList">The domain list.</param>
        /// <param name="completeRecheck">if set to <c>true</c> [complete recheck].</param>
        /// <returns></returns>
        public indexDomainAssertionResult GetDomainIndexAssertion(List <string> domainList = null, bool completeRecheck = false)
        {
            indexDomainAssertionResult output = new indexDomainAssertionResult();
            List <indexDomain>         iList  = GetList();

            if (domainList == null)
            {
                domainList = new List <string>();
            }
            if (!domainList.Any())
            {
                iList.ForEach(x => domainList.Add(x.domain));

                //domainList = GetDomainUrls(indexDomainContentEnum.indexed);
            }

            double IPs    = 0;
            int    Lemmas = 0;



            foreach (string domainUrl in domainList)
            {
                indexDomainContentEnum flags = indexDomainContentEnum.none;

                indexDomain idomain = GetDomain(domainUrl);
                if (idomain == null)
                {
                    output.Add(flags, domainUrl);
                    continue;
                }
                else
                {
                    flags = indexDomainContentEnum.indexed;

                    List <indexPage> pageList = imbWEMManager.index.pageIndexTable.GetPagesForDomain(domainUrl);
                    if (completeRecheck)
                    {
                        idomain.recheck(pageList);
                    }


                    List <string> pageUrls = new List <string>();
                    pageList.ForEach(x => pageUrls.Add(x.url));
                    indexURLAssertionResult pageListResult = imbWEMManager.index.pageIndexTable.GetUrlAssertion(pageUrls);

                    if (pageListResult[indexPageEvaluationEntryState.inTheIndex].Count() == pageListResult[indexPageEvaluationEntryState.haveEvaluationEntry].Count())
                    {
                        flags |= indexDomainContentEnum.completeEvaluationPages;
                    }
                    else
                    {
                        flags |= indexDomainContentEnum.uncompleteEvaluationPages;
                    }

                    FileInfo dlc_tf_idf = imbWEMManager.index.experimentManager.CurrentSession.GetTFIDF_DLC_File(idomain, getWritableFileMode.existing);

                    if (dlc_tf_idf.Exists)
                    {
                        flags |= indexDomainContentEnum.uncompleteDomainTFIDF;

                        idomain.TFIDFcompiled = false;
                    }
                    else
                    {
                        flags |= indexDomainContentEnum.completeDomainTFIDF;

                        idomain.TFIDFcompiled = true;
                    }

                    bool appUncomplete = false;

                    double IPd = 0;
                    foreach (indexPage p in pageList)
                    {
                        if ((p.Lemmas == 0) && (p.InfoPrize == 0) && (p.DistinctLemmas.isNullOrEmpty()))
                        {
                            appUncomplete = true;
                        }
                        //IPd += p.InfoPrize;
                    }
                    if (appUncomplete)
                    {
                        flags |= indexDomainContentEnum.uncompleteTFDFApplicationToPages;
                    }
                    else
                    {
                        flags |= indexDomainContentEnum.completeTFDFApplicationToPages;
                    }

                    // idomain.InfoPrize = IPd;

                    output.Add(flags, domainUrl);

                    // AddOrUpdate(idomain);
                }
            }

            return(output);
        }