/// <summary> /// Gets the sub assertion: uses only links that have already here, sets <see cref="indexPageEvaluationEntryState.notInTheIndex"/> new links /// </summary> /// <param name="links">The links.</param> /// <returns></returns> public indexURLAssertionResult GetSubAssertion(IEnumerable <string> links, bool useIndex = true) { indexURLAssertionResult output = new indexURLAssertionResult(); List <string> failed = new List <string>(); foreach (string lnk in links) { if (flagsByItem.ContainsKey(lnk)) { output.Add(flagsByItem[lnk], lnk); } else { if (useIndex) { failed.Add(lnk); } else { output.Add(indexPageEvaluationEntryState.notInTheIndex, lnk); } } } if (useIndex) { imbWEMManager.index.pageIndexTable.GetUrlAssertion(failed, output); } return(output); }
/// <summary> /// Gets the URL assertion from collection of spider links /// </summary> /// <param name="urls">The urls.</param> /// <returns></returns> public indexURLAssertionResult GetUrlAssertion(IEnumerable <spiderLink> urls) // , out Int32 relevant, out Int32 notRelevant, out Int32 notKnown, out Int32 notInIndex { indexURLAssertionResult output = new indexURLAssertionResult(); foreach (spiderLink url in urls) { indexPageEvaluationEntryState state = GetPageAssertion(url.url); output.Add(state, url.url); } return(output); }
/// <summary> /// Gets the URL assertion and returns metrics to the provided variables /// </summary> /// <param name="urls">The urls.</param> /// <param name="relevant">The relevant.</param> /// <param name="notRelevant">The not relevant.</param> /// <param name="notKnown">The not known.</param> /// <param name="notInIndex">Index of the not in.</param> public indexURLAssertionResult GetUrlAssertion(IEnumerable <string> urls, indexURLAssertionResult output = null) // , out Int32 relevant, out Int32 notRelevant, out Int32 notKnown, out Int32 notInIndex { if (output == null) { output = new indexURLAssertionResult(); } foreach (string url in urls) { indexPageEvaluationEntryState state = GetPageAssertion(url); output.Add(state, url); } return(output); }
/// <summary> /// Performs domain index assertion /// </summary> /// <param name="domainList">The domain list.</param> /// <param name="completeRecheck">if set to <c>true</c> [complete recheck].</param> /// <returns></returns> public indexDomainAssertionResult GetDomainIndexAssertion(List <string> domainList = null, bool completeRecheck = false) { indexDomainAssertionResult output = new indexDomainAssertionResult(); List <indexDomain> iList = GetList(); if (domainList == null) { domainList = new List <string>(); } if (!domainList.Any()) { iList.ForEach(x => domainList.Add(x.domain)); //domainList = GetDomainUrls(indexDomainContentEnum.indexed); } double IPs = 0; int Lemmas = 0; foreach (string domainUrl in domainList) { indexDomainContentEnum flags = indexDomainContentEnum.none; indexDomain idomain = GetDomain(domainUrl); if (idomain == null) { output.Add(flags, domainUrl); continue; } else { flags = indexDomainContentEnum.indexed; List <indexPage> pageList = imbWEMManager.index.pageIndexTable.GetPagesForDomain(domainUrl); if (completeRecheck) { idomain.recheck(pageList); } List <string> pageUrls = new List <string>(); pageList.ForEach(x => pageUrls.Add(x.url)); indexURLAssertionResult pageListResult = imbWEMManager.index.pageIndexTable.GetUrlAssertion(pageUrls); if (pageListResult[indexPageEvaluationEntryState.inTheIndex].Count() == pageListResult[indexPageEvaluationEntryState.haveEvaluationEntry].Count()) { flags |= indexDomainContentEnum.completeEvaluationPages; } else { flags |= indexDomainContentEnum.uncompleteEvaluationPages; } FileInfo dlc_tf_idf = imbWEMManager.index.experimentManager.CurrentSession.GetTFIDF_DLC_File(idomain, getWritableFileMode.existing); if (dlc_tf_idf.Exists) { flags |= indexDomainContentEnum.uncompleteDomainTFIDF; idomain.TFIDFcompiled = false; } else { flags |= indexDomainContentEnum.completeDomainTFIDF; idomain.TFIDFcompiled = true; } bool appUncomplete = false; double IPd = 0; foreach (indexPage p in pageList) { if ((p.Lemmas == 0) && (p.InfoPrize == 0) && (p.DistinctLemmas.isNullOrEmpty())) { appUncomplete = true; } //IPd += p.InfoPrize; } if (appUncomplete) { flags |= indexDomainContentEnum.uncompleteTFDFApplicationToPages; } else { flags |= indexDomainContentEnum.completeTFDFApplicationToPages; } // idomain.InfoPrize = IPd; output.Add(flags, domainUrl); // AddOrUpdate(idomain); } } return(output); }