/// <summary> /// Filters the document set by creation descending rank, scored by <see cref="function"/> and taking first <see cref="limit"/> web pages /// </summary> /// <param name="input">The input.</param> /// <returns></returns> public TextDocumentSet FilterDocumentSet(TextDocumentSet input) { Int32 iterations = 1; TextDocumentSet output = new TextDocumentSet(input.name); output.AddRange(input); if (function.kernel == DocumentFunctionKernelType.iterative) { iterations = limit; output.Clear(); } for (int itc = 0; itc < iterations; itc++) { Dictionary <TextDocumentLayerCollection, Double> docVsScore = new Dictionary <TextDocumentLayerCollection, double>(); foreach (TextDocumentLayerCollection textDocument in input) { docVsScore.Add(textDocument, function.Compute(textDocument, input.name)); } List <KeyValuePair <TextDocumentLayerCollection, double> > sorted = docVsScore.OrderByDescending(x => x.Value).ToList(); if (function.kernel == DocumentFunctionKernelType.singleCycle) { if (sorted.Count > limit) { output.Clear(); Int32 c = 0; foreach (var p in sorted) { output.Add(p.Key); c++; if (c >= limit) { break; } } } } else { var p = sorted.First(); output.Add(p.Key); input.Remove(p.Key); } } return(output); }
/// <summary> /// Renders a web site into set of documents /// </summary> /// <param name="site">The site.</param> /// <param name="logger">The logger.</param> /// <returns></returns> public TextDocumentSet RenderSiteDocuments(WebSiteDocuments site, ILogBuilder logger, Boolean EnableRendering = true) { TextDocumentSet textSet = new TextDocumentSet(site.domain); //Parallel.ForEach(site.documents, (webPage) => //{ // TextDocumentLayerCollection pg = RenderText(webPage, site, EnableRendering); // pg.name = webPage.AssociatedID; // textSet.Add(pg); //}); foreach (WebSiteDocument webPage in site.documents) { TextDocumentLayerCollection pg = RenderText(webPage, site, EnableRendering); pg.name = webPage.AssignedID; textSet.Add(pg); } return(textSet); }