コード例 #1
0
        /// <summary>
        /// Filters the document set by creation descending rank, scored by <see cref="function"/> and taking first <see cref="limit"/> web pages
        /// </summary>
        /// <param name="input">The input.</param>
        /// <returns></returns>
        public TextDocumentSet FilterDocumentSet(TextDocumentSet input)
        {
            Int32           iterations = 1;
            TextDocumentSet output     = new TextDocumentSet(input.name);

            output.AddRange(input);

            if (function.kernel == DocumentFunctionKernelType.iterative)
            {
                iterations = limit;
                output.Clear();
            }

            for (int itc = 0; itc < iterations; itc++)
            {
                Dictionary <TextDocumentLayerCollection, Double> docVsScore = new Dictionary <TextDocumentLayerCollection, double>();

                foreach (TextDocumentLayerCollection textDocument in input)
                {
                    docVsScore.Add(textDocument, function.Compute(textDocument, input.name));
                }

                List <KeyValuePair <TextDocumentLayerCollection, double> > sorted = docVsScore.OrderByDescending(x => x.Value).ToList();


                if (function.kernel == DocumentFunctionKernelType.singleCycle)
                {
                    if (sorted.Count > limit)
                    {
                        output.Clear();
                        Int32 c = 0;
                        foreach (var p in sorted)
                        {
                            output.Add(p.Key);
                            c++;
                            if (c >= limit)
                            {
                                break;
                            }
                        }
                    }
                }
                else
                {
                    var p = sorted.First();
                    output.Add(p.Key);
                    input.Remove(p.Key);
                }
            }



            return(output);
        }
コード例 #2
0
        /// <summary>
        /// Renders a web site into set of documents
        /// </summary>
        /// <param name="site">The site.</param>
        /// <param name="logger">The logger.</param>
        /// <returns></returns>
        public TextDocumentSet RenderSiteDocuments(WebSiteDocuments site, ILogBuilder logger, Boolean EnableRendering = true)
        {
            TextDocumentSet textSet = new TextDocumentSet(site.domain);

            //Parallel.ForEach(site.documents, (webPage) =>
            //{
            //    TextDocumentLayerCollection pg = RenderText(webPage, site, EnableRendering);
            //    pg.name = webPage.AssociatedID;
            //    textSet.Add(pg);
            //});

            foreach (WebSiteDocument webPage in site.documents)
            {
                TextDocumentLayerCollection pg = RenderText(webPage, site, EnableRendering);

                pg.name = webPage.AssignedID;
                textSet.Add(pg);
            }

            return(textSet);
        }