Exemplo n.º 1
0
        /// <summary>
        /// Processes the specified source.
        /// </summary>
        /// <param name="chunks">The source.</param>
        /// <param name="document_level">The document level.</param>
        /// <param name="table">The table.</param>
        /// <param name="parser">The parser.</param>
        /// <param name="logger">The logger.</param>
        /// <param name="forSingleWebSite">if set to <c>true</c> [for single web site].</param>
        /// <returns></returns>
        public webLemmaTermTable process(IEnumerable <IPipelineTaskSubject> chunks, cnt_level document_level, webLemmaTermTable table, ITextResourceResolver parser = null, ILogBuilder logger = null, Boolean forSingleWebSite = false)
        {
            // List<pipelineTaskMCPageSubject> MCPageSubjects = new List<pipelineTaskMCPageSubject>();

            TFDFCounter counter = new TFDFCounter();

            var listChunks = chunks.ToList();

            listChunks.Sort((x, y) => String.CompareOrdinal(x.currentForm, y.currentForm));


            foreach (pipelineTaskSubjectContentToken mcSubject in listChunks)
            {
                //var page = mcSubject.GetParentOfType<pipelineTaskMCPageSubject>();

                //if (!MCPageSubjects.Contains(page))
                //{
                //    MCPageSubjects.Add(page);
                //    counter.NextDocument();
                //}

                if (mcSubject.contentLevelType == cnt_level.mcChunk)
                {
                    counter.Add(mcSubject.currentForm, mcSubject);
                }
            }


            return(process(counter, logger, table, forSingleWebSite));
        }
Exemplo n.º 2
0
        /// <summary>
        /// Prepares the counter.
        /// </summary>
        /// <param name="source">The source.</param>
        /// <returns></returns>
        public TFDFCounter prepareCounter(IEnumerable <IPipelineTaskSubject> source)
        {
            TFDFCounter counter = new TFDFCounter();

            var listChunks = source.ToList();

            listChunks.Sort((x, y) => String.CompareOrdinal(x.currentForm, y.currentForm));

            foreach (pipelineTaskSubjectContentToken mcSubject in source)
            {
                if (mcSubject.contentLevelType == cnt_level.mcTokenStream)
                {
                    counter.Add(mcSubject.currentForm, mcSubject);
                }
            }

            return(counter);
        }
Exemplo n.º 3
0
        //process(IEnumerable<IPipelineTaskSubject> source, cnt_level document_level, webLemmaTermTable table, ILogBuilder logger = null, Boolean forSingleWebSite = false)*
        public webLemmaTermTable process(IEnumerable <IPipelineTaskSubject> source, cnt_level document_level, webLemmaTermTable table = null,
                                         ITextResourceResolver parser = null, ILogBuilder logger = null, Boolean forSingleWebSite = false)
        {
            if (table.Count > 0)
            {
                logger.log("THIS TABLE " + table.name + " ALREADY HAS [" + table.Count + "] ITEMS --- HALTING BUILD [For single web site: " + forSingleWebSite + "]");
                if (DoBeep == 1)
                {
                    imbACE.Services.terminal.aceTerminalInput.doBeepViaConsole(1200, 250);
                    Interlocked.Increment(ref DoBeep);
                }
                return(table);
            }


            TFDFCounter counter = new TFDFCounter();

            lock (getAllChildrenLock)
            {
                var listSource = source.ToList();
                // listSource.Sort((x, y) => String.CompareOrdinal(x.currentForm, y.currentForm));
                source = listSource;
            }



            List <IPipelineTaskSubject> rkns = source.GetSubjectsOfLevel <IPipelineTaskSubject>(new cnt_level[] { cnt_level.mcToken }); // source.GetSubjectChildrenTokenType<pipelineTaskSubjectContentToken, IPipelineTaskSubject>();

            rkns.Sort((x, y) => String.CompareOrdinal(x.currentForm, y.currentForm));

            //var tkns = source.GetSubjectsOfLevel(cnt_level.mcToken);
            Int32 shorties = 0;

            foreach (var tkn in rkns)
            {
                //if (tkn.currentForm.Length > 1)
                //{
                if (tkn.flagBag.ContainsAll(tkn_contains.onlyLetters))
                {
                    counter.Add(tkn.currentForm.ToLower(), tkn);
                }
                //} else
                //{
                //    shorties++;
                //}
            }

            if (shorties > 0)
            {
                logger.log("[" + shorties + "] too short tokens removed");
            }

            /*
             *
             * List<pipelineTaskSubjectContentToken> MCPageSubjects = source.ToSubjectTokenType<pipelineTaskSubjectContentToken>();
             *
             *
             *
             * foreach (pipelineTaskSubjectContentToken mcSubject in MCPageSubjects)
             * {
             *  counter.NextDocument();
             *  List<pipelineTaskSubjectContentToken> tkns = new List<pipelineTaskSubjectContentToken>();
             *  lock (getAllChildrenLock)
             *  {
             *      tkns = mcSubject.getAllChildrenInType<pipelineTaskSubjectContentToken>(null, false, false).GetSubjectsOfLevel(cnt_level.mcToken);
             *  }
             *  foreach (var tkn in tkns)
             *  {
             *      if (tkn.flagBag.ContainsAll(tkn_contains.onlyLetters))
             *      {
             *          counter.Add(tkn.currentForm.ToLower(), tkn);
             *      }
             *  }
             * }
             */
            return(process(table.name, parser, counter, logger, table, forSingleWebSite));
        }