Exemplo n.º 1
0
        public weightTableCompiled GetOrCreateTFIDF_DLC_Heuristic(modelSpiderSiteRecord __wRecord, builderForLog loger, bool __useExisting, bool __saveToCache, multiLanguageEvaluator evaluator = null)
        {
            indexDomain idomain = imbWEMManager.index.domainIndexTable.GetOrCreate(__wRecord.domain);

            FileInfo            TFIDF_DLC_File = GetTFIDF_DLC_File(idomain, getWritableFileMode.existing);
            weightTableCompiled TFIDF_DLC      = null;

            if (TFIDF_DLC_File.Exists && __useExisting)
            {
                TFIDF_DLC = new weightTableCompiled(TFIDF_DLC_File.FullName, true, idomain.domain + "_DLC_TF_IDF");

                loger.log("DLC TF-IDF[" + TFIDF_DLC.Count + "] cache found for: " + idomain.domain);
                return(TFIDF_DLC);
            }

            // <--------------- evaluator selection

            if (evaluator == null)
            {
                evaluator = __wRecord.tRecord.evaluator;
            }

            loger.log("DLC TF-IDF heuristic construction for: " + idomain.domain + " initiated.");

            termDocument domainTable = new termDocument();

            domainTable.expansion = 1;

            double tp = 0;

            var DLCTerms = GetDLCTerms_Heuristics(__wRecord, loger, __useExisting, __useExisting, evaluator, idomain);



            domainTable.AddTokens(DLCTerms, loger);

            tp = domainTable.Count().GetRatio(DLCTerms.Count); // allTerms.Count.GetRatio(tc);

            loger.log("[" + idomain.domain + "] preprocess finished. DLC TF-IDF terms [" + domainTable.Count() + "] - Semantic compression: " + tp.ToString("P2"));

            TFIDF_DLC      = domainTable.GetCompiledTable(loger);
            TFIDF_DLC.name = "DLC-TFIDF " + idomain.domain;

            return(TFIDF_DLC);
        }