예제 #1
0
        /// <summary>
        /// Write dictionaries to disk
        /// </summary>
        public void Save()
        {
            postingMap.Replace(tempPostingMap);
            this.WriteDocWeights();
            docWeigthsHashMap.Replace(tempDocWeightsHashMap);
            this.CreateTiers();

            tier1.Replace(tempTier1);
            tier2.Replace(tempTier2);
            tier3.Replace(tempTier3);

            tempTermFreq.Clear();
            calculatedDocWeights.Clear();
            docByteSize.Clear();
            tokensPerDocument.Clear();
            averageTermFreqPerDoc.Clear();
            tempPostingMap.Clear();
            tempDocWeightsHashMap.Clear();

            tempTier1.Clear();
            tempTier2.Clear();
            tempTier3.Clear();
        }
 public void Save()
 {
     map.Replace(tempMap);
     tempMap.Clear();
 }
예제 #3
0
        /// <summary>
        /// Build KGram onto disk
        /// </summary>
        /// <param name="vocabularies">List of unique vocabularies</param>
        public DiskKGram buildKGram(HashSet <string> vocabularies)
        {
            Console.WriteLine("Start KGram generating process...");
            Console.WriteLine("Vocbularies' size: " + vocabularies.Count);
            Console.WriteLine("KGram size: " + this.size);

            Console.WriteLine("Building full size KGrams....");
            foreach (string vocab in vocabularies)
            {
                //Split the vocabulary
                List <string> kGrams = this.KGramSplitter("$" + vocab + "$", this.size);

                //Add k-grams to dictionary
                foreach (string kGram in kGrams)
                {
                    if (tempMap.ContainsKey(kGram))
                    {
                        tempMap[kGram].Add(vocab);
                    }
                    else
                    {
                        tempMap.Add(kGram, new List <string> {
                            vocab
                        });
                    }
                }
            }

            //Build lesser k-gram to handle wildcard query lesser than size
            Console.WriteLine("Building lesser size KGrams....");
            foreach (string kGram in tempMap.Keys)
            {
                for (int k = 0; k < this.size; k++)
                {
                    List <string> miniKGrams = this.KGramSplitter(kGram, k);
                    foreach (string miniKGram in miniKGrams)
                    {
                        if (!string.IsNullOrWhiteSpace(miniKGram) && miniKGram != "$")
                        {
                            if (tempMiniMap.ContainsKey(miniKGram))
                            {
                                tempMiniMap[miniKGram].Add(kGram);
                            }
                            else
                            {
                                tempMiniMap.Add(miniKGram, new List <string> {
                                    kGram
                                });
                            }
                        }
                    }
                }
            }

            map.Replace(tempMap);
            miniMap.Replace(tempMiniMap);
            //Print Results
            Console.WriteLine("KGram's size: " + map.GetSize());
            Console.WriteLine("Lesser KGram's size: " + miniMap.GetSize());

            //WriteKGramToDisk
            Console.WriteLine("Write K-Gram to disk...");
            // Console.WriteLine("Path:" + Path.GetFullPath(this.path));

            Console.WriteLine("Complete KGram generating process");
            tempMap.Clear();
            tempMiniMap.Clear();
            return(this);
        }