예제 #1
0
        /// <summary>
        /// Build KGram onto disk
        /// </summary>
        /// <param name="vocabularies">List of unique vocabularies</param>
        public DiskKGram buildKGram(HashSet <string> vocabularies)
        {
            Console.WriteLine("Start KGram generating process...");
            Console.WriteLine("Vocbularies' size: " + vocabularies.Count);
            Console.WriteLine("KGram size: " + this.size);

            Console.WriteLine("Building full size KGrams....");
            foreach (string vocab in vocabularies)
            {
                //Split the vocabulary
                List <string> kGrams = this.KGramSplitter("$" + vocab + "$", this.size);

                //Add k-grams to dictionary
                foreach (string kGram in kGrams)
                {
                    if (tempMap.ContainsKey(kGram))
                    {
                        tempMap[kGram].Add(vocab);
                    }
                    else
                    {
                        tempMap.Add(kGram, new List <string> {
                            vocab
                        });
                    }
                }
            }

            //Build lesser k-gram to handle wildcard query lesser than size
            Console.WriteLine("Building lesser size KGrams....");
            foreach (string kGram in tempMap.Keys)
            {
                for (int k = 0; k < this.size; k++)
                {
                    List <string> miniKGrams = this.KGramSplitter(kGram, k);
                    foreach (string miniKGram in miniKGrams)
                    {
                        if (!string.IsNullOrWhiteSpace(miniKGram) && miniKGram != "$")
                        {
                            if (tempMiniMap.ContainsKey(miniKGram))
                            {
                                tempMiniMap[miniKGram].Add(kGram);
                            }
                            else
                            {
                                tempMiniMap.Add(miniKGram, new List <string> {
                                    kGram
                                });
                            }
                        }
                    }
                }
            }

            map.Replace(tempMap);
            miniMap.Replace(tempMiniMap);
            //Print Results
            Console.WriteLine("KGram's size: " + map.GetSize());
            Console.WriteLine("Lesser KGram's size: " + miniMap.GetSize());

            //WriteKGramToDisk
            Console.WriteLine("Write K-Gram to disk...");
            // Console.WriteLine("Path:" + Path.GetFullPath(this.path));

            Console.WriteLine("Complete KGram generating process");
            tempMap.Clear();
            tempMiniMap.Clear();
            return(this);
        }
예제 #2
0
 public int GetDocumentsCount()
 {
     return(docWeigthsHashMap.GetSize());
 }