コード例 #1
0
        public void updateWithNewDoc(Dictionary<int, string> dict_with_index_paths, List<string> stopwords, int start_index_key)
        {
            ArrayList index_array_to_update = new ArrayList();

            for (int index = start_index_key + 1; index < dict_with_index_paths.Count; index++ )
            {
                StreamReader reader = new StreamReader(dict_with_index_paths[index]);
                string[] words = Regex.Split(reader.ReadToEnd(), @"\W+");

                foreach (string word in words)
                {
                    if (!stopwords.Contains(word) &&
                        word.Count() > 1 &&
                        word != "")
                        index_array_to_update.Add(new Index(word.ToLower(), 1, index.ToString()));
                }
            }

            IndexCompareWords sort_index = new IndexCompareWords();

            index_row.AddRange(index_array_to_update);

            index_row.Sort(sort_index);


            foreach (string duplicate in findDuplicates(index_row))
            {
                reduceDuplicateAndUpdateIndexTable(index_row, duplicate);
            }

            saveIndexTableToDisk(index_row);
        }
コード例 #2
0
        public void updateWithNewDoc(Dictionary <int, string> dict_with_index_paths, List <string> stopwords, int start_index_key)
        {
            ArrayList index_array_to_update = new ArrayList();

            for (int index = start_index_key + 1; index < dict_with_index_paths.Count; index++)
            {
                StreamReader reader = new StreamReader(dict_with_index_paths[index]);
                string[]     words  = Regex.Split(reader.ReadToEnd(), @"\W+");

                foreach (string word in words)
                {
                    if (!stopwords.Contains(word) &&
                        word.Count() > 1 &&
                        word != "")
                    {
                        index_array_to_update.Add(new Index(word.ToLower(), 1, index.ToString()));
                    }
                }
            }

            IndexCompareWords sort_index = new IndexCompareWords();

            index_row.AddRange(index_array_to_update);

            index_row.Sort(sort_index);


            foreach (string duplicate in findDuplicates(index_row))
            {
                reduceDuplicateAndUpdateIndexTable(index_row, duplicate);
            }

            saveIndexTableToDisk(index_row);
        }
コード例 #3
0
        public IndexTable(Dictionary<int, string> dict_with_index_paths, List<string> stopwords)
        {
            index_row = new ArrayList();

            foreach (var item in dict_with_index_paths)
            {
                StreamReader reader = new StreamReader(item.Value);
                string[] words = Regex.Split(reader.ReadToEnd(), @"\W+");
                foreach(string word in words)
                {
                    if (!stopwords.Contains(word) &&
                        word.Count() > 1 &&
                        word != "")
                        index_row.Add(new Index(word.ToLower(), 1, item.Key.ToString()));
                }
            }

            IndexCompareWords sort_index = new IndexCompareWords();
            index_row.Sort(sort_index);

            foreach(string duplicate in findDuplicates(index_row))
            {
               reduceDuplicateAndUpdateIndexTable(index_row, duplicate);
            }

            saveIndexTableToDisk(index_row);
        }
コード例 #4
0
        public IndexTable(Dictionary <int, string> dict_with_index_paths, List <string> stopwords)
        {
            index_row = new ArrayList();

            foreach (var item in dict_with_index_paths)
            {
                StreamReader reader = new StreamReader(item.Value);
                string[]     words  = Regex.Split(reader.ReadToEnd(), @"\W+");
                foreach (string word in words)
                {
                    if (!stopwords.Contains(word) &&
                        word.Count() > 1 &&
                        word != "")
                    {
                        index_row.Add(new Index(word.ToLower(), 1, item.Key.ToString()));
                    }
                }
            }

            IndexCompareWords sort_index = new IndexCompareWords();

            index_row.Sort(sort_index);

            foreach (string duplicate in findDuplicates(index_row))
            {
                reduceDuplicateAndUpdateIndexTable(index_row, duplicate);
            }

            saveIndexTableToDisk(index_row);
        }