public void updateWithNewDoc(Dictionary<int, string> dict_with_index_paths, List<string> stopwords, int start_index_key) { ArrayList index_array_to_update = new ArrayList(); for (int index = start_index_key + 1; index < dict_with_index_paths.Count; index++ ) { StreamReader reader = new StreamReader(dict_with_index_paths[index]); string[] words = Regex.Split(reader.ReadToEnd(), @"\W+"); foreach (string word in words) { if (!stopwords.Contains(word) && word.Count() > 1 && word != "") index_array_to_update.Add(new Index(word.ToLower(), 1, index.ToString())); } } IndexCompareWords sort_index = new IndexCompareWords(); index_row.AddRange(index_array_to_update); index_row.Sort(sort_index); foreach (string duplicate in findDuplicates(index_row)) { reduceDuplicateAndUpdateIndexTable(index_row, duplicate); } saveIndexTableToDisk(index_row); }
public void updateWithNewDoc(Dictionary <int, string> dict_with_index_paths, List <string> stopwords, int start_index_key) { ArrayList index_array_to_update = new ArrayList(); for (int index = start_index_key + 1; index < dict_with_index_paths.Count; index++) { StreamReader reader = new StreamReader(dict_with_index_paths[index]); string[] words = Regex.Split(reader.ReadToEnd(), @"\W+"); foreach (string word in words) { if (!stopwords.Contains(word) && word.Count() > 1 && word != "") { index_array_to_update.Add(new Index(word.ToLower(), 1, index.ToString())); } } } IndexCompareWords sort_index = new IndexCompareWords(); index_row.AddRange(index_array_to_update); index_row.Sort(sort_index); foreach (string duplicate in findDuplicates(index_row)) { reduceDuplicateAndUpdateIndexTable(index_row, duplicate); } saveIndexTableToDisk(index_row); }
public IndexTable(Dictionary<int, string> dict_with_index_paths, List<string> stopwords) { index_row = new ArrayList(); foreach (var item in dict_with_index_paths) { StreamReader reader = new StreamReader(item.Value); string[] words = Regex.Split(reader.ReadToEnd(), @"\W+"); foreach(string word in words) { if (!stopwords.Contains(word) && word.Count() > 1 && word != "") index_row.Add(new Index(word.ToLower(), 1, item.Key.ToString())); } } IndexCompareWords sort_index = new IndexCompareWords(); index_row.Sort(sort_index); foreach(string duplicate in findDuplicates(index_row)) { reduceDuplicateAndUpdateIndexTable(index_row, duplicate); } saveIndexTableToDisk(index_row); }
public IndexTable(Dictionary <int, string> dict_with_index_paths, List <string> stopwords) { index_row = new ArrayList(); foreach (var item in dict_with_index_paths) { StreamReader reader = new StreamReader(item.Value); string[] words = Regex.Split(reader.ReadToEnd(), @"\W+"); foreach (string word in words) { if (!stopwords.Contains(word) && word.Count() > 1 && word != "") { index_row.Add(new Index(word.ToLower(), 1, item.Key.ToString())); } } } IndexCompareWords sort_index = new IndexCompareWords(); index_row.Sort(sort_index); foreach (string duplicate in findDuplicates(index_row)) { reduceDuplicateAndUpdateIndexTable(index_row, duplicate); } saveIndexTableToDisk(index_row); }