static List <MyDictionary> Preprocessing(string path, List <FileInfo> files) { List <MyDictionary> listOfDicts = new List <MyDictionary>(); int fileCount; for (fileCount = 0; fileCount < files.Count; fileCount++) { int indexAtFile = 0; string filePath = path + "\\" + files[fileCount].Name; //since a word could be seen many times in a file, it will have different indexes. So a string,List<int> dictionary is needed. MyDictionary filteredWordsandIndexes = new MyDictionary(); using (StreamReader reader = new StreamReader(filePath)) { string[] exceptArray = { "-", "\n", " ", "\r", "\r\n" }; char[] splitArray = { ' ', '\n' }; // Read entire text file with ReadToEnd. var words = reader.ReadToEnd().Split(splitArray).Where(x => !exceptArray.Contains(x)); foreach (string word in words) { //Current index is assigned to the current words index. int thisWordsIndex = indexAtFile; //Current index is incremented by current words length and one space since space is used for splitting. indexAtFile += word.Length + 1; //Current word is cleared from the unnecessary characters. string wordFiltered = word.Replace(",", "").Replace(".", "").Replace("\r", "").Replace("\n", "").ToLower(new CultureInfo("en-US", false)).ToString(); if (wordFiltered.Length > 0) { filteredWordsandIndexes.CheckAndAdd(wordFiltered, thisWordsIndex); } } listOfDicts.Add(filteredWordsandIndexes); } } return(listOfDicts); }