public ContextSensitiveSpellingCorrection(IPOSTagger posTagger, IEnumerable <string> corpora, IEnumerable <string[]> confusionSets, bool prune) { _posTagger = posTagger; _contextFeaturesExtractor = new ContextFeaturesExtractor(k); _collocationtFeaturesExtractor = new CollocationFeaturesExtractor(l); _statsHelper = new StatsHelper(); _comparators = new List <Comparator>(confusionSets.Count()); Sentence[] sentences = PreProcessCorpora(corpora).ToArray(); /*processed corpus was serialized for faster results between trials*/ XmlSerializer x = new XmlSerializer(typeof(Sentence[])); FileStream fs = new FileStream(@"Sentence.xml", FileMode.Open); x.Serialize(fs, sentences); fs.Close(); sentences = (Sentence[])x.Deserialize(new FileStream(@"Sentence.xml", FileMode.Open)); Console.WriteLine("Deserialize complete"); var featureFrequencies = new Dictionary <string, Dictionary <string, int> >(StringComparer.OrdinalIgnoreCase); if (prune) { /* preprocess terms' frequencies */ featureFrequencies = _statsHelper.GetFrequencies(sentences); } Parallel.ForEach(confusionSets, confusionSet => { TrainingData output = GenerateTrainingData(sentences, prune, featureFrequencies, confusionSet); Train(confusionSet, output.Features.ToArray(), output.Samples); }); }