public ContextSensitiveSpellingCorrection(IPOSTagger posTagger, IEnumerable <string> corpora, IEnumerable <string[]> confusionSets, bool prune)
        {
            _posTagger = posTagger;
            _contextFeaturesExtractor      = new ContextFeaturesExtractor(k);
            _collocationtFeaturesExtractor = new CollocationFeaturesExtractor(l);
            _statsHelper = new StatsHelper();
            _comparators = new List <Comparator>(confusionSets.Count());

            Sentence[] sentences = PreProcessCorpora(corpora).ToArray();


            /*processed corpus was serialized for faster results between trials*/
            XmlSerializer x  = new XmlSerializer(typeof(Sentence[]));
            FileStream    fs = new FileStream(@"Sentence.xml", FileMode.Open);

            x.Serialize(fs, sentences);
            fs.Close();
            sentences = (Sentence[])x.Deserialize(new FileStream(@"Sentence.xml", FileMode.Open));
            Console.WriteLine("Deserialize complete");

            var featureFrequencies = new Dictionary <string, Dictionary <string, int> >(StringComparer.OrdinalIgnoreCase);

            if (prune)
            {
                /* preprocess terms' frequencies */
                featureFrequencies = _statsHelper.GetFrequencies(sentences);
            }

            Parallel.ForEach(confusionSets, confusionSet =>
            {
                TrainingData output = GenerateTrainingData(sentences, prune, featureFrequencies, confusionSet);

                Train(confusionSet, output.Features.ToArray(), output.Samples);
            });
        }