Beispiel #1
0
        public void LoadFromGigaFidaFile(string fileName)
        {
            Utils.ThrowException(fileName == null ? new ArgumentNullException("fileName") : null);
            Utils.ThrowException(!Utils.VerifyFileNameOpen(fileName) ? new ArgumentValueException("fileName") : null);
            XmlTextReader xmlReader = null;

            try
            {
                bool hasHeader = false;
                mTaggedWords.Clear();
                mTeiHeader = null;
                xmlReader  = new XmlTextReader(new FileStream(fileName, FileMode.Open));
                while (xmlReader.Read())
                {
                    if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "teiHeader") // header
                    {
                        hasHeader = true;
                        Utils.XmlSkip(xmlReader, "teiHeader");
                    }
                    else if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "p") // paragraph
                    {
                        ThreadHandler.AbortCheckpoint();                                         // TODO: do this at various appropriate places
                        xmlReader.Read();
                        Corpus aux = new Corpus();
                        aux.LoadFromTextSsjTokenizer(xmlReader.Value);
                        if (aux.TaggedWords.Count > 0)
                        {
                            foreach (TaggedWord word in aux.TaggedWords)
                            {
                                word.MoreInfo.RemoveEndOfParagraphFlag();
                                mTaggedWords.Add(word);
                            }
                            aux.TaggedWords.Last.MoreInfo.SetEndOfParagraphFlag();
                        }
                    }
                }
                xmlReader.Close();
                if (hasHeader)
                {
                    ReadTeiHeader(fileName);
                }
            }
            catch
            {
                try { xmlReader.Close(); } catch { }
                throw;
            }
        }
Beispiel #2
0
 static void Main(string[] args)
 {
     try
     {
         if (args.Length < 2)
         {
             OutputHelp();
         }
         else
         {
             string corpusFileName = null, modelFileName = null, lexiconFileName = null;
             bool   considerTag = false;
             bool   treeOpt     = false;
             bool   verbose     = false;
             if (ParseParams(args, ref verbose, ref considerTag, ref treeOpt, ref corpusFileName, ref modelFileName, ref lexiconFileName))
             {
                 Logger logger = Logger.GetRootLogger();
                 if (!verbose)
                 {
                     logger.LocalLevel = Logger.Level.Off;
                     logger.LocalProgressOutputType = Logger.ProgressOutputType.Off;
                 }
                 else
                 {
                     logger.LocalOutputType = Logger.OutputType.Custom;
                     Logger.CustomOutput    = new Logger.CustomOutputDelegate(delegate(string loggerName, Logger.Level level, string funcName, Exception e,
                                                                                       string message, object[] msgArgs) { Console.WriteLine(message, msgArgs); });
                 }
                 Corpus corpus = new Corpus();
                 logger.Info(/*funcName=*/ null, "Nalagam učni korpus ...");
                 corpus.LoadFromXmlFile(corpusFileName, /*tagLen=*/ int.MaxValue);
                 LemmatizerSettings lemmatizerSettings = new LemmatizerSettings();
                 lemmatizerSettings.eMsdConsider                 = considerTag ? LemmatizerSettings.MsdConsideration.Distinct : LemmatizerSettings.MsdConsideration.Ignore;
                 lemmatizerSettings.bUseFromInRules              = true;
                 lemmatizerSettings.iMaxRulesPerNode             = 0;
                 lemmatizerSettings.bBuildFrontLemmatizer        = false;
                 lemmatizerSettings.bStoreAllFullKnownWords      = false;
                 lemmatizerSettings.bUseMsdSplitTreeOptimization = treeOpt;
                 Lemmatizer lemmatizer = new Lemmatizer(lemmatizerSettings);
                 for (int i = 0; i < corpus.TaggedWords.Count; i++)
                 {
                     logger.ProgressFast(Logger.Level.Info, /*funcName=*/ null, "{0} / {1}", i + 1, corpus.TaggedWords.Count);
                     TaggedWord word = corpus.TaggedWords[i];
                     if (!word.MoreInfo.Punctuation)
                     {
                         lemmatizer.AddExample(word.WordLower, word.Lemma.ToLower(), 1, word.Tag);
                     }
                 }
                 if (lexiconFileName != null)
                 {
                     logger.Info(/*funcName=*/ null, "Nalagam leksikon ...");
                     StreamReader lexReader = new StreamReader(lexiconFileName);
                     string       lexLine;
                     int          i = 0;
                     while ((lexLine = lexReader.ReadLine()) != null)
                     {
                         // lexicon format: word \t lemma \t tag \t freq
                         logger.ProgressFast(Logger.Level.Info, /*funcName=*/ null, "{0}", ++i, /*numSteps=*/ 0);
                         string[] lexData = lexLine.Split('\t');
                         string   word    = lexData[0];
                         string   lemma   = lexData[1];
                         string   tag     = lexData[2];
                         double   freq    = Math.Max(0.1, Convert.ToDouble(lexData[3]));
                         lemmatizer.AddExample(word.ToLower(), lemma.ToLower(), freq, tag);
                     }
                     logger.ProgressFast(Logger.Level.Info, /*funcName=*/ null, "{0}", i, i);
                     lexReader.Close();
                 }
                 logger.Info(/*funcName=*/ null, "Gradim model za lematizacijo ...");
                 if (treeOpt)
                 {
                     string msdSpec = Utils.GetManifestResourceString(typeof(Program), "MsdSpecsSloSloCodes.txt");
                     MsdSplitTree.BeamSearchParams beamSearchParams = new MsdSplitTree.BeamSearchParams();
                     beamSearchParams.beamsPerLevel[0] = 2;
                     lemmatizer.BuildModel(msdSpec, beamSearchParams);
                 }
                 else
                 {
                     lemmatizer.BuildModel();
                 }
                 logger.Info(/*funcName=*/ null, "Optimiram lematizacijsko drevo ...");
                 lemmatizer.OptimizeMemorySize();
                 logger.Info(/*funcName=*/ null, "Zapisujem model ...");
                 BinarySerializer writer = new BinarySerializer(modelFileName, FileMode.Create);
                 writer.WriteBool(considerTag);
                 lemmatizer.Save(writer);
                 writer.Close();
                 logger.Info(/*funcName=*/ null, "Končano.");
             }
         }
     }
     catch (Exception exception)
     {
         Console.WriteLine();
         Console.WriteLine("*** Nepričakovana napaka. Podrobnosti: {0}\r\n{1}", exception, exception.StackTrace);
     }
 }
Beispiel #3
0
        public void Tag(Corpus corpus)
        {
            int foo, bar, foobar;

            Tag(corpus, out foo, out bar, out foobar, /*xmlMode=*/ false); // throws InvalidOperationException, ArgumentNullException
        }
Beispiel #4
0
        // *** End of Dec-2011 ***

        public void Tag(Corpus corpus, out int lemmaCorrect, out int lemmaCorrectLowercase, out int lemmaWords, bool xmlMode)
        {
            DateTime startTime = DateTime.Now;

            mLogger.Debug("Tag", "Označujem besedilo ...");
            lemmaCorrect          = 0;
            lemmaCorrectLowercase = 0;
            lemmaWords            = 0;
            for (int i = 0; i < corpus.TaggedWords.Count; i++)
            {
                //mLogger.ProgressFast(Logger.Level.Info, /*sender=*/this, "Tag", "{0} / {1}", i + 1, corpus.TaggedWords.Count);
                //BinaryVector featureVector = corpus.GenerateFeatureVector(i, mFeatureSpace, /*extendFeatureSpace=*/false, mSuffixTree);
                //Prediction<string> result = mModel.Predict(featureVector);
                if ((corpus.TaggedWords[i].MoreInfo != null && corpus.TaggedWords[i].MoreInfo.Punctuation) ||
                    (corpus.TaggedWords[i].MoreInfo == null && mNonWordRegex.Match(corpus.TaggedWords[i].WordLower).Success)) // non-word
                {
                    /*bool flag = false;
                     * foreach (KeyDat<double, string> item in result)
                     * {
                     *  if (corpus.TaggedWords[i].Word == item.Dat || corpus.TaggedWords[i].Word + "<eos>" == item.Dat)
                     *  {
                     *      corpus.TaggedWords[i].Tag = item.Dat;
                     *      flag = true;
                     *      break;
                     *  }
                     * }
                     * if (!flag)
                     * {
                     *  corpus.TaggedWords[i].Tag = corpus.TaggedWords[i].Word;
                     * }*/
                }
                else // word
                {
                    string wordLower = corpus.TaggedWords[i].WordLower;
                    //Set<string> filter = mSuffixTree.Contains(wordLower) ? mSuffixTree.GetTags(wordLower) : null;
                    //result = ProcessResult(result, filter);//???!!!
                    string goldTag = corpus.TaggedWords[i].Tag;
                    string word    = corpus.TaggedWords[i].Word;
                    string rule;

                    /*if (filter == null)
                     * {
                     *  filter = Rules.ApplyTaggerRules(CreateFilterFromResult(result), word, out rule);
                     * }
                     * else
                     * {
                     *  filter = Rules.ApplyTaggerRules(filter, word, out rule);
                     *  if (filter.Count == 0) { filter = Rules.ApplyTaggerRules(CreateFilterFromResult(result), word, out rule); }
                     * }
                     * result = ProcessResult(result, filter);//???!!!
                     * string predictedTag;
                     * if (result.Count == 0)
                     * {
                     *  predictedTag = Rules.GetMostFrequentTag(wordLower, filter);
                     * }
                     * else
                     * {
                     *  predictedTag = result.BestClassLabel;
                     * }
                     * corpus.TaggedWords[i].Tag = predictedTag;*/
                    if (mLemmatizer != null)
                    {
                        string lemma;
                        lemma = /*mConsiderTags ? mLemmatizer.Lemmatize(wordLower, predictedTag) : */ mLemmatizer.Lemmatize(wordLower);
                        //lemma = Rules.FixLemma(lemma, corpus.TaggedWords[i].Word, predictedTag);
                        if (string.IsNullOrEmpty(lemma))
                        {
                            lemma = wordLower;
                        }
                        if (xmlMode)
                        {
                            lemmaWords++;
                            if (lemma == corpus.TaggedWords[i].Lemma)
                            {
                                lemmaCorrect++;
                            }
                            if (corpus.TaggedWords[i].Lemma != null && lemma.ToLower() == corpus.TaggedWords[i].Lemma.ToLower())
                            {
                                lemmaCorrectLowercase++;
                            }
                        }
                        corpus.TaggedWords[i].Lemma = lemma;
                    }
                }
            }
            TimeSpan span = DateTime.Now - startTime;

            mLogger.Debug("Tag", "Trajanje označevanja: {0:00}:{1:00}:{2:00}.{3:000}.", span.Hours, span.Minutes, span.Seconds, span.Milliseconds);
        }
Beispiel #5
0
        private static void Main(string[] args)
        {
            // initialize logger
            mLogger.LocalLevel      = Logger.Level.Debug;
            mLogger.LocalOutputType = Logger.OutputType.Custom;
            Logger.CustomOutput     = new Logger.CustomOutputDelegate(delegate(string loggerName, Logger.Level level, string funcName, Exception e,
                                                                               string message, object[] msgArgs) { Console.WriteLine(message, msgArgs); });
#endif
            try
            {
                if (args.Length < 4)
                {
                    OutputHelp();
                }
                else
                {
                    string inputFolder = null, searchPattern = null, taggerModelFile = null, lemmatizerModelFile = null, outputFileOrFolder = null;
                    bool   ssjTokenizer = false, searchSubfolders = false, verbose = false, overwrite = false;
                    if (ParseParams(args, ref verbose, ref inputFolder, ref searchPattern, ref taggerModelFile, ref lemmatizerModelFile,
                                    ref outputFileOrFolder, ref ssjTokenizer, ref searchSubfolders, ref overwrite))
                    {
                        if (!verbose)
                        {
                            mLogger.LocalLevel = Logger.Level.Info;
                            mLogger.LocalProgressOutputType = Logger.ProgressOutputType.Off;
                        }
                        PartOfSpeechTagger tagger = new PartOfSpeechTagger();
                        tagger.LoadModels(taggerModelFile, lemmatizerModelFile);
                        mLogger.Debug(null, "Mapa z vhodnimi datotekami: {0}", inputFolder);
                        mLogger.Debug(null, "Iskalni vzorec: {0}", searchPattern);
                        foreach (FileInfo file in new DirectoryInfo(inputFolder).GetFiles(searchPattern,
                                                                                          searchSubfolders ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly))
                        {
                            string outputFileName = outputFileOrFolder;
                            if (searchPattern.Contains("*") || searchPattern.Contains("?")) // search pattern contains wildcards thus output is a folder
                            {
                                outputFileName = outputFileOrFolder.TrimEnd('\\') + "\\" +
                                                 file.Name.Substring(0, file.Name.Length - file.Extension.Length) + ".out" + file.Extension;
                                DirectoryInfo dirInfo = new FileInfo(outputFileName).Directory;
                                if (!dirInfo.Exists)
                                {
                                    dirInfo.Create();
                                }
                            }
                            if (File.Exists(outputFileName) && !overwrite)
                            {
                                mLogger.Debug(null, "Datoteka {0} že obstaja. Pripadajoča vhodna datoteka ni bila ponovno označena.", outputFileName);
                                continue;
                            }
                            mLogger.Debug(null, "Nalagam {0} ...", file.FullName);
                            Corpus corpus;
                            bool   xmlMode = false;
                            try
                            {
                                corpus = new Corpus();
                                corpus.LoadFromXmlFile(file.FullName, /*tagLen=*/ -1);
                                if (corpus.TaggedWords.Count > 0)
                                {
                                    xmlMode = true;
                                }
                                else
                                {
                                    corpus.LoadFromGigaFidaFile(file.FullName);
                                }
                            }
                            catch (ThreadHandler.AbortedByUserException)
                            {
                                throw;
                            }
                            catch (Exception e)
                            {
                                corpus = new Corpus();
                                string content = File.ReadAllText(file.FullName);
                                if (IsXmlTei(content))
                                {
                                    mLogger.Debug(null, "*** Opozorilo: Datoteka z besedilom vsebuje značke XML-TEI, vendar nima pravilne oblike. Podrobnosti: {0}", e.Message);
                                }
                                if (ssjTokenizer)
                                {
                                    corpus.LoadFromTextSsjTokenizer(content);
                                }
                                else
                                {
                                    corpus.LoadFromText(content);
                                }
                            }
                            int      knownWordsCorrect      = 0;
                            int      knownWordsPosCorrect   = 0;
                            int      knownWords             = 0;
                            int      unknownWordsCorrect    = 0;
                            int      unknownWordsPosCorrect = 0;
                            int      unknownWords           = 0;
                            int      eosCount                      = 0;
                            int      eosCorrect                    = 0;
                            int      lemmaCorrect                  = 0;
                            int      lemmaCorrectLowercase         = 0;
                            int      lemmaWords                    = 0;
                            int      knownWordsCorrectNoPunct      = 0;
                            int      knownWordsPosCorrectNoPunct   = 0;
                            int      knownWordsNoPunct             = 0;
                            int      unknownWordsCorrectNoPunct    = 0;
                            int      unknownWordsPosCorrectNoPunct = 0;
                            int      unknownWordsNoPunct           = 0;
                            string[] goldTags                      = new string[corpus.TaggedWords.Count];
                            for (int i = 0; i < corpus.TaggedWords.Count; i++)
                            {
                                goldTags[i] = corpus.TaggedWords[i].Tag;
                            }
                            tagger.Tag(corpus, out lemmaCorrect, out lemmaCorrectLowercase, out lemmaWords, xmlMode);
                            mLogger.Debug(null, "Zapisujem označeno besedilo v datoteko {0} ...", outputFileName);
                            StreamWriter writer = new StreamWriter(outputFileName);
                            writer.Write(corpus.ToString(xmlMode || ssjTokenizer ? "XML-MI" : "XML"));
                            writer.Close();
                            mLogger.Debug(null, "Končano.");
                            if (xmlMode)
                            {
                                for (int i = 0; i < corpus.TaggedWords.Count; i++)
                                {
                                    string wordLower = corpus.TaggedWords[i].WordLower;
                                    string tag       = corpus.TaggedWords[i].Tag;
                                    bool   isKnown   = tagger.IsKnownWord(wordLower);
                                    if (!PartOfSpeechTagger.mNonWordRegex.Match(corpus.TaggedWords[i].Word).Success)
                                    {
                                        if (tag == goldTags[i])
                                        {
                                            if (isKnown)
                                            {
                                                knownWordsCorrectNoPunct++;
                                            }
                                            else
                                            {
                                                unknownWordsCorrectNoPunct++;
                                            }
                                        }
                                        if (goldTags[i] != null && tag[0] == goldTags[i][0])
                                        {
                                            if (isKnown)
                                            {
                                                knownWordsPosCorrectNoPunct++;
                                            }
                                            else
                                            {
                                                unknownWordsPosCorrectNoPunct++;
                                            }
                                        }
                                        if (isKnown)
                                        {
                                            knownWordsNoPunct++;
                                        }
                                        else
                                        {
                                            unknownWordsNoPunct++;
                                        }
                                    }
                                    if (tag == goldTags[i])
                                    {
                                        if (isKnown)
                                        {
                                            knownWordsCorrect++;
                                        }
                                        else
                                        {
                                            unknownWordsCorrect++;
                                        }
                                    }
                                    if (goldTags[i] != null && tag[0] == goldTags[i][0])
                                    {
                                        if (isKnown)
                                        {
                                            knownWordsPosCorrect++;
                                        }
                                        else
                                        {
                                            unknownWordsPosCorrect++;
                                        }
                                    }
                                    if (isKnown)
                                    {
                                        knownWords++;
                                    }
                                    else
                                    {
                                        unknownWords++;
                                    }
                                    if (corpus.TaggedWords[i].MoreInfo.EndOfSentence)
                                    {
                                        eosCount++;
                                        if (tag.EndsWith("<eos>"))
                                        {
                                            eosCorrect++;
                                        }
                                    }
                                }
                                int allWords                  = knownWords + unknownWords;
                                int allWordsCorrect           = knownWordsCorrect + unknownWordsCorrect;
                                int allWordsPosCorrect        = knownWordsPosCorrect + unknownWordsPosCorrect;
                                int allWordsNoPunct           = knownWordsNoPunct + unknownWordsNoPunct;
                                int allWordsCorrectNoPunct    = knownWordsCorrectNoPunct + unknownWordsCorrectNoPunct;
                                int allWordsPosCorrectNoPunct = knownWordsPosCorrectNoPunct + unknownWordsPosCorrectNoPunct;
                                mLogger.Info(null, "Točnost na znanih besedah: ................... {2:0.00}% ({0} / {1})", knownWordsCorrect, knownWords,
                                             (double)knownWordsCorrect / (double)knownWords * 100.0);
                                mLogger.Info(null, "Točnost na neznanih besedah: ................. {2:0.00}% ({0} / {1})", unknownWordsCorrect, unknownWords,
                                             (double)unknownWordsCorrect / (double)unknownWords * 100.0);
                                mLogger.Info(null, "Skupna točnost: .............................. {2:0.00}% ({0} / {1})", allWordsCorrect, allWords,
                                             (double)allWordsCorrect / (double)allWords * 100.0);
                                mLogger.Info(null, "Točnost na znanih besedah (POS): ............. {2:0.00}% ({0} / {1})", knownWordsPosCorrect, knownWords,
                                             (double)knownWordsPosCorrect / (double)knownWords * 100.0);
                                mLogger.Info(null, "Točnost na neznanih besedah (POS): ........... {2:0.00}% ({0} / {1})", unknownWordsPosCorrect, unknownWords,
                                             (double)unknownWordsPosCorrect / (double)unknownWords * 100.0);
                                mLogger.Info(null, "Skupna točnost (POS): ........................ {2:0.00}% ({0} / {1})", allWordsPosCorrect, allWords,
                                             (double)allWordsPosCorrect / (double)allWords * 100.0);
                                mLogger.Info(null, "Točnost na znanih besedah (brez ločil): ...... {2:0.00}% ({0} / {1})", knownWordsCorrectNoPunct, knownWordsNoPunct,
                                             (double)knownWordsCorrectNoPunct / (double)knownWordsNoPunct * 100.0);
                                mLogger.Info(null, "Točnost na neznanih besedah (brez ločil): .... {2:0.00}% ({0} / {1})", unknownWordsCorrectNoPunct, unknownWordsNoPunct,
                                             (double)unknownWordsCorrectNoPunct / (double)unknownWordsNoPunct * 100.0);
                                mLogger.Info(null, "Skupna točnost (brez ločil): ................. {2:0.00}% ({0} / {1})", allWordsCorrectNoPunct, allWordsNoPunct,
                                             (double)allWordsCorrectNoPunct / (double)allWordsNoPunct * 100.0);
                                mLogger.Info(null, "Točnost na znanih besedah (POS, brez ločil):   {2:0.00}% ({0} / {1})", knownWordsPosCorrectNoPunct, knownWordsNoPunct,
                                             (double)knownWordsPosCorrectNoPunct / (double)knownWordsNoPunct * 100.0);
                                mLogger.Info(null, "Točnost na neznanih besedah (POS, brez ločil): {2:0.00}% ({0} / {1})", unknownWordsPosCorrectNoPunct, unknownWordsNoPunct,
                                             (double)unknownWordsPosCorrectNoPunct / (double)unknownWordsNoPunct * 100.0);
                                mLogger.Info(null, "Skupna točnost (POS, brez ločil): ............ {2:0.00}% ({0} / {1})", allWordsPosCorrectNoPunct, allWordsNoPunct,
                                             (double)allWordsPosCorrectNoPunct / (double)allWordsNoPunct * 100.0);
                                if (lemmatizerModelFile != null)
                                {
                                    mLogger.Info(null, "Točnost lematizacije (brez ločil): ........... {2:0.00}% ({0} / {1})", lemmaCorrect, lemmaWords,
                                                 (double)lemmaCorrect / (double)lemmaWords * 100.0);
                                    mLogger.Info(null, "Točnost lematizacije (male črke, brez ločil):  {2:0.00}% ({0} / {1})", lemmaCorrectLowercase, lemmaWords,
                                                 (double)lemmaCorrectLowercase / (double)lemmaWords * 100.0);
                                }
                                mLogger.Info(null, "Točnost detekcije konca stavka: .............. {2:0.00}% ({0} / {1})", eosCorrect, eosCount,
                                             (double)eosCorrect / (double)eosCount * 100.0);
                            }
                        }
                    }
                }
            }
            catch (Exception e)
            {
                mLogger.Info(null, "");
                mLogger.Info(null, "*** Nepričakovana napaka. Podrobnosti: {0}", e);
            }
        }
Beispiel #6
0
 static void Main(string[] args)
 {
     try
     {
         if (args.Length < 2)
         {
             OutputHelp();
         }
         else
         {
             int    cutOff = 2;
             int    numIter = 50;
             int    numThreads = 1;
             string corpusFileName = null, modelFileName = null, lexiconFileName = null;
             bool   verbose = false;
             if (ParseParams(args, ref verbose, ref cutOff, ref numIter, ref numThreads, ref corpusFileName, ref modelFileName, ref lexiconFileName))
             {
                 Logger logger = Logger.GetRootLogger();
                 if (!verbose)
                 {
                     logger.LocalLevel = Logger.Level.Off;
                     logger.LocalProgressOutputType = Logger.ProgressOutputType.Off;
                 }
                 else
                 {
                     logger.LocalOutputType = Logger.OutputType.Custom;
                     Logger.CustomOutput    = new Logger.CustomOutputDelegate(delegate(string loggerName, Logger.Level level, string funcName, Exception e,
                                                                                       string message, object[] msgArgs) { Console.WriteLine(message, msgArgs); });
                 }
                 Corpus corpus = new Corpus();
                 logger.Info(/*funcName=*/ null, "Nalagam učni korpus ...");
                 corpus.LoadFromXmlFile(corpusFileName, /*tagLen=*/ -1);
                 GC.Collect();
                 long         oldMemUse  = Process.GetCurrentProcess().PrivateMemorySize64;
                 PatriciaTree suffixTree = new PatriciaTree();
                 foreach (TaggedWord word in corpus.TaggedWords)
                 {
                     suffixTree.AddWordTagPair(word.WordLower, word.Tag);
                 }
                 if (lexiconFileName != null)
                 {
                     logger.Info(/*funcName=*/ null, "Nalagam leksikon ...");
                     StreamReader lexReader = new StreamReader(lexiconFileName);
                     string       lexLine;
                     while ((lexLine = lexReader.ReadLine()) != null)
                     {
                         string[] lexData = lexLine.Split('\t');
                         suffixTree.AddWordTagPair(lexData[0].ToLower(), lexData[2]);
                     }
                     lexReader.Close();
                 }
                 GC.Collect();
                 long memUse = Process.GetCurrentProcess().PrivateMemorySize64;
                 Console.WriteLine("Poraba pomnilnika (drevo končnic): {0:0.00} MB", (double)(memUse - oldMemUse) / 1048576.0);
                 oldMemUse = memUse;
                 suffixTree.PropagateTags();
                 GC.Collect();
                 memUse = Process.GetCurrentProcess().PrivateMemorySize64;
                 Console.WriteLine("Poraba pomnilnika (propagirane oznake): {0:0.00} MB", (double)(memUse - oldMemUse) / 1048576.0);
                 MaximumEntropyClassifierFast <string> model   = new MaximumEntropyClassifierFast <string>();
                 LabeledDataset <string, BinaryVector> dataset = new LabeledDataset <string, BinaryVector>();
                 Dictionary <string, int> featureSpace         = new Dictionary <string, int>();
                 logger.Info(/*funcName=*/ null, "Pripravljam vektorje značilk ...");
                 for (int i = 0; i < corpus.TaggedWords.Count; i++)
                 {
                     logger.ProgressFast(Logger.Level.Info, /*funcName=*/ null, "{0} / {1}", i + 1, corpus.TaggedWords.Count);
                     BinaryVector featureVector = corpus.GenerateFeatureVector(i, featureSpace, /*extendFeatureSpace=*/ true, suffixTree);
                     dataset.Add(corpus.TaggedWords[i].Tag, featureVector);
                 }
                 logger.Info(/*funcName=*/ null, "Gradim model ...");
                 DateTime startTime = DateTime.Now;
                 model.CutOff     = cutOff;
                 model.NumThreads = numThreads;
                 model.NumIter    = numIter;
                 model.Train(dataset);
                 TimeSpan span = DateTime.Now - startTime;
                 logger.Info(/*funcName=*/ null, "Trajanje gradnje modela: {0:00}:{1:00}:{2:00}.{3:000}.", span.Hours, span.Minutes, span.Seconds, span.Milliseconds);
                 logger.Info(/*funcName=*/ null, "Zapisujem model ...");
                 BinarySerializer writer = new BinarySerializer(modelFileName, FileMode.Create);
                 suffixTree.Save(writer);
                 Utils.SaveDictionary(featureSpace, writer);
                 model.Save(writer);
                 writer.Close();
                 logger.Info(/*funcName=*/ null, "Končano.");
             }
         }
     }
     catch (Exception exception)
     {
         Console.WriteLine();
         Console.WriteLine("*** Nepričakovana napaka. Podrobnosti: {0}\r\n{1}", exception, exception.StackTrace);
     }
 }