C# (CSharp) VietAccentor LazyTrainer примеры использования

Язык программирования: C# (CSharp)

Пространство имен/Пакет: VietAccentor

Класс/Тип: LazyTrainer

Примеров на hotexamples.com: 5

C# (CSharp) VietAccentor LazyTrainer - 5 примеров найдено. Это лучшие примеры C# (CSharp) кода для VietAccentor.LazyTrainer, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ReadDictionary(3)

ReadWordModel(2)

MatchSequence(1)

Test(1)

Train(1)

WriteSegmentModel(1)

WriteWordModel(1)

Пример #1

Показать файл

        public static Dictionary <int, TupleList <string, short> > LoadModel(string segmentModelFile, string wordModelFile)
        {
            Dictionary <int, TupleList <long, short> > wordModel = LazyTrainer.ReadWordModel(wordModelFile);

            Dictionary <long, string> offsetToSegmentMap = new Dictionary <long, string>();

            using (BinaryReader brSegment = new BinaryReader(File.OpenRead(segmentModelFile)))
            {
                while (brSegment.BaseStream.Position != brSegment.BaseStream.Length)
                {
                    offsetToSegmentMap.Add(brSegment.BaseStream.Position, brSegment.ReadString());
                }
            }

            Dictionary <int, TupleList <string, short> > model = new Dictionary <int, TupleList <string, short> >();

            foreach (int wordIdx in wordModel.Keys)
            {
                model.Add(wordIdx, new TupleList <string, short>());
                foreach (Tuple <long, short> segmentLocation in wordModel[wordIdx])
                {
                    model[wordIdx].Add(offsetToSegmentMap[segmentLocation.Item1], segmentLocation.Item2);
                }
            }
            return(model);
        }

Пример #2

Показать файл

        private static void Train(string dictionaryFile, string segmentModelFile, string wordModelFile)
        {
            System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
            sw.Start();

            List <List <string> > accentSegmentWordList = TextParser.ParseData(new TupleList <string, string>
            {
                { @"..\..\..\..\Data", "batches_train.txt" },
                { @"..\..\..\..\Data\downloaded", "*.txt" }
            });

            Console.Write("Generating segments... ");

            // List of segment offsets in the binary file
            List <long> offsets = LazyTrainer.WriteSegmentModel(accentSegmentWordList, segmentModelFile);

            Console.WriteLine(sw.ElapsedMilliseconds);
            sw.Restart();

            Console.Write("Loading dictionary of raw words... ");

            // load dictionary of raw words
            Dictionary <string, int> dictionary = LazyTrainer.ReadDictionary(dictionaryFile);

            Console.WriteLine(sw.ElapsedMilliseconds);
            sw.Restart();

            Console.Write("Mapping words to segments... ");

            VietConverter tc = new VietConverter();

            Dictionary <int, TupleList <long, short> > model = new Dictionary <int, TupleList <long, short> >();

            // pass through all segments, recording where each word appears in the segment
            for (int iSegment = 0; iSegment < accentSegmentWordList.Count; iSegment++)
            {
                List <string> wordSegment = accentSegmentWordList[iSegment];
                for (short iWord = 0; iWord < wordSegment.Count; iWord++)
                {
                    string rawWord = tc.Convert(wordSegment[iWord]);

                    // If word is in dictionary
                    if (dictionary.ContainsKey(rawWord))
                    {
                        int wordKey = dictionary[rawWord];
                        if (!model.ContainsKey(wordKey))
                        {
                            model.Add(wordKey, new TupleList <long, short>());
                        }
                        model[wordKey].Add(offsets[iSegment], iWord);
                    }
                }
            }

            LazyTrainer.WriteWordModel(model, wordModelFile);

            Console.WriteLine(sw.ElapsedMilliseconds);
        }

Пример #3

Показать файл

 public static void Run(bool conserveMemory)
 {
     if (!File.Exists(g_SegmentModelFile) || !File.Exists(g_WordModelFile))
     {
         Directory.CreateDirectory(Path.GetDirectoryName(g_SegmentModelFile));
         LazyTrainer.Train(g_DictionaryFile, g_SegmentModelFile, g_WordModelFile);
     }
     LazyTrainer.Test(g_DictionaryFile, g_SegmentModelFile, g_WordModelFile, conserveMemory);
 }

Пример #4

Показать файл

        private static void Test(string dictionaryFile, string segmentModelFile, string wordModelFile, bool conserveMemory)
        {
            Console.OutputEncoding = Encoding.UTF8;

            System.Diagnostics.Stopwatch watch = new System.Diagnostics.Stopwatch();
            watch.Start();

            Console.Write("Starting up...");

            Dictionary <string, int> dictionary = LazyTrainer.ReadDictionary(dictionaryFile);
            Dictionary <int, TupleList <long, short> > model = LazyTrainer.ReadWordModel(wordModelFile);
            VietConverter tc = new VietConverter();

            using (BinaryReader brSegment = new BinaryReader(File.OpenRead(segmentModelFile)))
            {
                // If not conserving memory then preload everything
                Dictionary <long, string> offsetToSegmentMap = new Dictionary <long, string>();
                if (!conserveMemory)
                {
                    while (brSegment.BaseStream.Position != brSegment.BaseStream.Length)
                    {
                        offsetToSegmentMap.Add(brSegment.BaseStream.Position, brSegment.ReadString());
                    }
                }

                Console.WriteLine(" - {0} seconds", watch.ElapsedMilliseconds / 1000.0);
                Console.WriteLine();

                while (true)
                {
                    Console.Write("Enter a phrase: ");

                    string data = Console.ReadLine();
                    if (data.Contains("quit"))
                    {
                        break;
                    }

                    File.AppendAllText(g_LogFile, data + "\r\n");

                    watch.Restart();

                    List <string> prediction = new List <string>();
                    string[]      words      = data.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                    for (short iw = 0; iw < words.Length; iw++)
                    {
                        string w = words[iw];
                        if (!dictionary.ContainsKey(w))
                        {
                            prediction.Add(w);

                            File.AppendAllText(g_LogFile, String.Format("{0} is not a known word, leaving as is\r\n", w));
                        }
                        else
                        {
                            int wKey = dictionary[w];
                            if (model.ContainsKey(wKey))
                            {
                                var locations = model[wKey];

                                short         maxMatchCount      = -1;
                                List <string> mostLikelySequence = new List <string>();
                                string        mostLikelyWord     = String.Empty;

                                List <string> segments = new List <string>();
                                if (conserveMemory)
                                {
                                    foreach (var loc in locations)
                                    {
                                        brSegment.BaseStream.Position = loc.Item1;
                                        segments.Add(brSegment.ReadString());
                                    }
                                }
                                else
                                {
                                    foreach (var loc in locations)
                                    {
                                        segments.Add(offsetToSegmentMap[loc.Item1]);
                                    }
                                }

                                ConcurrentBag <Tuple <short, List <string>, string> > results = new ConcurrentBag <Tuple <short, List <string>, string> >();

                                Parallel.For(0, locations.Count, new ParallelOptions()
                                {
                                    MaxDegreeOfParallelism = 8
                                }, (int i) =>
                                {
                                    short iWord = locations[i].Item2;

                                    // The accented word list
                                    List <string> actual = segments[i].Split(new char[0], StringSplitOptions.RemoveEmptyEntries).ToList();

                                    // The converted raw word list
                                    List <string> rawActual = new List <string>();

                                    foreach (string aw in actual)
                                    {
                                        rawActual.Add(tc.Convert(aw));
                                    }

                                    short matchCount = 0;
                                    LazyTrainer.MatchSequence(words.ToList(), iw, rawActual, iWord, out matchCount);

                                    results.Add(new Tuple <short, List <string>, string>(matchCount, actual, actual[iWord]));
                                });

                                foreach (var item in results)
                                {
                                    if (item.Item1 >= maxMatchCount)
                                    {
                                        maxMatchCount      = item.Item1;
                                        mostLikelySequence = item.Item2;
                                        mostLikelyWord     = item.Item3;
                                    }
                                }

                                prediction.Add(mostLikelyWord);

                                File.AppendAllText(g_LogFile,
                                                   String.Format("{0} has a {1}-gram match in: {2}\r\n", w, maxMatchCount, String.Join(" ", mostLikelySequence)));
                            }
                        }
                    }
                    string predicted = String.Format("Predicted: {0} - {1} seconds", String.Join(" ", prediction), watch.ElapsedMilliseconds / 1000.0);
                    Console.WriteLine(predicted);
                    Console.WriteLine();

                    File.AppendAllText(g_LogFile, predicted + "\r\n\r\n");
                }
            }
        }

Пример #5

Показать файл

Файл: Trainer.cs Проект: accentype/accentype-learn

        public static void Train(string outModelFilePattern, TupleList <string, string> inputTrainingFiles, int modelVersion, int minGram, int maxGram, AccentConverter converter, bool learnKnownWordsOnly = true)
        {
            List <int> grams = new List <int>();

            for (int n = minGram; n <= maxGram; n++)
            {
                if (!File.Exists(String.Format(outModelFilePattern, n)))
                {
                    grams.Add(n);
                }
            }
            if (grams.Count == 0)
            {
                return;
            }

            // Load dictionary of raw words
            Dictionary <string, int> dictionary = learnKnownWordsOnly ? LazyTrainer.ReadDictionary(DataManager.DictionaryFile) : null;

            // Load segments from training data
            List <List <string> > segments = TextParser.ParseData(inputTrainingFiles);

            StringBuilder sbRaw = new StringBuilder();
            StringBuilder sbAcc = new StringBuilder();

            foreach (int n in grams)
            {
                int iG = n - 1;
                Console.WriteLine("Building {0}-gram ...", iG + 1);

                Clocker.Tick();

                using (BinaryWriter bwModel = new BinaryWriter(File.Create(String.Format(outModelFilePattern, iG + 1))))
                {
                    ILanguageModel igGram = ModelFactory.CreateModelByVersion(modelVersion);
                    for (int iS = 0; iS < segments.Count; iS++)
                    {
                        List <string> words = segments[iS];
                        for (int i = 0; i < words.Count - iG; i++)
                        {
                            sbRaw.Clear();
                            sbAcc.Clear();

                            bool shouldProceed = true;
                            if (learnKnownWordsOnly)
                            {
                                for (int g = 0; g <= iG; g++)
                                {
                                    string accWord = words[i + g];
                                    string rawWord = converter.Convert(accWord);

                                    if (!dictionary.ContainsKey(rawWord))
                                    {
                                        shouldProceed = false;
                                        break;
                                    }

                                    sbAcc.Append(accWord);
                                    sbRaw.Append(rawWord);
                                    if (g < iG)
                                    {
                                        sbRaw.Append(" ");
                                    }
                                }
                            }
                            else
                            {
                                for (int g = 0; g <= iG; g++)
                                {
                                    sbAcc.Append(words[i + g]);
                                    sbRaw.Append(converter.Convert(words[i + g]));
                                    if (g < iG)
                                    {
                                        sbRaw.Append(" ");
                                    }
                                }
                            }

                            if (shouldProceed)
                            {
                                string accents = ExtractAccents(sbAcc.ToString(), converter);

                                igGram.Add(sbRaw.ToString(), accents);
                            }
                        }
                    }

                    igGram.WriteToBinary(bwModel);
                }

                Clocker.Tock();
            }
        }