コード例 #1
0
ファイル: Trainer.cs プロジェクト: accentype/accentype-learn
        public static string Accentify(string rawSentence, string accents, AccentConverter converter)
        {
            if (rawSentence == null || accents == null || converter == null)
            {
                return(null);
            }

            char[] rawChars = rawSentence.ToArray();
            char[] accChars = converter.Convert(accents).ToArray();

            int ia = 0;

            for (int i = 0; i < rawChars.Length; i++)
            {
                if (converter.RawCharMap.ContainsKey(rawChars[i]))
                {
                    if (ia >= accChars.Length || rawChars[i] != accChars[ia])
                    {
                        return(null);
                    }
                    rawChars[i] = accents[ia++];
                }
            }
            return(new string(rawChars));
        }
コード例 #2
0
ファイル: Trainer.cs プロジェクト: accentype/accentype-learn
        public static void TestUserQuery(List <ILanguageModel> models, AccentConverter converter)
        {
            while (true)
            {
                Console.Write("Enter a phrase: ");

                string data = Console.ReadLine();
                if (data.Contains("quit"))
                {
                    break;
                }

                File.AppendAllText(g_LogFile, data + "\r\n");

                Clocker.Tick();

                string predictedWords = String.Join(" ", Predict(data, models, converter).Select(wc => wc[0]));

                string predicted = String.Format("Predicted: {0} - {1} seconds", predictedWords, Clocker.Seconds());
                Console.WriteLine(predicted);
                Console.WriteLine();

                File.AppendAllText(g_LogFile, predicted + "\r\n\r\n");
            }
        }
コード例 #3
0
ファイル: Trainer.cs プロジェクト: accentype/accentype-learn
        public static void Test(AccentConverter converter, int modelVersion)
        {
            string modelFile = (converter is VietConverter) ? g_ModelFile : g_FrenchModelFile;

            string[] modelFiles = Directory.GetFiles(DataManager.BaseDir, Path.GetFileName(String.Format(modelFile, "*")));

            List <ILanguageModel> models = new ILanguageModel[5].ToList();

            for (int n = 1; n <= 3; n++)
            {
                string fileName = String.Format(modelFile, n);
                if (!File.Exists(fileName))
                {
                    continue;
                }

                Console.WriteLine("Loading {0}-gram model...", n);

                Clocker.Tick();

                models[n - 1] = ModelFactory.LoadModel(modelFiles[n - 1], modelVersion);

                Clocker.Tock();
            }

            TestUserQuery(models, converter);
        }
コード例 #4
0
ファイル: Trainer.cs プロジェクト: accentype/accentype-learn
        public static string ExtractAccents(string sentence, AccentConverter converter)
        {
            StringBuilder sb = new StringBuilder();

            string rawSentence = converter.Convert(sentence);

            for (int i = 0; i < rawSentence.Length; i++)
            {
                if (converter.RawCharMap.ContainsKey(rawSentence[i]))
                {
                    sb.Append(sentence[i]);
                }
            }
            return(sb.ToString());
        }
コード例 #5
0
ファイル: Trainer.cs プロジェクト: accentype/accentype-learn
        private static void ComputeAccentScore(string[] words, int iW,
                                               AccentConverter converter, double weight,
                                               ILanguageModel model, int n, Dictionary <string, double> accScoreMap)
        {
            int g = n - 1;

            // compute accent probability for this word
            int g3Start = Math.Max(iW - g, 0);
            int g3End   = Math.Min(iW + g, words.Length - 1);

            for (int jW = g3Start; jW <= g3End - g; jW++)
            {
                string segment =
                    (g == 2) ? String.Format("{0} {1} {2}", words[jW], words[jW + 1], words[jW + 2]) :
                    (g == 1) ? String.Format("{0} {1}", words[jW], words[jW + 1]) : words[jW];


                Dictionary <string, int> accentsCountMap = model.GetAccents(segment);

                if (accentsCountMap == null)
                {
                    continue;
                }

                double count = accentsCountMap.Sum(item => item.Value);

                foreach (string accents in accentsCountMap.Keys)
                {
                    string accSegment = Accentify(segment, accents, converter);
                    if (accSegment != null)
                    {
                        string[] accWords = accSegment.Split(new char[0]);

                        string accentedWord = accWords[iW - jW];
                        double accScore     = (accentsCountMap[accents] / count) * weight;

                        if (!accScoreMap.ContainsKey(accentedWord))
                        {
                            accScoreMap.Add(accentedWord, 0);
                        }
                        accScoreMap[accentedWord] += accScore;
                    }
                }
            }
        }
コード例 #6
0
        public override bool OnStart()
        {
            udpServer = new UdpClient(PredictEndPoint);

            models.Add(ModelFactory.LoadModel("model_v2_1.at", version: 2));
            models.Add(ModelFactory.LoadModel("model_v2_2.at", version: 2));
            models.Add(ModelFactory.LoadModel("model_v2_3.at", version: 2));

            accentConverter = new VietConverter();

            // Set the maximum number of concurrent connections
            ServicePointManager.DefaultConnectionLimit = 12;

            // For information on handling configuration changes
            // see the MSDN topic at http://go.microsoft.com/fwlink/?LinkId=166357.

            bool result = base.OnStart();

            Trace.TraceInformation("PredictRole has been started");

            return(result);
        }
コード例 #7
0
ファイル: Trainer.cs プロジェクト: accentype/accentype-learn
        public static void Train(string outModelFilePattern, TupleList <string, string> inputTrainingFiles, int modelVersion, int minGram, int maxGram, AccentConverter converter, bool learnKnownWordsOnly = true)
        {
            List <int> grams = new List <int>();

            for (int n = minGram; n <= maxGram; n++)
            {
                if (!File.Exists(String.Format(outModelFilePattern, n)))
                {
                    grams.Add(n);
                }
            }
            if (grams.Count == 0)
            {
                return;
            }

            // Load dictionary of raw words
            Dictionary <string, int> dictionary = learnKnownWordsOnly ? LazyTrainer.ReadDictionary(DataManager.DictionaryFile) : null;

            // Load segments from training data
            List <List <string> > segments = TextParser.ParseData(inputTrainingFiles);

            StringBuilder sbRaw = new StringBuilder();
            StringBuilder sbAcc = new StringBuilder();

            foreach (int n in grams)
            {
                int iG = n - 1;
                Console.WriteLine("Building {0}-gram ...", iG + 1);

                Clocker.Tick();

                using (BinaryWriter bwModel = new BinaryWriter(File.Create(String.Format(outModelFilePattern, iG + 1))))
                {
                    ILanguageModel igGram = ModelFactory.CreateModelByVersion(modelVersion);
                    for (int iS = 0; iS < segments.Count; iS++)
                    {
                        List <string> words = segments[iS];
                        for (int i = 0; i < words.Count - iG; i++)
                        {
                            sbRaw.Clear();
                            sbAcc.Clear();

                            bool shouldProceed = true;
                            if (learnKnownWordsOnly)
                            {
                                for (int g = 0; g <= iG; g++)
                                {
                                    string accWord = words[i + g];
                                    string rawWord = converter.Convert(accWord);

                                    if (!dictionary.ContainsKey(rawWord))
                                    {
                                        shouldProceed = false;
                                        break;
                                    }

                                    sbAcc.Append(accWord);
                                    sbRaw.Append(rawWord);
                                    if (g < iG)
                                    {
                                        sbRaw.Append(" ");
                                    }
                                }
                            }
                            else
                            {
                                for (int g = 0; g <= iG; g++)
                                {
                                    sbAcc.Append(words[i + g]);
                                    sbRaw.Append(converter.Convert(words[i + g]));
                                    if (g < iG)
                                    {
                                        sbRaw.Append(" ");
                                    }
                                }
                            }

                            if (shouldProceed)
                            {
                                string accents = ExtractAccents(sbAcc.ToString(), converter);

                                igGram.Add(sbRaw.ToString(), accents);
                            }
                        }
                    }

                    igGram.WriteToBinary(bwModel);
                }

                Clocker.Tock();
            }
        }
コード例 #8
0
ファイル: Trainer.cs プロジェクト: accentype/accentype-learn
        public static void Train(string outModelFilePattern, string trainingFile, int modelVersion, int minGram, int maxGram, AccentConverter converter, bool learnKnownWordsOnly = true)
        {
            TupleList <string, string> files = new TupleList <string, string>
            {
                { Path.GetDirectoryName(trainingFile), Path.GetFileName(trainingFile) }
            };

            Train(outModelFilePattern, files, modelVersion, minGram, maxGram, converter, learnKnownWordsOnly);
        }
コード例 #9
0
ファイル: Trainer.cs プロジェクト: accentype/accentype-learn
        public static string[][] Predict(string data, List <ILanguageModel> models, AccentConverter converter)
        {
            string[][] wordChoices = null;

            if (String.IsNullOrWhiteSpace(data))
            {
                return(null);
            }

            string[] queryWords = data.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            string[] words      = queryWords.Select(w => w.ToLower()).ToArray();

            List <List <int> > upperCases = new List <List <int> >();

            for (int w = 0; w < queryWords.Length; w++)
            {
                upperCases.Add(new List <int>());
                for (int c = 0; c < queryWords[w].Length; c++)
                {
                    if (Char.IsUpper(queryWords[w][c]))
                    {
                        upperCases[w].Add(c);
                    }
                }
            }

            double beta3 = 0.2;
            double beta2 = 0.15;
            double beta1 = 0.1;

            wordChoices = new string[words.Length][];

            int maxChoices = 0;

            if (wordChoices.Length == 1)
            {
                maxChoices = MaxChoiceCount1;
            }
            else if (wordChoices.Length == 2)
            {
                maxChoices = MaxChoiceCount2;
            }
            else
            {
                maxChoices = MaxChoiceCount3;
            }

            var predictedWords = new string[words.Length];

            for (int i = 0; i < words.Length; i++)
            {
                var accScoreMap = new Dictionary <string, double>();

                ComputeAccentScore(words, i, converter, beta3, models[2], 3, accScoreMap);
                ComputeAccentScore(words, i, converter, beta2, models[1], 2, accScoreMap);
                ComputeAccentScore(words, i, converter, beta1, models[0], 1, accScoreMap);

                if (accScoreMap != null && accScoreMap.Count > 0)
                {
                    wordChoices[i] = new string[Math.Min(maxChoices, accScoreMap.Count)];

                    var orderedChoices = accScoreMap.OrderByDescending(item => item.Value);

                    int j = 0;
                    foreach (var item in orderedChoices)
                    {
                        if (upperCases[i].Count > 0)
                        {
                            char[] choiceChars = item.Key.ToArray();
                            foreach (int upperCaseLocation in upperCases[i])
                            {
                                choiceChars[upperCaseLocation] = Char.ToUpper(choiceChars[upperCaseLocation]);
                            }
                            wordChoices[i][j] = new string(choiceChars);
                        }
                        else
                        {
                            wordChoices[i][j] = item.Key;
                        }
                        j++;

                        if (j >= wordChoices[i].Length)
                        {
                            break;
                        }
                    }
                }
                else
                {
                    wordChoices[i] = new string[] { queryWords[i] };
                }
            }
            return(wordChoices);
        }