예제 #1
0
        /// <summary>
        /// Estimates the phoneme-stops in a specified word. Each phoneme gets
        /// a weight that will be interpreted as its duration within the word.
        /// </summary>
        /// <param name="ar"><see cref="OrthographicResult"/></param>
        static void phStops(OrthographicResult ar)
        {
            //logfile.Log("phStops()");

            var stops = new List <decimal>();

            decimal tally = 0;

            foreach (var phon in ar.Phons)
            {
                switch (phon)
                {
                // curious where 0100010 got these - intuition perhaps.

                // TODO: French etc.

                case "aa":
                case "ae":
                case "ah":
                case "ax":
                case "ay":
                case  "b":
                case "eh":
                case  "l":
                case  "r":
                case  "w":
                    stops.Add(tally += 5);
                    break;

                case "ao":
                case "aw":
                case "er":
                case "ey":
                case "ow":
                case "oy":
                case "uh":
                case "uw":
                    stops.Add(tally += 6);
                    break;

                default:
                    stops.Add(tally += 3);
                    break;
                }
            }

            if (stops.Count != 0)
            {
                decimal factor = (ar.Stop - ar.Start) / tally;

                int i = 0;
                for (; i != stops.Count - 1; ++i)
                {
                    ar.phStops.Add(OrthographicResult.Truncate(stops[i] * factor) + ar.Start);
                }
                ar.phStops.Add(ar.Stop);                 // ensure the final phoneme-stop IS the word-stop.
            }
        }
예제 #2
0
        void LoadTable(string pfe)
        {
            _ars_alt = new List <OrthographicResult>();

            using (var fs = new FileStream(pfe, FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                var sr = new StreamReader(fs, Encoding.ASCII);

                OrthographicResult ar = null;
                string             l;
                while ((l = sr.ReadLine()) != null && l != String.Empty)
                {
                    // TODO: WARNING Do error checks ->

                    string[] a = l.Split(DELI);

                    if (Utility.isWordstart(a[0]))
                    {
                        if (ar != null)
                        {
                            _ars_alt.Add(ar);
                        }

                        ar             = new OrthographicResult();
                        ar.Orthography = String.Empty;
                        ar.Confi       = 0f;
                        ar.Level       = String.Empty;

                        ar.Phons = new List <string>();
                        ar.Phons.Add(a[1]);

                        ar.Start = Decimal.Parse(a[2]);
                        ar.Stop  = Decimal.Parse(a[3]);

                        ar.phStops.Add(Decimal.Parse(a[3]));
                    }
                    else
                    {
                        ar.Phons.Add(a[1]);
                        ar.phStops.Add(Decimal.Parse(a[3]));
                    }
                }

                if (ar != null)
                {
                    _ars_alt.Add(ar);
                }

                sr.Close();
            }

            if (_ars_alt.Count != 0)
            {
                AlternateData();
            }
        }
예제 #3
0
        /// <summary>
        /// Inserts any required silences in the phrase and estimates the
        /// phoneme-stops in each word.
        /// </summary>
        void Orthography()
        {
#if DEBUG
            logfile.Log();
            logfile.Log("Orthography() _generato= " + _generato);
#endif
            List <OrthographicResult> ars = null;
            switch (_generato)
            {
            case Generator.Dictati: ars = _ars_def; break;

            case Generator.Dialogi: ars = _ars_enh; break;
            }

            OrthographicResult ar;
            decimal            stop = 0;

            for (int i = 0; i != ars.Count; ++i)
            {
                if ((ar = ars[i]).Start > stop)                 // TODO: use a tolerance Eg. 10..15 millisec
                {
#if DEBUG
                    logfile.Log(". . insert silence");
#endif
                    OrthographicResult sil = CreateSilence();
                    sil.Start = stop;
                    sil.phStops.Add(sil.Stop = ar.Start);

                    ars.Insert(i, sil);

                    ++i;
                }

#if DEBUG
                logfile.Log(". ar.Orthography= " + ar.Orthography);
                string phons = String.Empty;
                foreach (var phon in ar.Phons)
                {
                    if (phons != String.Empty)
                    {
                        phons += " ";
                    }
                    phons += phon;
                }
                logfile.Log(". ar.Phons= " + phons);
#endif

                phStops(ar);

                stop = ar.Stop;
            }
        }
예제 #4
0
        /// <summary>
        /// Creates a silence.
        /// </summary>
        /// <returns>an <see cref="OrthographicResult"/> w/ blank Orthography</returns>
        static OrthographicResult CreateSilence()
        {
            var sil = new OrthographicResult();

            sil.Orthography = String.Empty;

            sil.Phons = new List <string>();
            sil.Phons.Add(StaticData.SIL);

            sil.Confi = 1f;
            sil.Level = String.Empty;

            return(sil);
        }
예제 #5
0
        /// <summary>
        /// Builds a list of OrthographicResults from the edited DataTable.
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        void click_Accept(object sender, EventArgs e)
        {
#if DEBUG
            logfile.Log();
            logfile.Log("click_Accept()");
#endif
            // TODO: only if changed

            _f._ars_alt = new List <OrthographicResult>();

            OrthographicResult result;
            bool decr;

            for (int r = 0; r != _dt.Rows.Count; ++r)
            {
                string pos = _dt.Rows[r][0] as String;                                                                                          // pos
#if DEBUG
                logfile.Log(". _dt.Rows[" + r + "][0]= " + _dt.Rows[r][0]);
#endif
                if (Utility.isWordstart(pos))
                {
                    decr = false;

                    result             = new OrthographicResult();
                    result.Orthography = String.Empty;
                    result.Confi       = 0f;
                    result.Level       = String.Empty;

                    result.Phons = new List <string>();
#if DEBUG
                    logfile.Log(". . _dt.Rows[" + r + "][1]= " + _dt.Rows[r][1]);
                    logfile.Log(". . _dt.Rows[" + r + "][2]= " + _dt.Rows[r][2]);
                    logfile.Log(". . _dt.Rows[" + r + "][3]= " + _dt.Rows[r][3]);
#endif
                    result.Phons.Add(_dt.Rows[r][1] as String);                                                         // phon

                    result.Start = Decimal.Parse(_dt.Rows[r][2].ToString());                                            // start

                    result.phStops.Add(Decimal.Parse(_dt.Rows[r][3].ToString()));                                       // stop - 1st phon


                    if (r != _dt.Rows.Count - 1)
                    {
                        decr = true;

                        pos = _dt.Rows[++r][0] as String;
                        while (!Utility.isWordstart(pos))
                        {
#if DEBUG
                            logfile.Log(". . . _dt.Rows[" + r + "][1]= " + _dt.Rows[r][1]);
                            logfile.Log(". . . _dt.Rows[" + r + "][3]= " + _dt.Rows[r][3]);
#endif
                            result.Phons.Add(_dt.Rows[r][1] as String);                                                 // phon - 2+
                            result.phStops.Add(Decimal.Parse(_dt.Rows[r][3].ToString()));                               // stop - 2+

                            if (r == _dt.Rows.Count - 1)
                            {
                                break;
                            }

                            pos = _dt.Rows[++r][0] as String;
                        }
                    }
#if DEBUG
                    logfile.Log(". . _dt.Rows[" + r + "][3]= " + _dt.Rows[r][3]);
#endif
                    result.Stop = Decimal.Parse(_dt.Rows[r][3].ToString());                                                     // stop - word

                    _f._ars_alt.Add(result);

                    if (decr)
                    {
                        --r;
                    }
                }
            }


            if (_f._ars_alt.Count != 0)
            {
                _f.AlternateData();
            }
        }
예제 #6
0
//		ulong GetAudioStreamPositionSeconds(string pos)
//		{
//			ulong sec = UInt64.Parse(pos);
//
//			sec /= 2uL;		// bytes per sample (16-bit)
//			sec /= 44100;	// samples per second
//
//			return sec;
//		}

        /// <summary>
        /// Handles 'SpInProcRecoContext.Recognition' event. Fires as the final
        /// hypothesis for a phrase. Each word will be added to a list of
        /// 'OrthographicResult's for the phrase.
        /// WARNING: This can fire 2+ on the same file-stream causing the engine
        /// to drop/reset important variables like 'PhraseInfo.StartTime' and
        /// 'word.AudioStreamOffset' and 'word.AudioTimeOffset'
        /// TODO: a fact that is exceedingly annoying to try to compensate for.
        /// </summary>
        /// <param name="StreamNumber"></param>
        /// <param name="StreamPosition"></param>
        /// <param name="RecognitionType"></param>
        /// <param name="Result"></param>
        void rc_Recognition(int StreamNumber, object StreamPosition, SpeechRecognitionType RecognitionType, ISpeechRecoResult Result)
        {
#if DEBUG
            logfile.Log();
            logfile.Log("rc_Recognition() #" + StreamNumber + " StreamPosition= " + StreamPosition + " _generato= " + _generato);
            logfile.Log(". RecognitionType= " + RecognitionType);             // <- standard.

            logfile.Log(". _phoneConverter.LanguageId= " + _phoneConverter.LanguageId);

            logfile.Log(". " + Result.PhraseInfo.GetText());             // (0, -1, true)

            logfile.Log(". _offset                       = " + _offset);
            logfile.Log(". PhraseInfo.AudioStreamPosition= " + Result.PhraseInfo.AudioStreamPosition);
//			logfile.Log(". . sec= " + GetAudioStreamPositionSeconds(Result.PhraseInfo.AudioStreamPosition.ToString()));

            logfile.Log(". PhraseInfo.AudioSizeBytes     = " + Result.PhraseInfo.AudioSizeBytes);
            logfile.Log(". PhraseInfo.StartTime          = " + Result.PhraseInfo.StartTime);
            logfile.Log(". PhraseInfo.AudioSizeTime      = " + Result.PhraseInfo.AudioSizeTime);

            logfile.Log(". Result.PhraseInfo.Rule.Name= " + Result.PhraseInfo.Rule.Name);                         // <- blank.
            logfile.Log(". Result.PhraseInfo.Rule.Id= " + Result.PhraseInfo.Rule.Id);
            logfile.Log(". Result.PhraseInfo.Rule.EngineConfidence= " + Result.PhraseInfo.Rule.EngineConfidence);
            logfile.Log(". Result.PhraseInfo.Rule.Confidence= " + Result.PhraseInfo.Rule.Confidence);

            logfile.Log(". wordcount= " + Result.PhraseInfo.Elements.Count);
#endif

            List <OrthographicResult> ars = null;
            switch (_generato)
            {
            case Generator.Dictati: ars = _ars_def; break;

            case Generator.Dialogi: ars = _ars_enh; break;
            }

            foreach (ISpeechPhraseElement word in Result.PhraseInfo.Elements)
            {
#if DEBUG
                logfile.Log(". . word= " + word.DisplayText);
                logfile.Log(". . LexicalForm= " + word.LexicalForm);
                logfile.Log(". . DisplayAttributes= " + word.DisplayAttributes);
                logfile.Log(". . EngineConfidence= " + word.EngineConfidence);
                logfile.Log(". . ActualConfidence= " + word.ActualConfidence);
                var ids = (ushort[])word.Pronunciation;
                foreach (var id in ids)
                {
                    logfile.Log(". . . PhoneId= " + id + " - " + _phoneConverter.IdToPhone(id));
                }

                logfile.Log(". . word.AudioStreamOffset= " + word.AudioStreamOffset);
                logfile.Log(". . word.AudioSizeBytes   = " + word.AudioSizeBytes);
                logfile.Log(". . word.AudioTimeOffset  = " + word.AudioTimeOffset);
                logfile.Log(". . word.AudioSizeTime    = " + word.AudioSizeTime);
#endif

                var ar = new OrthographicResult();
                ar.Orthography = word.DisplayText;

                string phons = _phoneConverter.IdToPhone(word.Pronunciation);                 // NOTE: object is a ushort or ushort[]

                ar.Phons = new List <string>(phons.Split(' '));
                ar.Confi = word.EngineConfidence;
                ar.Level = word.ActualConfidence.ToString().Replace("SEC", String.Empty).Replace("Confidence", String.Empty);
                ar.Start = _offset + Utility.GarpstoSecs(word.AudioTimeOffset);
                ar.Stop  = _offset + Utility.GarpstoSecs(word.AudioTimeOffset + word.AudioSizeTime);

                ars.Add(ar);
            }

            // NOTE: Recognition could be fired before the entire audiofile has
            // completed, which means it's going to fire again but the AudioTimeOffsets
            // will be completely borked obviously. So add this time-offset to any
            // second or subsequent Recognition event that happens on this stream
            _offset += Utility.GarpstoSecs(Result.PhraseInfo.AudioSizeTime);             // TODO. is not accurate.

            if (_text == String.Empty)
            {
                ++Confidence_def_count;
                Confidence_def += Result.PhraseInfo.Rule.EngineConfidence;
            }
#if DEBUG
            logfile.Log();
#endif
        }