Esempio n. 1
0
//		/// <summary>
//		/// IMPORTANT: This will crash the Lipsyncer.
//		///
//		/// Get this. I only have EnglishUS and EnglishGB recognizers for my OS
//		/// (win7 pro) but can output the phonemes of French and perhaps other
//		/// languages.
//		/// PS. My EnglishGB phonemes appear to be utterly borked.
//		/// </summary>
//		/// <param name="langid"></param>
//		void PrintPhons(int langid)
//		{
//			_phoneConverter.LanguageId = langid;
//			for (int i = 0; i != 100; ++i)
//				logfile.Log(i + " - " + _phoneConverter.IdToPhone(i));
//		}


        #region methods
        /// <summary>
        /// Sets the Recognizer and LanguageId when the Recognizers combobox
        /// selection changes.
        /// @note The LanguageId is used by both TTS and SpeechRecognition.
        /// </summary>
        /// <param name="recognizer"></param>
        /// <returns>true if the language-id is set successfully</returns>
        bool SetRecognizer(Recognizer recognizer)
        {
#if DEBUG
            logfile.Log();
            logfile.Log("SetRecognizer()");

            logfile.Log(". create (SpInprocRecognizer)_recognizer");
#endif
            _recognizer = new SpInprocRecognizer();
#if DEBUG
            logfile.Log(". (SpInprocRecognizer)_recognizer CREATED");
#endif
            _recognizer.Recognizer = (SpObjectToken)recognizer.Tok;
#if DEBUG
            logfile.Log(". recognizer.Tok.Id= " + recognizer.Tok.Id);
            logfile.Log(". recognizer.Description= " + recognizer.Tok.GetDescription());

            logfile.Log(". recognizer.Langids= " + recognizer.Langids);
#endif
            string langid = recognizer.Langids;
            int    pos    = recognizer.Langids.IndexOf(' ');
            if (pos != -1)
            {
                langid = langid.Substring(0, pos);                 // use 1st langid
            }
            // TODO: ComboBox dropdown for user to choose from if 2+ languages
            // are supported by the current Recognizer.

            int id;
            if (!Int32.TryParse(langid, out id) ||              // safety - unless the token has "n/a" Languages.
                id < 0)                                         // TODO: check id against valid SAPI language-ids
            {
                if (!FxeGeneratorF.isConsole)
                {
                    using (var d = new InfoDialog("Error", "Did not find a Language in the Recognizer's token."))
                    {
                        d.ShowDialog(FxeGeneratorF.That);
                    }
                }
                return(false);
            }

            _phoneConverter.LanguageId = id;
#if DEBUG
            logfile.Log(". _phoneConverter.LanguageId= " + _phoneConverter.LanguageId);
            logfile.Log();
#endif
            StaticData.viceroy(_phoneConverter.LanguageId);

            return(true);
        }
Esempio n. 2
0
        /// <summary>
        /// Tests an FXE-file by reading it w/ <see cref="FxeReader.TestFile"/>.
        /// </summary>
        /// <param name="pfe">fullpath of FXE-file to test</param>
        static void TestOutputFile(string pfe)
        {
            string titl, info;

            if (FxeReader.TestFile(pfe))
            {
                titl = "Write SUCCESS";
                info = pfe;
            }
            else
            {
                titl = "Write FAILED";
                info = "Borked file" + Environment.NewLine + pfe;
            }

            using (var d = new InfoDialog(titl, info))
            {
                d.ShowDialog(FxeGeneratorF.That);
            }
        }
Esempio n. 3
0
        /// <summary>
        /// cTor.
        /// </summary>
        /// <param name="pfe">blank string if '!isConsole'</param>
        /// <param name="headtype">blank string if '!isConsole'</param>
        internal FxeGeneratorF(string pfe = "", string headtype = "")
        {
#if DEBUG
//			LogSpeechRecognitionEngines();
            logfile.Log("FxeGeneratorF() cTor pfe= " + pfe + " headtype= " + headtype);
#endif

            That = this;

            FxeData.LoadTrigrams();

            bool fatality = false;

            if (pfe == String.Empty)             // is GUI interface ->
            {
#if DEBUG
                logfile.Log(". is GUI");
#endif
                InitializeComponent();

                co_headtype.SelectedIndex = 0;

                la_def_word_pct.Text             =
                    la_def_phon_pct.Text         =
                        la_enh_word_pct.Text     =
                            la_enh_phon_pct.Text = String.Empty;

                tb_text.Text = String.Empty;

                tb_text.BackColor         =
                    tb_expected.BackColor = Color.AntiqueWhite;

                tb_def_words.BackColor             =
                    tb_def_phons.BackColor         =
                        tb_enh_words.BackColor     =
                            tb_enh_phons.BackColor = Color.GhostWhite;


// PHONEMES data/grid ->
                DataColumn dc;
                dc          = new DataColumn(HEAD_PHONS_0, typeof(string));        // pos
                dc.ReadOnly = true;
                _dt1.Columns.Add(dc);

                dc          = new DataColumn(HEAD_PHONS_1, typeof(string));        // phon
                dc.ReadOnly = true;
                _dt1.Columns.Add(dc);

                dc          = new DataColumn(HEAD_PHONS_2, typeof(string));        // start
                dc.ReadOnly = true;
                _dt1.Columns.Add(dc);

                dc          = new DataColumn(HEAD_PHONS_3, typeof(string));        // stop
                dc.ReadOnly = true;
                _dt1.Columns.Add(dc);

                dc          = new DataColumn(HEAD_PHONS_4, typeof(string));        // vis
                dc.ReadOnly = true;
                _dt1.Columns.Add(dc);

                dc          = new DataColumn(HEAD_PHONS_5, typeof(string));        // truth
                dc.ReadOnly = true;
                _dt1.Columns.Add(dc);

                dc          = new DataColumn(HEAD_PHONS_6, typeof(string));        // level
                dc.ReadOnly = true;
                _dt1.Columns.Add(dc);

                grid_phons.DataSource       = _dt1;
                grid_phons.Columns[0].Width = 50;                 // 50 "pos"
                grid_phons.Columns[1].Width = 76;                 // 76 "phoneme"
                grid_phons.Columns[2].Width = 86;                 // 86 "start"
                grid_phons.Columns[3].Width = 86;                 // 86 "stop"
                grid_phons.Columns[4].Width = 67;                 // 67 "viseme"
                grid_phons.Columns[5].Width = 57;                 // 57 "truth"
                grid_phons.Columns[6].Width = 61;                 // 56 "level"

                for (int i = 0; i != grid_phons.Columns.Count; ++i)
                {
                    grid_phons.Columns[i].SortMode = DataGridViewColumnSortMode.NotSortable;
                }


// Data Blocks data/grid ->
                dc          = new DataColumn(HEAD_BLOCKS_0, typeof(string));
                dc.ReadOnly = true;
                _dt2.Columns.Add(dc);

                dc          = new DataColumn(HEAD_BLOCKS_1, typeof(float));
                dc.ReadOnly = true;
                _dt2.Columns.Add(dc);

                dc          = new DataColumn(HEAD_BLOCKS_2, typeof(float));
                dc.ReadOnly = true;
                _dt2.Columns.Add(dc);

                grid_blocs.DataSource = _dt2;
//				grid_blocs.Columns[0].Width =  80; //  68 "viseme"
//				grid_blocs.Columns[1].Width =  97; //  87 "frame stop"
//				grid_blocs.Columns[2].Width = 110; // 100 "morph weight"
                grid_blocs.Columns[0].Width = 140;                 //  68 "viseme"
                grid_blocs.Columns[1].Width = 67;                  //     "stop"
                grid_blocs.Columns[2].Width = 80;                  //     "weight"

//				for (int i = 0; i != grid_blocs.Columns.Count; ++i)
//					grid_blocs.Columns[i].SortMode = DataGridViewColumnSortMode.NotSortable;
                grid_blocs.Columns[0].SortMode     = DataGridViewColumnSortMode.NotSortable;
                grid_blocs.Columns[2].SortMode     = DataGridViewColumnSortMode.NotSortable;
                grid_blocs.ColumnHeaderMouseClick += dgblocs_ColumnHeaderMouseClick;

                grid_phons.RowHeadersVisible     =
                    grid_blocs.RowHeadersVisible = false;

                printversion();


                // instantiate/initialize SAPI
                _sapi = new SapiLipsync();
                _sapi.TtsStreamEnded += OnTtsStreamEnded;
                _sapi.SrStreamEnded  += OnSrStreamEnded;

                // this will set '_sapi._recognizer'
                // this will set '_sapi._phoneConverter.LanguageId'
                // and the Titletext
                if (!SpeechRecognizerLister.AddSpeechRecognizers(co_recognizers))
                {
                    string info = "FXE LipSyncer requires a SAPI 5.4 compliant" + Environment.NewLine
                                  + "Microsoft Speech Recognizer" + Environment.NewLine
                                  + "as displayed in Windoz ControlPanel|SpeechRecognition." + Environment.NewLine
                                  + Environment.NewLine
                                  + "none was found ...";
                    using (var d = new InfoDialog("FATAL Error", info))
                    {
                        d.ShowDialog(this);
                    }
                    fatality = true;
                }
            }
            else if (headtype != String.Empty && File.Exists(pfe))             // is Console interface ->
            {
#if DEBUG
                logfile.Log(". is Console");
#endif
                // TODO: Fail if a Recognizer is not found.

                // TODO: Ensure that 'head Model/Skeleton type' is a recognized type.
                // Eg. "P_HHM"

                Filelabel = Utility.GetFilelabel(pfe);                 // NOTE: that will be written into the FXE-file output.

                _sapi = new SapiLipsync(_pfe = pfe);
                if (_sapi.Wavefile != String.Empty)
                {
                    isConsole = true;
                    _headtype = headtype;

                    _sapi.SrStreamEnded += OnSrStreamEnded;
                    _sapi.Start(LoadTypedTextFile());
                }
                else
                {
                    fatality = true;
                }
            }
            else             // is Console error ->
            {
                fatality = true;
            }

            if (fatality)
            {
                Environment.Exit(0);
            }
        }
Esempio n. 4
0
        /// <summary>
        /// Determines the file to use for the SpeechRecognition filestream
        /// converting it from BMU/MP3 to WAV if necessary.
        /// @note The result shall be PCM 44.1kHz 16-bit Mono.
        /// </summary>
        /// <param name="pfe">path_file_extension</param>
        /// <returns>the fullpath to a PCM-wave file else a blank-string</returns>
        internal static string deterwave(string pfe)
        {
            //logfile.Log("AudioConverter.deterAudiopath() pfe= " + pfe);

            string pathT = Path.GetTempPath();

            //logfile.Log(". path= " + pathT);

            if (pfe.EndsWith(EXT_WAV, StringComparison.InvariantCultureIgnoreCase) ||             // prep .BMU ->
                pfe.EndsWith(EXT_BMU, StringComparison.InvariantCultureIgnoreCase))
            {
                var chars = new char[3];
                using (var fs = new FileStream(pfe, FileMode.Open, FileAccess.Read, FileShare.Read))
                {
                    var br = new BinaryReader(fs);
                    chars = br.ReadChars(3);
                    br.Close();
                }

                if (chars[0] == 'B' &&                 // because .BMUs are .MP3s and NwN2 labels them as .WAVs
                    chars[1] == 'M' &&
                    chars[2] == 'U')
                {
                    string pfeT = Path.Combine(pathT, TMP_MP3);                     // so label it as .MP3 and allow the next block to catch it.
                    //logfile.Log(". pfeT(MP3)= " + pfeT);

                    File.Delete(pfeT);
                    File.Copy(pfe, pfeT);

                    pfe = pfeT;
                }
            }

            if (pfe.EndsWith(EXT_MP3, StringComparison.InvariantCultureIgnoreCase))             // convert to .WAV file ->
            {
                string pfeT = Path.Combine(pathT, TMP_WAV);
                //logfile.Log(". pfeT(WAV)= " + pfeT);

                File.Delete(pfeT);

//				string execpath = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);
//				var info = new ProcessStartInfo(Path.Combine(execpath, LAME_EXE));
                var info = new ProcessStartInfo(Path.Combine(Application.StartupPath, LAME_EXE));
                info.Arguments       = "--decode \"" + pfe + "\" \"" + pfeT + "\"";
                info.WindowStyle     = ProcessWindowStyle.Hidden;
                info.UseShellExecute = false;
                info.CreateNoWindow  = true;

                using (Process proc = Process.Start(info))
                {
                    proc.WaitForExit();
                }

                pfe = pfeT;
            }

// http://www.topherlee.com/software/pcm-tut-wavformat.html
//  1- 4	"RIFF"				Marks the file as a riff file. Characters are each 1 byte long.
//  5- 8	File size (integer)	Size of the overall file - 8 bytes, in bytes (32-bit integer). Typically, you'd fill this in after creation.
//  9-12	"WAVE"				File Type Header. For our purposes, it always equals "WAVE".
// 13-16	"fmt "				Format chunk marker. Includes trailing null
// 17-20	16					Length of format data as listed above
// 21-22	1					Type of format (1 is PCM) - 2 byte integer
// 23-24	2					Number of Channels - 2 byte integer
// 25-28	44100				Sample Rate - 32 byte integer. Common values are 44100 (CD), 48000 (DAT). Sample Rate = Number of Samples per second, or Hertz.
// 29-32	176400				(Sample Rate * BitsPerSample * Channels) / 8.
// 33-34	4					(BitsPerSample * Channels) / 8.1 - 8 bit mono2 - 8 bit stereo/16 bit mono4 - 16 bit stereo
// 35-36	16					Bits per sample
// 37-40	"data"				"data" chunk header. Marks the beginning of the data section.
// 41-44	File size (data)	Size of the data section.

            string fullpath = String.Empty;

            if (pfe.EndsWith(EXT_WAV, StringComparison.InvariantCultureIgnoreCase))             // check .WAV ->
            {
                using (var fs = new FileStream(pfe, FileMode.Open, FileAccess.Read, FileShare.Read))
                {
                    var br = new BinaryReader(fs);

                    char[] c = br.ReadChars(16);                                                        // start 0

                    if (c[0] == 'R' && c[1] == 'I' && c[2] == 'F' && c[3] == 'F' &&
                        c[8] == 'W' && c[9] == 'A' && c[10] == 'V' && c[11] == 'E' &&
                        c[12] == 'f' && c[13] == 'm' && c[14] == 't' && c[15] == ' ')
                    {
                        br.ReadBytes(4);                                                                // start 16

                        short format = br.ReadInt16();                                                  // start 20: is PCM
                        if (format == (short)1)
                        {
                            short channels = br.ReadInt16();                                            // start 22: is Mono
                            if (channels == (short)1)
                            {
                                // TODO: Sample-rate and bit-depth should probably be relaxed.

                                int rate = br.ReadInt32();                                                      // start 24: is 44.1kHz
                                if (rate == 44100)
                                {
                                    br.ReadBytes(6);                                                    // start 28
                                    short bits = br.ReadInt16();                                        // start 34: is 16-bit
                                    if (bits == (short)16)
                                    {
                                        fullpath = pfe;
                                        //logfile.Log(". AudioConverter.fullpath= " + fullpath);
                                    }
                                }
                            }
                        }
                    }
                    br.Close();
                }
            }

            if (!FxeGeneratorF.isConsole && fullpath == String.Empty)
            {
                using (var d = new InfoDialog("Conversion Error", "Failed to convert to 44.1kHz 16-bit Mono PCM-wave format."))
                {
                    d.ShowDialog(FxeGeneratorF.That);
                }
            }

            return(fullpath);
        }