// /// <summary> // /// IMPORTANT: This will crash the Lipsyncer. // /// // /// Get this. I only have EnglishUS and EnglishGB recognizers for my OS // /// (win7 pro) but can output the phonemes of French and perhaps other // /// languages. // /// PS. My EnglishGB phonemes appear to be utterly borked. // /// </summary> // /// <param name="langid"></param> // void PrintPhons(int langid) // { // _phoneConverter.LanguageId = langid; // for (int i = 0; i != 100; ++i) // logfile.Log(i + " - " + _phoneConverter.IdToPhone(i)); // } #region methods /// <summary> /// Sets the Recognizer and LanguageId when the Recognizers combobox /// selection changes. /// @note The LanguageId is used by both TTS and SpeechRecognition. /// </summary> /// <param name="recognizer"></param> /// <returns>true if the language-id is set successfully</returns> bool SetRecognizer(Recognizer recognizer) { #if DEBUG logfile.Log(); logfile.Log("SetRecognizer()"); logfile.Log(". create (SpInprocRecognizer)_recognizer"); #endif _recognizer = new SpInprocRecognizer(); #if DEBUG logfile.Log(". (SpInprocRecognizer)_recognizer CREATED"); #endif _recognizer.Recognizer = (SpObjectToken)recognizer.Tok; #if DEBUG logfile.Log(". recognizer.Tok.Id= " + recognizer.Tok.Id); logfile.Log(". recognizer.Description= " + recognizer.Tok.GetDescription()); logfile.Log(". recognizer.Langids= " + recognizer.Langids); #endif string langid = recognizer.Langids; int pos = recognizer.Langids.IndexOf(' '); if (pos != -1) { langid = langid.Substring(0, pos); // use 1st langid } // TODO: ComboBox dropdown for user to choose from if 2+ languages // are supported by the current Recognizer. int id; if (!Int32.TryParse(langid, out id) || // safety - unless the token has "n/a" Languages. id < 0) // TODO: check id against valid SAPI language-ids { if (!FxeGeneratorF.isConsole) { using (var d = new InfoDialog("Error", "Did not find a Language in the Recognizer's token.")) { d.ShowDialog(FxeGeneratorF.That); } } return(false); } _phoneConverter.LanguageId = id; #if DEBUG logfile.Log(". _phoneConverter.LanguageId= " + _phoneConverter.LanguageId); logfile.Log(); #endif StaticData.viceroy(_phoneConverter.LanguageId); return(true); }
/// <summary> /// Tests an FXE-file by reading it w/ <see cref="FxeReader.TestFile"/>. /// </summary> /// <param name="pfe">fullpath of FXE-file to test</param> static void TestOutputFile(string pfe) { string titl, info; if (FxeReader.TestFile(pfe)) { titl = "Write SUCCESS"; info = pfe; } else { titl = "Write FAILED"; info = "Borked file" + Environment.NewLine + pfe; } using (var d = new InfoDialog(titl, info)) { d.ShowDialog(FxeGeneratorF.That); } }
/// <summary> /// cTor. /// </summary> /// <param name="pfe">blank string if '!isConsole'</param> /// <param name="headtype">blank string if '!isConsole'</param> internal FxeGeneratorF(string pfe = "", string headtype = "") { #if DEBUG // LogSpeechRecognitionEngines(); logfile.Log("FxeGeneratorF() cTor pfe= " + pfe + " headtype= " + headtype); #endif That = this; FxeData.LoadTrigrams(); bool fatality = false; if (pfe == String.Empty) // is GUI interface -> { #if DEBUG logfile.Log(". is GUI"); #endif InitializeComponent(); co_headtype.SelectedIndex = 0; la_def_word_pct.Text = la_def_phon_pct.Text = la_enh_word_pct.Text = la_enh_phon_pct.Text = String.Empty; tb_text.Text = String.Empty; tb_text.BackColor = tb_expected.BackColor = Color.AntiqueWhite; tb_def_words.BackColor = tb_def_phons.BackColor = tb_enh_words.BackColor = tb_enh_phons.BackColor = Color.GhostWhite; // PHONEMES data/grid -> DataColumn dc; dc = new DataColumn(HEAD_PHONS_0, typeof(string)); // pos dc.ReadOnly = true; _dt1.Columns.Add(dc); dc = new DataColumn(HEAD_PHONS_1, typeof(string)); // phon dc.ReadOnly = true; _dt1.Columns.Add(dc); dc = new DataColumn(HEAD_PHONS_2, typeof(string)); // start dc.ReadOnly = true; _dt1.Columns.Add(dc); dc = new DataColumn(HEAD_PHONS_3, typeof(string)); // stop dc.ReadOnly = true; _dt1.Columns.Add(dc); dc = new DataColumn(HEAD_PHONS_4, typeof(string)); // vis dc.ReadOnly = true; _dt1.Columns.Add(dc); dc = new DataColumn(HEAD_PHONS_5, typeof(string)); // truth dc.ReadOnly = true; _dt1.Columns.Add(dc); dc = new DataColumn(HEAD_PHONS_6, typeof(string)); // level dc.ReadOnly = true; _dt1.Columns.Add(dc); grid_phons.DataSource = _dt1; grid_phons.Columns[0].Width = 50; // 50 "pos" grid_phons.Columns[1].Width = 76; // 76 "phoneme" grid_phons.Columns[2].Width = 86; // 86 "start" grid_phons.Columns[3].Width = 86; // 86 "stop" grid_phons.Columns[4].Width = 67; // 67 "viseme" grid_phons.Columns[5].Width = 57; // 57 "truth" grid_phons.Columns[6].Width = 61; // 56 "level" for (int i = 0; i != grid_phons.Columns.Count; ++i) { grid_phons.Columns[i].SortMode = DataGridViewColumnSortMode.NotSortable; } // Data Blocks data/grid -> dc = new DataColumn(HEAD_BLOCKS_0, typeof(string)); dc.ReadOnly = true; _dt2.Columns.Add(dc); dc = new DataColumn(HEAD_BLOCKS_1, typeof(float)); dc.ReadOnly = true; _dt2.Columns.Add(dc); dc = new DataColumn(HEAD_BLOCKS_2, typeof(float)); dc.ReadOnly = true; _dt2.Columns.Add(dc); grid_blocs.DataSource = _dt2; // grid_blocs.Columns[0].Width = 80; // 68 "viseme" // grid_blocs.Columns[1].Width = 97; // 87 "frame stop" // grid_blocs.Columns[2].Width = 110; // 100 "morph weight" grid_blocs.Columns[0].Width = 140; // 68 "viseme" grid_blocs.Columns[1].Width = 67; // "stop" grid_blocs.Columns[2].Width = 80; // "weight" // for (int i = 0; i != grid_blocs.Columns.Count; ++i) // grid_blocs.Columns[i].SortMode = DataGridViewColumnSortMode.NotSortable; grid_blocs.Columns[0].SortMode = DataGridViewColumnSortMode.NotSortable; grid_blocs.Columns[2].SortMode = DataGridViewColumnSortMode.NotSortable; grid_blocs.ColumnHeaderMouseClick += dgblocs_ColumnHeaderMouseClick; grid_phons.RowHeadersVisible = grid_blocs.RowHeadersVisible = false; printversion(); // instantiate/initialize SAPI _sapi = new SapiLipsync(); _sapi.TtsStreamEnded += OnTtsStreamEnded; _sapi.SrStreamEnded += OnSrStreamEnded; // this will set '_sapi._recognizer' // this will set '_sapi._phoneConverter.LanguageId' // and the Titletext if (!SpeechRecognizerLister.AddSpeechRecognizers(co_recognizers)) { string info = "FXE LipSyncer requires a SAPI 5.4 compliant" + Environment.NewLine + "Microsoft Speech Recognizer" + Environment.NewLine + "as displayed in Windoz ControlPanel|SpeechRecognition." + Environment.NewLine + Environment.NewLine + "none was found ..."; using (var d = new InfoDialog("FATAL Error", info)) { d.ShowDialog(this); } fatality = true; } } else if (headtype != String.Empty && File.Exists(pfe)) // is Console interface -> { #if DEBUG logfile.Log(". is Console"); #endif // TODO: Fail if a Recognizer is not found. // TODO: Ensure that 'head Model/Skeleton type' is a recognized type. // Eg. "P_HHM" Filelabel = Utility.GetFilelabel(pfe); // NOTE: that will be written into the FXE-file output. _sapi = new SapiLipsync(_pfe = pfe); if (_sapi.Wavefile != String.Empty) { isConsole = true; _headtype = headtype; _sapi.SrStreamEnded += OnSrStreamEnded; _sapi.Start(LoadTypedTextFile()); } else { fatality = true; } } else // is Console error -> { fatality = true; } if (fatality) { Environment.Exit(0); } }
/// <summary> /// Determines the file to use for the SpeechRecognition filestream /// converting it from BMU/MP3 to WAV if necessary. /// @note The result shall be PCM 44.1kHz 16-bit Mono. /// </summary> /// <param name="pfe">path_file_extension</param> /// <returns>the fullpath to a PCM-wave file else a blank-string</returns> internal static string deterwave(string pfe) { //logfile.Log("AudioConverter.deterAudiopath() pfe= " + pfe); string pathT = Path.GetTempPath(); //logfile.Log(". path= " + pathT); if (pfe.EndsWith(EXT_WAV, StringComparison.InvariantCultureIgnoreCase) || // prep .BMU -> pfe.EndsWith(EXT_BMU, StringComparison.InvariantCultureIgnoreCase)) { var chars = new char[3]; using (var fs = new FileStream(pfe, FileMode.Open, FileAccess.Read, FileShare.Read)) { var br = new BinaryReader(fs); chars = br.ReadChars(3); br.Close(); } if (chars[0] == 'B' && // because .BMUs are .MP3s and NwN2 labels them as .WAVs chars[1] == 'M' && chars[2] == 'U') { string pfeT = Path.Combine(pathT, TMP_MP3); // so label it as .MP3 and allow the next block to catch it. //logfile.Log(". pfeT(MP3)= " + pfeT); File.Delete(pfeT); File.Copy(pfe, pfeT); pfe = pfeT; } } if (pfe.EndsWith(EXT_MP3, StringComparison.InvariantCultureIgnoreCase)) // convert to .WAV file -> { string pfeT = Path.Combine(pathT, TMP_WAV); //logfile.Log(". pfeT(WAV)= " + pfeT); File.Delete(pfeT); // string execpath = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); // var info = new ProcessStartInfo(Path.Combine(execpath, LAME_EXE)); var info = new ProcessStartInfo(Path.Combine(Application.StartupPath, LAME_EXE)); info.Arguments = "--decode \"" + pfe + "\" \"" + pfeT + "\""; info.WindowStyle = ProcessWindowStyle.Hidden; info.UseShellExecute = false; info.CreateNoWindow = true; using (Process proc = Process.Start(info)) { proc.WaitForExit(); } pfe = pfeT; } // http://www.topherlee.com/software/pcm-tut-wavformat.html // 1- 4 "RIFF" Marks the file as a riff file. Characters are each 1 byte long. // 5- 8 File size (integer) Size of the overall file - 8 bytes, in bytes (32-bit integer). Typically, you'd fill this in after creation. // 9-12 "WAVE" File Type Header. For our purposes, it always equals "WAVE". // 13-16 "fmt " Format chunk marker. Includes trailing null // 17-20 16 Length of format data as listed above // 21-22 1 Type of format (1 is PCM) - 2 byte integer // 23-24 2 Number of Channels - 2 byte integer // 25-28 44100 Sample Rate - 32 byte integer. Common values are 44100 (CD), 48000 (DAT). Sample Rate = Number of Samples per second, or Hertz. // 29-32 176400 (Sample Rate * BitsPerSample * Channels) / 8. // 33-34 4 (BitsPerSample * Channels) / 8.1 - 8 bit mono2 - 8 bit stereo/16 bit mono4 - 16 bit stereo // 35-36 16 Bits per sample // 37-40 "data" "data" chunk header. Marks the beginning of the data section. // 41-44 File size (data) Size of the data section. string fullpath = String.Empty; if (pfe.EndsWith(EXT_WAV, StringComparison.InvariantCultureIgnoreCase)) // check .WAV -> { using (var fs = new FileStream(pfe, FileMode.Open, FileAccess.Read, FileShare.Read)) { var br = new BinaryReader(fs); char[] c = br.ReadChars(16); // start 0 if (c[0] == 'R' && c[1] == 'I' && c[2] == 'F' && c[3] == 'F' && c[8] == 'W' && c[9] == 'A' && c[10] == 'V' && c[11] == 'E' && c[12] == 'f' && c[13] == 'm' && c[14] == 't' && c[15] == ' ') { br.ReadBytes(4); // start 16 short format = br.ReadInt16(); // start 20: is PCM if (format == (short)1) { short channels = br.ReadInt16(); // start 22: is Mono if (channels == (short)1) { // TODO: Sample-rate and bit-depth should probably be relaxed. int rate = br.ReadInt32(); // start 24: is 44.1kHz if (rate == 44100) { br.ReadBytes(6); // start 28 short bits = br.ReadInt16(); // start 34: is 16-bit if (bits == (short)16) { fullpath = pfe; //logfile.Log(". AudioConverter.fullpath= " + fullpath); } } } } } br.Close(); } } if (!FxeGeneratorF.isConsole && fullpath == String.Empty) { using (var d = new InfoDialog("Conversion Error", "Failed to convert to 44.1kHz 16-bit Mono PCM-wave format.")) { d.ShowDialog(FxeGeneratorF.That); } } return(fullpath); }