private HTS_GStreamSet m_GSS; // set of generated parameter streams //----------------------------------------------------------- public HTS_Engine() { m_Condition = new HTS_Condition(); m_ModelSet = new HTS_ModelSet(); m_Label = new HTS_Label(); m_SSS = new HTS_SStreamSet(); m_PSS = new HTS_PStreamSet(); m_GSS = new HTS_GStreamSet(); Initialize(); }
// HTS_SStreamSet_create: parse label and determine state duration public bool Create(HTS_ModelSet ms, HTS_Label label, bool phoneme_alignment_flag, double speed, double[] duration_iw, double[][] parameter_iw, double[][] gv_iw) { int i, j, k; double temp; int shift; int state; HTS_SStream sst; double[] duration_mean, duration_vari; double frame_length; int next_time; int next_state; Debug.LogWarning("======= SSS Create Start : " + label.GetSize() + " / " + ms.GetNumericOfVoice()); if (label.GetSize() == 0) { return(false); } // check interpolation weights for (i = 0, temp = 0.0; i < ms.GetNumericOfVoice(); i++) { temp += duration_iw[i]; } if (temp == 0.0) { return(false); } else if (temp != 1.0) { for (i = 0; i < ms.GetNumericOfVoice(); i++) { if (duration_iw[i] != 0.0) { duration_iw[i] /= temp; } } } for (i = 0; i < ms.GetNumericOfStream(); i++) { for (j = 0, temp = 0.0; j < ms.GetNumericOfVoice(); j++) { temp += parameter_iw[j][i]; } if (temp == 0.0) { return(false); } else if (temp != 1.0) { for (j = 0; j < ms.GetNumericOfVoice(); j++) { if (parameter_iw[j][i] != 0.0) { parameter_iw[j][i] /= temp; } } } if (ms.UseGv(i) == true) { for (j = 0, temp = 0.0; j < ms.GetNumericOfVoice(); j++) { temp += gv_iw[j][i]; } if (temp == 0.0) { return(false); } else if (temp != 1.0) { for (j = 0; j < ms.GetNumericOfVoice(); j++) { if (gv_iw[j][i] != 0.0) { gv_iw[j][i] /= temp; } } } } } // initialize state sequence this.nstate = ms.GetNumericOfState(); this.nstream = ms.GetNumericOfStream(); this.total_frame = 0; this.total_state = label.GetSize() * this.nstate; this.duration = new int[this.total_state]; this.sstream = new HTS_SStream[this.nstream]; for (i = 0; i < this.nstream; i++) { this.sstream[i] = new HTS_SStream(); sst = this.sstream[i]; sst.vector_length = ms.GetVectorLength(i); sst.mean = new double[this.total_state][]; sst.vari = new double[this.total_state][]; if (ms.IsMsd(i) == true) { sst.msd = new double[this.total_state]; } else { sst.msd = null; } for (j = 0; j < this.total_state; j++) { sst.mean[j] = new double[sst.vector_length * ms.GetWindowSize(i)]; sst.vari[j] = new double[sst.vector_length * ms.GetWindowSize(i)]; } if (ms.UseGv(i) == true) { sst.gv_switch = new bool[this.total_state]; for (j = 0; j < this.total_state; j++) { sst.gv_switch[j] = true; } } else { sst.gv_switch = null; } } // determine state duration duration_mean = new double[this.total_state]; duration_vari = new double[this.total_state]; for (i = 0; i < label.GetSize(); i++) { ms.GetDuration(label.GetString(i), duration_iw, duration_mean, i * this.nstate, duration_vari, i * this.nstate); } if (phoneme_alignment_flag == true) { // use duration set by user next_time = 0; next_state = 0; state = 0; for (i = 0; i < label.GetSize(); i++) { temp = label.GetEndFrame(i); if (temp >= 0) { next_time += ( int )SetSpecifiedDuration(this.duration, next_state, duration_mean, next_state, duration_vari, next_state, state + this.nstate - next_state, temp - next_time); next_state = state + this.nstate; } else if (i + 1 == label.GetSize()) { Debug.LogError("HTS_SStreamSet_create: The time of final label is not specified."); SetDefaultDuration(this.duration, next_state, duration_mean, next_state, duration_vari, next_state, state + this.nstate - next_state); } state += this.nstate; } } else { // determine frame length if (speed != 1.0) { temp = 0.0; for (i = 0; i < this.total_state; i++) { temp += duration_mean[i]; } frame_length = temp / speed; SetSpecifiedDuration(this.duration, 0, duration_mean, 0, duration_vari, 0, this.total_state, frame_length); } else { SetDefaultDuration(this.duration, 0, duration_mean, 0, duration_vari, 0, this.total_state); } } duration_mean = null; duration_vari = null; // get parameter for (i = 0, state = 0; i < label.GetSize(); i++) { for (j = 2; j <= this.nstate + 1; j++) { this.total_frame += this.duration[state]; for (k = 0; k < this.nstream; k++) { sst = this.sstream[k]; if (sst.msd != null) { ms.GetParameter(k, j, label.GetString(i), parameter_iw, sst.mean[state], 0, sst.vari[state], 0, sst.msd, state); } else { ms.GetParameter(k, j, label.GetString(i), parameter_iw, sst.mean[state], 0, sst.vari[state], 0, null, 0); } } state++; } } // copy dynamic window for (i = 0; i < this.nstream; i++) { sst = this.sstream[i]; sst.win_size = ms.GetWindowSize(i); sst.win_max_width = ms.GetWindowMaxWidth(i); sst.win_l_width = new int[sst.win_size]; sst.win_r_width = new int[sst.win_size]; sst.win_coefficient = new double[sst.win_size][]; sst.win_coefficient_offset = new int[sst.win_size]; for (j = 0; j < sst.win_size; j++) { sst.win_l_width[j] = ms.GetWindowLeftWidth(i, j); sst.win_r_width[j] = ms.GetWindowRightWidth(i, j); if (sst.win_l_width[j] + sst.win_r_width[j] == 0) { sst.win_coefficient[j] = new double[-2 * sst.win_l_width[j] + 1]; } else { sst.win_coefficient[j] = new double[-2 * sst.win_l_width[j]]; } // オフセット操作をしている // sst.win_coefficient[ j ] -= sst.win_l_width[ j ] ; sst.win_coefficient_offset[j] -= sst.win_l_width[j]; for (shift = sst.win_l_width[j]; shift <= sst.win_r_width[j]; shift++) { // オフセット操作に対応 // sst.win_coefficient[ j ][ shift ] = ms.GetWindowCoefficient( i, j, shift ) ; sst.win_coefficient[j][sst.win_coefficient_offset[j] + shift] = ms.GetWindowCoefficient(i, j, shift); } } } // determine GV for (i = 0; i < this.nstream; i++) { sst = this.sstream[i]; if (ms.UseGv(i) == true) { sst.gv_mean = new double[sst.vector_length]; sst.gv_vari = new double[sst.vector_length]; ms.GetGv(i, label.GetString(0), gv_iw, sst.gv_mean, sst.gv_vari); } else { sst.gv_mean = null; sst.gv_vari = null; } } for (i = 0; i < label.GetSize(); i++) { if (ms.GetGvFlag(label.GetString(i)) == false) { for (j = 0; j < this.nstream; j++) { if (ms.UseGv(j) == true) { for (k = 0; k < this.nstate; k++) { this.sstream[j].gv_switch[i * this.nstate + k] = false; } } } } } //-------------------------------------------------------------------------- /* Debug.LogWarning( "=====> SStream Check : " + this.nstream ) ; * * int p = 0, q ; * for( p = 0 ; p < this.nstream ; p ++ ) * { * Debug.LogWarning( "=====" + p ) ; * Debug.LogWarning( "SS[" + p + "]00:" + this.sstream[ p ].vector_length ) ; * Debug.LogWarning( "SS[" + p + "]01:" + this.sstream[ p ].mean[ 0 ][ 0 ] ) ; * Debug.LogWarning( "SS[" + p + "]02:" + this.sstream[ p ].vari[ 0 ][ 0 ] ) ; * if( this.sstream[ p ].msd == null ) * { * Debug.LogWarning( "SS[" + p + "]03: is null" ) ; * } * else * { * Debug.LogWarning( "SS[" + p + "]03:" + this.sstream[ p ].msd[ 0 ] ) ; * } * Debug.LogWarning( "SS[" + p + "]04:" + this.sstream[ p ].win_size ) ; * Debug.LogWarning( "SS[" + p + "]05:" + this.sstream[ p ].win_l_width[ 0 ] ) ; * Debug.LogWarning( "SS[" + p + "]06:" + this.sstream[ p ].win_r_width[ 0 ] ) ; * * Debug.LogWarning( "SS[" + p + "]07:" + this.sstream[ p ].win_coefficient[ 0 ][ this.sstream[ p ].win_coefficient_offset[ 0 ] + 0 ] ) ; * * Debug.LogWarning( "SS[" + p + "]08:" + this.sstream[ p ].win_max_width ) ; * if( this.sstream[ p ].gv_mean == null ) * { * Debug.LogWarning( "SS[" + p + "]09: is null" ) ; * } * else * { * Debug.LogWarning( "SS[" + p + "]09:" + this.sstream[ p ].gv_mean[ 0 ] ) ; * } * if( this.sstream[ p ].gv_vari == null ) * { * Debug.LogWarning( "SS[" + p + "]10: is null" ) ; * } * else * { * Debug.LogWarning( "SS[" + p + "]10:" + this.sstream[ p ].gv_vari[ 0 ] ) ; * } * if( this.sstream[ p ].gv_switch == null ) * { * Debug.LogWarning( "SS[" + p + "]11: is null" ) ; * } * else * { * Debug.LogWarning( "SS[" + p + "]11:" + this.sstream[ p ].gv_switch[ 0 ] ) ; * } * } * * Debug.LogWarning( "=====" ) ; * Debug.LogWarning( "total_frame : " + this.total_frame ) ; * Debug.LogWarning( "total_state : " + this.total_state ) ; * for( p = this.total_state - 10 ; p < this.total_state ; p ++ ) * { * Debug.LogWarning( "duration[" + p +"] " + this.duration[ p ] ) ; * }*/ return(true); }