Exemple #1
0
        private HTS_GStreamSet m_GSS;                                                   // set of generated parameter streams

        //-----------------------------------------------------------

        public HTS_Engine()
        {
            m_Condition = new HTS_Condition();
            m_ModelSet  = new HTS_ModelSet();
            m_Label     = new HTS_Label();
            m_SSS       = new HTS_SStreamSet();
            m_PSS       = new HTS_PStreamSet();
            m_GSS       = new HTS_GStreamSet();

            Initialize();
        }
        // HTS_SStreamSet_create: parse label and determine state duration
        public bool Create(HTS_ModelSet ms, HTS_Label label, bool phoneme_alignment_flag, double speed, double[] duration_iw, double[][] parameter_iw, double[][] gv_iw)
        {
            int         i, j, k;
            double      temp;
            int         shift;
            int         state;
            HTS_SStream sst;

            double[] duration_mean, duration_vari;
            double   frame_length;
            int      next_time;
            int      next_state;

            Debug.LogWarning("======= SSS Create Start : " + label.GetSize() + " / " + ms.GetNumericOfVoice());


            if (label.GetSize() == 0)
            {
                return(false);
            }

            // check interpolation weights
            for (i = 0, temp = 0.0; i < ms.GetNumericOfVoice(); i++)
            {
                temp += duration_iw[i];
            }

            if (temp == 0.0)
            {
                return(false);
            }
            else
            if (temp != 1.0)
            {
                for (i = 0; i < ms.GetNumericOfVoice(); i++)
                {
                    if (duration_iw[i] != 0.0)
                    {
                        duration_iw[i] /= temp;
                    }
                }
            }

            for (i = 0; i < ms.GetNumericOfStream(); i++)
            {
                for (j = 0, temp = 0.0; j < ms.GetNumericOfVoice(); j++)
                {
                    temp += parameter_iw[j][i];
                }

                if (temp == 0.0)
                {
                    return(false);
                }
                else
                if (temp != 1.0)
                {
                    for (j = 0; j < ms.GetNumericOfVoice(); j++)
                    {
                        if (parameter_iw[j][i] != 0.0)
                        {
                            parameter_iw[j][i] /= temp;
                        }
                    }
                }

                if (ms.UseGv(i) == true)
                {
                    for (j = 0, temp = 0.0; j < ms.GetNumericOfVoice(); j++)
                    {
                        temp += gv_iw[j][i];
                    }

                    if (temp == 0.0)
                    {
                        return(false);
                    }
                    else
                    if (temp != 1.0)
                    {
                        for (j = 0; j < ms.GetNumericOfVoice(); j++)
                        {
                            if (gv_iw[j][i] != 0.0)
                            {
                                gv_iw[j][i] /= temp;
                            }
                        }
                    }
                }
            }

            // initialize state sequence
            this.nstate      = ms.GetNumericOfState();
            this.nstream     = ms.GetNumericOfStream();
            this.total_frame = 0;
            this.total_state = label.GetSize() * this.nstate;
            this.duration    = new int[this.total_state];
            this.sstream     = new HTS_SStream[this.nstream];

            for (i = 0; i < this.nstream; i++)
            {
                this.sstream[i] = new HTS_SStream();

                sst = this.sstream[i];
                sst.vector_length = ms.GetVectorLength(i);
                sst.mean          = new double[this.total_state][];
                sst.vari          = new double[this.total_state][];

                if (ms.IsMsd(i) == true)
                {
                    sst.msd = new double[this.total_state];
                }
                else
                {
                    sst.msd = null;
                }

                for (j = 0; j < this.total_state; j++)
                {
                    sst.mean[j] = new double[sst.vector_length * ms.GetWindowSize(i)];
                    sst.vari[j] = new double[sst.vector_length * ms.GetWindowSize(i)];
                }

                if (ms.UseGv(i) == true)
                {
                    sst.gv_switch = new bool[this.total_state];
                    for (j = 0; j < this.total_state; j++)
                    {
                        sst.gv_switch[j] = true;
                    }
                }
                else
                {
                    sst.gv_switch = null;
                }
            }

            // determine state duration
            duration_mean = new double[this.total_state];
            duration_vari = new double[this.total_state];

            for (i = 0; i < label.GetSize(); i++)
            {
                ms.GetDuration(label.GetString(i), duration_iw, duration_mean, i * this.nstate, duration_vari, i * this.nstate);
            }

            if (phoneme_alignment_flag == true)
            {
                // use duration set by user
                next_time  = 0;
                next_state = 0;
                state      = 0;

                for (i = 0; i < label.GetSize(); i++)
                {
                    temp = label.GetEndFrame(i);

                    if (temp >= 0)
                    {
                        next_time += ( int )SetSpecifiedDuration(this.duration, next_state, duration_mean, next_state, duration_vari, next_state, state + this.nstate - next_state, temp - next_time);
                        next_state = state + this.nstate;
                    }
                    else
                    if (i + 1 == label.GetSize())
                    {
                        Debug.LogError("HTS_SStreamSet_create: The time of final label is not specified.");
                        SetDefaultDuration(this.duration, next_state, duration_mean, next_state, duration_vari, next_state, state + this.nstate - next_state);
                    }
                    state += this.nstate;
                }
            }
            else
            {
                // determine frame length
                if (speed != 1.0)
                {
                    temp = 0.0;
                    for (i = 0; i < this.total_state; i++)
                    {
                        temp += duration_mean[i];
                    }
                    frame_length = temp / speed;
                    SetSpecifiedDuration(this.duration, 0, duration_mean, 0, duration_vari, 0, this.total_state, frame_length);
                }
                else
                {
                    SetDefaultDuration(this.duration, 0, duration_mean, 0, duration_vari, 0, this.total_state);
                }
            }
            duration_mean = null;
            duration_vari = null;

            // get parameter
            for (i = 0, state = 0; i < label.GetSize(); i++)
            {
                for (j = 2; j <= this.nstate + 1; j++)
                {
                    this.total_frame += this.duration[state];
                    for (k = 0; k < this.nstream; k++)
                    {
                        sst = this.sstream[k];
                        if (sst.msd != null)
                        {
                            ms.GetParameter(k, j, label.GetString(i), parameter_iw, sst.mean[state], 0, sst.vari[state], 0, sst.msd, state);
                        }
                        else
                        {
                            ms.GetParameter(k, j, label.GetString(i), parameter_iw, sst.mean[state], 0, sst.vari[state], 0, null, 0);
                        }
                    }
                    state++;
                }
            }

            // copy dynamic window
            for (i = 0; i < this.nstream; i++)
            {
                sst                        = this.sstream[i];
                sst.win_size               = ms.GetWindowSize(i);
                sst.win_max_width          = ms.GetWindowMaxWidth(i);
                sst.win_l_width            = new int[sst.win_size];
                sst.win_r_width            = new int[sst.win_size];
                sst.win_coefficient        = new double[sst.win_size][];
                sst.win_coefficient_offset = new int[sst.win_size];

                for (j = 0; j < sst.win_size; j++)
                {
                    sst.win_l_width[j] = ms.GetWindowLeftWidth(i, j);
                    sst.win_r_width[j] = ms.GetWindowRightWidth(i, j);

                    if (sst.win_l_width[j] + sst.win_r_width[j] == 0)
                    {
                        sst.win_coefficient[j] = new double[-2 * sst.win_l_width[j] + 1];
                    }
                    else
                    {
                        sst.win_coefficient[j] = new double[-2 * sst.win_l_width[j]];
                    }

                    // オフセット操作をしている
//					sst.win_coefficient[ j ] -= sst.win_l_width[ j ] ;
                    sst.win_coefficient_offset[j] -= sst.win_l_width[j];

                    for (shift = sst.win_l_width[j]; shift <= sst.win_r_width[j]; shift++)
                    {
                        // オフセット操作に対応
//						sst.win_coefficient[ j ][ shift ] = ms.GetWindowCoefficient( i, j, shift ) ;
                        sst.win_coefficient[j][sst.win_coefficient_offset[j] + shift] = ms.GetWindowCoefficient(i, j, shift);
                    }
                }
            }

            // determine GV
            for (i = 0; i < this.nstream; i++)
            {
                sst = this.sstream[i];

                if (ms.UseGv(i) == true)
                {
                    sst.gv_mean = new double[sst.vector_length];
                    sst.gv_vari = new double[sst.vector_length];

                    ms.GetGv(i, label.GetString(0), gv_iw, sst.gv_mean, sst.gv_vari);
                }
                else
                {
                    sst.gv_mean = null;
                    sst.gv_vari = null;
                }
            }

            for (i = 0; i < label.GetSize(); i++)
            {
                if (ms.GetGvFlag(label.GetString(i)) == false)
                {
                    for (j = 0; j < this.nstream; j++)
                    {
                        if (ms.UseGv(j) == true)
                        {
                            for (k = 0; k < this.nstate; k++)
                            {
                                this.sstream[j].gv_switch[i * this.nstate + k] = false;
                            }
                        }
                    }
                }
            }

            //--------------------------------------------------------------------------

/*			Debug.LogWarning( "=====> SStream Check : " + this.nstream ) ;
 *
 *                      int p = 0, q ;
 *                      for( p  = 0 ; p <  this.nstream ; p ++ )
 *                      {
 *                              Debug.LogWarning( "=====" + p ) ;
 *                              Debug.LogWarning( "SS[" + p + "]00:" + this.sstream[ p ].vector_length ) ;
 *                              Debug.LogWarning( "SS[" + p + "]01:" + this.sstream[ p ].mean[ 0 ][ 0 ] ) ;
 *                              Debug.LogWarning( "SS[" + p + "]02:" + this.sstream[ p ].vari[ 0 ][ 0 ] ) ;
 *                              if( this.sstream[ p ].msd == null )
 *                              {
 *                                      Debug.LogWarning( "SS[" + p + "]03: is null" ) ;
 *                              }
 *                              else
 *                              {
 *                                      Debug.LogWarning( "SS[" + p + "]03:" + this.sstream[ p ].msd[ 0 ] ) ;
 *                              }
 *                              Debug.LogWarning( "SS[" + p + "]04:" + this.sstream[ p ].win_size ) ;
 *                              Debug.LogWarning( "SS[" + p + "]05:" + this.sstream[ p ].win_l_width[ 0 ] ) ;
 *                              Debug.LogWarning( "SS[" + p + "]06:" + this.sstream[ p ].win_r_width[ 0 ] ) ;
 *
 *                              Debug.LogWarning( "SS[" + p + "]07:" + this.sstream[ p ].win_coefficient[ 0 ][ this.sstream[ p ].win_coefficient_offset[ 0 ] + 0 ] ) ;
 *
 *                              Debug.LogWarning( "SS[" + p + "]08:" + this.sstream[ p ].win_max_width ) ;
 *                              if( this.sstream[ p ].gv_mean == null )
 *                              {
 *                                      Debug.LogWarning( "SS[" + p + "]09: is null" ) ;
 *                              }
 *                              else
 *                              {
 *                                      Debug.LogWarning( "SS[" + p + "]09:" + this.sstream[ p ].gv_mean[ 0 ] ) ;
 *                              }
 *                              if( this.sstream[ p ].gv_vari == null )
 *                              {
 *                                      Debug.LogWarning( "SS[" + p + "]10: is null" ) ;
 *                              }
 *                              else
 *                              {
 *                                      Debug.LogWarning( "SS[" + p + "]10:" + this.sstream[ p ].gv_vari[ 0 ] ) ;
 *                              }
 *                              if( this.sstream[ p ].gv_switch == null )
 *                              {
 *                                      Debug.LogWarning( "SS[" + p + "]11: is null" ) ;
 *                              }
 *                              else
 *                              {
 *                                      Debug.LogWarning( "SS[" + p + "]11:" + this.sstream[ p ].gv_switch[ 0 ] ) ;
 *                              }
 *                      }
 *
 *                      Debug.LogWarning( "=====" ) ;
 *                      Debug.LogWarning( "total_frame : " + this.total_frame ) ;
 *                      Debug.LogWarning( "total_state : " + this.total_state ) ;
 *                      for( p  = this.total_state - 10 ; p <  this.total_state ; p ++ )
 *                      {
 *                              Debug.LogWarning( "duration[" + p +"] " + this.duration[ p ] ) ;
 *                      }*/

            return(true);
        }