/// <summary>
        /// Load a wave file for processing.
        /// </summary>
        /// <param name="filePath">Waveform file path.</param>
        public void Load(string filePath)
        {
            _filePath = filePath;
            WaveFile waveFile = new WaveFile();
            waveFile.Load(filePath);
            switch (waveFile.Format.BitsPerSample)
            {
                case (int)WaveBitsPerSample.Sixteen:
                    _waveData = ArrayHelper.BinaryConvertArray(waveFile.GetSoundData());
                    break;
                default:
                    string message = string.Format(CultureInfo.InvariantCulture,
                        "Only {0} bits per sample waveform is supported. But it is {1} bits per sample of waveform file [{2}].",
                        (int)WaveBitsPerSample.Sixteen, waveFile.Format.BitsPerSample, filePath);
                    throw new NotSupportedException(message);
            }

            _samplesPerSecond = waveFile.Format.SamplesPerSecond;
        }
        /// <summary>
        /// Converts the wave data into proper format.
        /// </summary>
        /// <param name="waveFile">The given WaveFile object in which the data will be converted.</param>
        /// <param name="offset">The offset in sample count of the data will be converted.</param>
        /// <param name="count">The count in sample count of the data will be converted.</param>
        /// <returns>Waveform data in supported format of voice font.</returns>
        private byte[] ConvertsWaveDataFormat(WaveFile waveFile, int offset, int count)
        {
            Debug.Assert(waveFile.Format.FormatTag == WaveFormatTag.Pcm, "The source format tag should be PCM.");
            byte[] data = new byte[count * _header.BytesPerSample];

            // Format conversion.
            if (_header.FormatCategory == WaveFormatTag.Pcm)
            {
                if (waveFile.Format.BitsPerSample == _header.BytesPerSample * 8)
                {
                    offset *= _header.BytesPerSample;
                    count *= _header.BytesPerSample;
                    Array.Copy(waveFile.GetSoundData(), offset, data, 0, count);
                }
                else if (_header.BytesPerSample == 1)
                {
                    short[] dataIn16Bits = waveFile.DataIn16Bits;
                    for (int i = 0; i < data.Length; ++i)
                    {
                        data[i] = (byte)((dataIn16Bits[i + offset] / 256) + 128); // Convert 16-bit to 8-bit.
                    }
                }
                else if (_header.BytesPerSample == 2)
                {
                    throw new NotSupportedException("It is unsupported to convert 8-bit to 16-bit");
                }
            }
            else if (_header.FormatCategory == WaveFormatTag.Mulaw)
            {
                Debug.Assert(_header.SamplesPerSecond == 8000, "Only supports 8k Hz for mulaw voice.");
                Debug.Assert(_header.BytesPerSample == 1, "Only supports 1 byte per sample for mulaw voice.");
                Debug.Assert(_header.Compression == WaveCompressCatalog.Unc, "Only supports uncompress encoding for mulaw voice.");

                if (waveFile.Format.SamplesPerSecond != 8000)
                {
                    string message = Helper.NeutralFormat(
                        "Samples per second [{0}] of source waveform file should be the same with that [{1}] of target voice.",
                        waveFile.Format.SamplesPerSecond, _header.SamplesPerSecond);
                    throw new InvalidDataException(message);
                }

                if (waveFile.Format.BitsPerSample != 16 || waveFile.Format.BlockAlign != 2)
                {
                    string message = Helper.NeutralFormat(
                        "Only supports 16 bits per sample and 2 bytes alignment, while that of source waveform file is [{0}] and [{1}].",
                        waveFile.Format.BitsPerSample, waveFile.Format.BlockAlign);
                    throw new InvalidDataException(message);
                }

                // Converts 16bits PCM samples to 8 bits Mulaw samples
                short[] soundData = waveFile.DataIn16Bits;
                for (int i = 0; i < count; i++)
                {
                    data[i] = SampleConverter.LinearToUlaw(soundData[offset + i]);
                }
            }
            else
            {
                // Bug #70735 is filed to track: Currently, Compress is not supported in RUS offline inventory building.
                throw new NotSupportedException(
                    Helper.NeutralFormat("Unsupported target format [{0}].", _header.FormatCategory));
            }

            return data;
        }
        /// <summary>
        /// Writes the right margin if possible.
        /// </summary>
        /// <param name="waveFile">The given wave file where the current candidate belongs to.</param>
        /// <param name="candidate">The current candidate.</param>
        /// <param name="candidateInfo">The candidate information of the current candidate.</param>
        private void WriteRightMargin(WaveFile waveFile, UnitCandidate candidate, WaveCandidateInfo candidateInfo)
        {
            if (_ccMarginLength + _fsMarginLength > 0)
            {
                int rightMarginLength = (_ccMarginLength / 2) + _fsMarginLength;
                int waveSampleOffsetInSentence = (int)((candidate.StartTimeInSecond * waveFile.Format.SamplesPerSecond) + 0.5f);
                int waveSampleLength = (int)(((candidate.EndTimeInSecond - candidate.StartTimeInSecond) * waveFile.Format.SamplesPerSecond) + 0.5f);

                // Right margin section.
                if (candidate.Index == candidate.Sentence.Candidates.Count - 1)
                {
                    // It means the candidate is the last one, there is no next candidate. So, writes some zero as margin.
                    WriteZeroMargin(rightMarginLength);
                }
                else if (candidate.Sentence.Candidates[candidate.Index + 1].Id == UnitCandidate.InvalidId)
                {
                    // There is a next candidate and it isn't in the inventory. So, writes the next candidate as margin.
                    int offset = (int)(waveSampleOffsetInSentence + waveSampleLength);
                    int count = (waveFile.GetSoundData().Length / (waveFile.Format.BitsPerSample / 8)) - offset;
                    if (count < rightMarginLength)
                    {
                        WriteIntoInventory(ConvertsWaveDataFormat(waveFile, offset, count));
                        WriteZeroMargin(rightMarginLength - count);
                    }
                    else
                    {
                        WriteIntoInventory(ConvertsWaveDataFormat(waveFile, offset, rightMarginLength));
                    }
                }
            }
        }
예제 #4
0
        /// <summary>
        /// Extract related features from wave: zero crossing, energy, autocorrelation.
        /// </summary>
        /// <param name="args">Arguments: wave file, fea file.</param>
        /// <param name="logWriter">LogWriter to implement parallel computing interface.</param>
        /// <exception cref="ArgumentException">Exception.</exception>
        public static void ExtractRelatedFeaturesOneFile(string[] args, TextWriter logWriter)
        {
            // check arguments.
            if (args.Length < 2)
            {
                throw new ArgumentException("Arguments for ExtractRelatedFeaturesOneFile: input wave file, output fea file");
            }

            // check input and output file.
            string wavePath = args[0];
            string feaFile = args[1];
            int frameShift = int.Parse(args[2]);
            int framelength = int.Parse(args[3]);

            // output <zeroCrossing energy autoCorrelation>.
            using (StreamWriter sw = new StreamWriter(feaFile, false))
            {
                // load wave
                WaveFile waveFile = new WaveFile();
                waveFile.Load(wavePath);
                short[] waveData = ArrayHelper.BinaryConvertArray(waveFile.GetSoundData());

                // calculate features.
                for (int i = 0;; ++i)
                {
                    if ((((i + 1) * frameShift) + (framelength / 2)) > waveData.Length)
                    {
                        break;
                    }

                    int nzero = 0;
                    double energy = 0;
                    double autoCorr = 0;
                    double dsum = 0;
                    double product1 = 0;
                    double product2 = 0;

                    int pos = (i + 1) * frameShift;
                    int nbegin = pos - (framelength / 2);
                    int nend = pos + (framelength / 2);

                    if (nend <= waveData.Length && nbegin >= 0)
                    {
                        if (nbegin == 0)
                        {
                            // process each frame.
                            int j = nbegin;
                            for (; j < nend - 1; ++j)
                            {
                                if ((waveData[j] < 0 && waveData[j + 1] > 0)
                                    || (waveData[j] > 0 && waveData[j + 1] < 0)
                                    || (waveData[j] == 0 && waveData[j + 1] != 0))
                                {
                                    nzero++;
                                }

                                energy += waveData[j] * waveData[j];
                            }

                            // calculate energy.
                            energy += waveData[j] * waveData[j];
                            energy = energy / framelength;
                            energy = 10 * Math.Log(Minimum + energy);
                        }
                        else
                        {
                            // process each frame.
                            int j = nbegin;
                            for (; j < nend - 1; ++j)
                            {
                                if ((waveData[j] < 0 && waveData[j + 1] > 0)
                                    || (waveData[j] > 0 && waveData[j + 1] < 0)
                                    || (waveData[j] == 0 && waveData[j + 1] != 0))
                                {
                                    nzero++;
                                }

                                energy += waveData[j] * waveData[j];

                                dsum += waveData[j] * waveData[j - 1];
                                product1 += waveData[j] * waveData[j];
                                product2 += waveData[j - 1] * waveData[j - 1];
                            }

                            // calculate energy.
                            energy += waveData[j] * waveData[j];
                            energy = energy / framelength;
                            energy = 10 * Math.Log(Minimum + energy);

                            // calculate auto correlation.
                            dsum += waveData[j] * waveData[j - 1];
                            product1 += waveData[j] * waveData[j];
                            product2 += waveData[j - 1] * waveData[j - 1];
                            autoCorr = dsum / Math.Sqrt(product1 * product2);
                        }
                    }

                    sw.WriteLine("{0} {1:F6} {2:F6}", nzero, energy, autoCorr);
                }
            }
        }
예제 #5
0
        /// <summary>
        /// Extract lpc residual error.
        /// </summary>
        /// <param name="args">Arguments: wave file, lpc file, lpc error file.</param>
        /// <param name="logWriter">LogWriter to implement parallel computing interface.</param>
        /// <exception cref="ArgumentException">Exception.</exception>
        public static void ExtractLpcResidualErrorOneFile(string[] args, TextWriter logWriter)
        {
            // check arguments
            if (args.Length < 3)
            {
                throw new ArgumentException("Arguments for ExtractLpcResidualErrorOneFile: input wave file, input lpc file, output lpc error file");
            }

            // check input and output file
            string wavePath = args[0];
            string lpcFile = args[1];
            string errorFile = args[2];
            int frameShift = int.Parse(args[3]);
            int frameLength = int.Parse(args[4]);

            // output <zeroCrossing energy autoCorrelation>
            List<double[]> lpcData = new List<double[]>();
            foreach (string line in Helper.FileLines(lpcFile))
            {
                string[] fields = line.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                double[] data = fields.Select(i => double.Parse(i, CultureInfo.InvariantCulture)).ToArray();
                lpcData.Add(data);
            }

            using (StreamWriter sw = new StreamWriter(errorFile, false))
            {
                // load wave
                WaveFile waveFile = new WaveFile();
                waveFile.Load(wavePath);
                short[] waveData = ArrayHelper.BinaryConvertArray(waveFile.GetSoundData());

                // calculate residual error
                for (int i = 0; i < lpcData.Count; i++)
                {
                    int pos = (i + 1) * frameShift;
                    int nbegin = pos - (frameLength / 2);
                    int nend = pos + (frameLength / 2);
                    double energy = 0;

                    // calculate actual value
                    if (nend <= waveData.Length && nbegin >= 0)
                    {
                        for (int j = nbegin; j < nend; j++)
                        {
                            energy += waveData[j] * waveData[j];
                        }

                        energy = energy / (double)frameLength;
                        double tempt_energy = energy;
                        energy = 10 * Math.Log(Minimum + energy);

                        // calculate prediction value
                        double prediction = 0;
                        for (int k = 0; k < LpcOrder; k++)
                        {
                            double denergy = 0;
                            for (int j = nbegin; j < nend; j++)
                            {
                                if (j - k > 0)
                                {
                                    denergy += waveData[j] * waveData[j - k];
                                }
                            }

                            prediction += lpcData[i][k] * (denergy / (double)frameLength);
                        }

                        prediction = prediction + tempt_energy;
                        prediction = 10 * Math.Log(Math.Abs(prediction) + Minimum);

                        // output residual error
                        sw.WriteLine("{0:F6} {1:F6}", lpcData[i][0], energy - prediction);
                    }
                }
            }
        }
예제 #6
0
        /// <summary>
        /// Convert the WaveFile instance into another samples per second.
        /// </summary>
        /// <param name="waveFile">Waveform instance to resample.</param>
        /// <param name="targetSamplesPerSecond">Samples per second of the target waveform file.</param>
        public static void Resample(WaveFile waveFile, int targetSamplesPerSecond)
        {
            if (waveFile == null)
            {
                throw new ArgumentNullException("waveFile");
            }

            if (waveFile.Riff == null)
            {
                string message = Helper.NeutralFormat("The Riff of wave file should not bu null.");
                throw new ArgumentNullException("waveFile", message);
            }

            if (waveFile.DataIn16Bits == null)
            {
                string message = Helper.NeutralFormat("The DataIn16Bits of wave file should not bu null.");
                throw new ArgumentNullException("waveFile", message);
            }

            if (waveFile.Format.BitsPerSample != SupportedBitsPerSample)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "Only {0}bit waveform file supported for resampling.",
                    SupportedBitsPerSample);
                throw new NotSupportedException(message);
            }

            if (waveFile.Format.Channels != SupportedChannels)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "Only {0} channel waveform file supported for resampling.",
                    SupportedChannels);
                throw new NotSupportedException(message);
            }

            // Do nothing if both samples per second are the same
            if (waveFile.Format.SamplesPerSecond != targetSamplesPerSecond)
            {
                // If both samples per second are not the same

                // Validate cache data encoded in Short
                if (waveFile.DataIn16Bits.Length != waveFile.GetSoundData().Length / sizeof(short))
                {
                    string message = string.Format(CultureInfo.InvariantCulture,
                        "The Data in 16 bits buffer is not updated with the sound data.");
                    Debug.Assert(false, message);
                    throw new InvalidDataException(message);
                }

                ResampleFilter resample = new ResampleFilter(waveFile.Format.SamplesPerSecond,
                    targetSamplesPerSecond);

                // Re-sample
                short[] targetSamples = resample.Resample(waveFile.DataIn16Bits);

                // Update the target sound data into the WaveFile instance
                RiffChunk dataChunk = waveFile.Riff.GetChunk(Riff.IdData);
                dataChunk.SetData(ArrayHelper.BinaryConvertArray(targetSamples));

                WaveFormat format = waveFile.Format;
                format.SamplesPerSecond = targetSamplesPerSecond;
                format.AverageBytesPerSecond =
                    format.SamplesPerSecond * waveFile.Format.BitsPerSample / 8;

                waveFile.Format = format;
            }
        }
예제 #7
0
        /// <summary>
        /// Append other wavefile instance to this instance.
        /// </summary>
        /// <param name="wf">Wave file.</param>
        public void Append(WaveFile wf)
        {
            if (wf == null)
            {
                throw new ArgumentNullException("wf");
            }

            if (_riff == null)
            {
                Initialze();
                Format = wf.Format;
            }

            if (!Format.Equals(wf.Format))
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "Current format should not be different with the waveform file to append.");
                throw new ArgumentException(message, "wf");
            }

            RiffChunk dataChunk = _riff.GetChunk(Riff.IdData);
            if (dataChunk == null)
            {
                dataChunk = new RiffChunk();
                dataChunk.Id = Riff.IdData;
                _riff.Chunks.Add(dataChunk);
            }

            dataChunk.Append(wf.GetSoundData());
        }
예제 #8
0
        /// <summary>
        /// Merge two waveform files into 2-channel waveform file.
        /// </summary>
        /// <param name="leftFile">Left waveform file for left channel, i.e. first channel.</param>
        /// <param name="rightFile">Right waveform file for left channel, i.e. second channel.</param>
        /// <returns>Merged waveform file.</returns>
        public static WaveFile MergeTwoChannels(WaveFile leftFile, WaveFile rightFile)
        {
            if (leftFile == null)
            {
                throw new ArgumentNullException("leftFile");
            }

            if (rightFile == null)
            {
                throw new ArgumentNullException("rightFile");
            }

            if (leftFile.Format != rightFile.Format)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "Both waveform files should share the same formant.");
                throw new InvalidDataException(message);
            }

            if (leftFile.GetSoundData().Length != rightFile.GetSoundData().Length)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "Both waveform files should have the same samples.");
                throw new InvalidDataException(message);
            }

            if (leftFile.Format.Channels != 1)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "Only single channel waveform file is supported to merge.");
                throw new InvalidDataException(message);
            }

            WaveFile targetFile = new WaveFile();
            WaveFormat format = leftFile.Format;
            format.Channels = 2;
            format.AverageBytesPerSecond *= format.Channels;
            format.BlockAlign *= format.Channels;
            targetFile.Format = format;

            byte[] data = new byte[leftFile.GetSoundData().Length * format.Channels];

            for (int i = 0; i < leftFile.GetSoundData().Length; i += leftFile.Format.BlockAlign)
            {
                Buffer.BlockCopy(leftFile.GetSoundData(), i,
                    data, i * format.Channels, leftFile.Format.BlockAlign);
                Buffer.BlockCopy(rightFile.GetSoundData(), i,
                    data, (i * format.Channels) + leftFile.Format.BlockAlign, leftFile.Format.BlockAlign);
            }

            RiffChunk chunk = targetFile.Riff.GetChunk(Riff.IdData);
            chunk.SetData(data);

            return targetFile;
        }