Example #1
0
        /// <summary>
        /// Check data consistence between script item and segmentation file.
        /// </summary>
        /// <param name="script">Script file instance.</param>
        /// <param name="item">Script item.</param>
        /// <param name="fileMap">File list map.</param>
        /// <param name="segmentDir">Segment file directory.</param>
        /// <param name="errorSet">Data error set found.</param>
        /// <param name="phoneBasedSegment">Phone based alignment or unit based alignment.</param>
        public static void ValidateDataAlignment(ScriptFile script, ScriptItem item,
            FileListMap fileMap, string segmentDir, DataErrorSet errorSet, bool phoneBasedSegment)
        {
            string segmentFilePath = Path.Combine(segmentDir, fileMap.Map[item.Id] + ".txt");

            SegmentFile segmentFile = new SegmentFile();
            segmentFile.Load(segmentFilePath);

            if (segmentFile.WaveSegments.Count == 0)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "There is no valid alignment data into alignment file.");
                errorSet.Errors.Add(new DataError(segmentFilePath, message, item.Id));
            }
            else if (!segmentFile.WaveSegments[segmentFile.WaveSegments.Count - 1].IsSilencePhone)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "The alignment file is invalid, for without silence segment at the end.");
                errorSet.Errors.Add(new DataError(segmentFilePath, message, item.Id));
            }
            else if (!phoneBasedSegment && item.Units.Count != segmentFile.NonSilenceWaveSegments.Count)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "script units {0} do not match with non-silence segments {1} in segmentation file.",
                    item.Units.Count, segmentFile.NonSilenceWaveSegments.Count);
                errorSet.Errors.Add(new DataError(script.FilePath, message, item.Id));
            }
            else if (phoneBasedSegment && item.GetPhones().Length != segmentFile.NonSilenceWaveSegments.Count)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "script phones {0} do not match with non-silence segments {1} in segmentation file.",
                    item.GetPhones().Length, segmentFile.NonSilenceWaveSegments.Count);
                errorSet.Errors.Add(new DataError(script.FilePath, message, item.Id));
            } 
            else
            {
                // go through each segments
                if (phoneBasedSegment)
                {
                    string[] phones = item.GetPhones();
                    for (int i = 0; i < segmentFile.NonSilenceWaveSegments.Count; i++)
                    {
                        WaveSegment segment = segmentFile.NonSilenceWaveSegments[i];
                        
                        if (segment.Label != phones[i])
                        {
                            string message = string.Format(CultureInfo.InvariantCulture,
                                "phone [{0}/{1}] at {2} does not match between script and segment.",
                                WaveSegment.FormatLabel(phones[i]), segment.Label, i);
                            errorSet.Errors.Add(new DataError(script.FilePath, message, item.Id));
                        }
                    }
                }
                else
                {
                    for (int i = 0; i < segmentFile.NonSilenceWaveSegments.Count; i++)
                    {
                        WaveSegment segment = segmentFile.NonSilenceWaveSegments[i];
                        TtsUnit unit = item.Units[i];

                        if (segment.Label != WaveSegment.FormatLabel(unit.MetaUnit.Name))
                        {
                            string message = string.Format(CultureInfo.InvariantCulture,
                                "units [{0}/{1}] at {2} do not match between script and segment.",
                                WaveSegment.FormatLabel(unit.MetaUnit.Name), segment.Label, i);
                            errorSet.Errors.Add(new DataError(script.FilePath, message, item.Id));
                        }
                    }
                }
            }
        }
Example #2
0
        public static DataError ReadOneScriptItem(ScriptItem scriptItem,
            string sentenceLine, string pronunciationLine,
            bool withPron, bool withSid, bool validate)
        {
            if (scriptItem == null)
            {
                throw new ArgumentNullException("scriptItem");
            }

            if (string.IsNullOrEmpty(sentenceLine))
            {
                throw new ArgumentNullException("sentenceLine");
            }

            sentenceLine = sentenceLine.Trim();
            if (withPron)
            {
                if (string.IsNullOrEmpty(pronunciationLine))
                {
                    throw new ArgumentNullException("pronunciationLine");
                }

                pronunciationLine = pronunciationLine.Trim();
            }

            if (withSid)
            {
                Match m = Regex.Match(sentenceLine, @"^([0-9a-zA-Z]+)[\t ]+(.+)$");
                if (!m.Success)
                {
                    string message = string.Format(CultureInfo.InvariantCulture,
                        "Invalid format, no sentence id for sentence: '{0}', pronunciation: '{1}'.",
                        sentenceLine, withPron ? pronunciationLine : "null");
                    return new DataError(message);
                }

                scriptItem.Id = m.Groups[1].Value;
                scriptItem.Sentence = m.Groups[2].Value.Trim();
            }
            else
            {
                scriptItem.Sentence = sentenceLine;
            }

            if (withPron)
            {
                // Phone set is case insensitive, so convert pronunciation to lower letter.
                scriptItem.Pronunciation = pronunciationLine.ToLower(CultureInfo.InvariantCulture);
            }

            if (validate)
            {
                Phoneme phoneme = null;
                if (scriptItem.Language != Language.Neutral)
                {
                    phoneme = Localor.GetPhoneme(scriptItem.Language, scriptItem.Engine);
                }

                try
                {
                    // Check all phonemes, currently for DeDE and JaJP only 
                    if (phoneme != null &&
                        (scriptItem.Language == Language.DeDE || scriptItem.Language == Language.JaJP))
                    {
                        string[] phones = scriptItem.GetPhones();
                        foreach (string phone in phones)
                        {
                            phoneme.TtsPhone2Id(phone);
                        }
                    }

                    if (scriptItem.Language != Language.Neutral &&
                        (scriptItem.NormalWords == null || scriptItem.NormalWords.Count == 0))
                    {
                        string message = string.Format(CultureInfo.InvariantCulture,
                            "No normal word found in the sentence.");
                        return new DataError("null", message, scriptItem.Id);
                    }
                }
                catch (InvalidDataException ide)
                {
                    return new DataError("null", Helper.BuildExceptionMessage(ide), scriptItem.Id);
                }
            }

            return null;
        }