Ejemplo n.º 1
0
 /// <summary>
 /// Merge erros in errorSet to erros in this instance.
 /// </summary>
 /// <param name="errorSet">Source errors to copy.</param>
 public void Merge(DataErrorSet errorSet)
 {
     if (errorSet != null && errorSet.Errors != null)
     {
         foreach (DataError error in errorSet.Errors)
         {
             _errors.Add(error);
         }
     }
 }
Ejemplo n.º 2
0
        /// <summary>
        /// Shift segment data with certain silence duration.
        /// </summary>
        /// <param name="silenceDuration">Silence duration in second.</param>
        /// <param name="sourceDir">Source segment directory.</param>
        /// <param name="targetDir">Target segment directory.</param>
        /// <returns>Data error set found.</returns>
        public static DataErrorSet ShiftSegmentFiles(float silenceDuration,
            string sourceDir, string targetDir)
        {
            DataErrorSet errorSet = new DataErrorSet();
            SegmentFile sf = new SegmentFile();
            Dictionary<string, string> sourceMap =
                Microsoft.Tts.Offline.FileListMap.Build(sourceDir, ".txt");
            foreach (string sid in sourceMap.Keys)
            {
                string sourceFilePath = null;
                string dstFilePath = null;
                try
                {
                    dstFilePath = Path.Combine(targetDir, sourceMap[sid] + ".txt");
                    if (File.Exists(dstFilePath))
                    {
                        continue;
                    }

                    sourceFilePath = Path.Combine(sourceDir, sourceMap[sid] + ".txt");
                    Helper.EnsureFolderExistForFile(dstFilePath);

                    sf.Load(sourceFilePath);
                    sf.Shift(silenceDuration);

                    sf.Save(dstFilePath);
                }
                catch (InvalidDataException ide)
                {
                    errorSet.Errors.Add(new DataError(sourceFilePath,
                        Helper.BuildExceptionMessage(ide), sid));
                }
            }

            return errorSet;
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Check data consistence between script file and segmentation files.
        /// </summary>
        /// <param name="fileMap">File list map.</param>
        /// <param name="script">Script file instance.</param>
        /// <param name="segmentDir">Segment file directory.</param>
        /// <returns>Data error set found.</returns>
        public static DataErrorSet ValidateDataAlignment(
            FileListMap fileMap, ScriptFile script, string segmentDir)
        {
            // Parameters validation
            if (string.IsNullOrEmpty(segmentDir))
            {
                throw new ArgumentNullException("segmentDir");
            }

            if (fileMap == null)
            {
                throw new ArgumentNullException("fileMap");
            }

            if (fileMap.Map == null)
            {
                throw new ArgumentException("fileMap.Map is null");
            }

            if (fileMap.Map.Keys == null)
            {
                throw new ArgumentException("fileMap.Map.Keys is null");
            }

            if (script == null)
            {
                throw new ArgumentNullException("script");
            }

            if (script.Items == null)
            {
                throw new ArgumentException("script.Items is null");
            }

            if (script.Items.Values == null)
            {
                throw new ArgumentException("script.Items.Values is null");
            } 

            DataErrorSet errorSet = new DataErrorSet();

            foreach (ScriptItem item in script.Items.Values)
            {
                try
                {
                    if (!fileMap.Map.ContainsKey(item.Id))
                    {
                        errorSet.Errors.Add(new DataError(script.FilePath,
                            "File list map does not contain sentences.", item.Id));
                        continue;
                    }

                    ValidateDataAlignment(script, item, fileMap, segmentDir, errorSet, false);
                }
                catch (InvalidDataException ide)
                {
                    errorSet.Errors.Add(new DataError(script.FilePath,
                        Helper.BuildExceptionMessage(ide), item.Id));
                }
            }

            foreach (string sid in fileMap.Map.Keys)
            {
                if (!script.Items.ContainsKey(sid))
                {
                    errorSet.Errors.Add(new DataError(script.FilePath,
                        "script file does not contain the sentence.", sid));
                }
            }

            return errorSet;
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Check data file consistence between segment, and wave files with the file map.
        /// </summary>
        /// <param name="fileMap">File list map.</param>
        /// <param name="waveDir">Waveform file directory.</param>
        /// <param name="segmentDir">Segmentation file directory.</param>
        /// <returns>Data error set found.</returns>
        public static DataErrorSet ValidateDataAlignment(
            FileListMap fileMap, string waveDir, string segmentDir)
        {
            // Parameter validation
            if (fileMap == null)
            {
                throw new ArgumentNullException("fileMap");
            }

            if (fileMap.Map == null)
            {
                throw new ArgumentException("fileMap.Map is null");
            }

            if (fileMap.Map.Keys == null)
            {
                throw new ArgumentException("fileMap.Map.Keys is null");
            }

            DataErrorSet errorSet = new DataErrorSet();

            // go through the file list map for each sentence
            foreach (string sid in fileMap.Map.Keys)
            {
                try
                {
                    string alignmentFile = Path.Combine(segmentDir, fileMap.Map[sid] + ".txt");
                    string waveFile = Path.Combine(waveDir, fileMap.Map[sid] + ".wav");

                    StringBuilder error = new StringBuilder();

                    ValidateDataAlignment(alignmentFile, waveFile, error);
                    if (error.Length > 0)
                    {
                        errorSet.Errors.Add(new DataError(string.Empty, error.ToString(), sid));
                    }
                }
                catch (InvalidDataException ide)
                {
                    string message = Helper.BuildExceptionMessage(ide);
                    errorSet.Errors.Add(new DataError(string.Empty, message, sid));
                }
            }

            return errorSet;
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Check waveform files consistence between waveform and
        /// Referrence waveform files with the filemap.
        /// </summary>
        /// <param name="fileMap">File list map listed the sentences to validate.</param>
        /// <param name="waveDir">Base directory of waveform file.</param>
        /// <param name="refWaveDir">Directory of referrence waveform file.</param>
        /// <param name="refName">The name of the referrence waveform directory.</param>
        /// <returns>Data error set found.</returns>
        public static DataErrorSet ValidateWaveAlignment(FileListMap fileMap, string waveDir,
            string refWaveDir, string refName)
        {
            if (fileMap == null)
            {
                throw new ArgumentNullException("fileMap");
            }

            if (fileMap.Map == null)
            {
                throw new ArgumentException("fileMap.Map is null");
            }

            if (fileMap.Map.Keys == null)
            {
                throw new ArgumentException("fileMap.Map.Keys is null");
            }

            if (string.IsNullOrEmpty(refName))
            {
                throw new ArgumentNullException("refName");
            }

            if (string.IsNullOrEmpty(refWaveDir))
            {
                throw new ArgumentNullException("refWaveDir");
            }

            DataErrorSet errorSet = new DataErrorSet();

            foreach (string sid in fileMap.Map.Keys)
            {
                try
                {
                    string refFile = Path.Combine(refWaveDir, fileMap.Map[sid] + ".wav");
                    string waveFile = Path.Combine(waveDir, fileMap.Map[sid] + ".wav");

                    int waveSampleCount = 0;
                    int refSampleCount = 0;
                    WaveFormat waveFormat = new WaveFormat();
                    WaveFormat refWaveFormat = new WaveFormat();

                    StringBuilder sb = new StringBuilder();

                    // validate referrence file existance
                    if (!File.Exists(refFile))
                    {
                        sb.AppendFormat(CultureInfo.InvariantCulture,
                            "{0} file [{0}] does not exist.", refName, refFile);
                    }
                    else
                    {
                        refSampleCount = WaveFile.ReadSampleCount(refFile);
                        refWaveFormat = WaveFile.ReadFormat(refFile);
                    }

                    // validate waveform file existance
                    if (!File.Exists(waveFile))
                    {
                        sb.AppendFormat(CultureInfo.InvariantCulture,
                            "Wave file [{0}] does not exist.", waveFile);
                    }
                    else
                    {
                        waveSampleCount = WaveFile.ReadSampleCount(waveFile);
                        waveFormat = WaveFile.ReadFormat(waveFile);
                    }

                    // validate content consistence
                    if (waveSampleCount != 0 && refSampleCount != 0
                        && waveSampleCount != refSampleCount)
                    {
                        sb.AppendFormat(CultureInfo.InvariantCulture,
                            "The sample count is not the same between waveform file [{0}] and {1} file [{2}].",
                            waveFile, refName, refFile);
                    }

                    if (!waveFormat.Equals(refWaveFormat))
                    {
                        sb.AppendFormat(CultureInfo.InvariantCulture,
                            "The waveform format is not the same between waveform file [{0}] and {1} file [{2}].",
                            waveFile, refName, refFile);
                    }

                    if (sb.Length > 0)
                    {
                        errorSet.Errors.Add(new DataError(string.Empty, sb.ToString(), sid));
                    }
                }
                catch (InvalidDataException ide)
                {
                    string message = Helper.BuildExceptionMessage(ide);
                    errorSet.Errors.Add(new DataError(string.Empty, message, sid));
                }
            }

            return errorSet;
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Check data file consistence between script file and filemap file.
        /// </summary>
        /// <param name="fileMap">File list map.</param>
        /// <param name="script">Script file instance.</param>
        /// <returns>Data error set found.</returns>
        public static DataErrorSet ValidateDataAlignment(FileListMap fileMap,
            ScriptFile script)
        {
            // Parameters validation
            if (fileMap == null)
            {
                throw new ArgumentNullException("fileMap");
            }

            if (fileMap.Map == null)
            {
                throw new ArgumentException("fileMap.Map is null");
            }

            if (fileMap.Map.Keys == null)
            {
                throw new ArgumentException("fileMap.Map.Keys is null");
            }

            if (string.IsNullOrEmpty(fileMap.FilePath))
            {
                throw new ArgumentException("fileMap.FilePath is null");
            }

            if (script == null)
            {
                throw new ArgumentNullException("script");
            }

            if (script.Items == null)
            {
                throw new ArgumentException("script.Items is null");
            }

            if (script.Items.Keys == null)
            {
                throw new ArgumentException("script.Items.Keys is null");
            }

            if (string.IsNullOrEmpty(script.FilePath))
            {
                throw new ArgumentException("script.FilePath is null");
            }

            DataErrorSet errorSet = new DataErrorSet();

            // go through sentence ids listed in the filemap first
            foreach (string sid in fileMap.Map.Keys)
            {
                if (!script.Items.ContainsKey(sid))
                {
                    string message = string.Format(CultureInfo.InvariantCulture,
                        "Sentence [{0}] is found in the filemap [{1}], but not listed in script file [{2}].",
                        sid, fileMap.FilePath, script.FilePath);
                    errorSet.Errors.Add(new DataError(script.FilePath, message, sid));
                }
            }

            // check sentence ids in the script file
            foreach (string sid in script.Items.Keys)
            {
                if (!fileMap.Map.ContainsKey(sid))
                {
                    string message = string.Format(CultureInfo.InvariantCulture,
                        "Sentence [{0}] is found in the script [{1}], but not listed in filemap [{2}].",
                        sid, script.FilePath, fileMap.FilePath);
                    errorSet.Errors.Add(new DataError(script.FilePath, message, sid));
                }
            }

            return errorSet;
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Adapt raw manual alignment data to well-labeled alignment data.
        /// </summary>
        /// <param name="rawManuDir">Raw manual alignment data directory.</param>
        /// <param name="forcedDir">Forced alignment data directory.</param>
        /// <param name="manuDir">Adapted result directory.</param>
        /// <returns>Data error set found.</returns>
        public static DataErrorSet Adapt(string rawManuDir, string forcedDir, string manuDir)
        {
            if (!Directory.Exists(manuDir))
            {
                Directory.CreateDirectory(manuDir);
            }

            DataErrorSet errorSet = new DataErrorSet();

            Dictionary<string, string> rawManSegMap = FileListMap.Build(rawManuDir, ".txt");
            Dictionary<string, string> forceSegMap = FileListMap.Build(forcedDir, ".txt");

            foreach (string id in rawManSegMap.Keys)
            {
                if (!forceSegMap.ContainsKey(id))
                {
                    string message = "unexpected raw manual segment id, not in forced set: " + id;
                    Console.Error.WriteLine(message);
                    errorSet.Errors.Add(new DataError(Path.Combine(forcedDir, rawManSegMap[id] + ".txt"),
                        message, id));
                    continue;
                }

                string rawSegmentFilePath = Path.Combine(rawManuDir, rawManSegMap[id]) + ".txt";
                string forcedSegmentFilePath = Path.Combine(forcedDir, forceSegMap[id]) + ".txt";
                string adaptedSegmentFilePath = Path.Combine(manuDir, rawManSegMap[id]) + ".txt";
                Helper.EnsureFolderExistForFile(adaptedSegmentFilePath);

                Collection<WaveSegment> rawSegments = null;
                try
                {
                    rawSegments = SegmentFile.ReadAllData(rawSegmentFilePath);
                }
                catch (InvalidDataException ide)
                {
                    errorSet.Errors.Add(new DataError(rawSegmentFilePath,
                        Helper.BuildExceptionMessage(ide), id));
                    continue;
                }
                catch (FileLoadException fle)
                {
                    errorSet.Errors.Add(new DataError(rawSegmentFilePath,
                        Helper.BuildExceptionMessage(fle), id));
                    continue;
                }

                Collection<WaveSegment> forcedSegments = null;
                try
                {
                    forcedSegments = SegmentFile.ReadAllData(forcedSegmentFilePath);
                }
                catch (InvalidDataException ide)
                {
                    errorSet.Errors.Add(new DataError(forcedSegmentFilePath,
                       Helper.BuildExceptionMessage(ide), id));
                    continue;
                }
                catch (FileLoadException fle)
                {
                    errorSet.Errors.Add(new DataError(forcedSegmentFilePath,
                        Helper.BuildExceptionMessage(fle), id));
                    continue;
                }

                RemoveSilenceSegment(forcedSegments);

                if (!Adapting(rawSegments, forcedSegments, adaptedSegmentFilePath, false))
                {
                    Console.Error.WriteLine("unmatched segment number in sentence:" + id);
                    errorSet.Errors.Add(new DataError(rawSegmentFilePath, 
                        "unmatched segment number in sentence between forced align [" + forcedSegmentFilePath + "] and raw manual alignment file [" + rawSegmentFilePath + "]", 
                        id));
                    continue;
                }

                Adapting(rawSegments, forcedSegments, adaptedSegmentFilePath, true);
            }

            return errorSet;
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Remove error sentence from map file.
        /// </summary>
        /// <param name="errorSet">Data error set.</param>
        /// <param name="mapFilePath">File list map file path.</param>
        public static void RemoveErrorSentence(DataErrorSet errorSet, string mapFilePath)
        {
            if (errorSet == null)
            {
                throw new ArgumentNullException("errorSet");
            }

            if (errorSet.Errors == null)
            {
                throw new ArgumentException("errorSet.Errors is null");
            }

            Dictionary<string, string> map = FileListMap.ReadAllData(mapFilePath);
            foreach (DataError error in errorSet.Errors)
            {
                if (string.IsNullOrEmpty(error.SentenceId))
                {
                    continue;
                }

                if (map.ContainsKey(error.SentenceId))
                {
                    map.Remove(error.SentenceId);
                }
            }

            FileListMap.WriteAllData(map, mapFilePath);
        }
Ejemplo n.º 9
0
        /// <summary>
        /// Initialize script file from a file.
        /// </summary>
        /// <param name="filePath">File to load script data.</param>
        /// <returns>Data error set found.</returns>
        public DataErrorSet Load(string filePath)
        {
            _items = new SortedDictionary<string, ScriptItem>();
            _errorSet = ReadAllData(filePath, Language, EngineType, _items);
            _filePath = filePath;

            return _errorSet;
        }
Ejemplo n.º 10
0
        /// <summary>
        /// Remove error sentence out of script file.
        /// </summary>
        /// <param name="errorSet">Data error set.</param>
        /// <param name="scriptFilePath">Script file path.</param>
        public static void RemoveErrorSentence(DataErrorSet errorSet, string scriptFilePath)
        {
            if (errorSet == null)
            {
                throw new ArgumentNullException("errorSet");
            }

            if (errorSet.Errors == null)
            {
                throw new ArgumentException("errorSet.Errors is null");
            }

            ScriptFile script = new ScriptFile();
            script.Load(scriptFilePath);

            foreach (DataError error in errorSet.Errors)
            {
                if (string.IsNullOrEmpty(error.SentenceId))
                {
                    continue;
                }

                if (script.Items.ContainsKey(error.SentenceId))
                {
                    script.Items.Remove(error.SentenceId);
                }
            }

            script.Save(scriptFilePath);
        }
Ejemplo n.º 11
0
        /// <summary>
        /// Load script file into script entry collection, in order as in file.
        /// </summary>
        /// <param name="scriptFilePath">Script file to read.</param>
        /// <param name="language">Language of the script.</param>
        /// <param name="engineType">Engine of the script to support.</param>
        /// <param name="outEntries">Output of script items.</param>
        /// <param name="withPron">Whether load script with pronunciation.</param>
        /// <param name="withSid">Whether load script with SID.</param>
        /// <param name="validate">Whether validate script item.</param>
        /// <returns>Data error set found.</returns>
        public static DataErrorSet ReadAllData(string scriptFilePath,
            Language language, EngineType engineType,
            Collection<ScriptItem> outEntries,
            bool withPron, bool withSid, bool validate)
        {
            if (outEntries == null)
            {
                throw new ArgumentNullException("outEntries");
            }

            if (string.IsNullOrEmpty(scriptFilePath))
            {
                throw new ArgumentNullException("scriptFilePath");
            }

            if (!File.Exists(scriptFilePath))
            {
                throw Helper.CreateException(typeof(FileNotFoundException),
                    scriptFilePath);
            }

            DataErrorSet errorSet = new DataErrorSet();

            using (StreamReader sr = new StreamReader(scriptFilePath))
            {
                try
                {
                    while (true)
                    {
                        ScriptItem scriptItem = Localor.CreateScriptItem(language, engineType);
                        DataError error = ReadOneScriptItem(sr, scriptItem, withPron, withSid, validate);

                        if (error != null)
                        {
                            // Attach file path information for the errors
                            error.FilePath = scriptFilePath;
                            errorSet.Errors.Add(error);
                        }
                        else
                        {
                            if (!string.IsNullOrEmpty(scriptItem.Sentence))
                            {
                                // Succeed, and add loaded script item
                                outEntries.Add(scriptItem);
                            }
                            else
                            {
                                // End of stream reached
                                break;
                            }
                        }
                    }
                }
                catch (InvalidDataException ide)
                {
                    string message =
                        Helper.NeutralFormat("Failed to load script file [{0}]: {1}.", scriptFilePath, ide.Message);
                    throw new InvalidDataException(message, ide);
                }
            }

            return errorSet;
        }
Ejemplo n.º 12
0
        /// <summary>
        /// Append a script into other script file.
        /// </summary>
        /// <param name="subScriptFilePath">Source script file.</param>
        /// <param name="outFilePath">Target script file.</param>
        /// <param name="append">Whether appending to target script file.</param>
        /// <returns>Invalid format script entry strings.</returns>
        private DataErrorSet AppendScript(string subScriptFilePath,
            string outFilePath, bool append)
        {
            DataErrorSet errorSet = new DataErrorSet();

            SortedDictionary<string, ScriptItem> existEntries = new SortedDictionary<string, ScriptItem>();
            if (append && File.Exists(outFilePath))
            {
                errorSet = ReadAllData(outFilePath, Language, EngineType, existEntries);
            }
            else
            {
                Helper.EnsureFolderExistForFile(outFilePath);
            }

            SortedDictionary<string, ScriptItem> subEntries = new SortedDictionary<string, ScriptItem>();
            DataErrorSet subErrorSet = ReadAllData(subScriptFilePath,
                 Language, EngineType, subEntries);
            errorSet.Merge(subErrorSet);

            using (StreamWriter sw = new StreamWriter(outFilePath, append, Encoding.Unicode))
            {
                foreach (string sid in subEntries.Keys)
                {
                    if (existEntries.ContainsKey(sid))
                    {
                        DataError error = new DataError(subScriptFilePath,
                            "Entry already exists in script file [" + outFilePath + "]", sid);
                        errorSet.Errors.Add(error);
                        continue;
                    }

                    // hook handling
                    DataError preAppendError = ProcessPronunciation(subEntries[sid]);
                    if (preAppendError != null)
                    {
                        errorSet.Errors.Add(preAppendError);
                        continue;
                    }

                    sw.WriteLine(subEntries[sid].ToString(true, true, true));
                }
            }

            return errorSet;
        }
Ejemplo n.º 13
0
        /// <summary>
        /// Build pronunciation script from file list.
        /// </summary>
        /// <param name="scriptFilePaths">Hiragana script file list.</param>
        /// <param name="outFilePath">Romaji pronunciation script file.</param>
        /// <returns>Data error set found.</returns>
        public DataErrorSet BuildScript(string[] scriptFilePaths,
            string outFilePath)
        {
            if (scriptFilePaths == null)
            {
                throw new ArgumentNullException("scriptFilePaths");
            }

            DataErrorSet errorSet = new DataErrorSet();

            for (int i = 0; i < scriptFilePaths.Length; i++)
            {
                string scriptFilePath = scriptFilePaths[i];

                if (string.IsNullOrEmpty(scriptFilePath))
                {
                    throw new InvalidDataException("scriptFilePath");
                }

                if (!scriptFilePath.EndsWith(ScriptFile.Extension, StringComparison.Ordinal))
                {
                    continue;
                }

                // all script files should be saved in unicode
                if (!Helper.IsUnicodeFile(scriptFilePath))
                {
                    DataError error = new DataError(scriptFilePath,
                        "script file should be saved in Unicode.");
                    errorSet.Errors.Add(error);
                    continue;
                }

                // do appending
                DataErrorSet subErrorSet = AppendScript(scriptFilePath,
                    outFilePath, (i != 0));

                // merge error messages
                errorSet.Merge(subErrorSet);
            }

            return errorSet;
        }
Ejemplo n.º 14
0
        /// <summary>
        /// Build whole pronunciation script from script directory.
        /// </summary>
        /// <param name="phoneScriptFileDir">Script directory.</param>
        /// <param name="outFilePath">Pronunciation script file.</param>
        /// <returns>Data error set found.</returns>
        public DataErrorSet BuildScript(string phoneScriptFileDir,
            string outFilePath)
        {
            if (string.IsNullOrEmpty(outFilePath))
            {
                throw new ArgumentNullException("outFilePath");
            }

            if (!Directory.Exists(phoneScriptFileDir))
            {
                throw Helper.CreateException(typeof(DirectoryNotFoundException),
                    phoneScriptFileDir);
            }

            Helper.EnsureFolderExistForFile(outFilePath);
            string[] subFilePaths = System.IO.Directory.GetFiles(phoneScriptFileDir,
                                                            "*" + ScriptFile.Extension);
            DataErrorSet errorSet = new DataErrorSet();
            if (subFilePaths != null)
            {
                errorSet = BuildScript(subFilePaths, outFilePath);
            }

            return errorSet;
        }
Ejemplo n.º 15
0
        /// <summary>
        /// Check data consistence between script item and segmentation file.
        /// </summary>
        /// <param name="script">Script file instance.</param>
        /// <param name="item">Script item.</param>
        /// <param name="fileMap">File list map.</param>
        /// <param name="segmentDir">Segment file directory.</param>
        /// <param name="errorSet">Data error set found.</param>
        /// <param name="phoneBasedSegment">Phone based alignment or unit based alignment.</param>
        public static void ValidateDataAlignment(ScriptFile script, ScriptItem item,
            FileListMap fileMap, string segmentDir, DataErrorSet errorSet, bool phoneBasedSegment)
        {
            string segmentFilePath = Path.Combine(segmentDir, fileMap.Map[item.Id] + ".txt");

            SegmentFile segmentFile = new SegmentFile();
            segmentFile.Load(segmentFilePath);

            if (segmentFile.WaveSegments.Count == 0)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "There is no valid alignment data into alignment file.");
                errorSet.Errors.Add(new DataError(segmentFilePath, message, item.Id));
            }
            else if (!segmentFile.WaveSegments[segmentFile.WaveSegments.Count - 1].IsSilencePhone)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "The alignment file is invalid, for without silence segment at the end.");
                errorSet.Errors.Add(new DataError(segmentFilePath, message, item.Id));
            }
            else if (!phoneBasedSegment && item.Units.Count != segmentFile.NonSilenceWaveSegments.Count)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "script units {0} do not match with non-silence segments {1} in segmentation file.",
                    item.Units.Count, segmentFile.NonSilenceWaveSegments.Count);
                errorSet.Errors.Add(new DataError(script.FilePath, message, item.Id));
            }
            else if (phoneBasedSegment && item.GetPhones().Length != segmentFile.NonSilenceWaveSegments.Count)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "script phones {0} do not match with non-silence segments {1} in segmentation file.",
                    item.GetPhones().Length, segmentFile.NonSilenceWaveSegments.Count);
                errorSet.Errors.Add(new DataError(script.FilePath, message, item.Id));
            } 
            else
            {
                // go through each segments
                if (phoneBasedSegment)
                {
                    string[] phones = item.GetPhones();
                    for (int i = 0; i < segmentFile.NonSilenceWaveSegments.Count; i++)
                    {
                        WaveSegment segment = segmentFile.NonSilenceWaveSegments[i];
                        
                        if (segment.Label != phones[i])
                        {
                            string message = string.Format(CultureInfo.InvariantCulture,
                                "phone [{0}/{1}] at {2} does not match between script and segment.",
                                WaveSegment.FormatLabel(phones[i]), segment.Label, i);
                            errorSet.Errors.Add(new DataError(script.FilePath, message, item.Id));
                        }
                    }
                }
                else
                {
                    for (int i = 0; i < segmentFile.NonSilenceWaveSegments.Count; i++)
                    {
                        WaveSegment segment = segmentFile.NonSilenceWaveSegments[i];
                        TtsUnit unit = item.Units[i];

                        if (segment.Label != WaveSegment.FormatLabel(unit.MetaUnit.Name))
                        {
                            string message = string.Format(CultureInfo.InvariantCulture,
                                "units [{0}/{1}] at {2} do not match between script and segment.",
                                WaveSegment.FormatLabel(unit.MetaUnit.Name), segment.Label, i);
                            errorSet.Errors.Add(new DataError(script.FilePath, message, item.Id));
                        }
                    }
                }
            }
        }
Ejemplo n.º 16
0
        /// <summary>
        /// Concatenate script files in given file list into an unified sript file and reorder the sentence ID.
        /// </summary>
        /// <param name="language">Language of the script to process.</param>
        /// <param name="fileList">A set of file names to be concatenated.</param>
        /// <param name="resetSid">A bool to indicate whether re-oreder the sentence ID of all scripts.</param>
        /// <param name="targetScriptFilePath">The output script file path.</param>
        /// <returns>Data error set containing error found.</returns>
        public static DataErrorSet ConcatenateFiles(Language language, IEnumerable<string> fileList, bool resetSid,
            string targetScriptFilePath)
        {
            if (string.IsNullOrEmpty(targetScriptFilePath))
            {
                throw new ArgumentNullException("targetScriptFilePath");
            }

            if (fileList == null)
            {
                throw new ArgumentNullException("fileList");
            }

            ScriptFile script = Localor.CreateScriptFile(language);
            DataErrorSet errorSet = new DataErrorSet();

            int count = 0;
            foreach (string file in fileList)
            {
                if (string.IsNullOrEmpty(file))
                {
                    continue;
                }

                ScriptFile subScript = Localor.CreateScriptFile(language);
                subScript.Load(file);
                errorSet.Merge(subScript.ErrorSet);

                foreach (string sid in subScript.Items.Keys)
                {
                    ScriptItem item = subScript.Items[sid];

                    item.Id = resetSid ? Helper.NeutralFormat("{0:D10}", ++count) : sid;

                    if (script.Items.ContainsKey(item.Id))
                    {
                        errorSet.Errors.Add(new DataError(file, "Sentence already exist", sid));
                        continue;
                    }

                    script.Items.Add(item.Id, item);
                }
            }

            Helper.EnsureFolderExistForFile(targetScriptFilePath);
            script.Save(targetScriptFilePath);

            return errorSet;
        }
Ejemplo n.º 17
0
        /// <summary>
        /// Validate the phone sequence in the script file. It will check:
        ///  1) word alignment with the pronunciation string. This means that
        ///     for each word it should have one and only one corresponding
        ///     pronunciation
        ///  2) the pronunciation should be syllabified, and for each syllbale
        ///     there is one and only one vowel. It can have one stress mark
        ///  3) each phones in the pronunciation string, should be valid in 
        ///     that langugage phoneme set.
        /// </summary>
        /// <param name="script">The script oebjct to be validated.</param>
        /// <returns>Errors/problems found in the script.</returns>
        public static DataErrorSet ValidatePronunciation(ScriptFile script)
        {
            if (script == null)
            {
                throw new ArgumentNullException("script");
            }

            if (script.Items == null)
            {
                throw new ArgumentException("script.Items is null");
            }

            if (script.Items.Keys == null)
            {
                throw new ArgumentException("script.Items.Keys is null");
            }

            DataErrorSet errorSet = new DataErrorSet();
            foreach (string sid in script.Items.Keys)
            {
                try
                {
                    ScriptItem item = script.Items[sid];
                    DataError subError = script.ProcessPronunciation(item);
                    if (subError != null)
                    {
                        errorSet.Errors.Add(subError);
                    }
                }
                catch (InvalidDataException ide)
                {
                    errorSet.Errors.Add(new DataError(script.FilePath,
                        Helper.BuildExceptionMessage(ide), sid));
                }
                catch (KeyNotFoundException knfe)
                {
                    errorSet.Errors.Add(new DataError(script.FilePath,
                        Helper.BuildExceptionMessage(knfe), sid));
                }
            }

            return errorSet;
        }