/// <summary>
        /// Validation data alignment between feature file and script file.
        /// </summary>
        /// <param name="featureFile">Feature file.</param>
        /// <param name="scriptFile">Script file.</param>
        /// <param name="language">Language.</param>
        /// <returns>Data error set found.</returns>
        public static ErrorSet ValidateFeatureData(string featureFile,
            string scriptFile, Language language)
        {
            ErrorSet errorSet = new ErrorSet();

            TtsPhoneSet phoneSet = Localor.GetPhoneSet(language);
            XmlScriptValidateSetting validateSetting = new XmlScriptValidateSetting(phoneSet, null);
            XmlScriptFile script = XmlScriptFile.LoadWithValidation(scriptFile, validateSetting);
            if (script.ErrorSet.Count > 0)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "{0} error(s) found in the script file [{1}]",
                    script.ErrorSet.Count, scriptFile);

                throw new InvalidDataException(message);
            }

            XmlUnitFeatureFile unitFeatureFile = new XmlUnitFeatureFile(featureFile);
            if (unitFeatureFile.Units.Count <= 0)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "Zero unit feature item in unit feature file {0}", featureFile);
                errorSet.Add(VoiceFontError.OtherErrors, message);

                throw new InvalidDataException(message);
            }

            if (unitFeatureFile.Language != language)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "Different lanuage\r\nScript File {0}: lang = {1}\r\n Feature File {2}: lang = {3}",
                    scriptFile, Localor.LanguageToString(language),
                    featureFile, Localor.LanguageToString(unitFeatureFile.Language));

                throw new InvalidDataException(message);
            }

            foreach (string key in unitFeatureFile.Units.Keys)
            {
                UnitFeature unit = unitFeatureFile.Units[key];

                string sid = unit.SentenceId;
                int unitIndex = unit.Index;
                string unitName = unit.Name;

                if (unit.Index < 0)
                {
                    string message = string.Format(CultureInfo.InvariantCulture,
                        "invalid unit index [{0}] found in feature file [{1}]. It should not be negative integer for unit indexing.",
                        unit.Index, featureFile);
                    errorSet.Add(VoiceFontError.OtherErrors, message);
                    continue;
                }

                try
                {
                    if (!script.ItemDic.ContainsKey(unit.SentenceId))
                    {
                        string message = string.Format(CultureInfo.InvariantCulture,
                            "sentence id {0} in feature file [{1}] is not in script file [{2}]",
                            sid, featureFile, scriptFile);
                        errorSet.Add(ScriptError.OtherErrors, sid, message);
                        continue;
                    }

                    ScriptItem item = script.ItemDic[sid];
                    Phoneme phoneme = Localor.GetPhoneme(language);
                    SliceData sliceData = Localor.GetSliceData(language);
                    Collection<TtsUnit> itemUnits = item.GetUnits(phoneme, sliceData);
                    if (unitIndex >= itemUnits.Count)
                    {
                        string message = string.Format(CultureInfo.InvariantCulture,
                            "the {0}th unit [{1}] in sentence {2} of feature file [{3}] is out of range for sentence {2} in script file [{4}]",
                            unitIndex, unitName, sid, featureFile, scriptFile);
                        errorSet.Add(ScriptError.OtherErrors, sid, message);
                        continue;
                    }

                    TtsUnit ttsUnit = itemUnits[unitIndex];
                    string sliceName = ttsUnit.FullName.Replace(' ', '+');
                    if (sliceName != unitName)
                    {
                        string str1 = "the {0}th unit [{1}] in sentence {3} of feature file [{4}] ";
                        string str2 = "is not matched with {0}th unit [{2}] for sentence {3} in script file [{5}]";
                        string message = string.Format(CultureInfo.InvariantCulture,
                            str1 + str2,
                            unitIndex, unitName, sliceName, sid, featureFile, scriptFile);
                        errorSet.Add(ScriptError.OtherErrors, sid, message);
                        continue;
                    }
                }
                catch (InvalidDataException ide)
                {
                    errorSet.Add(ScriptError.OtherErrors, sid, Helper.BuildExceptionMessage(ide));
                }
            }

            return errorSet;
        }
        /// <summary>
        /// Find unmatching sentences between filemap file and script file
        /// <param />
        /// This function should be merged with that in forcedalignment into common library.
        /// </summary>
        /// <param name="scriptFilePath">The location of script file.</param>
        /// <param name="language">Language of the script file.</param>
        /// <param name="mapFilePath">The location of file fist map path.</param>
        /// <returns>Unmatching sentence ids.</returns>
        public static ErrorSet FindUnmatchedSentences(string scriptFilePath,
            Language language, string mapFilePath)
        {
            ErrorSet errorSet = new ErrorSet();
            TtsPhoneSet phoneSet = Localor.GetPhoneSet(language);
            XmlScriptValidateSetting validateSetting = new XmlScriptValidateSetting(phoneSet, null);
            XmlScriptFile script = XmlScriptFile.LoadWithValidation(scriptFilePath, validateSetting);
            script.Remove(ScriptHelper.GetNeedDeleteItemIds(script.ErrorSet));

            Dictionary<string, string> map = Microsoft.Tts.Offline.FileListMap.ReadAllData(mapFilePath);
            errorSet.Merge(script.ErrorSet);
            foreach (string sid in script.ItemDic.Keys)
            {
                if (!map.ContainsKey(sid))
                {
                    string message = Helper.NeutralFormat(
                        "Script item {0} in script file but not in file list map file", sid);
                    errorSet.Add(ScriptError.OtherErrors, sid, message);
                }
            }

            foreach (string sid in map.Keys)
            {
                if (!script.ItemDic.ContainsKey(sid))
                {
                    string message = Helper.NeutralFormat(
                        "Script item {0} in file list map file but not in script file", sid);
                    errorSet.Add(ScriptError.OtherErrors, sid, message);
                }
            }

            return errorSet;
        }
Example #3
0
        /// <summary>
        /// Check whether a script item is valid
        /// We don't check schema here
        /// Validation conditions: 
        /// 1. Normal word should have pronunciation 
        /// 2. Pronunciation should be good
        /// 3. POS should be in POS set
        /// We could use some flag to control the validation conditions
        /// When we need flexible control.
        /// </summary>
        /// <param name="item">The item to be checked.</param>
        /// <param name="errors">Errors if item is invalid.</param>
        /// <param name="validateSetting">Validation data set.</param>
        /// <returns>True is valid.</returns>
        public static bool IsValidItem(ScriptItem item, ErrorSet errors, XmlScriptValidateSetting validateSetting)
        {
            if (item == null)
            {
                throw new ArgumentNullException("item");
            }

            if (errors == null)
            {
                throw new ArgumentNullException("errors");
            }

            if (validateSetting == null)
            {
                throw new ArgumentNullException("validateSetting");
            }

            validateSetting.VerifySetting();

            XmlScriptValidationScope scope = validateSetting.ValidationScope;

            bool valid = true;
            errors.Clear();

            int sentIndex = 0;
            foreach (ScriptSentence sentence in item.Sentences)
            {
                int wordIndex = 0;
                foreach (ScriptWord word in sentence.Words)
                {
                    if ((scope & XmlScriptValidationScope.Pronunciation) == XmlScriptValidationScope.Pronunciation)
                    {
                        // check pronunciation
                        string pron = null;
                        if (word.WordType == WordType.Normal)
                        {
                            pron = word.GetPronunciation(validateSetting.PhoneSet);
                        }

                        if (!string.IsNullOrEmpty(pron))
                        {
                            ErrorSet pronErrors = Core.Pronunciation.Validate(pron, validateSetting.PhoneSet);
                            foreach (Error error in pronErrors.Errors)
                            {
                                errors.Add(ScriptError.PronunciationError, error, item.Id, word.Grapheme);
                            }
                        }
                        else if (word.WordType == WordType.Normal)
                        {
                            // Pronunciation is optional for normal word, will give warning if empty pronunciation for normal word.
                            errors.Add(ScriptError.EmptyPronInNormalWord, item.Id, word.Grapheme);
                        }
                    }

                    if ((scope & XmlScriptValidationScope.POS) == XmlScriptValidationScope.POS)
                    {
                        // check pos name
                        if (!string.IsNullOrEmpty(word.PosString) &&
                            !validateSetting.PosSet.Items.ContainsKey(word.PosString))
                        {
                            errors.Add(ScriptError.UnrecognizedPos, item.Id, word.Grapheme,
                                word.Pronunciation, word.PosString);
                        }
                    }

                    string nodePath = string.Format(CultureInfo.InvariantCulture, "Sentence[{0}].Word[{1}]",
                        sentIndex, wordIndex);
                    word.IsValid(item.Id, nodePath, scope, errors);

                    wordIndex++;
                }

                sentIndex++;
            }

            if ((scope & XmlScriptValidationScope.SegmentSequence) == XmlScriptValidationScope.SegmentSequence)
            {
                CheckSegments(item, errors);
            }

            if (errors.Count > 0)
            {
                valid = false;
            }

            return valid;
        }
        /// <summary>
        /// Parse XML document for script file path.
        /// </summary>
        /// <param name="dom">XML configuration document.</param>
        /// <param name="filePath">XML configuration file path.</param>
        /// <param name="nsmgr">Namespace.</param>
        /// <param name="updateScriptPath">Whether update script path.</param>
        private void ParseScriptFile(XmlDocument dom, string filePath, XmlNamespaceManager nsmgr,
            bool updateScriptPath)
        {
            // ScriptFilePath
            XmlNode node = dom.DocumentElement.SelectSingleNode(@"tts:scriptFile/@path", nsmgr);
            CheckPath(node.InnerText, false, filePath, "scriptFile");
            ScriptPath = node.InnerText;

            if (updateScriptPath && MergeItems.Count == 0)
            {
                // temporarily add codes to convert *.txt to *.xml 
                // will delete it when all scripts are converted to xml

                // Merge script if the input is a script folder
                ScriptPath = Path.Combine(OutputDir, @"Interm\MergedScript.xml");
                Helper.EnsureFolderExistForFile(ScriptPath);
                if (Directory.Exists(node.InnerText))
                {
                    string inputDir = node.InnerText;
                    if (Directory.GetFiles(inputDir, "*.xml", SearchOption.TopDirectoryOnly).Length == 0)
                    {
                        inputDir = Path.Combine(inputDir, "temp");
                        Helper.EnsureFolderExist(inputDir);
                        foreach (string input in Directory.GetFiles(
                            node.InnerText, "*.txt", SearchOption.TopDirectoryOnly))
                        {
                            string output = Path.Combine(inputDir, Path.GetFileNameWithoutExtension(input) + ".xml");
                            ScriptHelper.ConvertTwoLineScriptToXmlScript(input, output, Language);
                        }
                    }

                    TtsPhoneSet phoneSet = Localor.GetPhoneSet(Language);
                    XmlScriptValidateSetting validateSetting = new XmlScriptValidateSetting(phoneSet, null);
                    ScriptHelper.MergeScripts(inputDir, ScriptPath, false, validateSetting);

                    if (!inputDir.Equals(node.InnerText))
                    {
                        Directory.Delete(inputDir, true);
                    }
                }
                else if (File.Exists(node.InnerText))
                {
                    if (Path.GetExtension(node.InnerText).Equals(".txt"))
                    {
                        ScriptHelper.ConvertTwoLineScriptToXmlScript(node.InnerText, ScriptPath, Language);
                    }
                    else
                    {
                        File.Copy(node.InnerText, ScriptPath, true);
                    }
                }
                else if (!File.Exists(node.InnerText))
                {
                    // if merge enable, the script path will the target path
                    // this file will be the merge result of source script files
                    string message = string.Format(CultureInfo.InvariantCulture,
                        "Could not find the scriptFile file [{0}], which is specified in config file [{1}]",
                        node.InnerText, filePath);
                    throw new FileNotFoundException(message);
                }
            }
        }
        /// <summary>
        /// Load script and check it.
        /// </summary>
        /// <param name="scriptFile">File to be loaded.</param>
        /// <param name="validateSetting">Validation data set.</param>
        /// <returns>Script loaded.</returns>
        public static XmlScriptFile LoadWithValidation(string scriptFile, XmlScriptValidateSetting validateSetting)
        {
            if (string.IsNullOrEmpty(scriptFile))
            {
                throw new ArgumentNullException("scriptFile");
            }

            if (validateSetting == null)
            {
                throw new ArgumentNullException("validateSetting");
            }

            validateSetting.VerifySetting();

            XmlScriptFile script = new XmlScriptFile();
            script.Load(scriptFile);

            script.PhoneSet = validateSetting.PhoneSet;
            script.PosSet = validateSetting.PosSet;
            script.Validate(validateSetting);

            return script;
        }
        /// <summary>
        /// Add one item to script file.
        /// This method will check whether the item is balid before adding.
        /// </summary>
        /// <param name="item">The item to be added.</param>
        /// <param name="errors">The errors if failed to add.</param>
        /// <param name="validate">Whether validate schema and content.</param>
        /// <param name="sort">Whether insert the script item in the sort position.</param>
        /// <returns>True if successfully added.</returns>
        public bool Add(ScriptItem item, ErrorSet errors, bool validate, bool sort)
        {
            if (item == null)
            {
                throw new ArgumentNullException("item");
            }

            if (errors == null)
            {
                throw new ArgumentNullException("errors");
            }

            // check schema, should throw exception if invalid
            CheckSchema(item);

            bool added = true;
            errors.Clear();

            // content checking, should add to errors if invalid
            if (_itemDic.ContainsKey(item.Id))
            {
                errors.Add(ScriptError.DuplicateItemId, item.Id);
            }

            if (validate)
            {
                ErrorSet contentErrors = new ErrorSet();
                XmlScriptValidateSetting validateSetting = new XmlScriptValidateSetting(PhoneSet, PosSet);
                ScriptItem.IsValidItem(item, contentErrors, validateSetting);
                errors.Merge(contentErrors);
            }

            if (errors.Count > 0)
            {
                added = false;
            }

            if (added)
            {
                _itemDic.Add(item.Id, item);
                if (sort)
                {
                    bool inserted = false;
                    for (int i = 0; i < _items.Count; i++)
                    {
                        if (string.Compare(item.Id, _items[i].Id, StringComparison.OrdinalIgnoreCase) < 0)
                        {
                            _items.Insert(i, item);
                            inserted = true;
                            break;
                        }
                    }

                    if (!inserted)
                    {
                        _items.Add(item);
                    }
                }
                else
                {
                    _items.Add(item);
                }
            }

            return added;
        }
Example #7
0
        public void ParseConfig(XmlElement config)
        {
            if (config == null)
            {
                throw new ArgumentNullException("config");
            }

            Debug.Assert(!string.IsNullOrEmpty(config.GetAttribute("language")));
            Debug.Assert(!string.IsNullOrEmpty(config.GetAttribute("engine")));
            _primaryLanguage = Localor.StringToLanguage(
                                            config.GetAttribute("language"));
            _engineType = (EngineType)Enum.Parse(typeof(EngineType),
                                            config.GetAttribute("engine"));

            XmlElement eleLangData = config.SelectSingleNode("languageData") as XmlElement;
            VoiceCreationLanguageData languageData = new VoiceCreationLanguageData();
            if (eleLangData != null)
            {
                languageData.ParseLanguageDataFromXmlElement(true, eleLangData);
                languageData.SetLanguageData(_primaryLanguage);
            }
            else
            {
                languageData.CartQuestions = config.SelectSingleNode("question/@path").InnerText;
            }

            _voiceName = config.GetAttribute("voiceName");
            _tokenId = config.GetAttribute("tokenId");

            _fontPath = config.SelectSingleNode("font/@path").InnerText;

            TtsPhoneSet phoneSet = Localor.GetPhoneSet(_primaryLanguage);
            XmlScriptValidateSetting validateSetting = new XmlScriptValidateSetting(phoneSet, null);
            _scriptFile = XmlScriptFile.LoadWithValidation(config.SelectSingleNode("script/@path").InnerText, validateSetting);
            _scriptFile.Remove(ScriptHelper.GetNeedDeleteItemIds(_scriptFile.ErrorSet));

            FileMap = new FileListMap();
            FileMap.Load(config.SelectSingleNode("filemap/@path").InnerText);

            _weightTable = new WeightTable(_primaryLanguage, _engineType);
            _weightTable.Load(config.SelectSingleNode("weighttable/@path").InnerText);

            _cartTreeManager = new CartTreeManager();
            _cartTreeManager.CartTreeDir = config.SelectSingleNode("treedir/@path").InnerText;
            if (!Directory.Exists(_cartTreeManager.CartTreeDir))
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "The treeDir path does not exist at [{0}]",
                    _cartTreeManager.CartTreeDir);
                throw new DirectoryNotFoundException(message);
            }

            _cartTreeManager.CartQuestionFile = languageData.CartQuestions;
            if (!File.Exists(_cartTreeManager.CartQuestionFile))
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "The tree question file path does not exist at [{0}]",
                    _cartTreeManager.CartQuestionFile);
                throw new DirectoryNotFoundException(message);
            }

            _cartTreeManager.UnitDescriptFile = config.SelectSingleNode("unitdescript/@path").InnerText;
            if (!File.Exists(_cartTreeManager.UnitDescriptFile))
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "The unit description file path does not exist at [{0}]",
                    _cartTreeManager.UnitDescriptFile);
                throw new DirectoryNotFoundException(message);
            }

            _unitFeatureFilePath = config.SelectSingleNode("wavesequence/@path").InnerText;
            if (!File.Exists(_unitFeatureFilePath))
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "The wave sequence file path does not exist at [{0}]",
                    _unitFeatureFilePath);
                throw new DirectoryNotFoundException(message);
            }

            _wave16kDirectories.Clear();
            foreach (XmlNode dirNode in config.SelectNodes("wave16k/@path"))
            {
                string waveDir = dirNode.InnerText.Trim();
                if (!Directory.Exists(waveDir))
                {
                    string message = string.Format(CultureInfo.InvariantCulture,
                        "The wave16k path does not exist at [{0}]",
                        waveDir);
                    throw new DirectoryNotFoundException(message);
                }

                _wave16kDirectories.Add(waveDir);
            }

            _segmentDirectories.Clear();
            foreach (XmlNode dirNode in config.SelectNodes("segment/@path"))
            {
                string alignmentDir = dirNode.InnerText.Trim();
                if (!Directory.Exists(alignmentDir))
                {
                    string message = string.Format(CultureInfo.InvariantCulture,
                        "The alignment path does not exist at [{0}]",
                        alignmentDir);
                    throw new DirectoryNotFoundException(message);
                }

                _segmentDirectories.Add(alignmentDir);
            }
        }
Example #8
0
        public static ErrorSet BuildMonoMlf(string scriptFilePath, string outFilePath, bool writeToFile,
            Phoneme phoneme, XmlScriptValidateSetting validateSetting, SliceData sliceData)
        {
            if (phoneme == null)
            {
                throw new ArgumentNullException("phoneme");
            }

            if (sliceData == null)
            {
                throw new ArgumentNullException("phoneme");
            }

            if (validateSetting == null)
            {
                throw new ArgumentNullException("validateSetting");
            }

            validateSetting.VerifySetting();

            ErrorSet errors = new ErrorSet();
            StreamWriter sw = null;

            if (writeToFile)
            {
                sw = new StreamWriter(outFilePath, false, Encoding.ASCII);
                sw.WriteLine("#!MLF!#");
            }

            try
            {
                XmlScriptFile script = XmlScriptFile.LoadWithValidation(scriptFilePath, validateSetting);
                script.Remove(GetNeedDeleteItemIds(script.ErrorSet));
                if (script.Items.Count == 0)
                {
                    throw new InvalidDataException(
                        Helper.NeutralFormat("No valid items in {0}.", scriptFilePath));
                }

                errors.Merge(script.ErrorSet);
                foreach (ScriptItem item in script.Items)
                {
                    errors.Merge(BuildMonoMlf(item, sw, writeToFile, phoneme, sliceData));
                }
            }
            finally
            {
                if (sw != null)
                {
                    sw.Close();
                }
            }

            if (writeToFile)
            {
                Debug.Assert(HtkTool.VerifyMlfFormat(outFilePath));
            }

            return errors;
        }
Example #9
0
 /// <summary>
 /// Merge scripts in a folder into a script file.
 /// Error items are removed from the output file.
 /// </summary>
 /// <param name="scriptDir">Dir conataining script file.</param>
 /// <param name="targetFile">Target file path.</param>
 /// <param name="resetId">True means resetting id.</param>
 /// <param name="validateSetting">Validation setting.</param>
 /// <param name="contentController">Contenct controller.</param>
 /// <returns>Error set.</returns>
 public static ErrorSet MergeScripts(string scriptDir, string targetFile, bool resetId, XmlScriptValidateSetting validateSetting, object contentController)
 {
     ErrorSet errorSet = new ErrorSet();
     XmlScriptFile mergedScript = MergeScripts(scriptDir, errorSet, resetId, validateSetting, contentController);
     Helper.EnsureFolderExistForFile(targetFile);
     mergedScript.Save(targetFile, Encoding.Unicode, contentController);
     return errorSet;
 }
Example #10
0
        /// <summary>
        /// Merge scripts in a folder into a script file.
        /// Error items are removed from the output file.
        /// </summary>
        /// <param name="scriptDir">Dir conataining script file.</param>
        /// <param name="errorSet">Error set.</param>
        /// <param name="resetId">True means resetting id.</param>
        /// <param name="validateSetting">Validation setting.</param>
        /// <param name="contentController">Contenct controller.</param>
        /// <returns>Xml script file.</returns>
        public static XmlScriptFile MergeScripts(string scriptDir, ErrorSet errorSet,
            bool resetId, XmlScriptValidateSetting validateSetting, object contentController)
        {
            if (string.IsNullOrEmpty(scriptDir))
            {
                throw new ArgumentNullException("scriptDir");
            }

            if (errorSet == null)
            {
                throw new ArgumentNullException("errorSet");
            }

            if (validateSetting == null)
            {
                throw new ArgumentNullException("validateSetting");
            }

            if (!Directory.Exists(scriptDir))
            {
                throw new DirectoryNotFoundException(scriptDir);
            }

            validateSetting.VerifySetting();
            
            XmlScriptValidationScope scope = validateSetting.ValidationScope;

            string[] subFiles = Directory.GetFiles(
                scriptDir, "*" + XmlScriptFile.Extension, SearchOption.AllDirectories);
            XmlScriptFile mergedScript = new XmlScriptFile();

            long id = 0;
            foreach (string file in subFiles)
            {
                XmlScriptFile script = new XmlScriptFile();
                script.Load(file, contentController);
                if (mergedScript.Language == Language.Neutral)
                {
                    mergedScript.Language = script.Language;
                }
                else if (mergedScript.Language != script.Language)
                {
                    throw new InvalidDataException(Helper.NeutralFormat("Inconsistent langage in {0}", file));
                }

                if (scope != XmlScriptValidationScope.None)
                {
                    script.PosSet = validateSetting.PosSet;
                    script.PhoneSet = validateSetting.PhoneSet;

                    script.Validate(validateSetting);
                    script.Remove(GetNeedDeleteItemIds(script.ErrorSet));
                }

                errorSet.Merge(script.ErrorSet);
                foreach (ScriptItem item in script.Items)
                {
                    item.Id = resetId ? Helper.NeutralFormat("{0:D10}", ++id) : item.Id;

                    ErrorSet addErrors = new ErrorSet();
                    if (!mergedScript.Add(item, addErrors, false))
                    {
                        // Added failed
                        errorSet.Merge(addErrors);
                    }
                }
            }

            return mergedScript;
        }
Example #11
0
 /// <summary>
 /// Merge scripts in a folder into a script file
 /// Error items are removed from the output file.
 /// </summary>
 /// <param name="scriptDir">Dir conataining script file.</param>
 /// <param name="targetFile">Merged file.</param>
 /// <param name="resetId">True means resetting id.</param>
 /// <param name="validateSetting">Validation setting.</param>
 /// <returns>ErrorSet.</returns>
 public static ErrorSet MergeScripts(string scriptDir, string targetFile, bool resetId, XmlScriptValidateSetting validateSetting)
 {
     XmlScriptFile.ContentControler controler = new XmlScriptFile.ContentControler();
     controler.SaveComments = false;
     return MergeScripts(scriptDir, targetFile, resetId, validateSetting, controler);
 }