Beispiel #1
0
        /// <summary>
        /// Check whether a script item is valid
        /// We don't check schema here
        /// Validation conditions: 
        /// 1. Normal word should have pronunciation 
        /// 2. Pronunciation should be good
        /// 3. POS should be in POS set
        /// We could use some flag to control the validation conditions
        /// When we need flexible control.
        /// </summary>
        /// <param name="item">The item to be checked.</param>
        /// <param name="errors">Errors if item is invalid.</param>
        /// <param name="validateSetting">Validation data set.</param>
        /// <returns>True is valid.</returns>
        public static bool IsValidItem(ScriptItem item, ErrorSet errors, XmlScriptValidateSetting validateSetting)
        {
            if (item == null)
            {
                throw new ArgumentNullException("item");
            }

            if (errors == null)
            {
                throw new ArgumentNullException("errors");
            }

            if (validateSetting == null)
            {
                throw new ArgumentNullException("validateSetting");
            }

            validateSetting.VerifySetting();

            XmlScriptValidationScope scope = validateSetting.ValidationScope;

            bool valid = true;
            errors.Clear();

            int sentIndex = 0;
            foreach (ScriptSentence sentence in item.Sentences)
            {
                int wordIndex = 0;
                foreach (ScriptWord word in sentence.Words)
                {
                    if ((scope & XmlScriptValidationScope.Pronunciation) == XmlScriptValidationScope.Pronunciation)
                    {
                        // check pronunciation
                        string pron = null;
                        if (word.WordType == WordType.Normal)
                        {
                            pron = word.GetPronunciation(validateSetting.PhoneSet);
                        }

                        if (!string.IsNullOrEmpty(pron))
                        {
                            ErrorSet pronErrors = Core.Pronunciation.Validate(pron, validateSetting.PhoneSet);
                            foreach (Error error in pronErrors.Errors)
                            {
                                errors.Add(ScriptError.PronunciationError, error, item.Id, word.Grapheme);
                            }
                        }
                        else if (word.WordType == WordType.Normal)
                        {
                            // Pronunciation is optional for normal word, will give warning if empty pronunciation for normal word.
                            errors.Add(ScriptError.EmptyPronInNormalWord, item.Id, word.Grapheme);
                        }
                    }

                    if ((scope & XmlScriptValidationScope.POS) == XmlScriptValidationScope.POS)
                    {
                        // check pos name
                        if (!string.IsNullOrEmpty(word.PosString) &&
                            !validateSetting.PosSet.Items.ContainsKey(word.PosString))
                        {
                            errors.Add(ScriptError.UnrecognizedPos, item.Id, word.Grapheme,
                                word.Pronunciation, word.PosString);
                        }
                    }

                    string nodePath = string.Format(CultureInfo.InvariantCulture, "Sentence[{0}].Word[{1}]",
                        sentIndex, wordIndex);
                    word.IsValid(item.Id, nodePath, scope, errors);

                    wordIndex++;
                }

                sentIndex++;
            }

            if ((scope & XmlScriptValidationScope.SegmentSequence) == XmlScriptValidationScope.SegmentSequence)
            {
                CheckSegments(item, errors);
            }

            if (errors.Count > 0)
            {
                valid = false;
            }

            return valid;
        }
        public static ErrorSet BuildMonoMlf(string scriptFilePath, string outFilePath, bool writeToFile,
            Phoneme phoneme, XmlScriptValidateSetting validateSetting, SliceData sliceData)
        {
            if (phoneme == null)
            {
                throw new ArgumentNullException("phoneme");
            }

            if (sliceData == null)
            {
                throw new ArgumentNullException("phoneme");
            }

            if (validateSetting == null)
            {
                throw new ArgumentNullException("validateSetting");
            }

            validateSetting.VerifySetting();

            ErrorSet errors = new ErrorSet();
            StreamWriter sw = null;

            if (writeToFile)
            {
                sw = new StreamWriter(outFilePath, false, Encoding.ASCII);
                sw.WriteLine("#!MLF!#");
            }

            try
            {
                XmlScriptFile script = XmlScriptFile.LoadWithValidation(scriptFilePath, validateSetting);
                script.Remove(GetNeedDeleteItemIds(script.ErrorSet));
                if (script.Items.Count == 0)
                {
                    throw new InvalidDataException(
                        Helper.NeutralFormat("No valid items in {0}.", scriptFilePath));
                }

                errors.Merge(script.ErrorSet);
                foreach (ScriptItem item in script.Items)
                {
                    errors.Merge(BuildMonoMlf(item, sw, writeToFile, phoneme, sliceData));
                }
            }
            finally
            {
                if (sw != null)
                {
                    sw.Close();
                }
            }

            if (writeToFile)
            {
                Debug.Assert(HtkTool.VerifyMlfFormat(outFilePath));
            }

            return errors;
        }
        /// <summary>
        /// Load script and check it.
        /// </summary>
        /// <param name="scriptFile">File to be loaded.</param>
        /// <param name="validateSetting">Validation data set.</param>
        /// <returns>Script loaded.</returns>
        public static XmlScriptFile LoadWithValidation(string scriptFile, XmlScriptValidateSetting validateSetting)
        {
            if (string.IsNullOrEmpty(scriptFile))
            {
                throw new ArgumentNullException("scriptFile");
            }

            if (validateSetting == null)
            {
                throw new ArgumentNullException("validateSetting");
            }

            validateSetting.VerifySetting();

            XmlScriptFile script = new XmlScriptFile();
            script.Load(scriptFile);

            script.PhoneSet = validateSetting.PhoneSet;
            script.PosSet = validateSetting.PosSet;
            script.Validate(validateSetting);

            return script;
        }
        /// <summary>
        /// Merge scripts in a folder into a script file.
        /// Error items are removed from the output file.
        /// </summary>
        /// <param name="scriptDir">Dir conataining script file.</param>
        /// <param name="errorSet">Error set.</param>
        /// <param name="resetId">True means resetting id.</param>
        /// <param name="validateSetting">Validation setting.</param>
        /// <param name="contentController">Contenct controller.</param>
        /// <returns>Xml script file.</returns>
        public static XmlScriptFile MergeScripts(string scriptDir, ErrorSet errorSet,
            bool resetId, XmlScriptValidateSetting validateSetting, object contentController)
        {
            if (string.IsNullOrEmpty(scriptDir))
            {
                throw new ArgumentNullException("scriptDir");
            }

            if (errorSet == null)
            {
                throw new ArgumentNullException("errorSet");
            }

            if (validateSetting == null)
            {
                throw new ArgumentNullException("validateSetting");
            }

            if (!Directory.Exists(scriptDir))
            {
                throw new DirectoryNotFoundException(scriptDir);
            }

            validateSetting.VerifySetting();
            
            XmlScriptValidationScope scope = validateSetting.ValidationScope;

            string[] subFiles = Directory.GetFiles(
                scriptDir, "*" + XmlScriptFile.Extension, SearchOption.AllDirectories);
            XmlScriptFile mergedScript = new XmlScriptFile();

            long id = 0;
            foreach (string file in subFiles)
            {
                XmlScriptFile script = new XmlScriptFile();
                script.Load(file, contentController);
                if (mergedScript.Language == Language.Neutral)
                {
                    mergedScript.Language = script.Language;
                }
                else if (mergedScript.Language != script.Language)
                {
                    throw new InvalidDataException(Helper.NeutralFormat("Inconsistent langage in {0}", file));
                }

                if (scope != XmlScriptValidationScope.None)
                {
                    script.PosSet = validateSetting.PosSet;
                    script.PhoneSet = validateSetting.PhoneSet;

                    script.Validate(validateSetting);
                    script.Remove(GetNeedDeleteItemIds(script.ErrorSet));
                }

                errorSet.Merge(script.ErrorSet);
                foreach (ScriptItem item in script.Items)
                {
                    item.Id = resetId ? Helper.NeutralFormat("{0:D10}", ++id) : item.Id;

                    ErrorSet addErrors = new ErrorSet();
                    if (!mergedScript.Add(item, addErrors, false))
                    {
                        // Added failed
                        errorSet.Merge(addErrors);
                    }
                }
            }

            return mergedScript;
        }