Beispiel #1
0
        static void Main(string[] args)
        {
            // Start the Configuration Manager
            //ConfigurationManager configManager = new ConfigurationManager(@"D:\Work\Research\PhD\Implementation\Diactrization\Preprocessing\Preprocessing\Configurations.xml");
            ConfigurationManager configManager = new ConfigurationManager(args[0]);

            // Start the logger
            Logger logger = new Logger(configManager);

            // Start the train parser
            Parser trainParser;

            switch (configManager.trainInputFormat)
            {
            case "ReadyFeatures":
                trainParser = new ReadyFeaturesParser(configManager, logger);
                break;

            case "RawTxt":
                trainParser = new RawTxtParser(configManager, logger);
                break;

            default:
                trainParser = new ReadyFeaturesParser(configManager, logger);
                break;
            }

            // Start Train Set parsing from root directory
            trainParser.Parse(configManager.rootTrainDirectory, "Train", configManager.trainInputParsingMode, configManager.trainInputFormat);


            // Start the test parser
            Parser testParser;

            switch (configManager.testInputFormat)
            {
            case "ReadyFeatures":
                testParser = new ReadyFeaturesParser(configManager, logger);
                break;

            case "RawTxt":
                testParser = new RawTxtParser(configManager, logger);
                break;

            default:
                testParser = new ReadyFeaturesParser(configManager, logger);
                break;
            }

            // Start Test Set parsing from root directory
            testParser.Parse(configManager.rootTestDirectory, "Test", configManager.testInputParsingMode, configManager.testInputFormat);

            // Copy files to configuration environment if required
            if (configManager.configEnvDirectory != "")
            {
                MLApp.MLAppClass matlab = new MLApp.MLAppClass();
                matlab.Execute(@"load('" + configManager.rootTrainDirectory + @"\input_data');");
                matlab.Execute(@"load('" + configManager.rootTestDirectory + @"\input_data');");
                matlab.Execute(@"save('" + configManager.configEnvDirectory + @"\input_data');");
            }
        }
        }// end IsConformantStringLen

        // Utility to compute the required string length in case of Raw features
        public static int ComputeStringLengthBitfield(ConfigurationManager configManager)
        {
            int stringLength = 0;

            switch (configManager.outputFeatures)
            {
            case "All":

                if ((String)configManager.suppressFeaturesHashTable["mrfType"] != "Suppress")
                {
                    stringLength = (Parser.maxIDs.mrfType + 1);
                }

                if ((String)configManager.suppressFeaturesHashTable["p"] != "Suppress")
                {
                    stringLength += (Parser.maxIDs.p + 1);
                }

                if ((String)configManager.suppressFeaturesHashTable["r"] != "Suppress")
                {
                    stringLength += (Parser.maxIDs.r + 1);
                }

                if ((String)configManager.suppressFeaturesHashTable["f"] != "Suppress")
                {
                    stringLength += (Parser.maxIDs.f + 1);
                }

                if ((String)configManager.suppressFeaturesHashTable["s"] != "Suppress")
                {
                    stringLength += (Parser.maxIDs.s + 1);
                }

                stringLength += (Parser.maxIDs.POS_IDs[0] + 1);

                // Add the word only ID
                if ((String)configManager.suppressFeaturesHashTable["vocabularyWordID"] != "Suppress")
                {
                    switch (configManager.wordOnlyEncoding)
                    {
                    case "WordLevel":
                        stringLength += (Parser.maxIDs.vocabularyWordID + 1);
                        break;

                    case "CharacterLevel":
                        stringLength += Parser.maxIDs.wordLength * FeaturesFormatter.CHAR_INCLUDING_DIACS_FEATURE_BITFIELD_LEN;
                        break;

                    default:
                        Console.WriteLine("Incorrect WordOnlyEncoding configuration. {0} is invalid configuration. Valid configurations are: WordLevel or CharacterLevel.", configManager.wordOnlyEncoding);
                        break;
                    }    //end switch
                }

                break;

            case "POSAndWord":

                stringLength += (Parser.maxIDs.POS_IDs[0] + 1);

                // Add the word only ID
                if ((String)configManager.suppressFeaturesHashTable["vocabularyWordID"] != "Suppress")
                {
                    switch (configManager.wordOnlyEncoding)
                    {
                    case "WordLevel":
                        stringLength += (Parser.maxIDs.vocabularyWordID + 1);
                        break;

                    case "CharacterLevel":
                        stringLength += Parser.maxIDs.wordLength * FeaturesFormatter.CHAR_INCLUDING_DIACS_FEATURE_BITFIELD_LEN;
                        break;

                    default:
                        Console.WriteLine("Incorrect WordOnlyEncoding configuration. {0} is invalid configuration. Valid configurations are: WordLevel or CharacterLevel.", configManager.wordOnlyEncoding);
                        break;
                    }    //end switch
                }

                break;

            case "MrfAndWord":

                if ((String)configManager.suppressFeaturesHashTable["mrfType"] != "Suppress")
                {
                    stringLength = (Parser.maxIDs.mrfType + 1);
                }

                if ((String)configManager.suppressFeaturesHashTable["p"] != "Suppress")
                {
                    stringLength += (Parser.maxIDs.p + 1);
                }

                if ((String)configManager.suppressFeaturesHashTable["r"] != "Suppress")
                {
                    stringLength += (Parser.maxIDs.r + 1);
                }

                if ((String)configManager.suppressFeaturesHashTable["f"] != "Suppress")
                {
                    stringLength += (Parser.maxIDs.f + 1);
                }

                if ((String)configManager.suppressFeaturesHashTable["s"] != "Suppress")
                {
                    stringLength += (Parser.maxIDs.s + 1);
                }

                // Add the word only ID
                if ((String)configManager.suppressFeaturesHashTable["vocabularyWordID"] != "Suppress")
                {
                    switch (configManager.wordOnlyEncoding)
                    {
                    case "WordLevel":
                        stringLength += (Parser.maxIDs.vocabularyWordID + 1);
                        break;

                    case "CharacterLevel":
                        stringLength += Parser.maxIDs.wordLength * FeaturesFormatter.CHAR_INCLUDING_DIACS_FEATURE_BITFIELD_LEN;
                        break;

                    default:
                        Console.WriteLine("Incorrect WordOnlyEncoding configuration. {0} is invalid configuration. Valid configurations are: WordLevel or CharacterLevel.", configManager.wordOnlyEncoding);
                        break;
                    }    //end switch
                }

                break;

            case "WordOnly":

                // Add the word only ID
                if ((String)configManager.suppressFeaturesHashTable["vocabularyWordID"] != "Suppress")
                {
                    switch (configManager.wordOnlyEncoding)
                    {
                    case "WordLevel":
                        stringLength += (Parser.maxIDs.vocabularyWordID + 1);
                        break;

                    case "CharacterLevel":
                        stringLength += Parser.maxIDs.wordLength * FeaturesFormatter.CHAR_INCLUDING_DIACS_FEATURE_BITFIELD_LEN;
                        break;

                    default:
                        Console.WriteLine("Incorrect WordOnlyEncoding configuration. {0} is invalid configuration. Valid configurations are: WordLevel or CharacterLevel.", configManager.wordOnlyEncoding);
                        break;
                    }    //end switch
                }

                break;

            case "MrfAndPOS":

                if ((String)configManager.suppressFeaturesHashTable["mrfType"] != "Suppress")
                {
                    stringLength = (Parser.maxIDs.mrfType + 1);
                }

                if ((String)configManager.suppressFeaturesHashTable["p"] != "Suppress")
                {
                    stringLength += (Parser.maxIDs.p + 1);
                }

                if ((String)configManager.suppressFeaturesHashTable["r"] != "Suppress")
                {
                    stringLength += (Parser.maxIDs.r + 1);
                }

                if ((String)configManager.suppressFeaturesHashTable["f"] != "Suppress")
                {
                    stringLength += (Parser.maxIDs.f + 1);
                }

                if ((String)configManager.suppressFeaturesHashTable["s"] != "Suppress")
                {
                    stringLength += (Parser.maxIDs.s + 1);
                }

                stringLength += (Parser.maxIDs.POS_IDs[0] + 1);

                //stringLength = (Parser.maxIDs.mrfType + 1) + (Parser.maxIDs.p + 1) + (Parser.maxIDs.r + 1) + (Parser.maxIDs.f + 1) + (Parser.maxIDs.s + 1) + (Parser.maxIDs.POS_IDs[0] + 1);
                break;

            case "MrfOnly":
                if ((String)configManager.suppressFeaturesHashTable["mrfType"] != "Suppress")
                {
                    stringLength = (Parser.maxIDs.mrfType + 1);
                }

                if ((String)configManager.suppressFeaturesHashTable["p"] != "Suppress")
                {
                    stringLength += (Parser.maxIDs.p + 1);
                }

                if ((String)configManager.suppressFeaturesHashTable["r"] != "Suppress")
                {
                    stringLength += (Parser.maxIDs.r + 1);
                }

                if ((String)configManager.suppressFeaturesHashTable["f"] != "Suppress")
                {
                    stringLength += (Parser.maxIDs.f + 1);
                }

                if ((String)configManager.suppressFeaturesHashTable["s"] != "Suppress")
                {
                    stringLength += (Parser.maxIDs.s + 1);
                }

                stringLength += (Parser.maxIDs.POS_IDs[0] + 1);
                //stringLength = (Parser.maxIDs.mrfType + 1) + (Parser.maxIDs.p + 1) + (Parser.maxIDs.r + 1) + (Parser.maxIDs.f + 1) + (Parser.maxIDs.s + 1);
                break;

            case "POSOnly":
                stringLength = (Parser.maxIDs.POS_IDs[0] + 1);
                break;
            } // end switch
            return(stringLength);
        }     // end ComputeStringLength