示例#1
0
        // Method to put the features in their format
        protected override void FormatMrfWordFeatures(Word word, ref WordFeatures wordFeatures)
        {
            if ((String)configManager.suppressFeaturesHashTable["mrfType"] != "Suppress")
            {
                wordFeatures.features = ConvertToBitfieldString(word.mrfType + 1, (uint)Parser.maxIDs.mrfType + 1);
            }

            if ((String)configManager.suppressFeaturesHashTable["p"] != "Suppress")
            {
                wordFeatures.features = wordFeatures.features + ConvertToBitfieldString(word.p + 1, (uint)Parser.maxIDs.p + 1);
            }

            if ((String)configManager.suppressFeaturesHashTable["r"] != "Suppress")
            {
                wordFeatures.features = wordFeatures.features + ConvertToBitfieldString(word.r + 1, (uint)Parser.maxIDs.r + 1);
            }

            if ((String)configManager.suppressFeaturesHashTable["f"] != "Suppress")
            {
                wordFeatures.features = wordFeatures.features + ConvertToBitfieldString(word.f + 1, (uint)Parser.maxIDs.f + 1);
            }

            if ((String)configManager.suppressFeaturesHashTable["s"] != "Suppress")
            {
                wordFeatures.features = wordFeatures.features + ConvertToBitfieldString(word.s + 1, (uint)Parser.maxIDs.s + 1);
            }


            /*wordFeatures.features = ConvertToBitfieldString(word.mrfType, (uint)Parser.maxIDs.mrfType) +
             *                      ConvertToBitfieldString(word.p, (uint)Parser.maxIDs.p) +
             *                      ConvertToBitfieldString(word.r, (uint)Parser.maxIDs.r) +
             *                      ConvertToBitfieldString(word.f, (uint)Parser.maxIDs.f) +
             *                      ConvertToBitfieldString(word.s, (uint)Parser.maxIDs.s);*/
        }
        }     // end ContextExtract()

        // Method to start forming the context features for Type 1
        public void ContextExtractType1()
        {
            // Loop on all words features
            for (int i = 0; i < wordsFeatures.Length; i++)
            {
                // Initialize the context features for this word
                contextFeatures[i] = new WordFeatures();

                // Set the target
                contextFeatures[i].target = wordsFeatures[i].target;

                // Fill in the BEFORE context words
                for (int j = configManager.contextBeforeLength; j > 0; j--)
                {
                    if (i > j)
                    {
                        contextFeatures[i].features = contextFeatures[i].features + wordsFeatures[i - j].features;
                    }
                    else
                    {
                        contextFeatures[i].features = contextFeatures[i].features + FeaturesFormatter.emptyFeatureString;
                    }
                } // end for BEFORE

                // Put the concerned word in its context
                if (configManager.addFeaturesToCentralContextWord.Count != 0)
                {
                    // There exists special request for the central word
                    contextFeatures[i].features = contextFeatures[i].features + wordsFeatures[i].centralContextWordFeatures;
                }
                else
                {
                    // Just add the normal word
                    contextFeatures[i].features = contextFeatures[i].features + wordsFeatures[i].features;
                }

                // Always add the central word features. If no specific request, then all Mrf must be marked Suppress in the Configurations.xml file and hence central = wordfeatures string normally
                //contextFeatures[i].features = contextFeatures[i].features + wordsFeatures[i].centralContextWordFeatures;


                // Insert the last characters features
                if (wordsFeatures[i].lastCharFeatures != "")
                {
                    contextFeatures[i].features = contextFeatures[i].features + wordsFeatures[i].lastCharFeatures;
                }

                // Fill in the AFTER context words
                for (int j = 1; j <= configManager.contextAfterLength; j++)
                {
                    if ((i + j) < wordsFeatures.Length)
                    {
                        contextFeatures[i].features = contextFeatures[i].features + wordsFeatures[i + j].features;
                    }
                    else
                    {
                        contextFeatures[i].features = contextFeatures[i].features + FeaturesFormatter.emptyFeatureString;
                    }
                } // end for AFTER
            }     // end for wordsFeatures.Length
        }         // end ContextExtractType1()
示例#3
0
        }     // end ComputeStringLength

        // Method to format the targetString
        protected override void FormatTargetStringFeatures(ref WordFeatures wordFeatures)
        {
            String targetString = "";

            if ((String)configManager.suppressFeaturesHashTable["ContextTargets"] != "Suppress")
            {
                // Get number of targets
                uint numDiacTargets = (uint)((TargetCode[])Enum.GetValues(typeof(TargetCode))).Length;
                uint numPOSTargets  = (uint)Parser.maxIDs.POS_IDs[0] + 1;

                switch (configManager.targetType)
                {
                case "DIAC":
                    targetString = FeaturesFormatter.ConvertToBitfieldString(wordFeatures.target[0], numDiacTargets);
                    break;

                case "POS":
                    targetString = FeaturesFormatter.ConvertToBitfieldString(wordFeatures.target, numPOSTargets);
                    break;

                default:
                    Console.WriteLine("Incorrect TargetType configuration. {0} is invalid configuration. Valid configurations are: DIAC or POS.", configManager.targetType);
                    break;
                } // end switch
            }     // end if !Suppress("ContextTargets")

            wordFeatures.targetString = targetString;
        }// end FormatTargetStringFeatures()
示例#4
0
        }// end IsConformantStringLen

        // Method to put the features in their format
        protected override void FormatTargetStringFeatures(ref WordFeatures wordFeatures)
        {
            String targetString = "";

            if ((String)configManager.suppressFeaturesHashTable["ContextTargets"] != "Suppress")
            {
                switch (configManager.targetType)
                {
                case "DIAC":
                    int maxDiacTargetValue = (int)((TargetCode[])Enum.GetValues(typeof(TargetCode))).Max();    // -1 remove DEFAULT
                    targetString = (wordFeatures.target[0] / maxDiacTargetValue).ToString() + ",";
                    break;

                case "POS":
                    int maxPOSTargetValue = Parser.maxIDs.POS_IDs[1];
                    foreach (int target in wordFeatures.target)
                    {
                        targetString += (target / maxPOSTargetValue).ToString() + ",";
                    }    // end foreach

                    break;

                default:
                    Console.WriteLine("Incorrect TargetType configuration. {0} is invalid configuration. Valid configurations are: DIAC or POS.", configManager.targetType);
                    break;
                }// end switch
            }
            wordFeatures.targetString = targetString;
        } // FormatTargetStringFeatures
示例#5
0
        // Method to check if the wordFeature string conforms to the expected string length or not
        protected override bool IsConformantStringLen(WordFeatures wordFeature)
        {
            // Split the features string
            String[] features = wordFeature.features.Split(",".ToCharArray());

            return((features.Length - 1 == stringLength) ? true : false);

            /*
             * switch (configManager.outputFeatures)
             * {
             *  // -1 to remove the last split value after the last , in the features
             *  case "MrfAndPOS":
             *      return (features.Length - 1 == stringLength) ? true : false;
             *  //break;
             *  case "MrfOnly":
             *      return (features.Length - 1 == stringLength) ? true : false;
             *  //break;
             *  case "POSOnly":
             *      return (features.Length - 1 == stringLength) ? true : false;
             *  //break;
             *  default:
             *      return false;
             *
             * }// end switch
             * */
            // TODO: check if features without the mrf part conforms to Parser.maxIDs.POS_IDs[0] + 1 or not
        }// end IsConformantStringLen
        }         // end ContextExtractType2()

        // Method to form the context features of one a word for Type 2
        // Format of Type 2:
        // <Concerned word>+<Context word 1><Target 1><Context word 2><Target 2>...<Context word n><Target n>
        private WordFeatures GetWordContextType2(WordFeatures[] contextWords, int wordPosition)
        {
            WordFeatures contextFeatures = new WordFeatures();

            // Set the target
            contextFeatures.target = contextWords[wordPosition].target;

            // Put the word at the begnining of the formed string
            contextFeatures.features = contextWords[wordPosition].features;

            // Add the other words features next
            for (int i = 0; i < contextWords.Length; i++)
            {
                // Don't add the wordPosition to the context, it's aleady the first one
                if (i != wordPosition)
                {
                    contextFeatures.features = contextFeatures.features + contextWords[i].features;
                    // Add the CRF target
                    if ((String)configManager.suppressFeaturesHashTable["ContextTargets"] != "Suppress")
                    {
                        contextFeatures.features += contextWords[i].targetString;
                    }
                }
            }// end for

            return(contextFeatures);
        }// end GetWordContextType2
 // Method to put the features in their format
 protected override void FormatWordIDWordFeatures(Word word, ref WordFeatures wordFeatures)
 {
     if ((String)configManager.suppressFeaturesHashTable["vocabularyWordID"] != "Suppress")
     {
         wordFeatures.features += ConvertToBitfieldString(word.vocabularyWordID, (uint)Parser.maxIDs.vocabularyWordID);
     }
 }
示例#8
0
        } // end FormatMrfWordFeatures

        // Method to put the features in their format
        protected override void FormatWordIDWordFeatures(Word word, ref WordFeatures wordFeatures)
        {
            if ((String)configManager.suppressFeaturesHashTable["vocabularyWordID"] != "Suppress")
            {
                wordFeatures.features += GetIntBinaryString(word.vocabularyWordID, Parser.GetNumBits(Parser.maxIDs.vocabularyWordID));
            }
        }
示例#9
0
        }     // end FormatFeatures

        // Method to form the bit-field of the POS features
        protected virtual void FormatPOSWordFeatures(Word word, ref WordFeatures wordFeatures)
        {
            String bitToAdd;

            // Traverse all positions to set its bit
            for (int position = 0; position <= Parser.maxIDs.POS_IDs[0]; position++)
            {
                // Default bit value is 0 unless found in POS_IDs array
                bitToAdd = "0";

                // Check if the current position exists in the POS_IDs array
                for (int i = 0; i < word.POS_IDs.Length; i++)
                {
                    // If exists then make the bit to be added 1
                    if (position == word.POS_IDs[i])
                    {
                        bitToAdd = "1";
                        break;
                    }
                }// end foreach

                // Write the final string once
                wordFeatures.features = wordFeatures.features + bitToAdd + ",";
            } // end for
        }     // end FormatPOSWordFeatures
示例#10
0
        } // end FormatMrfWordFeatures

        // Method to put the features in their format
        protected override void FormatWordIDWordFeatures(Word word, ref WordFeatures wordFeatures)
        {
            if ((String)configManager.suppressFeaturesHashTable["vocabularyWordID"] != "Suppress")
            {
                switch (configManager.wordOnlyEncoding)
                {
                case "WordLevel":
                    wordFeatures.features += GetIntBinaryString(word.vocabularyWordID, Parser.GetNumBits(Parser.maxIDs.vocabularyWordID));
                    break;

                case "CharacterLevel":
                    // Loop on characters of the word
                    foreach (char wordChar in word.wordNameWithProperDiacritics)
                    {
                        wordFeatures.features += GetIntBinaryString(wordChar % 1568 + 1, Parser.GetNumBits(FeaturesFormatter.CHAR_INCLUDING_DIACS_FEATURE_BITFIELD_LEN));
                    }    // end foreach

                    // Now, pad the rest of the word to the max word length
                    for (int i = word.wordNameWithProperDiacritics.Length + 1; i <= Parser.maxIDs.wordLength; i++)
                    {
                        wordFeatures.features += GetIntBinaryString(0, Parser.GetNumBits(FeaturesFormatter.CHAR_INCLUDING_DIACS_FEATURE_BITFIELD_LEN));
                    }    //end for

                    break;

                default:
                    Console.WriteLine("Incorrect WordOnlyEncoding configuration. {0} is invalid configuration. Valid configurations are: WordLevel or CharacterLevel.", configManager.wordOnlyEncoding);
                    break;
                }//end switch
            }
        }
 // Method to put the features in their format
 protected override void FormatWordIDWordFeatures(Word word, ref WordFeatures wordFeatures)
 {
     if ((String)configManager.suppressFeaturesHashTable["vocabularyWordID"] != "Suppress")
     {
         wordFeatures.features = wordFeatures.features + ((double)word.vocabularyWordID / (double)Parser.maxIDs.vocabularyWordID).ToString() + ",";
     }
 }
        // Method to put the features in their format
        protected override void FormatMrfWordFeatures(Word word, ref WordFeatures wordFeatures)
        {
            /*wordFeatures.features = ((double)word.mrfType / (double)Parser.maxIDs.mrfType).ToString() + "," +
             *                      ((double)word.p / (double)Parser.maxIDs.p).ToString() + "," +
             *                      ((double)word.r / (double)Parser.maxIDs.r).ToString() + "," +
             *                      ((double)word.f / (double)Parser.maxIDs.f).ToString() + "," +
             *                      ((double)word.s / (double)Parser.maxIDs.s).ToString() + ",";*/

            if ((String)configManager.suppressFeaturesHashTable["mrfType"] != "Suppress")
            {
                wordFeatures.features = ((double)word.mrfType / (double)Parser.maxIDs.mrfType).ToString() + ",";
            }

            if ((String)configManager.suppressFeaturesHashTable["p"] != "Suppress")
            {
                wordFeatures.features = wordFeatures.features + ((double)word.p / (double)Parser.maxIDs.p).ToString() + ",";
            }

            if ((String)configManager.suppressFeaturesHashTable["r"] != "Suppress")
            {
                wordFeatures.features = wordFeatures.features + ((double)word.r / (double)Parser.maxIDs.r).ToString() + ",";
            }

            if ((String)configManager.suppressFeaturesHashTable["f"] != "Suppress")
            {
                wordFeatures.features = wordFeatures.features + ((double)word.f / (double)Parser.maxIDs.f).ToString() + ",";
            }

            if ((String)configManager.suppressFeaturesHashTable["s"] != "Suppress")
            {
                wordFeatures.features = wordFeatures.features + ((double)word.s / (double)Parser.maxIDs.s).ToString() + ",";
            }
        }
示例#13
0
        }     // end ComputeStringLength

        // Method to put the features in their format
        protected override void FormatTargetStringFeatures(ref WordFeatures wordFeatures)
        {
            String targetString = "";

            if ((String)configManager.suppressFeaturesHashTable["ContextTargets"] != "Suppress")
            {
                switch (configManager.targetType)
                {
                case "DIAC":
                    int maxDiacTargetValue = (int)((TargetCode[])Enum.GetValues(typeof(TargetCode))).Max();    // -1 remove DEFAULT
                    targetString = GetIntBinaryString(wordFeatures.target[0], Parser.GetNumBits(maxDiacTargetValue));
                    break;

                case "POS":
                    int maxPOSTargetValue = Parser.maxIDs.POS_IDs[1];
                    foreach (int target in wordFeatures.target)
                    {
                        targetString += GetIntBinaryString(target, Parser.GetNumBits(maxPOSTargetValue));
                    }    // end foreach

                    //wordFeatures.features += GetIntBinaryString((int)word.equivalentPOS_ID, Parser.GetNumBits(Parser.maxIDs.vocabularyWordID));
                    break;

                default:
                    Console.WriteLine("Incorrect TargetType configuration. {0} is invalid configuration. Valid configurations are: DIAC or POS.", configManager.targetType);
                    break;
                }// end switch
            }
            wordFeatures.targetString = targetString;
        } // FormatTargetStringFeatures
 // Method to put the features in their format
 protected override void FormatMrfWordFeatures(Word word, ref WordFeatures wordFeatures)
 {
     wordFeatures.features = ConvertToBitfieldString(word.mrfType, (uint)parser.maxIDs.mrfType) +
                             ConvertToBitfieldString(word.p, (uint)parser.maxIDs.p) +
                             ConvertToBitfieldString(word.r, (uint)parser.maxIDs.r) +
                             ConvertToBitfieldString(word.f, (uint)parser.maxIDs.f) +
                             ConvertToBitfieldString(word.s, (uint)parser.maxIDs.s);
 }
示例#15
0
 // Method to pad the string if needed
 protected override void PadRestOfFeaturesString(ref WordFeatures wordFeatures)
 {
     // Now Pad the rest of stringLength with zeros
     for (int i = addedFeatures + 1; i <= stringLength; i++)
     {
         wordFeatures.features += "0,";
     }//end for
 }
        // Method to put the features in their format
        protected override void FormatMrfWordFeatures(Word word, ref WordFeatures wordFeatures)
        {
            wordFeatures.features = GetIntBinaryString(word.mrfType, parser.GetNumBits(parser.maxIDs.mrfType)) +
                                    GetIntBinaryString(word.p, parser.GetNumBits(parser.maxIDs.p)) +
                                    GetIntBinaryString(word.r, parser.GetNumBits(parser.maxIDs.r)) +
                                    GetIntBinaryString(word.f, parser.GetNumBits(parser.maxIDs.f)) +
                                    GetIntBinaryString(word.s, parser.GetNumBits(parser.maxIDs.s));

            /*String s = GetIntBinaryString(word.mrfType, parser.GetNumBits(parser.maxIDs.mrfType));
             * s = GetIntBinaryString(word.p, parser.GetNumBits(parser.maxIDs.p));
             * s = GetIntBinaryString(word.r, parser.GetNumBits(parser.maxIDs.r));
             * s = GetIntBinaryString(word.f, parser.GetNumBits(parser.maxIDs.f));
             * s = GetIntBinaryString(word.s, parser.GetNumBits(parser.maxIDs.s));*/
        } // end FormatMrfWordFeatures
示例#17
0
        // Method to put the features in their format
        protected override void FormatWordIDWordFeatures(Word word, ref WordFeatures wordFeatures)
        {
            if ((offset + word.vocabularyWordID) > 47202)
            {
                int x = 0;
                x++;
            }

            if ((String)configManager.suppressFeaturesHashTable["vocabularyWordID"] != "Suppress")
            {
                wordFeatures.features += (offset + word.vocabularyWordID).ToString() + ",";
                offset        += Parser.maxIDs.vocabularyWordID;
                addedFeatures += 1;
            }
        }
示例#18
0
        }     // end ContextExtract()

        // Method to start forming the context features for Type 1
        public void ContextExtractType1()
        {
            // Loop on all words features
            for (int i = 0; i < wordsFeatures.Length; i++)
            {
                // Initialize the context features for this word
                contextFeatures[i] = new WordFeatures();

                // Set the target
                contextFeatures[i].target = wordsFeatures[i].target;

                // Fill in the BEFORE context words
                for (int j = configManager.contextBeforeLength; j > 0; j--)
                {
                    if (i > j)
                    {
                        contextFeatures[i].features = contextFeatures[i].features + wordsFeatures[i - j].features;
                    }
                    else
                    {
                        contextFeatures[i].features = contextFeatures[i].features + FeaturesFormatter.emptyFeatureString;
                    }
                } // end for BEFORE

                // Put the concerned word in its context
                contextFeatures[i].features = contextFeatures[i].features + wordsFeatures[i].features;

                // Insert the last characters features
                if (wordsFeatures[i].lastCharFeatures != "")
                {
                    contextFeatures[i].features = contextFeatures[i].features + wordsFeatures[i].lastCharFeatures;
                }

                // Fill in the AFTER context words
                for (int j = 1; j <= configManager.contextAfterLength; j++)
                {
                    if ((i + j) < wordsFeatures.Length)
                    {
                        contextFeatures[i].features = contextFeatures[i].features + wordsFeatures[i + j].features;
                    }
                    else
                    {
                        contextFeatures[i].features = contextFeatures[i].features + FeaturesFormatter.emptyFeatureString;
                    }
                } // end for AFTER
            }     // end for wordsFeatures.Length
        }         // end ContextExtractType1()
        // Override the POS word features format
        protected override void FormatPOSWordFeatures(Word word, ref WordFeatures wordFeatures)
        {
            // Add POS ID's
            foreach (int ID in word.POS_IDs)
            {
                wordFeatures.features += (offset + ID + 1).ToString() + ",";
                addedFeatures         += 1;
            }

            // Now Pad the rest of stringLength with zeros
            for (int i = addedFeatures + 1; i <= stringLength; i++)
            {
                wordFeatures.features += "0,";
            }//end for

            offset += Parser.maxIDs.POS_IDs[0] + 1;
        }
        }     // end ContextExtractType1()

        // Method to start forming the context features for Type 2
        public void ContextExtractType2()
        {
            // Temporary array to hold the context words
            WordFeatures[] contextWords = new WordFeatures[contextLength];

            // Loop on all words features
            for (int i = 0; i < numWordsWithContextFeatures; i += contextLength)
            {
                // Fill in the context words
                for (int j = 0; j < contextLength; j++)
                {
                    contextWords[j] = wordsFeatures[i + j];
                } // end for contextLength

                // Get the word context features
                for (int m = 0; m < contextLength; m++)
                {
                    contextFeatures[i + m] = GetWordContextType2(contextWords, m);
                } // end for contextLength
            }     // end for wordsFeatures.Length
        }         // end ContextExtractType2()
示例#21
0
        // Method to put the features in their format
        protected override void FormatMrfWordFeatures(Word word, ref WordFeatures wordFeatures)
        {
            if ((String)configManager.suppressFeaturesHashTable["mrfType"] != "Suppress")
            {
                wordFeatures.features = GetIntBinaryString(word.mrfType, Parser.GetNumBits(Parser.maxIDs.mrfType));
            }

            if ((String)configManager.suppressFeaturesHashTable["p"] != "Suppress")
            {
                wordFeatures.features = wordFeatures.features + GetIntBinaryString(word.p, Parser.GetNumBits(Parser.maxIDs.p));
            }

            if ((String)configManager.suppressFeaturesHashTable["r"] != "Suppress")
            {
                wordFeatures.features = wordFeatures.features + GetIntBinaryString(word.r, Parser.GetNumBits(Parser.maxIDs.r));
            }

            if ((String)configManager.suppressFeaturesHashTable["f"] != "Suppress")
            {
                wordFeatures.features = wordFeatures.features + GetIntBinaryString(word.f, Parser.GetNumBits(Parser.maxIDs.f));
            }

            if ((String)configManager.suppressFeaturesHashTable["s"] != "Suppress")
            {
                wordFeatures.features = wordFeatures.features + GetIntBinaryString(word.s, Parser.GetNumBits(Parser.maxIDs.s));
            }


            /*wordFeatures.features = GetIntBinaryString(word.mrfType, Parser.GetNumBits(Parser.maxIDs.mrfType)) +
             *                      GetIntBinaryString(word.p, Parser.GetNumBits(Parser.maxIDs.p)) +
             *                      GetIntBinaryString(word.r, Parser.GetNumBits(Parser.maxIDs.r)) +
             *                      GetIntBinaryString(word.f, Parser.GetNumBits(Parser.maxIDs.f)) +
             *                      GetIntBinaryString(word.s, Parser.GetNumBits(Parser.maxIDs.s));*/

            /*String s = GetIntBinaryString(word.mrfType, Parser.GetNumBits(Parser.maxIDs.mrfType));
             * s = GetIntBinaryString(word.p, Parser.GetNumBits(Parser.maxIDs.p));
             * s = GetIntBinaryString(word.r, Parser.GetNumBits(Parser.maxIDs.r));
             * s = GetIntBinaryString(word.f, Parser.GetNumBits(Parser.maxIDs.f));
             * s = GetIntBinaryString(word.s, Parser.GetNumBits(Parser.maxIDs.s));*/
        } // end FormatMrfWordFeatures
        }         // end ContextExtractType2()

        // Method to form the context features of one a word for Type 2
        private WordFeatures GetWordContextType2(WordFeatures[] contextWords, int wordPosition)
        {
            WordFeatures contextFeatures = new WordFeatures();

            // Set the target
            contextFeatures.target = contextWords[wordPosition].target;

            // Put the word at the begnining of the formed string
            contextFeatures.features = contextWords[wordPosition].features;

            // Add the other words features next
            for (int i = 0; i < contextWords.Length; i++)
            {
                // Don't add the wordPosition to the context, it's aleady the first one
                if (i != wordPosition)
                {
                    contextFeatures.features = contextFeatures.features + contextWords[i].features;
                }
            }// end for

            return(contextFeatures);
        }// end GetWordContextType2
示例#23
0
        // Method to start features extraction.
        public void FormatFeatures()
        {
            logger.LogTrace("Features formatting started...");

            // Initialize the words features
            wordsFeatures = new WordFeatures[words.Length];

            ArrayList wordsFeaturesList = new ArrayList();

            int i = 0;

            try
            {
                // Traverse all words
                for (i = 0; i < words.Length; i++)
                {
                    WordFeatures wordFeaturesLocal = new WordFeatures();

                    // Extract the target
                    wordFeaturesLocal.target = GetTarget(words[i].wordName);

                    // Extract the last characters features

                    /*if (i == 369)
                     * {
                     *  int x;
                     * }*/
                    wordFeaturesLocal.lastCharFeatures = GetLastCharFeatures(words[i].wordName);
                    //Console.WriteLine("Last Char Features Obtained of " + i);

                    /*if (wordsFeatures[i].target == TargetCode.DAMMETEN)
                     * {
                     *  int x = 10;
                     *  cntr++;
                     * }*/

                    // Reset features offset for Raw case
                    offset = 0;

                    // Reset number of added features
                    addedFeatures = 0;

                    // Check the required features to be out
                    switch (configManager.outputFeatures)
                    {
                    case "MrfAndPOS":
                        // Format the features according to the type configured (Default, Binar or Bitfield)
                        FormatMrfWordFeatures(words[i], ref wordFeaturesLocal);

                        // Fill in the POS bit-field
                        if (words[i].POS_IDs != null)
                        {
                            FormatPOSWordFeatures(words[i], ref wordFeaturesLocal);
                        }

                        break;

                    case "MrfOnly":
                        // Format the features according to the type configured (Default, Binar or Bitfield)
                        FormatMrfWordFeatures(words[i], ref wordFeaturesLocal);

                        break;

                    case "POSOnly":
                        // Fill in the POS bit-field
                        if (words[i].POS_IDs != null)
                        {
                            FormatPOSWordFeatures(words[i], ref wordFeaturesLocal);
                        }
                        break;

                    default:
                        Console.WriteLine("Incorrect features format configuration. {0} is invalid configuration. Valid configurations are: MrfAndPOS, MrfOnly, POSOnly.", configManager.outputFeatures);
                        throw (new IndexOutOfRangeException());
                    }// end switch



                    // Check length of the formatted wordFeature
                    if (!IsConformantStringLen(wordFeaturesLocal))
                    {
                        // Log error
                        logger.LogError("The expected feature string length is " + stringLength + " while this one length is " + wordFeaturesLocal.features.Length,
                                        ErrorCode.NON_CONFORMANT_FEATURE_STRING);

                        //wordsFeatures[i] = null;
                    }
                    else
                    {
                        /* // Now form the central context word features
                         * WordFeatures centralContextWordFeautres = wordFeaturesLocal;
                         * FormatMrfWordFeatures(words[i], ref centralContextWordFeautres);
                         * wordFeaturesLocal.centralContextWordFeatures = centralContextWordFeautres.features;*/

                        // Now form the central context word features
                        wordFeaturesLocal.centralContextWordFeatures = FormatCentralContextWordFeatures(words[i], wordFeaturesLocal.features);

                        // Add the word to the list
                        wordsFeaturesList.Add(wordFeaturesLocal);
                    }


                    // Reset features offset for Raw case
                    offset = 0;

                    // Reset number of added features
                    addedFeatures = 0;
                }// end for

                wordsFeatures = (WordFeatures[])wordsFeaturesList.ToArray(wordsFeaturesList[0].GetType());

                logger.LogTrace("Features formatting done successfuly");
            }
            catch (OutOfMemoryException e)
            {
                logger.LogError("Out of memory at word number " + (i + 1).ToString() + "which is" + words[i].wordName, ErrorCode.OUT_OF_MEMORY);
                Console.WriteLine("Out of memory at word number " + (i + 1).ToString() + "which is" + words[i].wordName);
                throw (e);
            } // end catch
        }     // end FormatFeatures
示例#24
0
 // Method to check if the wordFeature string conforms to the expected string length or not
 protected abstract bool IsConformantStringLen(WordFeatures wordFeature);
示例#25
0
        }     // end FormatPOSWordFeatures

        // Method to put the features in their format
        protected abstract void FormatMrfWordFeatures(Word word, ref WordFeatures wordFeatures);
 // Method to format the targetString
 protected abstract void FormatTargetStringFeatures(ref WordFeatures wordFeatures);
示例#27
0
 // Override the POS word features format
 protected override void FormatPOSWordFeatures(Word word, ref WordFeatures wordFeatures)
 {
     wordFeatures.features += GetIntBinaryString((int)word.equivalentPOS_ID, Parser.GetNumBits(Parser.maxIDs.vocabularyWordID));
     //(word.equivalentPOS_ID / Parser.maxIDs.equivalentPOS_ID).ToString() + ",";
 }
        // Method to start features extraction.
        public void FormatFeatures()
        {
            logger.LogTrace("Features formatting started...");

            // Initialize the words features
            wordsFeatures = new WordFeatures[words.Length];

            ArrayList wordsFeaturesList = new ArrayList();

            int i = 0;

            try
            {
                // Traverse all words
                for (i = 0; i < words.Length; i++)
                {
                    WordFeatures wordFeaturesLocal = new WordFeatures();

                    // Extract the target
                    switch (configManager.targetType)
                    {
                    case "DIAC":
                        wordFeaturesLocal.target    = new int[1];
                        wordFeaturesLocal.target[0] = (int)GetTarget(words[i].wordName);
                        break;

                    case "POS":
                        wordFeaturesLocal.target = (int[])GetTarget(words[i]).Clone();
                        break;

                    default:
                        Console.WriteLine("Incorrect TargetType configuration. {0} is invalid configuration. Valid configurations are: DIAC or POS.", configManager.targetType);
                        break;
                    }

                    // Format the targetString
                    FormatTargetStringFeatures(ref wordFeaturesLocal);

                    // Extract the last characters features
                    wordFeaturesLocal.lastCharFeatures = GetLastCharFeatures(words[i].wordName);
                    //Console.WriteLine("Last Char Features Obtained of " + i);

                    /*if (wordsFeatures[i].target == TargetCode.DAMMETEN)
                     * {
                     *  int x = 10;
                     *  cntr++;
                     * }*/

                    // Reset features offset for Raw case
                    offset = 0;

                    // Reset number of added features
                    addedFeatures = 0;

                    // Check the required features to be out
                    switch (configManager.outputFeatures)
                    {
                    case "All":

                        // Fill in word ID features
                        FormatWordIDWordFeatures(words[i], ref wordFeaturesLocal);

                        // Format the features according to the type configured (Default, Binar or Bitfield)
                        FormatMrfWordFeatures(words[i], ref wordFeaturesLocal);

                        // Fill in the POS bit-field
                        if (words[i].POS_IDs != null)
                        {
                            FormatPOSWordFeatures(words[i], ref wordFeaturesLocal);
                        }

                        // Fill in word ID features--> It's recommended to keep the POS features the last so that
                        // any next features positions are after the string of POS
                        // Ex: Word + POS--> POS needs 61 positions--> if Word ID = 1, then it'd be 63, 2, 4, <PAD: 0 ,0..0>
                        // So, keep POS first--> 2, 4, <PADS: 0,0,..>, 63
                        //FormatWordIDWordFeatures(words[i], ref wordFeaturesLocal);

                        // Now pad the rest of features string if needed
                        //PadRestOfFeaturesString(ref wordFeaturesLocal);

                        break;

                    case "POSAndWord":

                        // Fill in word ID features
                        FormatWordIDWordFeatures(words[i], ref wordFeaturesLocal);

                        // Fill in the POS bit-field
                        if (words[i].POS_IDs != null)
                        {
                            FormatPOSWordFeatures(words[i], ref wordFeaturesLocal);
                        }

                        // Fill in word ID features--> It's recommended to keep the POS features the last so that
                        // any next features positions are after the string of POS
                        // Ex: Word + POS--> POS needs 61 positions--> if Word ID = 1, then it'd be 63, 2, 4, <PAD: 0 ,0..0>
                        // So, keep POS first--> 2, 4, <PADS: 0,0,..>, 63

                        //FormatWordIDWordFeatures(words[i], ref wordFeaturesLocal);

                        // Now pad the rest of features string if needed
                        //PadRestOfFeaturesString(ref wordFeaturesLocal);

                        break;

                    case "MrfAndWord":

                        // Fill in word ID features
                        FormatWordIDWordFeatures(words[i], ref wordFeaturesLocal);

                        // Format the features according to the type configured (Default, Binar or Bitfield)
                        FormatMrfWordFeatures(words[i], ref wordFeaturesLocal);

                        // Fill in word ID features--> It's recommended to keep the POS features the last so that
                        // any next features positions are after the string of POS
                        // Ex: Word + POS--> POS needs 61 positions--> if Word ID = 1, then it'd be 63, 2, 4, <PAD: 0 ,0..0>
                        // So, keep POS first--> 2, 4, <PADS: 0,0,..>, 63
                        //FormatWordIDWordFeatures(words[i], ref wordFeaturesLocal);

                        // Now pad the rest of features string if needed
                        //PadRestOfFeaturesString(ref wordFeaturesLocal);

                        break;

                    case "WordOnly":

                        // Fill in word ID features
                        FormatWordIDWordFeatures(words[i], ref wordFeaturesLocal);

                        // Now pad the rest of features string if needed
                        //PadRestOfFeaturesString(ref wordFeaturesLocal);

                        break;

                    case "MrfAndPOS":
                        // Format the features according to the type configured (Default, Binar or Bitfield)
                        FormatMrfWordFeatures(words[i], ref wordFeaturesLocal);

                        // Fill in the POS bit-field
                        if (words[i].POS_IDs != null)
                        {
                            FormatPOSWordFeatures(words[i], ref wordFeaturesLocal);
                        }

                        // Now pad the rest of features string if needed
                        //PadRestOfFeaturesString(ref wordFeaturesLocal);

                        break;

                    case "MrfOnly":
                        // Format the features according to the type configured (Default, Binar or Bitfield)
                        FormatMrfWordFeatures(words[i], ref wordFeaturesLocal);

                        break;

                    case "POSOnly":
                        // Fill in the POS bit-field
                        if (words[i].POS_IDs != null)
                        {
                            FormatPOSWordFeatures(words[i], ref wordFeaturesLocal);
                        }
                        // Now pad the rest of features string if needed
                        //PadRestOfFeaturesString(ref wordFeaturesLocal);
                        break;

                    default:
                        Console.WriteLine("Incorrect features format configuration. {0} is invalid configuration. Valid configurations are: MrfAndPOS, MrfOnly, POSOnly.", configManager.outputFeatures);
                        throw (new IndexOutOfRangeException());
                    }// end switch



                    // Check length of the formatted wordFeature
                    if (!IsConformantStringLen(wordFeaturesLocal))
                    {
                        // Log error
                        logger.LogError("The expected feature string length is " + stringLength + " while this one length is " + wordFeaturesLocal.features.Length,
                                        ErrorCode.NON_CONFORMANT_FEATURE_STRING);

                        //wordsFeatures[i] = null;
                    }
                    else
                    {
                        /* // Now form the central context word features
                         * WordFeatures centralContextWordFeautres = wordFeaturesLocal;
                         * FormatMrfWordFeatures(words[i], ref centralContextWordFeautres);
                         * wordFeaturesLocal.centralContextWordFeatures = centralContextWordFeautres.features;*/

                        // Now form the central context word features
                        wordFeaturesLocal.centralContextWordFeatures = FormatCentralContextWordFeatures(words[i], wordFeaturesLocal.features);

                        // Add the word to the list
                        wordsFeaturesList.Add(wordFeaturesLocal);
                    }


                    // Reset features offset for Raw case
                    offset = 0;

                    // Reset number of added features
                    addedFeatures = 0;
                }// end for

                wordsFeatures = (WordFeatures[])wordsFeaturesList.ToArray(wordsFeaturesList[0].GetType());

                logger.LogTrace("Features formatting done successfuly");
            }
            catch (OutOfMemoryException e)
            {
                logger.LogError("Out of memory at word number " + (i + 1).ToString() + "which is" + words[i].wordName, ErrorCode.OUT_OF_MEMORY);
                Console.WriteLine("Out of memory at word number " + (i + 1).ToString() + "which is" + words[i].wordName);
                throw (e);
            } // end catch
        }     // end FormatFeatures
示例#29
0
        } // end GetIntBinaryString

        // Method to check if the wordFeature string conforms to the expected string length or not
        protected override bool IsConformantStringLen(WordFeatures wordFeature)
        {
            // *2 to account for "," after each number
            return(((stringLength * 2) == wordFeature.features.Length) ? true : false);
        }// end IsConformantStringLen
 // Method to pad the string if needed
 protected virtual void PadRestOfFeaturesString(ref WordFeatures wordFeatures)
 {
     // Nothing to be done
 }