Encapsulates positional frequency data for words. That is, how often a word appears in a specific postition within an identifier.
 /// <summary>
 /// Creates a new UnigramMethodRule.
 /// </summary>
 /// <param name="specialWords">A list of words that indicate the method name needs special handling.</param>
 /// <param name="booleanArgumentVerbs">A list of verbs that indicate that the boolean arguments to a method should be included in the UnknownArguments list.</param>
 /// <param name="nounPhraseIndicators">A list of word that indicate that beginning of a noun phrase.</param>
 /// <param name="positionalFrequencies">Positional frequency data.</param>
 public UnigramMethodRule(HashSet<string> specialWords, HashSet<string> booleanArgumentVerbs, HashSet<string> nounPhraseIndicators, PositionalFrequencies positionalFrequencies)
     : base()
 {
     this.SpecialWords = specialWords;
     this.BooleanArgumentVerbs = booleanArgumentVerbs;
     this.NounPhraseIndicators = nounPhraseIndicators;
     this.PositionalFrequencies = positionalFrequencies;
 }
Exemple #2
0
 /// <summary>
 /// Creates a new NonBaseVerbRule.
 /// </summary>
 /// <param name="posData">The part-of-speech data to use.</param>
 /// <param name="tagger">The part-of-speech tagger to use.</param>
 /// <param name="splitter">The identifier splitter to use.</param>
 /// <param name="specialWords">A list of words that indicate the method name needs special handling.</param>
 /// <param name="booleanArgumentVerbs">A list of verbs that indicate that the boolean arguments to a method should be included in the UnknownArguments list.</param>
 /// <param name="nounPhraseIndicators">A list of word that indicate that beginning of a noun phrase.</param>
 /// <param name="positionalFrequencies">Positional frequency data.</param>
 public NonBaseVerbRule(PartOfSpeechData posData, Tagger tagger, IdSplitter splitter, HashSet<string> specialWords, HashSet<string> booleanArgumentVerbs, HashSet<string> nounPhraseIndicators, PositionalFrequencies positionalFrequencies)
     : base(posData, tagger, splitter, specialWords, booleanArgumentVerbs, nounPhraseIndicators, positionalFrequencies) { }
Exemple #3
0
 /// <summary>
 /// Creates a new NonBaseVerbRule.
 /// </summary>
 /// <param name="specialWords">A list of words that indicate the method name needs special handling.</param>
 /// <param name="booleanArgumentVerbs">A list of verbs that indicate that the boolean arguments to a method should be included in the UnknownArguments list.</param>
 /// <param name="nounPhraseIndicators">A list of word that indicate that beginning of a noun phrase.</param>
 /// <param name="positionalFrequencies">Positional frequency data.</param>
 public NonBaseVerbRule(HashSet<string> specialWords, HashSet<string> booleanArgumentVerbs, HashSet<string> nounPhraseIndicators, PositionalFrequencies positionalFrequencies)
     : base(specialWords, booleanArgumentVerbs, nounPhraseIndicators, positionalFrequencies) { }
Exemple #4
0
 /// <summary>
 /// Creates a new UnigramTagger object, using the supplied data sets.
 /// </summary>
 /// <param name="posData">The part-of-speech data to use.</param>
 /// <param name="frequencies">The positional frequency data to use.</param>
 public UnigramTagger(PartOfSpeechData posData, PositionalFrequencies frequencies)
 {
     this.pos = posData;
     this.positionalFrequencies = frequencies;
 }
 /// <summary>
 /// Creates a new UnigramMethodRule.
 /// </summary>
 /// <param name="posData">The part-of-speech data to use.</param>
 /// <param name="tagger">The part-of-speech tagger to use.</param>
 /// <param name="splitter">The identifier splitter to use.</param>
 /// <param name="specialWords">A list of words that indicate the method name needs special handling.</param>
 /// <param name="booleanArgumentVerbs">A list of verbs that indicate that the boolean arguments to a method should be included in the UnknownArguments list.</param>
 /// <param name="nounPhraseIndicators">A list of word that indicate that beginning of a noun phrase.</param>
 /// <param name="positionalFrequencies">Positional frequency data.</param>
 public UnigramMethodRule(PartOfSpeechData posData, Tagger tagger, IdSplitter splitter, HashSet<string> specialWords, HashSet<string> booleanArgumentVerbs, HashSet<string> nounPhraseIndicators, PositionalFrequencies positionalFrequencies)
 {
     this.PosData = posData;
     this.PosTagger = tagger;
     this.Splitter = splitter;
     this.SpecialWords = specialWords;
     this.BooleanArgumentVerbs = booleanArgumentVerbs;
     this.NounPhraseIndicators = nounPhraseIndicators;
     this.PositionalFrequencies = positionalFrequencies;
 }
        /// <summary>
        /// Sets the member data sets to their default states.
        /// </summary>
        private void InitializeMembers() {
            string specialWordsFile = SwumConfiguration.GetFileSetting("UnigramMethodRule.SpecialWordsFile");
            if(specialWordsFile != null) {
                this.SpecialWords = new HashSet<string>(LibFileLoader.ReadWordList(specialWordsFile), StringComparer.InvariantCultureIgnoreCase);
            } else {
                this.SpecialWords = new HashSet<string>(StringComparer.InvariantCultureIgnoreCase);
                Console.Error.WriteLine("UnigramMethodRule.SpecialWordsFile not specified in config file.");
            }

            string booleanArgumentVerbsFile = SwumConfiguration.GetFileSetting("UnigramMethodRule.BooleanArgumentVerbsFile");
            if(booleanArgumentVerbsFile != null) {
                this.BooleanArgumentVerbs = new HashSet<string>(LibFileLoader.ReadWordList(booleanArgumentVerbsFile), StringComparer.InvariantCultureIgnoreCase);
            } else {
                this.BooleanArgumentVerbs = new HashSet<string>(StringComparer.InvariantCultureIgnoreCase);
                Console.Error.WriteLine("UnigramMethodRule.BooleanArgumentVerbsFile not specified in config file.");
            }

            string nounPhraseIndicatorsFile = SwumConfiguration.GetFileSetting("UnigramMethodRule.NounPhraseIndicatorsFile");
            if(nounPhraseIndicatorsFile != null) {
                this.NounPhraseIndicators = new HashSet<string>(LibFileLoader.ReadWordList(nounPhraseIndicatorsFile), StringComparer.InvariantCultureIgnoreCase);
            } else {
                this.NounPhraseIndicators = new HashSet<string>(StringComparer.InvariantCultureIgnoreCase);
                Console.Error.WriteLine("UnigramMethodRule.NounPhraseIndicatorsFile not specified in config file.");
            }

            this.PositionalFrequencies = new PositionalFrequencies();
        }