Esempio n. 1
0
        /// <summary>
        /// Reads the necessary data files and initializes the member variables.
        /// </summary>
        /// <param name="programWordCount">A dictionary containing the word counts from the local program.</param>
        private void Initialize(Dictionary <string, int> programWordCount)
        {
            this.CamelSplitter = new ConservativeIdSplitter();

            //set ProgramWordCount and calculate log of total
            this.ProgramWordCount = programWordCount;
            ulong ProgramTotalWordCount = 0;

            foreach (int value in this.ProgramWordCount.Values)
            {
                ProgramTotalWordCount = ProgramTotalWordCount + (ulong)value;
            }
            this.LogProgramTotalWordCount = Math.Log10(ProgramTotalWordCount);

            //load globalWordCount from default location
            var rawGlobalWordCount = LibFileLoader.ReadWordCount(SwumConfiguration.GetFileSetting("SamuraiIdSplitter.GlobalWordCountFile"), false, IncludeIdentifier);

            this.GlobalWordCount = new Dictionary <string, double>();
            //add weighting to word counts
            foreach (var kvp in rawGlobalWordCount)
            {
                this.GlobalWordCount[kvp.Key] = kvp.Value * Math.Pow((double)kvp.Key.Length - 1, 1.5);
            }

            //read prefix and suffix lists from default locations
            //TODO: the words must be in lowercase. Should we lowercase them on loading, or just assume/require that they're in lowercase in the file?
            this.Prefixes = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("SamuraiIdSplitter.Prefixesfile"));
            this.Suffixes = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("SamuraiIdSplitter.Suffixesfile"));
        }
Esempio n. 2
0
        /// <summary>
        /// Sets the member data sets to their default states.
        /// </summary>
        private void InitializeMembers()
        {
            string specialWordsFile = SwumConfiguration.GetFileSetting("UnigramMethodRule.SpecialWordsFile");

            if (specialWordsFile != null)
            {
                this.SpecialWords = new HashSet <string>(LibFileLoader.ReadWordList(specialWordsFile), StringComparer.InvariantCultureIgnoreCase);
            }
            else
            {
                this.SpecialWords = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase);
                Console.Error.WriteLine("UnigramMethodRule.SpecialWordsFile not specified in config file.");
            }

            string booleanArgumentVerbsFile = SwumConfiguration.GetFileSetting("UnigramMethodRule.BooleanArgumentVerbsFile");

            if (booleanArgumentVerbsFile != null)
            {
                this.BooleanArgumentVerbs = new HashSet <string>(LibFileLoader.ReadWordList(booleanArgumentVerbsFile), StringComparer.InvariantCultureIgnoreCase);
            }
            else
            {
                this.BooleanArgumentVerbs = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase);
                Console.Error.WriteLine("UnigramMethodRule.BooleanArgumentVerbsFile not specified in config file.");
            }

            string nounPhraseIndicatorsFile = SwumConfiguration.GetFileSetting("UnigramMethodRule.NounPhraseIndicatorsFile");

            if (nounPhraseIndicatorsFile != null)
            {
                this.NounPhraseIndicators = new HashSet <string>(LibFileLoader.ReadWordList(nounPhraseIndicatorsFile), StringComparer.InvariantCultureIgnoreCase);
            }
            else
            {
                this.NounPhraseIndicators = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase);
                Console.Error.WriteLine("UnigramMethodRule.NounPhraseIndicatorsFile not specified in config file.");
            }

            this.PositionalFrequencies = new PositionalFrequencies();
        }
Esempio n. 3
0
        //[Description("The srcML file to use.")]
        //[Required]
        //public string File { get; set; }

        public override void Execute()
        {
            //string srcMLFormat;

            ////construct the necessary srcML wrapper unit tags
            //XmlNamespaceManager xnm = SrcML.SrcML.NamespaceManager;

            //StringBuilder namespaceDecls = new StringBuilder();
            //foreach (string prefix in xnm)
            //{
            //    if (prefix != string.Empty && !prefix.StartsWith("xml", StringComparison.InvariantCultureIgnoreCase))
            //    {
            //        if (prefix.Equals("src", StringComparison.InvariantCultureIgnoreCase))
            //        {
            //            namespaceDecls.AppendFormat("xmlns=\"{0}\" ", xnm.LookupNamespace(prefix));
            //        }
            //        else
            //        {
            //            namespaceDecls.AppendFormat("xmlns:{0}=\"{1}\" ", prefix, xnm.LookupNamespace(prefix));
            //        }
            //    }
            //}
            //srcMLFormat = string.Format("<unit {0}>{{0}}</unit>", namespaceDecls.ToString());

            //string testSrcML = "<class>class <name>foo</name> <block>{<private type=\"default\"><decl_stmt><decl><type><name>bool</name></type> <name>MyVariable</name></decl>;</decl_stmt></private>}</block>;</class>";
            //XElement xml = XElement.Parse(string.Format(srcMLFormat, testSrcML), LoadOptions.PreserveWhitespace);
            //FieldContext fc = ContextBuilder.BuildFieldContext(xml.Descendants(SRC.Declaration).First());

            //FieldDeclarationNode fdn = new FieldDeclarationNode("myVariable", fc);
            ////var splitter = new ConservativeIdSplitter();
            ////var tagger = new UnigramTagger();
            ////var posData = new PCKimmoPartOfSpeechData();

            //FieldRule rule = new FieldRule();
            //rule.ConstructSwum(fdn);
            //Console.WriteLine(fdn.ToString());

            Console.WriteLine(SwumConfiguration.GetSetting("PCKimmoPartOfSpeechData.TwoDictFile"));
        }
Esempio n. 4
0
 /// <summary>
 /// Creates a new PCKimmoPartOfSpeechData object using the default file locations for the part-of-speech data.
 /// </summary>
 public PCKimmoPartOfSpeechData()
 {
     TwoDict                   = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.TwoDictFile"));
     Prepositions              = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.PrepositionsFile"));
     VerbsThirdPersonSingular  = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.VerbsThirdPersonSingularFile"));
     VerbsThirdPersonIrregular = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.VerbsThirdPersonIrregularFile"));
     ModalVerbs                = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.ModalVerbsFile"));
     IngVerbs                  = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.IngVerbsFile"));
     PastTenseVerbs            = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.PastTenseVerbsRegularFile"), SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.PastTenseVerbsIrregularFile"));
     PastParticipleVerbs       = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.PastParticipleVerbsRegularFile"), SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.PastParticipleVerbsIrregularFile"));
     PotentialVerbs            = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.PotentialVerbsFile"));
     OnlyNouns                 = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.OnlyNounsFile"));
     Adjectives                = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.AdjectivesFile"));
     Adverbs                   = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.AdverbsFile"));
     Determiners               = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.DeterminersFile"));
     Pronouns                  = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.PronounsFile"));
     IgnorableVerbs            = new HashSet <string>(LibFileLoader.ReadWordCount(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.IgnorableVerbsFile")).Keys);
     IgnorableHeadWords        = new HashSet <string>(LibFileLoader.ReadWordCount(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.IgnorableHeadWordsFile")).Keys);
     GeneralVerbs              = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.GeneralVerbsFile"));
     EventWords                = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.EventWordsFile"));
     SideEffectWords           = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.SideEffectWordsFile"));
     VerbParticles             = LibFileLoader.ReadVerbParticleFile(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.VerbParticlesFile"));
 }
Esempio n. 5
0
 /// <summary>
 /// Creates a new PositionalFrequencies object, using the positional frequency data in the default file.
 /// </summary>
 public PositionalFrequencies() : this(SwumConfiguration.GetFileSetting("PositionalFrequencies.FrequenciesFile"))
 {
 }
 public void TestGetFileSetting_MissingSetting()
 {
     Assert.IsNull(SwumConfiguration.GetFileSetting("NonExistentSetting"));
 }
 public void TestGetFileSetting()
 {
     Console.WriteLine("With assembly path: {0}", SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.TwoDictFile"));
 }
 public void TestGetSetting()
 {
     Assert.AreEqual("lib/dict.2.txt", SwumConfiguration.GetSetting("PCKimmoPartOfSpeechData.TwoDictFile"));
 }