/// <summary> /// Reads the necessary data files and initializes the member variables. /// </summary> /// <param name="programWordCount">A dictionary containing the word counts from the local program.</param> private void Initialize(Dictionary <string, int> programWordCount) { this.CamelSplitter = new ConservativeIdSplitter(); //set ProgramWordCount and calculate log of total this.ProgramWordCount = programWordCount; ulong ProgramTotalWordCount = 0; foreach (int value in this.ProgramWordCount.Values) { ProgramTotalWordCount = ProgramTotalWordCount + (ulong)value; } this.LogProgramTotalWordCount = Math.Log10(ProgramTotalWordCount); //load globalWordCount from default location var rawGlobalWordCount = LibFileLoader.ReadWordCount(SwumConfiguration.GetFileSetting("SamuraiIdSplitter.GlobalWordCountFile"), false, IncludeIdentifier); this.GlobalWordCount = new Dictionary <string, double>(); //add weighting to word counts foreach (var kvp in rawGlobalWordCount) { this.GlobalWordCount[kvp.Key] = kvp.Value * Math.Pow((double)kvp.Key.Length - 1, 1.5); } //read prefix and suffix lists from default locations //TODO: the words must be in lowercase. Should we lowercase them on loading, or just assume/require that they're in lowercase in the file? this.Prefixes = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("SamuraiIdSplitter.Prefixesfile")); this.Suffixes = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("SamuraiIdSplitter.Suffixesfile")); }
/// <summary> /// Sets the member data sets to their default states. /// </summary> private void InitializeMembers() { string specialWordsFile = SwumConfiguration.GetFileSetting("UnigramMethodRule.SpecialWordsFile"); if (specialWordsFile != null) { this.SpecialWords = new HashSet <string>(LibFileLoader.ReadWordList(specialWordsFile), StringComparer.InvariantCultureIgnoreCase); } else { this.SpecialWords = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase); Console.Error.WriteLine("UnigramMethodRule.SpecialWordsFile not specified in config file."); } string booleanArgumentVerbsFile = SwumConfiguration.GetFileSetting("UnigramMethodRule.BooleanArgumentVerbsFile"); if (booleanArgumentVerbsFile != null) { this.BooleanArgumentVerbs = new HashSet <string>(LibFileLoader.ReadWordList(booleanArgumentVerbsFile), StringComparer.InvariantCultureIgnoreCase); } else { this.BooleanArgumentVerbs = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase); Console.Error.WriteLine("UnigramMethodRule.BooleanArgumentVerbsFile not specified in config file."); } string nounPhraseIndicatorsFile = SwumConfiguration.GetFileSetting("UnigramMethodRule.NounPhraseIndicatorsFile"); if (nounPhraseIndicatorsFile != null) { this.NounPhraseIndicators = new HashSet <string>(LibFileLoader.ReadWordList(nounPhraseIndicatorsFile), StringComparer.InvariantCultureIgnoreCase); } else { this.NounPhraseIndicators = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase); Console.Error.WriteLine("UnigramMethodRule.NounPhraseIndicatorsFile not specified in config file."); } this.PositionalFrequencies = new PositionalFrequencies(); }
//[Description("The srcML file to use.")] //[Required] //public string File { get; set; } public override void Execute() { //string srcMLFormat; ////construct the necessary srcML wrapper unit tags //XmlNamespaceManager xnm = SrcML.SrcML.NamespaceManager; //StringBuilder namespaceDecls = new StringBuilder(); //foreach (string prefix in xnm) //{ // if (prefix != string.Empty && !prefix.StartsWith("xml", StringComparison.InvariantCultureIgnoreCase)) // { // if (prefix.Equals("src", StringComparison.InvariantCultureIgnoreCase)) // { // namespaceDecls.AppendFormat("xmlns=\"{0}\" ", xnm.LookupNamespace(prefix)); // } // else // { // namespaceDecls.AppendFormat("xmlns:{0}=\"{1}\" ", prefix, xnm.LookupNamespace(prefix)); // } // } //} //srcMLFormat = string.Format("<unit {0}>{{0}}</unit>", namespaceDecls.ToString()); //string testSrcML = "<class>class <name>foo</name> <block>{<private type=\"default\"><decl_stmt><decl><type><name>bool</name></type> <name>MyVariable</name></decl>;</decl_stmt></private>}</block>;</class>"; //XElement xml = XElement.Parse(string.Format(srcMLFormat, testSrcML), LoadOptions.PreserveWhitespace); //FieldContext fc = ContextBuilder.BuildFieldContext(xml.Descendants(SRC.Declaration).First()); //FieldDeclarationNode fdn = new FieldDeclarationNode("myVariable", fc); ////var splitter = new ConservativeIdSplitter(); ////var tagger = new UnigramTagger(); ////var posData = new PCKimmoPartOfSpeechData(); //FieldRule rule = new FieldRule(); //rule.ConstructSwum(fdn); //Console.WriteLine(fdn.ToString()); Console.WriteLine(SwumConfiguration.GetSetting("PCKimmoPartOfSpeechData.TwoDictFile")); }
/// <summary> /// Creates a new PCKimmoPartOfSpeechData object using the default file locations for the part-of-speech data. /// </summary> public PCKimmoPartOfSpeechData() { TwoDict = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.TwoDictFile")); Prepositions = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.PrepositionsFile")); VerbsThirdPersonSingular = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.VerbsThirdPersonSingularFile")); VerbsThirdPersonIrregular = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.VerbsThirdPersonIrregularFile")); ModalVerbs = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.ModalVerbsFile")); IngVerbs = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.IngVerbsFile")); PastTenseVerbs = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.PastTenseVerbsRegularFile"), SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.PastTenseVerbsIrregularFile")); PastParticipleVerbs = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.PastParticipleVerbsRegularFile"), SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.PastParticipleVerbsIrregularFile")); PotentialVerbs = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.PotentialVerbsFile")); OnlyNouns = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.OnlyNounsFile")); Adjectives = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.AdjectivesFile")); Adverbs = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.AdverbsFile")); Determiners = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.DeterminersFile")); Pronouns = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.PronounsFile")); IgnorableVerbs = new HashSet <string>(LibFileLoader.ReadWordCount(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.IgnorableVerbsFile")).Keys); IgnorableHeadWords = new HashSet <string>(LibFileLoader.ReadWordCount(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.IgnorableHeadWordsFile")).Keys); GeneralVerbs = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.GeneralVerbsFile")); EventWords = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.EventWordsFile")); SideEffectWords = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.SideEffectWordsFile")); VerbParticles = LibFileLoader.ReadVerbParticleFile(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.VerbParticlesFile")); }
/// <summary> /// Creates a new PositionalFrequencies object, using the positional frequency data in the default file. /// </summary> public PositionalFrequencies() : this(SwumConfiguration.GetFileSetting("PositionalFrequencies.FrequenciesFile")) { }
public void TestGetFileSetting_MissingSetting() { Assert.IsNull(SwumConfiguration.GetFileSetting("NonExistentSetting")); }
public void TestGetFileSetting() { Console.WriteLine("With assembly path: {0}", SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.TwoDictFile")); }
public void TestGetSetting() { Assert.AreEqual("lib/dict.2.txt", SwumConfiguration.GetSetting("PCKimmoPartOfSpeechData.TwoDictFile")); }