/// <summary> /// Reads the necessary data files and initializes the member variables. /// </summary> /// <param name="programWordCount">A dictionary containing the word counts from the local program.</param> private void Initialize(Dictionary <string, int> programWordCount) { this.CamelSplitter = new ConservativeIdSplitter(); //set ProgramWordCount and calculate log of total this.ProgramWordCount = programWordCount; ulong ProgramTotalWordCount = 0; foreach (int value in this.ProgramWordCount.Values) { ProgramTotalWordCount = ProgramTotalWordCount + (ulong)value; } this.LogProgramTotalWordCount = Math.Log10(ProgramTotalWordCount); //load globalWordCount from default location var rawGlobalWordCount = LibFileLoader.ReadWordCount(SwumConfiguration.GetFileSetting("SamuraiIdSplitter.GlobalWordCountFile"), false, IncludeIdentifier); this.GlobalWordCount = new Dictionary <string, double>(); //add weighting to word counts foreach (var kvp in rawGlobalWordCount) { this.GlobalWordCount[kvp.Key] = kvp.Value * Math.Pow((double)kvp.Key.Length - 1, 1.5); } //read prefix and suffix lists from default locations //TODO: the words must be in lowercase. Should we lowercase them on loading, or just assume/require that they're in lowercase in the file? this.Prefixes = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("SamuraiIdSplitter.Prefixesfile")); this.Suffixes = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("SamuraiIdSplitter.Suffixesfile")); }
/// <summary> /// Sets the member data sets to their default states. /// </summary> private void InitializeMembers() { string specialWordsFile = SwumConfiguration.GetFileSetting("UnigramMethodRule.SpecialWordsFile"); if (specialWordsFile != null) { this.SpecialWords = new HashSet <string>(LibFileLoader.ReadWordList(specialWordsFile), StringComparer.InvariantCultureIgnoreCase); } else { this.SpecialWords = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase); Console.Error.WriteLine("UnigramMethodRule.SpecialWordsFile not specified in config file."); } string booleanArgumentVerbsFile = SwumConfiguration.GetFileSetting("UnigramMethodRule.BooleanArgumentVerbsFile"); if (booleanArgumentVerbsFile != null) { this.BooleanArgumentVerbs = new HashSet <string>(LibFileLoader.ReadWordList(booleanArgumentVerbsFile), StringComparer.InvariantCultureIgnoreCase); } else { this.BooleanArgumentVerbs = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase); Console.Error.WriteLine("UnigramMethodRule.BooleanArgumentVerbsFile not specified in config file."); } string nounPhraseIndicatorsFile = SwumConfiguration.GetFileSetting("UnigramMethodRule.NounPhraseIndicatorsFile"); if (nounPhraseIndicatorsFile != null) { this.NounPhraseIndicators = new HashSet <string>(LibFileLoader.ReadWordList(nounPhraseIndicatorsFile), StringComparer.InvariantCultureIgnoreCase); } else { this.NounPhraseIndicators = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase); Console.Error.WriteLine("UnigramMethodRule.NounPhraseIndicatorsFile not specified in config file."); } this.PositionalFrequencies = new PositionalFrequencies(); }
/// <summary> /// Creates a new PCKimmoPartOfSpeechData object using the default file locations for the part-of-speech data. /// </summary> public PCKimmoPartOfSpeechData() { TwoDict = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.TwoDictFile")); Prepositions = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.PrepositionsFile")); VerbsThirdPersonSingular = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.VerbsThirdPersonSingularFile")); VerbsThirdPersonIrregular = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.VerbsThirdPersonIrregularFile")); ModalVerbs = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.ModalVerbsFile")); IngVerbs = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.IngVerbsFile")); PastTenseVerbs = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.PastTenseVerbsRegularFile"), SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.PastTenseVerbsIrregularFile")); PastParticipleVerbs = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.PastParticipleVerbsRegularFile"), SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.PastParticipleVerbsIrregularFile")); PotentialVerbs = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.PotentialVerbsFile")); OnlyNouns = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.OnlyNounsFile")); Adjectives = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.AdjectivesFile")); Adverbs = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.AdverbsFile")); Determiners = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.DeterminersFile")); Pronouns = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.PronounsFile")); IgnorableVerbs = new HashSet <string>(LibFileLoader.ReadWordCount(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.IgnorableVerbsFile")).Keys); IgnorableHeadWords = new HashSet <string>(LibFileLoader.ReadWordCount(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.IgnorableHeadWordsFile")).Keys); GeneralVerbs = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.GeneralVerbsFile")); EventWords = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.EventWordsFile")); SideEffectWords = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.SideEffectWordsFile")); VerbParticles = LibFileLoader.ReadVerbParticleFile(SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.VerbParticlesFile")); }
/// <summary> /// Creates a new PositionalFrequencies object, using the positional frequency data in the default file. /// </summary> public PositionalFrequencies() : this(SwumConfiguration.GetFileSetting("PositionalFrequencies.FrequenciesFile")) { }
public void TestGetFileSetting_MissingSetting() { Assert.IsNull(SwumConfiguration.GetFileSetting("NonExistentSetting")); }
public void TestGetFileSetting() { Console.WriteLine("With assembly path: {0}", SwumConfiguration.GetFileSetting("PCKimmoPartOfSpeechData.TwoDictFile")); }