/// <summary> /// Counts the number of occurrences of words within the identifiers in the given srcml files. /// </summary> /// <param name="archive">An archive containing the srcml files to analyze.</param> /// <returns>A dictionary mapping words to the number of occurrences within identifiers.</returns> public static Dictionary <string, int> CountProgramWords(ISrcMLArchive archive) { if (archive == null) { throw new ArgumentNullException("archive"); } var splitter = new ConservativeIdSplitter(); var observations = new Dictionary <string, int>(); foreach (var fileUnit in archive.FileUnits) { //query for all the identifiers var identifiers = from id in fileUnit.Descendants(SRC.Name) where !id.Elements().Any() select id.Value; foreach (var id in identifiers) { string[] words = splitter.Split(id); foreach (string word in words) { int obs; string lowWord = word.ToLower(); observations.TryGetValue(lowWord, out obs); //gets the number of observations for the word. If it is new, obs is set to 0 observations[lowWord] = obs + 1; } } } return(observations); }
/// <summary> /// Splits a program identifier into its constituent words. /// </summary> /// <param name="identifier">The identifier to split.</param> /// <param name="printSplitTrace">Whether or not to print a trace of the splitting process.</param> /// <returns>An array of the words resulting from the split.</returns> public string[] Split(string identifier, bool printSplitTrace) { if (printSplitTrace) { Console.WriteLine(identifier); } List <string> splitWords = new List <string>(); foreach (string word in CamelSplitter.Split(identifier)) { if (printSplitTrace) { Console.WriteLine("\t{0}", word); } foreach (string word2 in SplitOnUppercaseToLowercase(word)) { if (printSplitTrace) { Console.WriteLine("\t\t{0}", word2); } foreach (string word3 in SplitSameCase(word2)) { if (printSplitTrace) { Console.WriteLine("\t\t\t{0}", word3); } splitWords.Add(word3); } } } return(splitWords.ToArray()); }
/// <summary> /// Counts the number of occurrences of words within the identifiers in the given srcml files. /// </summary> /// <param name="archive">An archive containing the srcml files to analyze.</param> /// <returns>A dictionary mapping words to the number of occurrences within identifiers.</returns> public static Dictionary<string,int> CountProgramWords(ISrcMLArchive archive) { if(archive == null) { throw new ArgumentNullException("archive"); } var splitter = new ConservativeIdSplitter(); var observations = new Dictionary<string, int>(); foreach (var fileUnit in archive.FileUnits) { //query for all the identifiers var identifiers = from id in fileUnit.Descendants(SRC.Name) where !id.Elements().Any() select id.Value; foreach (var id in identifiers) { string[] words = splitter.Split(id); foreach (string word in words) { int obs; string lowWord = word.ToLower(); observations.TryGetValue(lowWord, out obs); //gets the number of observations for the word. If it is new, obs is set to 0 observations[lowWord] = obs + 1; } } } return observations; }