public double LineSalience(int n, string line) { var counter = new CodegramCounter(); var identifiersGrams = counter.IdentifierSequences(n, line).ToList(); var wordGrams = counter.WordSequences(n, line).ToList(); var words = counter.AllWords(line).Select(w => w.ToLower()).ToList(); var identifiers = counter.AllIdentifiers(line).Select(ident => ident.ToLower()).ToList(); var sumIdentifierGrams = 0.0; var sumWordGrams = 0.0; var sumWords = 0.0; var sumIdentifiers = 0.0; foreach (var word in words) { sumWords += (ReadCommands.LookupWordFrequency(Connection, Cache, word) + 1) / (double)WordCount; } foreach (var ident in identifiers) { sumIdentifiers += (ReadCommands.LookupIdentifierFrequency(Connection, Cache, ident) + 1) / (double)IdentifierCount; } foreach (var wordGram in wordGrams) { sumWordGrams += (SequenceWordFrequency(wordGram) + 1) / (double)WordSequenceCount; } foreach (var identGram in identifiersGrams) { sumIdentifierGrams += (SequenceIdentifierFrequency(identGram) + 1) / (double)IdentifierSequenceCount; } var vals = new double[] { sumWords, sumIdentifiers, sumWordGrams, sumIdentifierGrams }; if (vals.All(v => v == 0.0)) { return(0.0); } var multiplier = 1.0; if (words.Count == 1 && identifiers.Count == 1) { //Console.Write(line); multiplier = 0.01; } var salience = (vals.Where(s => s > 0.0).Min() / vals.Max()) * multiplier; return(salience); //return sumIdentifierGrams / IdentifierCount; }
public int SequenceIdentifierFrequency(IEnumerable <string> gram) { string phrase = string.Join(".", gram); string key = UnicodeEncoder.GetUnicodeKeyFromString(phrase, word => ReadCommands.LookupWordId(Connection, Cache, word)); if (key == null) { return(0); } return(ReadCommands.LookupIdentifierSequenceFrequency(Connection, Cache, key)); }