示例#1
0
        public double LineSalience(int n, string line)
        {
            var counter          = new CodegramCounter();
            var identifiersGrams = counter.IdentifierSequences(n, line).ToList();
            var wordGrams        = counter.WordSequences(n, line).ToList();
            var words            = counter.AllWords(line).Select(w => w.ToLower()).ToList();
            var identifiers      = counter.AllIdentifiers(line).Select(ident => ident.ToLower()).ToList();

            var sumIdentifierGrams = 0.0;
            var sumWordGrams       = 0.0;
            var sumWords           = 0.0;
            var sumIdentifiers     = 0.0;

            foreach (var word in words)
            {
                sumWords += (ReadCommands.LookupWordFrequency(Connection, Cache, word) + 1) / (double)WordCount;
            }

            foreach (var ident in identifiers)
            {
                sumIdentifiers += (ReadCommands.LookupIdentifierFrequency(Connection, Cache, ident) + 1) / (double)IdentifierCount;
            }

            foreach (var wordGram in wordGrams)
            {
                sumWordGrams += (SequenceWordFrequency(wordGram) + 1) / (double)WordSequenceCount;
            }

            foreach (var identGram in identifiersGrams)
            {
                sumIdentifierGrams += (SequenceIdentifierFrequency(identGram) + 1) / (double)IdentifierSequenceCount;
            }

            var vals = new double[] { sumWords, sumIdentifiers, sumWordGrams, sumIdentifierGrams };

            if (vals.All(v => v == 0.0))
            {
                return(0.0);
            }
            var multiplier = 1.0;

            if (words.Count == 1 && identifiers.Count == 1)
            {
                //Console.Write(line);
                multiplier = 0.01;
            }

            var salience = (vals.Where(s => s > 0.0).Min() / vals.Max()) * multiplier;

            return(salience);
            //return sumIdentifierGrams / IdentifierCount;
        }
示例#2
0
        public int SequenceIdentifierFrequency(IEnumerable <string> gram)
        {
            string phrase = string.Join(".", gram);
            string key    = UnicodeEncoder.GetUnicodeKeyFromString(phrase,
                                                                   word => ReadCommands.LookupWordId(Connection, Cache, word));

            if (key == null)
            {
                return(0);
            }

            return(ReadCommands.LookupIdentifierSequenceFrequency(Connection, Cache, key));
        }