Ejemplo n.º 1
0
        public SparseVectorList GetFeatureVector(Dictionary <string, int> vec)
        {
            SparseVectorList featurevector = new SparseVectorList();

            int lexiconindexcount = Lexicon.Count;

            foreach (var kvp in vec)
            {
                var word  = kvp.Key;
                int value = 0;
                if (Lexicon == null || Lexicon.TryGetValue(word, out value) == false)
                {
                    Lexicon.Add(word, lexiconindexcount);
                    value = lexiconindexcount;
                    lexiconindexcount++;
                }
                if (!featurevector.Increase(value, kvp.Value))
                {
                    featurevector.Insert(value, kvp.Value);
                }
            }

            featurevector.ListToArray();
            featurevector.count = featurevector.keyarray.Length;
            if (featurevector.count < 1)
            {
                return(null);
            }
            featurevector.InvalidateList();
            featurevector.GetNorm();
            return(featurevector);
        }
Ejemplo n.º 2
0
        public SparseVectorList GetFeatureVector(Document doc)
        {
            SparseVectorList featurevector = new SparseVectorList();

            int lexiconindexcount = Lexicon.Count;

            var content = LuceneOperations.GetDocumentContent(doc, _fieldWeightDict, _leadingSentencesCnt);
            var words   = NLPOperations.Tokenize(content, _tokenizeConfig);

            foreach (var word in words)
            {
                int value = 0;
                if (Lexicon == null || Lexicon.TryGetValue(word, out value) == false)
                {
                    Lexicon.Add(word, lexiconindexcount);
                    value = lexiconindexcount;
                    lexiconindexcount++;
                }
                if (!featurevector.Increase(value, 1))
                {
                    featurevector.Insert(value, 1);
                }
            }

            featurevector.ListToArray();
            featurevector.count = featurevector.keyarray.Length;
            //featurevector.SumUpValueArray();
            if (featurevector.count < 1)
            {
                return(null);
            }
            featurevector.InvalidateList();
            featurevector.GetNorm();
            return(featurevector);
        }