public SparseVectorList GetFeatureVector(Dictionary <string, int> vec) { SparseVectorList featurevector = new SparseVectorList(); int lexiconindexcount = Lexicon.Count; foreach (var kvp in vec) { var word = kvp.Key; int value = 0; if (Lexicon == null || Lexicon.TryGetValue(word, out value) == false) { Lexicon.Add(word, lexiconindexcount); value = lexiconindexcount; lexiconindexcount++; } if (!featurevector.Increase(value, kvp.Value)) { featurevector.Insert(value, kvp.Value); } } featurevector.ListToArray(); featurevector.count = featurevector.keyarray.Length; if (featurevector.count < 1) { return(null); } featurevector.InvalidateList(); featurevector.GetNorm(); return(featurevector); }
public SparseVectorList GetFeatureVector(Document doc) { SparseVectorList featurevector = new SparseVectorList(); int lexiconindexcount = Lexicon.Count; var content = LuceneOperations.GetDocumentContent(doc, _fieldWeightDict, _leadingSentencesCnt); var words = NLPOperations.Tokenize(content, _tokenizeConfig); foreach (var word in words) { int value = 0; if (Lexicon == null || Lexicon.TryGetValue(word, out value) == false) { Lexicon.Add(word, lexiconindexcount); value = lexiconindexcount; lexiconindexcount++; } if (!featurevector.Increase(value, 1)) { featurevector.Insert(value, 1); } } featurevector.ListToArray(); featurevector.count = featurevector.keyarray.Length; //featurevector.SumUpValueArray(); if (featurevector.count < 1) { return(null); } featurevector.InvalidateList(); featurevector.GetNorm(); return(featurevector); }