Ejemplo n.º 1
0
        public List <SearchResult> ExecuteTfIdfSearch(int?windowSize = null, byte?filterByClass = null)
        {
            var idfs        = Query.Select(x => (token: x, idf: Index.Idf(x))).ToDictionary(x => x.token, x => x.idf);
            var queryVector = new TfIdfVector(idfs, QueryTokenCounts);

            var docIds = new HashSet <int>();

            foreach (var token in Query)
            {
                foreach (var docId in Index.SearchByToken(token))
                {
                    docIds.Add(docId);
                }
            }

            if (windowSize.HasValue)
            {
                docIds = ProximityFilter(docIds, windowSize.Value);
            }

            var scoresByDocId = new Dictionary <int, double>();

            foreach (var docId in docIds)
            {
                var tfByToken = new Dictionary <string, int>();

                foreach (var token in Query)
                {
                    tfByToken.Add(token, Index.GetOccurrence(token, docId).Positions.Count);
                }

                var docVector = new TfIdfVector(idfs, tfByToken);
                scoresByDocId.Add(docId, docVector.Multiply(queryVector));
            }

            if (filterByClass == null)
            {
                return(scoresByDocId.OrderByDescending(x => x.Value).Select(x => new SearchResult(x.Key, Index.GetHighlight(x.Key), x.Value)).ToList());
            }

            var documents       = docIds.Select(x => new DocumentWrapper(Index.PureDocumentsById[x])).ToDictionary(x => x.Document.Id);
            var vectorGenerator = new VectorGenerator(documents, true);

            vectorGenerator.Process();

            var tokenMapper = new TokenMapper();
            var classifier  = new RandomForestClassifierClient();
            var result      = classifier.Classify(documents.Values.Select(x => x.CreateClassificationVector(tokenMapper)).ToList());

            for (int i = 0; i < result.Count; i++)
            {
                documents.Values.ElementAt(i).ClassifiedTag = result[i];
            }

            var filteredResult = scoresByDocId.Where(x => documents[x.Key].ClassifiedTag == filterByClass).ToList();

            Console.WriteLine($"Removed {filteredResult.Count} items with other classes");

            return(filteredResult.OrderByDescending(x => x.Value).Select(x => new SearchResult(x.Key, Index.GetHighlight(x.Key), x.Value)).ToList());
        }
Ejemplo n.º 2
0
        internal void ConvertTokenToId(TokenMapper mapper)
        {
            IntVector = new Dictionary <int, double>();

            foreach (var item in FinalVector)
            {
                IntVector.Add(mapper.GetOrCreateId(item.Key), item.Value);
            }
        }