Beispiel #1
0
        public DocumentScore Score(DocumentPosting posting)
        {
            //var tf = 1 + Math.Log10(Math.Pow(posting.Count, 1 / 2));
            var score = Math.Sqrt(posting.Count) * _idf;

            return(new DocumentScore(posting.DocumentId, score));
        }
Beispiel #2
0
        public void Add(DocumentPosting data)
        {
            var node = this;

            while (true)
            {
                if (data.DocumentId < node.Data.DocumentId)
                {
                    if (node.Left == null)
                    {
                        node.Left = new DocumentPostingNode(data);
                        break;
                    }
                    else
                    {
                        node = node.Left;
                    }
                }
                else if (data.DocumentId > node.Data.DocumentId)
                {
                    if (node.Right == null)
                    {
                        node.Right = new DocumentPostingNode(data);
                        break;
                    }
                    else
                    {
                        node = node.Right;
                    }
                }
                else
                {
                    if (data.Data < node.Data.Data)
                    {
                        if (node.Left == null)
                        {
                            node.Left = new DocumentPostingNode(data);
                            break;
                        }
                        else
                        {
                            node = node.Left;
                        }
                    }
                    else
                    {
                        if (node.Right == null)
                        {
                            node.Right = new DocumentPostingNode(data);
                            break;
                        }
                        else
                        {
                            node = node.Right;
                        }
                    }
                }
            }
        }
Beispiel #3
0
        public IList <AnalyzedTerm> AnalyzeDocument(Document document)
        {
            var analyzedTerms = new List <AnalyzedTerm>();

            foreach (var field in document.Fields.Values)
            {
                if (field.Analyze && field.Index)
                {
                    var tokens   = Analyze(field.Value);
                    var tokenDic = new Dictionary <string, int>();

                    foreach (var token in tokens)
                    {
                        if (tokenDic.ContainsKey(token))
                        {
                            tokenDic[token]++;
                        }
                        else
                        {
                            tokenDic[token] = 1;
                        }
                    }

                    foreach (var tokenGroup in tokenDic)
                    {
                        var word    = new Word(tokenGroup.Key);
                        var term    = new Term(field.Key, word);
                        var posting = new DocumentPosting(document.Id, tokenGroup.Value);

                        analyzedTerms.Add(new AnalyzedTerm(term, posting));
                    }
                }
                else if (field.Index)
                {
                    var term    = new Term(field.Key, new Word(field.Value));
                    var posting = new DocumentPosting(document.Id, 1);

                    analyzedTerms.Add(new AnalyzedTerm(term, posting));
                }
            }
            return(analyzedTerms);
        }
Beispiel #4
0
        public virtual AnalyzedDocument AnalyzeDocument(Document document)
        {
            var words = new List <AnalyzedTerm>();

            foreach (var field in document.Fields)
            {
                if (field.Key[0] == '_')
                {
                    var term    = new Term(field.Key, new Word(field.Value));
                    var posting = new DocumentPosting(document.Id, 1);

                    words.Add(new AnalyzedTerm(term, posting));
                }
                else
                {
                    var tokenDic = new Dictionary <string, int>();
                    foreach (var token in Analyze(field.Value))
                    {
                        if (tokenDic.ContainsKey(token))
                        {
                            tokenDic[token]++;
                        }
                        else
                        {
                            tokenDic[token] = 1;
                        }
                    }

                    foreach (var tokenGroup in tokenDic)
                    {
                        var word    = new Word(tokenGroup.Key);
                        var term    = new Term(field.Key, word);
                        var posting = new DocumentPosting(document.Id, tokenGroup.Value);

                        words.Add(new AnalyzedTerm(term, posting));
                    }
                }
            }
            return(new AnalyzedDocument(document.Id, words));
        }
Beispiel #5
0
        public IEnumerable <AnalyzedTerm> AnalyzeDocumentInternal(Document document)
        {
            foreach (var field in document.Fields.Values)
            {
                if (field.Analyze)
                {
                    var tokenDic = new Dictionary <string, int>();
                    foreach (var token in Analyze(field.Value))
                    {
                        if (tokenDic.ContainsKey(token))
                        {
                            tokenDic[token]++;
                        }
                        else
                        {
                            tokenDic[token] = 1;
                        }
                    }

                    foreach (var tokenGroup in tokenDic)
                    {
                        var word    = new Word(tokenGroup.Key);
                        var term    = new Term(field.Key, word);
                        var posting = new DocumentPosting(document.Id, tokenGroup.Value);

                        yield return(new AnalyzedTerm(term, posting));
                    }
                }
                else
                {
                    var term    = new Term(field.Key, new Word(field.Value));
                    var posting = new DocumentPosting(document.Id, 1);

                    yield return(new AnalyzedTerm(term, posting));
                }
            }
        }
Beispiel #6
0
 public double Score(DocumentPosting posting)
 {
     return(Math.Sqrt(posting.Count) * _idf);
 }
Beispiel #7
0
 public double Score(DocumentPosting posting)
 {
     // log-normalized term frequency
     return(1 + Math.Log10(posting.Count) * _idf);
 }
Beispiel #8
0
 public AnalyzedTerm(Term term, DocumentPosting posting)
 {
     Term    = term;
     Posting = posting;
 }
Beispiel #9
0
 public DocumentPostingNode(DocumentPosting data)
 {
     Data = data;
 }