Example #1
0
            public override bool Equals(object obj)
            {
                if (this == obj)
                {
                    return(true);
                }
                if (obj is null)
                {
                    return(false);
                }
                if (GetType() != obj.GetType())
                {
                    return(false);
                }
                FieldVals other = (FieldVals)obj;

                if (fieldName is null)
                {
                    if (other.fieldName != null)
                    {
                        return(false);
                    }
                }
                else if (!fieldName.Equals(other.fieldName, StringComparison.Ordinal))
                {
                    return(false);
                }
                if (J2N.BitConversion.SingleToInt32Bits(minSimilarity) != J2N.BitConversion
                    .SingleToInt32Bits(other.minSimilarity))
                {
                    return(false);
                }
                if (prefixLength != other.prefixLength)
                {
                    return(false);
                }
                if (queryString is null)
                {
                    if (other.queryString != null)
                    {
                        return(false);
                    }
                }
                else if (!queryString.Equals(other.queryString, StringComparison.Ordinal))
                {
                    return(false);
                }
                return(true);
            }
Example #2
0
            public override bool Equals(Object obj)
            {
                if (this == obj)
                {
                    return(true);
                }
                if (obj == null)
                {
                    return(false);
                }
                if (GetType() != obj.GetType())
                {
                    return(false);
                }
                FieldVals other = (FieldVals)obj;

                if (fieldName == null)
                {
                    if (other.fieldName != null)
                    {
                        return(false);
                    }
                }
                else if (!fieldName.Equals(other.fieldName))
                {
                    return(false);
                }
                if (BitConverter.ToInt32(BitConverter.GetBytes(minSimilarity), 0) != BitConverter.ToInt32(BitConverter.GetBytes(other.minSimilarity), 0))
                {
                    //if (Float.floatToIntBits(minSimilarity) != Float.floatToIntBits(other.minSimilarity))
                    return(false);
                }
                if (prefixLength != other.prefixLength)
                {
                    return(false);
                }
                if (queryString == null)
                {
                    if (other.queryString != null)
                    {
                        return(false);
                    }
                }
                else if (!queryString.Equals(other.queryString))
                {
                    return(false);
                }
                return(true);
            }
Example #3
0
            public override bool Equals(object obj)
            {
                if (this == obj)
                {
                    return(true);
                }
                if (obj == null)
                {
                    return(false);
                }
                if (GetType() != obj.GetType())
                {
                    return(false);
                }
                FieldVals other = (FieldVals)obj;

                if (fieldName == null)
                {
                    if (other.fieldName != null)
                    {
                        return(false);
                    }
                }
                else if (!fieldName.Equals(other.fieldName, StringComparison.Ordinal))
                {
                    return(false);
                }
                if (Number.FloatToIntBits(minSimilarity) != Number
                    .FloatToIntBits(other.minSimilarity))
                {
                    return(false);
                }
                if (prefixLength != other.prefixLength)
                {
                    return(false);
                }
                if (queryString == null)
                {
                    if (other.queryString != null)
                    {
                        return(false);
                    }
                }
                else if (!queryString.Equals(other.queryString, StringComparison.Ordinal))
                {
                    return(false);
                }
                return(true);
            }
Example #4
0
        private void AddTerms(IndexReader reader, FieldVals f)
        {
            if (f.queryString is null)
            {
                return;
            }
            Terms terms = MultiFields.GetTerms(reader, f.fieldName);

            if (terms is null)
            {
                return;
            }
            TokenStream ts = analyzer.GetTokenStream(f.fieldName, f.queryString);

            try
            {
                ICharTermAttribute termAtt = ts.AddAttribute <ICharTermAttribute>();

                int           corpusNumDocs  = reader.NumDocs;
                ISet <string> processedTerms = new JCG.HashSet <string>();
                ts.Reset();
                while (ts.IncrementToken())
                {
                    string term = termAtt.ToString();
                    if (!processedTerms.Contains(term))
                    {
                        processedTerms.Add(term);
                        ScoreTermQueue  variantsQ = new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term
                        float           minScore  = 0;
                        Term            startTerm = new Term(f.fieldName, term);
                        AttributeSource atts      = new AttributeSource();
                        IMaxNonCompetitiveBoostAttribute maxBoostAtt =
                            atts.AddAttribute <IMaxNonCompetitiveBoostAttribute>();
#pragma warning disable 612, 618
                        SlowFuzzyTermsEnum fe = new SlowFuzzyTermsEnum(terms, atts, startTerm, f.minSimilarity, f.prefixLength);
#pragma warning restore 612, 618
                        //store the df so all variants use same idf
                        int             df                   = reader.DocFreq(startTerm);
                        int             numVariants          = 0;
                        int             totalVariantDocFreqs = 0;
                        BytesRef        possibleMatch;
                        IBoostAttribute boostAtt =
                            fe.Attributes.AddAttribute <IBoostAttribute>();
                        while (fe.MoveNext())
                        {
                            possibleMatch = fe.Term;
                            numVariants++;
                            totalVariantDocFreqs += fe.DocFreq;
                            float score = boostAtt.Boost;
                            if (variantsQ.Count < MAX_VARIANTS_PER_TERM || score > minScore)
                            {
                                ScoreTerm st = new ScoreTerm(new Term(startTerm.Field, BytesRef.DeepCopyOf(possibleMatch)), score, startTerm);
                                variantsQ.InsertWithOverflow(st);
                                minScore = variantsQ.Top.Score; // maintain minScore
                            }
                            maxBoostAtt.MaxNonCompetitiveBoost = variantsQ.Count >= MAX_VARIANTS_PER_TERM ? minScore : float.NegativeInfinity;
                        }

                        if (numVariants > 0)
                        {
                            int avgDf = totalVariantDocFreqs / numVariants;
                            if (df == 0)    //no direct match we can use as df for all variants
                            {
                                df = avgDf; //use avg df of all variants
                            }

                            // take the top variants (scored by edit distance) and reset the score
                            // to include an IDF factor then add to the global queue for ranking
                            // overall top query terms
                            int size = variantsQ.Count;
                            for (int i = 0; i < size; i++)
                            {
                                ScoreTerm st = variantsQ.Pop();
                                st.Score = (st.Score * st.Score) * sim.Idf(df, corpusNumDocs);
                                q.InsertWithOverflow(st);
                            }
                        }
                    }
                }
                ts.End();
            }
            finally
            {
                IOUtils.DisposeWhileHandlingException(ts);
            }
        }
Example #5
0
        private void AddTerms(IndexReader reader, FieldVals f)
        {
            if (f.queryString == null)
            {
                return;
            }
            TokenStream   ts      = analyzer.TokenStream(f.fieldName, new System.IO.StringReader(f.queryString));
            TermAttribute termAtt = (TermAttribute)ts.AddAttribute(typeof(TermAttribute));

            int       corpusNumDocs            = reader.NumDocs();
            Term      internSavingTemplateTerm = new Term(f.fieldName); //optimization to avoid constructing new Term() objects
            Hashtable processedTerms           = new Hashtable();

            while (ts.IncrementToken())
            {
                String term = termAtt.Term();
                if (!processedTerms.Contains(term))
                {
                    processedTerms.Add(term, term);
                    ScoreTermQueue variantsQ = new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term
                    float          minScore  = 0;
                    Term           startTerm = internSavingTemplateTerm.CreateTerm(term);
                    FuzzyTermEnum  fe        = new FuzzyTermEnum(reader, startTerm, f.minSimilarity, f.prefixLength);
                    TermEnum       origEnum  = reader.Terms(startTerm);
                    int            df        = 0;
                    if (startTerm.Equals(origEnum.Term()))
                    {
                        df = origEnum.DocFreq(); //store the df so all variants use same idf
                    }
                    int numVariants          = 0;
                    int totalVariantDocFreqs = 0;
                    do
                    {
                        Term possibleMatch = fe.Term();
                        if (possibleMatch != null)
                        {
                            numVariants++;
                            totalVariantDocFreqs += fe.DocFreq();
                            float score = fe.Difference();
                            if (variantsQ.Size() < MAX_VARIANTS_PER_TERM || score > minScore)
                            {
                                ScoreTerm st = new ScoreTerm(possibleMatch, score, startTerm);
                                variantsQ.Insert(st);
                                minScore = ((ScoreTerm)variantsQ.Top()).score; // maintain minScore
                            }
                        }
                    }while (fe.Next());
                    if (numVariants > 0)
                    {
                        int avgDf = totalVariantDocFreqs / numVariants;
                        if (df == 0)    //no direct match we can use as df for all variants
                        {
                            df = avgDf; //use avg df of all variants
                        }

                        // take the top variants (scored by edit distance) and reset the score
                        // to include an IDF factor then add to the global queue for ranking
                        // overall top query terms
                        int size = variantsQ.Size();
                        for (int i = 0; i < size; i++)
                        {
                            ScoreTerm st = (ScoreTerm)variantsQ.Pop();
                            st.score = (st.score * st.score) * sim.Idf(df, corpusNumDocs);
                            q.Insert(st);
                        }
                    }
                }
            }
        }
Example #6
0
        public override Query Rewrite(IndexReader reader)
        {
            if (rewrittenQuery != null)
            {
                return(rewrittenQuery);
            }
            //load up the list of possible terms
            for (IEnumerator <FieldVals> iter = fieldVals.GetEnumerator(); iter.MoveNext();)
            {
                FieldVals f = iter.Current;
                AddTerms(reader, f);
            }
            //clear the list of fields
            fieldVals.Clear();

            BooleanQuery bq = new BooleanQuery();


            //create BooleanQueries to hold the variants for each token/field pair and ensure it
            // has no coord factor
            //Step 1: sort the termqueries by term/field
            IDictionary <Term, List <ScoreTerm> > variantQueries = new Dictionary <Term, List <ScoreTerm> >();
            int size = q.Size();

            for (int i = 0; i < size; i++)
            {
                ScoreTerm st = q.Pop();
                //List<ScoreTerm> l = variantQueries.get(st.fuzziedSourceTerm);
                //          if(l==null)
                List <ScoreTerm> l;
                if (!variantQueries.TryGetValue(st.fuzziedSourceTerm, out l) || l == null)
                {
                    l = new List <ScoreTerm>();
                    variantQueries[st.fuzziedSourceTerm] = l;
                }
                l.Add(st);
            }
            //Step 2: Organize the sorted termqueries into zero-coord scoring boolean queries
            for (var iter = variantQueries.Values.GetEnumerator(); iter.MoveNext();)
            {
                List <ScoreTerm> variants = iter.Current;
                if (variants.Count == 1)
                {
                    //optimize where only one selected variant
                    ScoreTerm st = variants[0];
                    Query     tq = ignoreTF ? (Query) new ConstantScoreQuery(new TermQuery(st.term)) : new TermQuery(st.term, 1);
                    tq.Boost = st.score; // set the boost to a mix of IDF and score
                    bq.Add(tq, BooleanClause.Occur.SHOULD);
                }
                else
                {
                    BooleanQuery termVariants = new BooleanQuery(true); //disable coord and IDF for these term variants
                    for (IEnumerator <ScoreTerm> iterator2 = variants.GetEnumerator(); iterator2
                         .MoveNext();)
                    {
                        ScoreTerm st = iterator2.Current;
                        // found a match
                        Query tq = ignoreTF ? (Query) new ConstantScoreQuery(new TermQuery(st.term)) : new TermQuery(st.term, 1);
                        tq.Boost = st.score;                              // set the boost using the ScoreTerm's score
                        termVariants.Add(tq, BooleanClause.Occur.SHOULD); // add to query
                    }
                    bq.Add(termVariants, BooleanClause.Occur.SHOULD);     // add to query
                }
            }
            //TODO possible alternative step 3 - organize above booleans into a new layer of field-based
            // booleans with a minimum-should-match of NumFields-1?
            bq.Boost            = Boost;
            this.rewrittenQuery = bq;
            return(bq);
        }
Example #7
0
 public void OnPostAdd()
 {
     try
     {
         Field field = new Field(FieldName, FieldDesc);
         if (Multiply == "Multiple")
         {
             field.Multiply();
         }
         else
         {
             field.MakeSingle();
         }
         if (Type == SetValues()[0])
         {
             field.changeType("Document");
         }
         else if (Type == SetValues()[1])
         {
             field.changeType("Text");
         }
         else if (Type == SetValues()[2])
         {
             field.changeType("Clause");
         }
         else if (Type == SetValues()[3])
         {
             field.changeType("Realization");
         }
         else if (Type == SetValues()[4])
         {
             field.changeType("Grapheme");
         }
         field.MakeUserFilled();
         if (Filled == SetFullfillment()[0])
         {
             field.MakeRestricted();
             string[] values = FieldVals.Split('\n');
             foreach (var value in values)
             {
                 field.AddValue(Regex.Replace(value, @"\r", ""));
             }
         }
         var dirData = Path.Combine(_environment.ContentRootPath, "wwwroot", "database");
         Directory.CreateDirectory(dirData);
         var dirFields = Path.Combine(dirData, "fields");
         Directory.CreateDirectory(dirFields);
         string     fieldInJSON = field.Jsonize();
         var        FieldDBfile = Path.Combine(dirFields, Regex.Replace(field.name, @"[*""><:/\\|?\.,]", "") + ".json");
         FileStream fs          = new FileStream(FieldDBfile, FileMode.Create);
         using (StreamWriter w = new StreamWriter(fs))
         {
             w.Write(fieldInJSON);
         }
         FieldList         = getFields();
         MultiplyOptions   = SetOptions();
         ValueTypeOptions  = SetValues();
         UserFilledOptions = SetFullfillment();
     }
     catch (Exception e)
     {
         FileStream fs = new FileStream("result1.txt", FileMode.Create);
         using (StreamWriter w = new StreamWriter(fs))
         {
             w.Write(e.Message);
         }
     }
 }