public override bool Equals(object obj) { if (this == obj) { return(true); } if (obj is null) { return(false); } if (GetType() != obj.GetType()) { return(false); } FieldVals other = (FieldVals)obj; if (fieldName is null) { if (other.fieldName != null) { return(false); } } else if (!fieldName.Equals(other.fieldName, StringComparison.Ordinal)) { return(false); } if (J2N.BitConversion.SingleToInt32Bits(minSimilarity) != J2N.BitConversion .SingleToInt32Bits(other.minSimilarity)) { return(false); } if (prefixLength != other.prefixLength) { return(false); } if (queryString is null) { if (other.queryString != null) { return(false); } } else if (!queryString.Equals(other.queryString, StringComparison.Ordinal)) { return(false); } return(true); }
public override bool Equals(Object obj) { if (this == obj) { return(true); } if (obj == null) { return(false); } if (GetType() != obj.GetType()) { return(false); } FieldVals other = (FieldVals)obj; if (fieldName == null) { if (other.fieldName != null) { return(false); } } else if (!fieldName.Equals(other.fieldName)) { return(false); } if (BitConverter.ToInt32(BitConverter.GetBytes(minSimilarity), 0) != BitConverter.ToInt32(BitConverter.GetBytes(other.minSimilarity), 0)) { //if (Float.floatToIntBits(minSimilarity) != Float.floatToIntBits(other.minSimilarity)) return(false); } if (prefixLength != other.prefixLength) { return(false); } if (queryString == null) { if (other.queryString != null) { return(false); } } else if (!queryString.Equals(other.queryString)) { return(false); } return(true); }
public override bool Equals(object obj) { if (this == obj) { return(true); } if (obj == null) { return(false); } if (GetType() != obj.GetType()) { return(false); } FieldVals other = (FieldVals)obj; if (fieldName == null) { if (other.fieldName != null) { return(false); } } else if (!fieldName.Equals(other.fieldName, StringComparison.Ordinal)) { return(false); } if (Number.FloatToIntBits(minSimilarity) != Number .FloatToIntBits(other.minSimilarity)) { return(false); } if (prefixLength != other.prefixLength) { return(false); } if (queryString == null) { if (other.queryString != null) { return(false); } } else if (!queryString.Equals(other.queryString, StringComparison.Ordinal)) { return(false); } return(true); }
private void AddTerms(IndexReader reader, FieldVals f) { if (f.queryString is null) { return; } Terms terms = MultiFields.GetTerms(reader, f.fieldName); if (terms is null) { return; } TokenStream ts = analyzer.GetTokenStream(f.fieldName, f.queryString); try { ICharTermAttribute termAtt = ts.AddAttribute <ICharTermAttribute>(); int corpusNumDocs = reader.NumDocs; ISet <string> processedTerms = new JCG.HashSet <string>(); ts.Reset(); while (ts.IncrementToken()) { string term = termAtt.ToString(); if (!processedTerms.Contains(term)) { processedTerms.Add(term); ScoreTermQueue variantsQ = new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term float minScore = 0; Term startTerm = new Term(f.fieldName, term); AttributeSource atts = new AttributeSource(); IMaxNonCompetitiveBoostAttribute maxBoostAtt = atts.AddAttribute <IMaxNonCompetitiveBoostAttribute>(); #pragma warning disable 612, 618 SlowFuzzyTermsEnum fe = new SlowFuzzyTermsEnum(terms, atts, startTerm, f.minSimilarity, f.prefixLength); #pragma warning restore 612, 618 //store the df so all variants use same idf int df = reader.DocFreq(startTerm); int numVariants = 0; int totalVariantDocFreqs = 0; BytesRef possibleMatch; IBoostAttribute boostAtt = fe.Attributes.AddAttribute <IBoostAttribute>(); while (fe.MoveNext()) { possibleMatch = fe.Term; numVariants++; totalVariantDocFreqs += fe.DocFreq; float score = boostAtt.Boost; if (variantsQ.Count < MAX_VARIANTS_PER_TERM || score > minScore) { ScoreTerm st = new ScoreTerm(new Term(startTerm.Field, BytesRef.DeepCopyOf(possibleMatch)), score, startTerm); variantsQ.InsertWithOverflow(st); minScore = variantsQ.Top.Score; // maintain minScore } maxBoostAtt.MaxNonCompetitiveBoost = variantsQ.Count >= MAX_VARIANTS_PER_TERM ? minScore : float.NegativeInfinity; } if (numVariants > 0) { int avgDf = totalVariantDocFreqs / numVariants; if (df == 0) //no direct match we can use as df for all variants { df = avgDf; //use avg df of all variants } // take the top variants (scored by edit distance) and reset the score // to include an IDF factor then add to the global queue for ranking // overall top query terms int size = variantsQ.Count; for (int i = 0; i < size; i++) { ScoreTerm st = variantsQ.Pop(); st.Score = (st.Score * st.Score) * sim.Idf(df, corpusNumDocs); q.InsertWithOverflow(st); } } } } ts.End(); } finally { IOUtils.DisposeWhileHandlingException(ts); } }
private void AddTerms(IndexReader reader, FieldVals f) { if (f.queryString == null) { return; } TokenStream ts = analyzer.TokenStream(f.fieldName, new System.IO.StringReader(f.queryString)); TermAttribute termAtt = (TermAttribute)ts.AddAttribute(typeof(TermAttribute)); int corpusNumDocs = reader.NumDocs(); Term internSavingTemplateTerm = new Term(f.fieldName); //optimization to avoid constructing new Term() objects Hashtable processedTerms = new Hashtable(); while (ts.IncrementToken()) { String term = termAtt.Term(); if (!processedTerms.Contains(term)) { processedTerms.Add(term, term); ScoreTermQueue variantsQ = new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term float minScore = 0; Term startTerm = internSavingTemplateTerm.CreateTerm(term); FuzzyTermEnum fe = new FuzzyTermEnum(reader, startTerm, f.minSimilarity, f.prefixLength); TermEnum origEnum = reader.Terms(startTerm); int df = 0; if (startTerm.Equals(origEnum.Term())) { df = origEnum.DocFreq(); //store the df so all variants use same idf } int numVariants = 0; int totalVariantDocFreqs = 0; do { Term possibleMatch = fe.Term(); if (possibleMatch != null) { numVariants++; totalVariantDocFreqs += fe.DocFreq(); float score = fe.Difference(); if (variantsQ.Size() < MAX_VARIANTS_PER_TERM || score > minScore) { ScoreTerm st = new ScoreTerm(possibleMatch, score, startTerm); variantsQ.Insert(st); minScore = ((ScoreTerm)variantsQ.Top()).score; // maintain minScore } } }while (fe.Next()); if (numVariants > 0) { int avgDf = totalVariantDocFreqs / numVariants; if (df == 0) //no direct match we can use as df for all variants { df = avgDf; //use avg df of all variants } // take the top variants (scored by edit distance) and reset the score // to include an IDF factor then add to the global queue for ranking // overall top query terms int size = variantsQ.Size(); for (int i = 0; i < size; i++) { ScoreTerm st = (ScoreTerm)variantsQ.Pop(); st.score = (st.score * st.score) * sim.Idf(df, corpusNumDocs); q.Insert(st); } } } } }
public override Query Rewrite(IndexReader reader) { if (rewrittenQuery != null) { return(rewrittenQuery); } //load up the list of possible terms for (IEnumerator <FieldVals> iter = fieldVals.GetEnumerator(); iter.MoveNext();) { FieldVals f = iter.Current; AddTerms(reader, f); } //clear the list of fields fieldVals.Clear(); BooleanQuery bq = new BooleanQuery(); //create BooleanQueries to hold the variants for each token/field pair and ensure it // has no coord factor //Step 1: sort the termqueries by term/field IDictionary <Term, List <ScoreTerm> > variantQueries = new Dictionary <Term, List <ScoreTerm> >(); int size = q.Size(); for (int i = 0; i < size; i++) { ScoreTerm st = q.Pop(); //List<ScoreTerm> l = variantQueries.get(st.fuzziedSourceTerm); // if(l==null) List <ScoreTerm> l; if (!variantQueries.TryGetValue(st.fuzziedSourceTerm, out l) || l == null) { l = new List <ScoreTerm>(); variantQueries[st.fuzziedSourceTerm] = l; } l.Add(st); } //Step 2: Organize the sorted termqueries into zero-coord scoring boolean queries for (var iter = variantQueries.Values.GetEnumerator(); iter.MoveNext();) { List <ScoreTerm> variants = iter.Current; if (variants.Count == 1) { //optimize where only one selected variant ScoreTerm st = variants[0]; Query tq = ignoreTF ? (Query) new ConstantScoreQuery(new TermQuery(st.term)) : new TermQuery(st.term, 1); tq.Boost = st.score; // set the boost to a mix of IDF and score bq.Add(tq, BooleanClause.Occur.SHOULD); } else { BooleanQuery termVariants = new BooleanQuery(true); //disable coord and IDF for these term variants for (IEnumerator <ScoreTerm> iterator2 = variants.GetEnumerator(); iterator2 .MoveNext();) { ScoreTerm st = iterator2.Current; // found a match Query tq = ignoreTF ? (Query) new ConstantScoreQuery(new TermQuery(st.term)) : new TermQuery(st.term, 1); tq.Boost = st.score; // set the boost using the ScoreTerm's score termVariants.Add(tq, BooleanClause.Occur.SHOULD); // add to query } bq.Add(termVariants, BooleanClause.Occur.SHOULD); // add to query } } //TODO possible alternative step 3 - organize above booleans into a new layer of field-based // booleans with a minimum-should-match of NumFields-1? bq.Boost = Boost; this.rewrittenQuery = bq; return(bq); }
public void OnPostAdd() { try { Field field = new Field(FieldName, FieldDesc); if (Multiply == "Multiple") { field.Multiply(); } else { field.MakeSingle(); } if (Type == SetValues()[0]) { field.changeType("Document"); } else if (Type == SetValues()[1]) { field.changeType("Text"); } else if (Type == SetValues()[2]) { field.changeType("Clause"); } else if (Type == SetValues()[3]) { field.changeType("Realization"); } else if (Type == SetValues()[4]) { field.changeType("Grapheme"); } field.MakeUserFilled(); if (Filled == SetFullfillment()[0]) { field.MakeRestricted(); string[] values = FieldVals.Split('\n'); foreach (var value in values) { field.AddValue(Regex.Replace(value, @"\r", "")); } } var dirData = Path.Combine(_environment.ContentRootPath, "wwwroot", "database"); Directory.CreateDirectory(dirData); var dirFields = Path.Combine(dirData, "fields"); Directory.CreateDirectory(dirFields); string fieldInJSON = field.Jsonize(); var FieldDBfile = Path.Combine(dirFields, Regex.Replace(field.name, @"[*""><:/\\|?\.,]", "") + ".json"); FileStream fs = new FileStream(FieldDBfile, FileMode.Create); using (StreamWriter w = new StreamWriter(fs)) { w.Write(fieldInJSON); } FieldList = getFields(); MultiplyOptions = SetOptions(); ValueTypeOptions = SetValues(); UserFilledOptions = SetFullfillment(); } catch (Exception e) { FileStream fs = new FileStream("result1.txt", FileMode.Create); using (StreamWriter w = new StreamWriter(fs)) { w.Write(e.Message); } } }