private TokenWrapper InitTokenWrapper(AttributeSource input) { if (onlyUseNewAPI) { // no wrapper needed return(null); } else { // if possible get the wrapper from the filter's input stream if (input is TokenStream && ((TokenStream)input).tokenWrapper != null) { return(((TokenStream)input).tokenWrapper); } // check that all attributes are implemented by the same TokenWrapper instance IAttribute att = AddAttribute(typeof(TermAttribute)); if (att is TokenWrapper && AddAttribute(typeof(TypeAttribute)) == att && AddAttribute(typeof(PositionIncrementAttribute)) == att && AddAttribute(typeof(FlagsAttribute)) == att && AddAttribute(typeof(OffsetAttribute)) == att && AddAttribute(typeof(PayloadAttribute)) == att) { return((TokenWrapper)att); } else { throw new System.NotSupportedException("If onlyUseNewAPI is disabled, all basic Attributes must be implemented by the internal class " + "TokenWrapper. Please make sure, that all TokenStreams/TokenFilters in this chain have been " + "instantiated with this flag disabled and do not add any custom instances for the basic Attributes!"); } } }
public virtual void TestCloneAttributes() { AttributeSource src = new AttributeSource(); ITermAttribute termAtt = src.AddAttribute <ITermAttribute>(); ITypeAttribute typeAtt = src.AddAttribute <ITypeAttribute>(); termAtt.SetTermBuffer("TestTerm"); typeAtt.Type = "TestType"; AttributeSource clone = src.CloneAttributes(); System.Collections.Generic.IEnumerator <Type> it = clone.GetAttributeTypesIterator().GetEnumerator(); Assert.IsTrue(it.MoveNext()); Assert.AreEqual(typeof(ITermAttribute), it.Current, "TermAttribute must be the first attribute"); Assert.IsTrue(it.MoveNext()); Assert.AreEqual(typeof(ITypeAttribute), it.Current, "TypeAttribute must be the second attribute"); Assert.IsFalse(it.MoveNext(), "No more attributes"); ITermAttribute termAtt2 = clone.GetAttribute <ITermAttribute>(); ITypeAttribute typeAtt2 = clone.GetAttribute <ITypeAttribute>(); Assert.IsFalse(ReferenceEquals(termAtt2, termAtt), "TermAttribute of original and clone must be different instances"); Assert.IsFalse(ReferenceEquals(typeAtt2, typeAtt), "TypeAttribute of original and clone must be different instances"); Assert.AreEqual(termAtt2, termAtt, "TermAttribute of original and clone must be equal"); Assert.AreEqual(typeAtt2, typeAtt, "TypeAttribute of original and clone must be equal"); }
/// <summary> /// Performs a clone of all <see cref="Attribute"/> instances returned in a new /// <see cref="AttributeSource"/> instance. This method can be used to e.g. create another <see cref="Analysis.TokenStream"/> /// with exactly the same attributes (using <see cref="AttributeSource(AttributeSource)"/>). /// You can also use it as a (non-performant) replacement for <see cref="CaptureState()"/>, if you need to look /// into / modify the captured state. /// </summary> public AttributeSource CloneAttributes() { AttributeSource clone = new AttributeSource(this.factory); if (HasAttributes) { // first clone the impls for (State state = GetCurrentState(); state != null; state = state.next) { //clone.AttributeImpls[state.attribute.GetType()] = state.attribute.Clone(); var impl = (Attribute)state.attribute.Clone(); if (!clone.attributeImpls.ContainsKey(impl.GetType())) { clone.attributeImpls.Add(impl.GetType(), impl); } } // now the interfaces foreach (var entry in this.attributes) { clone.attributes.Add(entry.Key, clone.attributeImpls[entry.Value.GetType()]); } } return(clone); }
public virtual void TestToStringAndMultiAttributeImplementations() { AttributeSource src = new AttributeSource(); ITermAttribute termAtt = src.AddAttribute <ITermAttribute>(); ITypeAttribute typeAtt = src.AddAttribute <ITypeAttribute>(); termAtt.SetTermBuffer("TestTerm"); typeAtt.Type = "TestType"; Assert.AreEqual("(" + termAtt.ToString() + "," + typeAtt.ToString() + ")", src.ToString(), "Attributes should appear in original order"); System.Collections.Generic.IEnumerator <Attribute> it = src.GetAttributeImplsIterator().GetEnumerator(); Assert.IsTrue(it.MoveNext(), "Iterator should have 2 attributes left"); Assert.AreSame(termAtt, it.Current, "First AttributeImpl from iterator should be termAtt"); Assert.IsTrue(it.MoveNext(), "Iterator should have 1 attributes left"); Assert.AreSame(typeAtt, it.Current, "Second AttributeImpl from iterator should be typeAtt"); Assert.IsFalse(it.MoveNext(), "Iterator should have 0 attributes left"); src = new AttributeSource(); src.AddAttributeImpl(new Token()); // this should not add a new attribute as Token implements TermAttribute, too termAtt = src.AddAttribute <ITermAttribute>(); Assert.IsTrue(termAtt is Token, "TermAttribute should be implemented by Token"); // get the Token attribute and check, that it is the only one it = src.GetAttributeImplsIterator().GetEnumerator(); Assert.IsTrue(it.MoveNext()); Token tok = (Token)it.Current; Assert.IsFalse(it.MoveNext(), "There should be only one attribute implementation instance"); termAtt.SetTermBuffer("TestTerm"); Assert.AreEqual("(" + tok.ToString() + ")", src.ToString(), "Token should only printed once"); }
/// <summary> Performs a clone of all {@link AttributeImpl} instances returned in a new /// AttributeSource instance. This method can be used to e.g. create another TokenStream /// with exactly the same attributes (using {@link #AttributeSource(AttributeSource)}) /// </summary> public virtual AttributeSource CloneAttributes() { AttributeSource clone = new AttributeSource(this.factory); // first clone the impls if (HasAttributes()) { for (State state = GetCurrentState(); state != null; state = state.next) { AttributeImpl impl = (AttributeImpl)state.attribute.Clone(); if (!clone.attributeImpls.ContainsKey(impl.GetType())) { clone.attributeImpls.Add(new Support.AttributeImplItem(impl.GetType(), impl)); } } } // now the interfaces foreach (Support.AttributeImplItem att in this.attributes) { clone.attributes.Add(new Support.AttributeImplItem(att.Key, clone.attributeImpls[att.Value.GetType()].Value)); } return(clone); }
/// <summary> /// This method is for introspection of attributes, it should simply /// add the key/values this attribute holds to the given <see cref="IAttributeReflector"/>. /// /// <para/>The default implementation calls <see cref="IAttributeReflector.Reflect(Type, string, object)"/> for all /// non-static fields from the implementing class, using the field name as key /// and the field value as value. The <see cref="IAttribute"/> class is also determined by Reflection. /// Please note that the default implementation can only handle single-Attribute /// implementations. /// /// <para/>Custom implementations look like this (e.g. for a combined attribute implementation): /// <code> /// public void ReflectWith(IAttributeReflector reflector) /// { /// reflector.Reflect(typeof(ICharTermAttribute), "term", GetTerm()); /// reflector.Reflect(typeof(IPositionIncrementAttribute), "positionIncrement", GetPositionIncrement()); /// } /// </code> /// /// <para/>If you implement this method, make sure that for each invocation, the same set of <see cref="IAttribute"/> /// interfaces and keys are passed to <see cref="IAttributeReflector.Reflect(Type, string, object)"/> in the same order, but possibly /// different values. So don't automatically exclude e.g. <c>null</c> properties! /// </summary> /// <seealso cref="ReflectAsString(bool)"/> public virtual void ReflectWith(IAttributeReflector reflector) // LUCENENET NOTE: This method was abstract in Lucene { Type clazz = this.GetType(); LinkedList <WeakReference <Type> > interfaces = AttributeSource.GetAttributeInterfaces(clazz); if (interfaces.Count != 1) { throw UnsupportedOperationException.Create(clazz.Name + " implements more than one Attribute interface, the default ReflectWith() implementation cannot handle this."); } interfaces.First.Value.TryGetTarget(out Type interf); //problem: the interfaces list has weak references that could have expired already FieldInfo[] fields = clazz.GetFields(BindingFlags.Instance | BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.DeclaredOnly); try { for (int i = 0; i < fields.Length; i++) { FieldInfo f = fields[i]; if (f.IsStatic) { continue; } reflector.Reflect(interf, f.Name, f.GetValue(this)); } } catch (Exception e) when(e.IsIllegalAccessException()) { // this should never happen, because we're just accessing fields // from 'this' throw RuntimeException.Create(e); } }
public virtual void TestCaptureState() { // init a first instance AttributeSource src = new AttributeSource(); TermAttribute termAtt = (TermAttribute)src.AddAttribute(typeof(TermAttribute)); TypeAttribute typeAtt = (TypeAttribute)src.AddAttribute(typeof(TypeAttribute)); termAtt.SetTermBuffer("TestTerm"); typeAtt.SetType("TestType"); int hashCode = src.GetHashCode(); AttributeSource.State state = src.CaptureState(); // modify the attributes termAtt.SetTermBuffer("AnotherTestTerm"); typeAtt.SetType("AnotherTestType"); Assert.IsTrue(hashCode != src.GetHashCode(), "Hash code should be different"); src.RestoreState(state); Assert.AreEqual("TestTerm", termAtt.Term()); Assert.AreEqual("TestType", typeAtt.Type()); Assert.AreEqual(hashCode, src.GetHashCode(), "Hash code should be equal after restore"); // restore into an exact configured copy AttributeSource copy = new AttributeSource(); copy.AddAttribute(typeof(TermAttribute)); copy.AddAttribute(typeof(TypeAttribute)); copy.RestoreState(state); Assert.AreEqual(src.GetHashCode(), copy.GetHashCode(), "Both AttributeSources should have same hashCode after restore"); Assert.AreEqual(src, copy, "Both AttributeSources should be equal after restore"); // init a second instance (with attributes in different order and one additional attribute) AttributeSource src2 = new AttributeSource(); typeAtt = (TypeAttribute)src2.AddAttribute(typeof(TypeAttribute)); Lucene.Net.Analysis.Tokenattributes.FlagsAttribute flagsAtt = (Lucene.Net.Analysis.Tokenattributes.FlagsAttribute)src2.AddAttribute(typeof(Lucene.Net.Analysis.Tokenattributes.FlagsAttribute)); termAtt = (TermAttribute)src2.AddAttribute(typeof(TermAttribute)); flagsAtt.SetFlags(12345); src2.RestoreState(state); Assert.AreEqual("TestTerm", termAtt.Term()); Assert.AreEqual("TestType", typeAtt.Type()); Assert.AreEqual(12345, flagsAtt.GetFlags(), "FlagsAttribute should not be touched"); // init a third instance missing one Attribute AttributeSource src3 = new AttributeSource(); termAtt = (TermAttribute)src3.AddAttribute(typeof(TermAttribute)); try { src3.RestoreState(state); Assert.Fail("The third instance is missing the TypeAttribute, so restoreState() should throw IllegalArgumentException"); } catch (System.ArgumentException iae) { // pass } }
public override TermsEnum GetTermsEnum(Terms terms, AttributeSource atts) { if (_terms.Size() == 0) { return TermsEnum.EMPTY; } return new SeekingTermSetTermsEnum(terms.Iterator(null), _terms, _ords); }
public override bool Accept(AttributeSource source) { if (typeAtt == null) { typeAtt = source.AddAttribute<ITypeAttribute>(); } return typeToMatch.Equals(typeAtt.Type); }
public KeywordTokenizer(AttributeSource.AttributeFactory factory, Reader input, int bufferSize) : base(factory, input) { if (bufferSize <= 0) { throw new System.ArgumentException("bufferSize must be > 0"); } termAtt.ResizeBuffer(bufferSize); }
protected LowerCaseKeywordTokenizer(AttributeSource source, System.IO.TextReader input) : base(source, input) { offsetAtt = AddAttribute<IOffsetAttribute>(); termAtt = AddAttribute<ITermAttribute>(); isAsciiCasingSameAsInvariant = CultureInfo.InvariantCulture.CompareInfo.Compare("abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", CompareOptions.IgnoreCase) == 0; invariantTextInfo = CultureInfo.InvariantCulture.TextInfo; }
/// <summary> An AttributeSource that uses the same attributes as the supplied one.</summary> public AttributeSource(AttributeSource input) { if (input == null) { throw new System.ArgumentException("input AttributeSource must not be null"); } this.attributes = input.attributes; this.attributeImpls = input.attributeImpls; this.factory = input.factory; }
#pragma warning restore 618 /// <summary> A <see cref="TokenStream"/> that uses the same attributes as the supplied one.</summary> protected internal TokenStream(AttributeSource input) : base(input) { InitBlock(); // REMOVE: in 3.0 #pragma warning disable 618 tokenWrapper = InitTokenWrapper(input); Check(); #pragma warning restore 618 }
public override bool Accept(AttributeSource source) { if (typeAtt == null) { typeAtt = source.AddAttribute<ITypeAttribute>(); } //check to see if this is a Category return (typeToMatch.Equals(typeAtt.Type)); }
public virtual void TestCaptureState() { // init a first instance AttributeSource src = new AttributeSource(); ICharTermAttribute termAtt = src.AddAttribute<ICharTermAttribute>(); ITypeAttribute typeAtt = src.AddAttribute<ITypeAttribute>(); termAtt.Append("TestTerm"); typeAtt.Type = "TestType"; int hashCode = src.GetHashCode(); AttributeSource.State state = src.CaptureState(); // modify the attributes termAtt.SetEmpty().Append("AnotherTestTerm"); typeAtt.Type = "AnotherTestType"; Assert.IsTrue(hashCode != src.GetHashCode(), "Hash code should be different"); src.RestoreState(state); Assert.AreEqual(termAtt.ToString(), "TestTerm"); Assert.AreEqual(typeAtt.Type, "TestType"); Assert.AreEqual(hashCode, src.GetHashCode(), "Hash code should be equal after restore"); // restore into an exact configured copy AttributeSource copy = new AttributeSource(); copy.AddAttribute<ICharTermAttribute>(); copy.AddAttribute<ITypeAttribute>(); copy.RestoreState(state); Assert.AreEqual(src.GetHashCode(), copy.GetHashCode(), "Both AttributeSources should have same hashCode after restore"); Assert.AreEqual(src, copy, "Both AttributeSources should be equal after restore"); // init a second instance (with attributes in different order and one additional attribute) AttributeSource src2 = new AttributeSource(); typeAtt = src2.AddAttribute<ITypeAttribute>(); IFlagsAttribute flagsAtt = src2.AddAttribute<IFlagsAttribute>(); termAtt = src2.AddAttribute<ICharTermAttribute>(); flagsAtt.Flags = 12345; src2.RestoreState(state); Assert.AreEqual(termAtt.ToString(), "TestTerm"); Assert.AreEqual(typeAtt.Type, "TestType"); Assert.AreEqual(12345, flagsAtt.Flags, "FlagsAttribute should not be touched"); // init a third instance missing one Attribute AttributeSource src3 = new AttributeSource(); termAtt = src3.AddAttribute<ICharTermAttribute>(); try { src3.RestoreState(state); Assert.Fail("The third instance is missing the TypeAttribute, so restoreState() should throw IllegalArgumentException"); } catch (System.ArgumentException iae) { // pass } }
public override bool Equals(System.Object obj) { if (obj == this) { return(true); } if (obj is AttributeSource) { AttributeSource other = (AttributeSource)obj; if (HasAttributes()) { if (!other.HasAttributes()) { return(false); } if (this.attributeImpls.Count != other.attributeImpls.Count) { return(false); } // it is only equal if all attribute impls are the same in the same order if (this.currentState == null) { this.ComputeCurrentState(); } State thisState = this.currentState; if (other.currentState == null) { other.ComputeCurrentState(); } State otherState = other.currentState; while (thisState != null && otherState != null) { if (otherState.attribute.GetType() != thisState.attribute.GetType() || !otherState.attribute.Equals(thisState.attribute)) { return(false); } thisState = thisState.next; otherState = otherState.next; } return(true); } else { return(!other.HasAttributes()); } } else { return(false); } }
public virtual void TestCaptureState() { // init a first instance AttributeSource src = new AttributeSource(); ITermAttribute termAtt = src.AddAttribute <ITermAttribute>(); ITypeAttribute typeAtt = src.AddAttribute <ITypeAttribute>(); termAtt.SetTermBuffer("TestTerm"); typeAtt.Type = "TestType"; int hashCode = src.GetHashCode(); AttributeSource.State state = src.CaptureState(); // modify the attributes termAtt.SetTermBuffer("AnotherTestTerm"); typeAtt.Type = "AnotherTestType"; Assert.IsTrue(hashCode != src.GetHashCode(), "Hash code should be different"); src.RestoreState(state); Assert.AreEqual("TestTerm", termAtt.Term); Assert.AreEqual("TestType", typeAtt.Type); Assert.AreEqual(hashCode, src.GetHashCode(), "Hash code should be equal after restore"); // restore into an exact configured copy AttributeSource copy = new AttributeSource(); copy.AddAttribute <ITermAttribute>(); copy.AddAttribute <ITypeAttribute>(); copy.RestoreState(state); Assert.AreEqual(src.GetHashCode(), copy.GetHashCode(), "Both AttributeSources should have same hashCode after restore"); Assert.AreEqual(src, copy, "Both AttributeSources should be equal after restore"); // init a second instance (with attributes in different order and one additional attribute) AttributeSource src2 = new AttributeSource(); typeAtt = src2.AddAttribute <ITypeAttribute>(); IFlagsAttribute flagsAtt = src2.AddAttribute <IFlagsAttribute>(); termAtt = src2.AddAttribute <ITermAttribute>(); flagsAtt.Flags = 12345; src2.RestoreState(state); Assert.AreEqual("TestTerm", termAtt.Term); Assert.AreEqual("TestType", typeAtt.Type); Assert.AreEqual(12345, flagsAtt.Flags, "FlagsAttribute should not be touched"); // init a third instance missing one Attribute AttributeSource src3 = new AttributeSource(); termAtt = src3.AddAttribute <ITermAttribute>(); Assert.Throws <ArgumentException>(() => src3.RestoreState(state), "The third instance is missing the TypeAttribute, so restoreState() should throw IllegalArgumentException"); }
/// <summary> /// An AttributeSource that uses the same attributes as the supplied one. /// </summary> public AttributeSource(AttributeSource input) { if (input == null) { throw new System.ArgumentException("input AttributeSource must not be null"); } this.Attributes = input.Attributes; this.AttributeImpls = input.AttributeImpls; this.CurrentState_Renamed = input.CurrentState_Renamed; this.Factory = input.Factory; }
/// <summary> /// Creates the <seealso cref="TokenStream"/> of n-grams from the given <seealso cref="Reader"/> and <seealso cref="AttributeSource.AttributeFactory"/>. </summary> public override Tokenizer Create(AttributeSource.AttributeFactory factory, Reader input) { if (luceneMatchVersion.OnOrAfter(Version.LUCENE_44)) { return new NGramTokenizer(luceneMatchVersion, factory, input, minGramSize, maxGramSize); } else { return new Lucene43NGramTokenizer(factory, input, minGramSize, maxGramSize); } }
public virtual void TestDefaultAttributeFactory() { AttributeSource src = new AttributeSource(); Assert.IsTrue(src.AddAttribute <ICharTermAttribute>() is CharTermAttribute, "CharTermAttribute is not implemented by CharTermAttributeImpl"); Assert.IsTrue(src.AddAttribute <IOffsetAttribute>() is OffsetAttribute, "OffsetAttribute is not implemented by OffsetAttributeImpl"); Assert.IsTrue(src.AddAttribute <IFlagsAttribute>() is FlagsAttribute, "FlagsAttribute is not implemented by FlagsAttributeImpl"); Assert.IsTrue(src.AddAttribute <IPayloadAttribute>() is PayloadAttribute, "PayloadAttribute is not implemented by PayloadAttributeImpl"); Assert.IsTrue(src.AddAttribute <IPositionIncrementAttribute>() is PositionIncrementAttribute, "PositionIncrementAttribute is not implemented by PositionIncrementAttributeImpl"); Assert.IsTrue(src.AddAttribute <ITypeAttribute>() is TypeAttribute, "TypeAttribute is not implemented by TypeAttributeImpl"); }
/// <summary> /// An <see cref="AttributeSource"/> that uses the same attributes as the supplied one. /// </summary> public AttributeSource(AttributeSource input) { if (input is null) { throw new ArgumentNullException(nameof(input), "input AttributeSource must not be null"); // LUCENENET specific - changed from IllegalArgumentException to ArgumentNullException (.NET convention) } this.attributes = input.attributes; this.attributeImpls = input.attributeImpls; this.currentState = input.currentState; this.factory = input.factory; }
public virtual void TestLUCENE_3042() { AttributeSource src1 = new AttributeSource(); src1.AddAttribute <ICharTermAttribute>().Append("foo"); int hash1 = src1.GetHashCode(); // this triggers a cached state AttributeSource src2 = new AttributeSource(src1); src2.AddAttribute <ITypeAttribute>().Type = "bar"; Assert.IsTrue(hash1 != src1.GetHashCode(), "The hashCode is identical, so the captured state was preserved."); Assert.AreEqual(src2.GetHashCode(), src1.GetHashCode()); }
/// <summary> /// Copies the contents of this <see cref="AttributeSource"/> to the given target <see cref="AttributeSource"/>. /// The given instance has to provide all <see cref="IAttribute"/>s this instance contains. /// The actual attribute implementations must be identical in both <see cref="AttributeSource"/> instances; /// ideally both <see cref="AttributeSource"/> instances should use the same <see cref="AttributeFactory"/>. /// You can use this method as a replacement for <see cref="RestoreState(State)"/>, if you use /// <see cref="CloneAttributes()"/> instead of <see cref="CaptureState()"/>. /// </summary> public void CopyTo(AttributeSource target) { for (State state = GetCurrentState(); state != null; state = state.next) { Attribute targetImpl = target.attributeImpls[state.attribute.GetType()]; if (targetImpl == null) { throw new ArgumentException("this AttributeSource contains Attribute of type " + state.attribute.GetType().Name + " that is not in the target"); } state.attribute.CopyTo(targetImpl); } }
public override bool Accept(AttributeSource source) { try { if (count >= lower && count < upper) { return true; } return false; } finally { count++; } }
/// <summary> /// Creates the <seealso cref="TokenStream"/> of n-grams from the given <seealso cref="TextReader"/> and <seealso cref="AttributeSource.AttributeFactory"/>. </summary> public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input) { #pragma warning disable 612, 618 if (luceneMatchVersion.OnOrAfter(LuceneVersion.LUCENE_44)) #pragma warning restore 612, 618 { return new NGramTokenizer(luceneMatchVersion, factory, input, minGramSize, maxGramSize); } else { #pragma warning disable 612, 618 return new Lucene43NGramTokenizer(factory, input, minGramSize, maxGramSize); #pragma warning restore 612, 618 } }
public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input) { if (luceneMatchVersion.OnOrAfter(LuceneVersion.LUCENE_44)) { if (!EdgeNGramTokenFilter.Side.FRONT.Label.Equals(side)) { throw new System.ArgumentException(typeof(EdgeNGramTokenizer).SimpleName + " does not support backward n-grams as of Lucene 4.4"); } return new EdgeNGramTokenizer(luceneMatchVersion, input, minGramSize, maxGramSize); } else { return new Lucene43EdgeNGramTokenizer(luceneMatchVersion, input, side, minGramSize, maxGramSize); } }
public void TestInvalidArguments() { var src = new AttributeSource(); Assert.Throws <ArgumentException>(() => src.AddAttribute <Token>(), "Should throw ArgumentException"); src = new AttributeSource(); Assert.Throws <ArgumentException>(() => src.AddAttribute <Token>(), "Should throw ArgumentException"); //try //{ // AttributeSource src = new AttributeSource(); // src.AddAttribute<System.Collections.IEnumerator>(); //Doesn't compile. // Assert.Fail("Should throw IllegalArgumentException"); //} //catch (ArgumentException iae) { } }
public void TestInvalidArguments() { try { AttributeSource src = new AttributeSource(); src.AddAttribute(typeof(Token)); Assert.Fail("Should throw IllegalArgumentException"); } catch (ArgumentException iae) { } try { AttributeSource src = new AttributeSource(); src.AddAttribute(typeof(System.Collections.IEnumerator)); Assert.Fail("Should throw IllegalArgumentException"); } catch (ArgumentException iae) { } }
public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input) { #pragma warning disable 612, 618 if (luceneMatchVersion.OnOrAfter(LuceneVersion.LUCENE_44)) #pragma warning restore 612, 618 { EdgeNGramTokenFilter.Side sideEnum; if (!Enum.TryParse(this.side, true, out sideEnum)) { throw new System.ArgumentException(typeof(EdgeNGramTokenizer).Name + " does not support backward n-grams as of Lucene 4.4"); } return new EdgeNGramTokenizer(luceneMatchVersion, input, minGramSize, maxGramSize); } else { #pragma warning disable 612, 618 return new Lucene43EdgeNGramTokenizer(luceneMatchVersion, input, side, minGramSize, maxGramSize); #pragma warning restore 612, 618 } }
public virtual void TestInvalidArguments() { try { AttributeSource src = new AttributeSource(); src.AddAttribute <Token>(); Assert.Fail("Should throw IllegalArgumentException"); } #pragma warning disable 168 catch (System.ArgumentException iae) #pragma warning restore 168 { } try { AttributeSource src = new AttributeSource(Token.TOKEN_ATTRIBUTE_FACTORY); src.AddAttribute <Token>(); Assert.Fail("Should throw IllegalArgumentException"); } #pragma warning disable 168 catch (System.ArgumentException iae) #pragma warning restore 168 { } // LUCENENET NOTE: Invalid type won't compile because // of the generic constraint, so this test is not necessary in .NET. /*try * { * AttributeSource src = new AttributeSource(); * // break this by unsafe cast * src.AddAttribute<typeof((Type)IEnumerator)>(); * Assert.Fail("Should throw IllegalArgumentException"); * } * catch (System.ArgumentException iae) * { * }*/ }
public override bool Accept(AttributeSource source) { if (termAtt == null) { termAtt = source.AddAttribute<ITermAttribute>(); } try { DateTime date = DateTime.Parse(termAtt.Term, dateFormat);//We don't care about the date, just that we can parse it as a date if (date != null) { return true; } } catch (FormatException) { } return false; }
public virtual void ReflectWith(IAttributeReflector reflector) // LUCENENET NOTE: This method was abstract in Lucene { Type clazz = this.GetType(); LinkedList <WeakReference> interfaces = AttributeSource.GetAttributeInterfaces(clazz); if (interfaces.Count != 1) { throw new NotSupportedException(clazz.Name + " implements more than one Attribute interface, the default ReflectWith() implementation cannot handle this."); } Type interf = (System.Type)interfaces.First().Target; /*object target = interfaces.First.Value; * * if (target == null) * return; * * Type interf = target.GetType();// as Type;*/ //problem: the interfaces list has weak references that could have expired already FieldInfo[] fields = clazz.GetFields(BindingFlags.Instance | BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.DeclaredOnly); try { for (int i = 0; i < fields.Length; i++) { FieldInfo f = fields[i]; if (f.IsStatic) { continue; } reflector.Reflect(interf, f.Name, f.GetValue(this)); } } catch (MemberAccessException e) { throw new Exception(e.ToString(), e); } }
public virtual void TestCloneAttributes() { AttributeSource src = new AttributeSource(); IFlagsAttribute flagsAtt = src.AddAttribute <IFlagsAttribute>(); ITypeAttribute typeAtt = src.AddAttribute <ITypeAttribute>(); flagsAtt.Flags = 1234; typeAtt.Type = "TestType"; AttributeSource clone = src.CloneAttributes(); IEnumerator <Type> it = clone.GetAttributeClassesEnumerator(); it.MoveNext(); Assert.AreEqual(typeof(IFlagsAttribute), it.Current, "FlagsAttribute must be the first attribute"); it.MoveNext(); Assert.AreEqual(typeof(ITypeAttribute), it.Current, "TypeAttribute must be the second attribute"); Assert.IsFalse(it.MoveNext(), "No more attributes"); IFlagsAttribute flagsAtt2 = clone.GetAttribute <IFlagsAttribute>(); ITypeAttribute typeAtt2 = clone.GetAttribute <ITypeAttribute>(); Assert.AreNotSame(flagsAtt2, flagsAtt, "FlagsAttribute of original and clone must be different instances"); Assert.AreNotSame(typeAtt2, typeAtt, "TypeAttribute of original and clone must be different instances"); Assert.AreEqual(flagsAtt2, flagsAtt, "FlagsAttribute of original and clone must be equal"); Assert.AreEqual(typeAtt2, typeAtt, "TypeAttribute of original and clone must be equal"); // test copy back flagsAtt2.Flags = 4711; typeAtt2.Type = "OtherType"; clone.CopyTo(src); Assert.AreEqual(4711, flagsAtt.Flags, "FlagsAttribute of original must now contain updated term"); Assert.AreEqual(typeAtt.Type, "OtherType", "TypeAttribute of original must now contain updated type"); // verify again: Assert.AreNotSame(flagsAtt2, flagsAtt, "FlagsAttribute of original and clone must be different instances"); Assert.AreNotSame(typeAtt2, typeAtt, "TypeAttribute of original and clone must be different instances"); Assert.AreEqual(flagsAtt2, flagsAtt, "FlagsAttribute of original and clone must be equal"); Assert.AreEqual(typeAtt2, typeAtt, "TypeAttribute of original and clone must be equal"); }
protected LowerCaseKeywordTokenizer(AttributeSource source, System.IO.TextReader input) : base(source, input) { offsetAtt = AddAttribute<IOffsetAttribute>(); termAtt = AddAttribute<ITermAttribute>(); }
public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input) { if (reverse) { return new ReversePathHierarchyTokenizer(factory, input, delimiter, replacement, skip); } return new PathHierarchyTokenizer(factory, input, delimiter, replacement, skip); }
public virtual void TestCaptureState() { // init a first instance AttributeSource src = new AttributeSource(); ICharTermAttribute termAtt = src.AddAttribute <ICharTermAttribute>(); ITypeAttribute typeAtt = src.AddAttribute <ITypeAttribute>(); termAtt.Append("TestTerm"); typeAtt.Type = "TestType"; int hashCode = src.GetHashCode(); AttributeSource.State state = src.CaptureState(); // modify the attributes termAtt.SetEmpty().Append("AnotherTestTerm"); typeAtt.Type = "AnotherTestType"; Assert.IsTrue(hashCode != src.GetHashCode(), "Hash code should be different"); src.RestoreState(state); Assert.AreEqual(termAtt.ToString(), "TestTerm"); Assert.AreEqual(typeAtt.Type, "TestType"); Assert.AreEqual(hashCode, src.GetHashCode(), "Hash code should be equal after restore"); // restore into an exact configured copy AttributeSource copy = new AttributeSource(); copy.AddAttribute <ICharTermAttribute>(); copy.AddAttribute <ITypeAttribute>(); copy.RestoreState(state); Assert.AreEqual(src.GetHashCode(), copy.GetHashCode(), "Both AttributeSources should have same hashCode after restore"); Assert.AreEqual(src, copy, "Both AttributeSources should be equal after restore"); // init a second instance (with attributes in different order and one additional attribute) AttributeSource src2 = new AttributeSource(); typeAtt = src2.AddAttribute <ITypeAttribute>(); IFlagsAttribute flagsAtt = src2.AddAttribute <IFlagsAttribute>(); termAtt = src2.AddAttribute <ICharTermAttribute>(); flagsAtt.Flags = 12345; src2.RestoreState(state); Assert.AreEqual(termAtt.ToString(), "TestTerm"); Assert.AreEqual(typeAtt.Type, "TestType"); Assert.AreEqual(12345, flagsAtt.Flags, "FlagsAttribute should not be touched"); // init a third instance missing one Attribute AttributeSource src3 = new AttributeSource(); termAtt = src3.AddAttribute <ICharTermAttribute>(); try { src3.RestoreState(state); Assert.Fail("The third instance is missing the TypeAttribute, so restoreState() should throw IllegalArgumentException"); } #pragma warning disable 168 catch (System.ArgumentException iae) #pragma warning restore 168 { // pass } }
public RussianLetterTokenizer(AttributeSource source, TextReader _in) : base(source, _in) { }
/* * Creates NGramTokenizer with given min and max n-grams. * <param name="source"><see cref="AttributeSource"/> to use</param> * <param name="input"><see cref="TextReader"/> holding the input to be tokenized</param> * <param name="minGram">the smallest n-gram to generate</param> * <param name="maxGram">the largest n-gram to generate</param> */ public NGramTokenizer(AttributeSource source, TextReader input, int minGram, int maxGram) : base(source, input) { init(minGram, maxGram); }
private void pushTok(AttributeSource t) { if (buffer == null) { buffer = new LinkedList<>(); } buffer.AddFirst(t); }
private void copy(AttributeSource target, AttributeSource source) { if (target != source) { source.copyTo(target); } }
/// <summary> /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range /// </summary> /// <param name="version"> the <a href="#version">Lucene match version</a> </param> /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param> /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param> /// <param name="minGram"> the smallest n-gram to generate </param> /// <param name="maxGram"> the largest n-gram to generate </param> public EdgeNGramTokenizer(LuceneVersion version, AttributeSource.AttributeFactory factory, TextReader input, int minGram, int maxGram) : base(version, factory, input, minGram, maxGram, true) { }
public override bool Accept(AttributeSource source) { if (termAtt == null) { termAtt = source.AddAttribute<ICharTermAttribute>(); } DateTime date; //We don't care about the date, just that we can parse it as a date if (formats == null) { return DateTime.TryParse(termAtt.ToString(), culture, style, out date); } else { return DateTime.TryParseExact(termAtt.ToString(), formats, culture, style, out date); } }
public CJKTokenizer(AttributeSource source, TextReader _in) : base(source, _in) { Init(); }
public IteratorAnonymousInnerClassHelper(AttributeSource outerInstance, AttributeSource.State initState) { this.outerInstance = outerInstance; this.initState = initState; state = initState; }
/// <summary> /// Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of /// length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity > /// <code>minSimilarity</code>. /// <p> /// After calling the constructor the enumeration is already pointing to the first /// valid term if such a term exists. /// </summary> /// <param name="terms"> Delivers terms. </param> /// <param name="atts"> <seealso cref="AttributeSource"/> created by the rewrite method of <seealso cref="MultiTermQuery"/> /// thats contains information about competitive boosts during rewrite. It is also used /// to cache DFAs between segment transitions. </param> /// <param name="term"> Pattern term. </param> /// <param name="minSimilarity"> Minimum required similarity for terms from the reader. Pass an integer value /// representing edit distance. Passing a fraction is deprecated. </param> /// <param name="prefixLength"> Length of required common prefix. Default value is 0. </param> /// <exception cref="IOException"> if there is a low-level IO error </exception> public FuzzyTermsEnum(Terms terms, AttributeSource atts, Term term, float minSimilarity, int prefixLength, bool transpositions) { if (!InstanceFieldsInitialized) { InitializeInstanceFields(); InstanceFieldsInitialized = true; } if (minSimilarity >= 1.0f && minSimilarity != (int)minSimilarity) { throw new System.ArgumentException("fractional edit distances are not allowed"); } if (minSimilarity < 0.0f) { throw new System.ArgumentException("minimumSimilarity cannot be less than 0"); } if (prefixLength < 0) { throw new System.ArgumentException("prefixLength cannot be less than 0"); } this.Terms = terms; this.Term_Renamed = term; // convert the string into a utf32 int[] representation for fast comparisons string utf16 = term.Text(); //LUCENE TO-DO //this.TermText = new int[utf16.codePointCount(0, utf16.Length)]; this.TermText = new int[utf16.Length]; for (int cp, i = 0, j = 0; i < utf16.Length; i += Character.CharCount(cp)) { TermText[j++] = cp = Character.CodePointAt(utf16, i); } this.TermLength = TermText.Length; this.DfaAtt = atts.AddAttribute<ILevenshteinAutomataAttribute>(); //The prefix could be longer than the word. //It's kind of silly though. It means we must match the entire word. this.RealPrefixLength = prefixLength > TermLength ? TermLength : prefixLength; // if minSimilarity >= 1, we treat it as number of edits if (minSimilarity >= 1f) { this.MinSimilarity_Renamed = 0; // just driven by number of edits MaxEdits = (int)minSimilarity; Raw = true; } else { this.MinSimilarity_Renamed = minSimilarity; // calculate the maximum k edits for this similarity MaxEdits = InitialMaxDistance(this.MinSimilarity_Renamed, TermLength); Raw = false; } if (transpositions && MaxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { throw new System.NotSupportedException("with transpositions enabled, distances > " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + " are not supported "); } this.Transpositions = transpositions; this.Scale_factor = 1.0f / (1.0f - this.MinSimilarity_Renamed); this.MaxBoostAtt = atts.AddAttribute<IMaxNonCompetitiveBoostAttribute>(); Bottom = MaxBoostAtt.MaxNonCompetitiveBoost; BottomTerm = MaxBoostAtt.CompetitiveTerm; BottomChanged(null, true); }
/// <summary> A TokenStream that uses the same attributes as the supplied one.</summary> protected internal TokenStream(AttributeSource input) : base(input) { }
public UrlTokenizer(AttributeSource source, System.IO.TextReader @in) : base(source, @in) { }
/// <summary> Creates a new StandardTokenizer with a given <see cref="AttributeSource" />.</summary> public StandardTokenizer(Version matchVersion, AttributeSource source, System.IO.TextReader input) : base(source) { InitBlock(); this.scanner = new StandardTokenizerImpl(input); Init(input, matchVersion); }
public RussianLetterTokenizer(AttributeSource.AttributeFactory factory, TextReader __in) : base(factory, __in) { }
/// <summary> /// Construct a new LetterTokenizer using a given /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>. /// </summary> /// <param name="matchVersion"> /// Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param> /// <param name="factory"> /// the attribute factory to use for this <seealso cref="Tokenizer"/> </param> /// <param name="in"> /// the input to split up into tokens </param> public LetterTokenizer(LuceneVersion matchVersion, AttributeSource.AttributeFactory factory, TextReader @in) : base(matchVersion, factory, @in) { }
/* * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range * * <param name="source"><see cref="AttributeSource"/> to use</param> * <param name="input"><see cref="TextReader"/> holding the input to be tokenized</param> * <param name="sideLabel">the name of the <see cref="Side"/> from which to chop off an n-gram</param> * <param name="minGram">the smallest n-gram to generate</param> * <param name="maxGram">the largest n-gram to generate</param> */ public EdgeNGramTokenizer(AttributeSource source, TextReader input, string sideLabel, int minGram, int maxGram) : this(source, input, SideExtensions.GetSide(sideLabel), minGram, maxGram) { }
public ArabicLetterTokenizer(AttributeSource source, TextReader @in) : base(source, @in) { }
/* * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range * * <param name="source"><see cref="AttributeSource"/> to use</param> * <param name="input"><see cref="TextReader"/> holding the input to be tokenized</param> * <param name="side">the <see cref="Side"/> from which to chop off an n-gram</param> * <param name="minGram">the smallest n-gram to generate</param> * <param name="maxGram">the largest n-gram to generate</param> */ public EdgeNGramTokenizer(AttributeSource source, TextReader input, Side side, int minGram, int maxGram) : base(source, input) { init(side, minGram, maxGram); }
/// <summary> Creates a new StandardTokenizer with a given <see cref="AttributeSource" />.</summary> public StandardTokenizer(Version matchVersion, AttributeSource source, System.IO.TextReader input):base(source) { InitBlock(); this.scanner = new StandardTokenizerImpl(input); Init(input, matchVersion); }
public AttributeReflectorAnonymousInnerClassHelper(AttributeSource outerInstance, bool prependAttClass, StringBuilder buffer) { this.outerInstance = outerInstance; this.prependAttClass = prependAttClass; this.buffer = buffer; }