Ejemplo n.º 1
0
		protected LowerCaseKeywordTokenizer(AttributeFactory factory, System.IO.TextReader input)
			: base(factory, input)
		{
            offsetAtt = AddAttribute<IOffsetAttribute>();
            termAtt = AddAttribute<ITermAttribute>();

            isAsciiCasingSameAsInvariant = CultureInfo.InvariantCulture.CompareInfo.Compare("abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", CompareOptions.IgnoreCase) == 0;
            invariantTextInfo = CultureInfo.InvariantCulture.TextInfo;
		}
Ejemplo n.º 2
0
 /// <summary>
 /// Construct a token stream processing the given input using the given AttributeFactory. </summary>
 protected internal Tokenizer(AttributeFactory factory, TextReader input)
     : base(factory)
 {
     if (input == null)
     {
         throw new System.NullReferenceException("input must not be null");
     }
     this.InputPending = input;
 }
Ejemplo n.º 3
0
 /// <summary>
 /// Creates a new ThaiTokenizer, supplying the AttributeFactory </summary>
 public ThaiTokenizer(AttributeFactory factory, Reader reader)
     : base(factory, reader, (BreakIterator)sentenceProto.clone())
 {
     if (!DBBI_AVAILABLE)
     {
       throw new System.NotSupportedException("This JRE does not have support for Thai segmentation");
     }
     wordBreaker = (BreakIterator)proto.clone();
 }
Ejemplo n.º 4
0
 /// <summary> An AttributeSource that uses the same attributes as the supplied one.</summary>
 public AttributeSource(AttributeSource input)
 {
     if (input == null)
     {
         throw new System.ArgumentException("input AttributeSource must not be null");
     }
     this.attributes = input.attributes;
     this.attributeImpls = input.attributeImpls;
     this.factory = input.factory;
 }
Ejemplo n.º 5
0
	  /// <summary>
	  /// Creates the <seealso cref="TokenStream"/> of n-grams from the given <seealso cref="Reader"/> and <seealso cref="AttributeFactory"/>. </summary>
	  public override Tokenizer create(AttributeFactory factory, Reader input)
	  {
		if (luceneMatchVersion.onOrAfter(Version.LUCENE_44))
		{
		  return new NGramTokenizer(luceneMatchVersion, factory, input, minGramSize, maxGramSize);
		}
		else
		{
		  return new Lucene43NGramTokenizer(factory, input, minGramSize, maxGramSize);
		}
	  }
Ejemplo n.º 6
0
 /// <summary>
 /// Creates a new ThaiTokenizer, supplying the AttributeFactory </summary>
 public ThaiTokenizer(AttributeFactory factory, TextReader reader)
       : base(factory, reader, BreakIterator.CreateSentenceInstance(Locale.GetUS()))
 {
     if (!DBBI_AVAILABLE)
     {
         throw new System.NotSupportedException("This JRE does not have support for Thai segmentation");
     }
     wordBreaker = new ThaiWordBreaker(BreakIterator.CreateWordInstance(Locale.GetUS()));
     termAtt = AddAttribute<ICharTermAttribute>();
     offsetAtt = AddAttribute<IOffsetAttribute>();
 }
Ejemplo n.º 7
0
        /// <summary>
        /// creates a new PatternTokenizer returning tokens from group (-1 for split functionality) </summary>
        public PatternTokenizer(AttributeFactory factory, Reader input, Pattern pattern, int group)
            : base(factory, input)
        {
            this.group = group;

            // Use "" instead of str so don't consume chars
            // (fillBuffer) from the input on throwing IAE below:
            matcher = pattern.matcher("");

            // confusingly group count depends ENTIRELY on the pattern but is only accessible via matcher
            if (group >= 0 && group > matcher.groupCount())
            {
              throw new System.ArgumentException("invalid group specified: pattern only has: " + matcher.groupCount() + " capturing groups");
            }
        }
Ejemplo n.º 8
0
	  public PathHierarchyTokenizer(AttributeFactory factory, Reader input, int bufferSize, char delimiter, char replacement, int skip) : base(factory, input)
	  {
		if (bufferSize < 0)
		{
		  throw new System.ArgumentException("bufferSize cannot be negative");
		}
		if (skip < 0)
		{
		  throw new System.ArgumentException("skip cannot be negative");
		}
		termAtt.resizeBuffer(bufferSize);

		this.delimiter = delimiter;
		this.replacement = replacement;
		this.skip = skip;
		resultToken = new StringBuilder(bufferSize);
	  }
Ejemplo n.º 9
0
        /// <summary>
        /// creates a new PatternTokenizer returning tokens from group (-1 for split functionality) </summary>
        public PatternTokenizer(AttributeFactory factory, TextReader input, Regex pattern, int group)
              : base(factory, input)
        {
            this.termAtt = AddAttribute<ICharTermAttribute>();
            this.offsetAtt = AddAttribute<IOffsetAttribute>();
            this.group = group;

            // Use "" instead of str so don't consume chars
            // (fillBuffer) from the input on throwing IAE below:
            this.matcher = pattern.Match("");
            this.pattern = pattern;

            // confusingly group count depends ENTIRELY on the pattern but is only accessible via matcher
            var groupCount = pattern.GetGroupNumbers().Length;
            if (group >= 0 && group > groupCount)
            {
                throw new System.ArgumentException("invalid group specified: pattern only has: " + groupCount + " capturing groups");
            }

        }
Ejemplo n.º 10
0
 /// <summary>
 /// Creates a new instance of the <seealso cref="org.apache.lucene.analysis.wikipedia.WikipediaTokenizer"/>.  Attaches the
 /// <code>input</code> to a the newly created JFlex scanner. Uses the given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>.
 /// </summary>
 /// <param name="input"> The input </param>
 /// <param name="tokenOutput"> One of <seealso cref="#TOKENS_ONLY"/>, <seealso cref="#UNTOKENIZED_ONLY"/>, <seealso cref="#BOTH"/> </param>
 public WikipediaTokenizer(AttributeFactory factory, Reader input, int tokenOutput, HashSet<string> untokenizedTypes)
     : base(factory, input)
 {
     this.scanner = new WikipediaTokenizerImpl(this.input);
     init(tokenOutput, untokenizedTypes);
 }
Ejemplo n.º 11
0
	  /// <summary>
	  /// Construct a new SegmenterBase, also supplying the AttributeFactory
	  /// </summary>
	  public SegmentingTokenizerBase(AttributeFactory factory, Reader reader, BreakIterator iterator) : base(factory, reader)
	  {
		this.iterator = iterator;
	  }
Ejemplo n.º 12
0
        /// <summary>
        /// clean up products, attributes, productypes and categories since provisioning the tenant
        /// </summary>
        /// <param name="tenantId"></param>
        /// <param name="masterCatalogId"></param>
        /// <param name="siteId"></param>
        public static void CleanUpProducts(int tenantId, int masterCatalogId, int?catalogId = null, int?siteId = null)
        {
            var tenant            = TenantFactory.GetTenant(ServiceClientMessageFactory.GetTestClientMessage(), tenantId);
            var ApiMessageHandler = ServiceClientMessageFactory.GetTestClientMessage(tenantId: tenantId, masterCatalogId: masterCatalogId, catalogId: catalogId, siteId: siteId);
            var products          = ProductFactory.GetProducts(ApiMessageHandler, noCount: null, pageSize: null, q: null, qLimit: null, sortBy: null, startIndex: null, filter: "createdate gt " + SinceWhen(DateTime.Now.AddDays(-1)));

            foreach (var pro in products.Items)
            {
                ProductFactory.DeleteProduct(ApiMessageHandler, pro.ProductCode);
            }
            var productTypes = ProductTypeFactory.GetProductTypes(handler: ApiMessageHandler, successCode: HttpStatusCode.OK, expectedCode: HttpStatusCode.OK);

            foreach (var pt in productTypes.Items)
            {
                if (pt.AuditInfo.CreateDate.Value > DateTime.Now.AddDays(-1))
                {
                    try
                    {
                        ProductTypeFactory.DeleteProductType(ApiMessageHandler, pt.Id.Value);
                    }
                    catch (TestFailException e)   //getaround base product type
                    {
                        // ToDo: e.ActualReturnCode
                    }
                }
            }
            var attributes = AttributeFactory.GetAttributes(handler: ApiMessageHandler, successCode: HttpStatusCode.OK, expectedCode: HttpStatusCode.OK);

            if (attributes.TotalCount != 0)
            {
                foreach (var attr in attributes.Items)
                {
                    if (attr.AuditInfo.CreateDate.Value > DateTime.Now.AddDays(-1))
                    {
                        //bug 18745, should return NoContent
                        try
                        {
                            AttributeFactory.DeleteAttribute(ApiMessageHandler, attr.AttributeFQN);
                        }
                        catch (TestFailException e)  //get around the bug
                        {
                            // ToDo: e.ActualReturnCode
                        }
                    }
                }
            }
            var cates = CategoryFactory.GetCategories(ApiMessageHandler, pageSize: null, sortBy: null, startIndex: null, filter: "createdate gt " + SinceWhen(DateTime.Now.AddDays(-1)));

            foreach (var cate in cates.Items)
            {
                var messageHandler1 = ServiceClientMessageFactory.GetTestClientMessage(tenantId: tenantId, masterCatalogId: masterCatalogId, catalogId: catalogId, siteId: siteId);
                try
                {
                    CategoryFactory.DeleteCategoryById(handler: messageHandler1, categoryId: (int)cate.Id, cascadeDelete: true);
                }
                catch (TestFailException e)   //work around notfound
                {
                    // ToDo: e.ActualReturnCode
                }
            }
        }
Ejemplo n.º 13
0
 public TokenizerAnonymousClass(AttributeFactory factory, TextReader reader, Token[] tokens)
     : base(factory, reader)
 {
     reusableToken = AddAttribute <ICharTermAttribute>();
     this.tokens   = tokens;
 }
Ejemplo n.º 14
0
 public PipeTokenizer(LuceneVersion matchVersion, AttributeFactory factory, TextReader input) :
     base(matchVersion, factory, input)
 {
 }
Ejemplo n.º 15
0
 /// <summary>
 /// Construct a new LowerCaseTokenizer using a given
 /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>.
 /// </summary>
 /// <param name="matchVersion">
 ///          Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param>
 /// <param name="factory">
 ///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
 /// <param name="in">
 ///          the input to split up into tokens </param>
 public LowerCaseTokenizer(Version matchVersion, AttributeFactory factory, TextReader @in)
     : base(matchVersion, factory, @in)
 {
 }
Ejemplo n.º 16
0
 public NGramTokenizer(AttributeFactory factory, TextReader input,
                       int gramSize)
     : base(factory, input)
 {
     Init(gramSize);
 }
Ejemplo n.º 17
0
 /// <summary>
 /// Construct a new SegmenterBase, also supplying the <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory"/>
 /// </summary>
 protected SegmentingTokenizerBase(AttributeFactory factory, TextReader reader, BreakIterator iterator) // LUCENENET: CA1012: Abstract types should not have constructors (marked protected)
     : base(factory, reader)
 {
     offsetAtt     = AddAttribute <IOffsetAttribute>();
     this.iterator = iterator;
 }
Ejemplo n.º 18
0
 /// <summary> A TokenStream using the supplied AttributeFactory for creating new <see cref="IAttribute" /> instances.</summary>
 protected internal TokenStream(AttributeFactory factory)
     : base(factory)
 {
 }
        public static ICollection <AttributeInfo> Get(OffnetServiceKey key, bool populateLists)
        {
            var config = OffnetServiceConfiguration.Get(key.Id, key.EffectiveDate);

            var list = ValidValueRuleParser.GetRules(config.ValidValueRule);

            var set = new ValidValueRuleSet();

            set.AddRules(list);

            var attributeSet = set.GetAttributes(key, SearchOptions.ALL_FALSE);
            //If we have an attribute with  no valid options, clear the value and try again...
            var emptyAttributes = (from a in attributeSet where a.Values == null || a.Values.Count == 0 select a.Name).ToList();

            if (emptyAttributes.Count > 0)
            {
                foreach (var a in emptyAttributes)
                {
                    key.RemoveAttribute(a);
                }
                attributeSet = set.GetAttributes(key, SearchOptions.ALL_FALSE);
            }

            IDictionary <string, AttributeInfo> tempList = new Dictionary <string, AttributeInfo>();

            foreach (var a in attributeSet)
            {
                tempList[a.Name] = a;
            }

            key.AddMissingAttributes();
            //Next we need to look if there are non-list items that need to be collected.
            foreach (var pair in config.Attributes)
            {
                AttributeInfo a = null;
                if (tempList.ContainsKey(pair.Key))
                {
                    tempList.TryGetValue(pair.Key, out a);
                }

                tempList[pair.Key] = AttributeFactory.CreateAttribute(pair.Value, pair.Key, a, key);
                if ((pair.Value.Type == AttributeType.Parent || pair.Value.Type == AttributeType.Related) &&
                    key.HasAttribute(pair.Key))
                {
                    tempList[pair.Key].SetValue(key.GetAttributeValue(pair.Key, SearchOptions.ALL_TRUE));
                }
            }


            var ruleSet = new OffnetServiceRuleSet();

            ruleSet.AddDefaults(key);  // add defaults so rules such as IsApplicable can use them

            //key doesn't have all the data we have generated so to use the latest we will build a ValueHolder that has what we need...
            string aValue;

            foreach (var a in tempList.Values)
            {
                aValue = a.GetValue();
                if (!string.IsNullOrEmpty(aValue)) // don't add empty values
                {
                    ruleSet.AddValue(a.Name, new RuleValue(aValue));
                }
            }
            //Determine which attributes we don't need
            var finalList = tempList.Values.Where(a => config.IsConfigurableAttribute(a.Name, ruleSet)).ToDictionary(a => a.Name);


            //Last we need to try and add a description to attributes that haven't had them added yet...
            //and flag them as optional or not.  We will also set the default value if there is one.
            foreach (var a in finalList.Values)
            {
                a.Optional = config.IsOptional(a.Name, ruleSet).ToString();

                a.Label = config.GetLabel(a.Name);
                if (config.HasDefault(a.Name))
                {
                    try
                    {
                        a.DefaultValue = config.GetDefaultValue(a.Name, key);
                    }
                    catch (Exception) { }
                }
                a.Hidden              = config.IsHidden(a.Name, key);
                a.MaxRepeats          = config.GetMaxRepeats(a.Name);
                a.RequiresRefresh     = config.GetRequiresRefresh(a.Name);
                a.ReadOnly            = config.IsReadOnly(a.Name, key);
                a.ApplicableForChange = config.GetApplicableForChange(a.Name);
                a.AffectsChildren     = config.AffectsChildren(a.Name);
                a.DesignImpact        = config.IsDesignImpact(a.Name, key);
                a.ProvisioningImpact  = config.IsProvisioningImpact(a.Name, key);

                var attribute = a as ListAttribute;
                if (attribute != null)
                {
                    var la = attribute;
                    if (populateLists && la.GetValue() != null && !la.ReadOnly && !la.Hidden)
                    {
                        //Since the value has been set, the list of options is empty.  If it is asked for, we will determine
                        //the list of options if this was not set.
                        var myKey = key.Clone(false);

                        myKey.AddValue(la.Name, null);
                        var myAtts = set.GetAttributes(myKey, SearchOptions.ALL_FALSE);

                        foreach (var av in
                                 from myAtt in myAtts
                                 where myAtt.Name.Equals(la.Name)
                                 from av in ((ListAttribute)myAtt).GetList()
                                 select av)
                        {
                            la.AddValue(av);
                        }
                    }
                }
            }

            return(config.SortList(finalList.Values));
        }
Ejemplo n.º 20
0
 protected LowerCaseKeywordTokenizer(AttributeFactory factory, System.IO.TextReader input)
     : base(factory, input)
 {
 }
Ejemplo n.º 21
0
 public override ArabicLetterTokenizer create(AttributeFactory factory, Reader input)
 {
     return(new ArabicLetterTokenizer(luceneMatchVersion, factory, input));
 }
Ejemplo n.º 22
0
 /// <summary>
 /// Creates NGramTokenizer with given min and max n-grams. </summary>
 /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
 /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
 /// <param name="minGram"> the smallest n-gram to generate </param>
 /// <param name="maxGram"> the largest n-gram to generate </param>
 public Lucene43NGramTokenizer(AttributeFactory factory, Reader input, int minGram, int maxGram) : base(factory, input)
 {
     init(minGram, maxGram);
 }
Ejemplo n.º 23
0
 protected LowerCaseKeywordTokenizer(AttributeFactory factory, System.IO.TextReader input)
     : base(factory, input)
 {
     offsetAtt = AddAttribute <IOffsetAttribute>();
     termAtt   = AddAttribute <ITermAttribute>();
 }
Ejemplo n.º 24
0
 /// <summary>
 /// Construct a new WhitespaceTokenizer using a given
 /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>.
 /// 
 /// @param
 ///          matchVersion Lucene version to match See
 ///          <seealso cref="<a href="#version">above</a>"/> </summary>
 /// <param name="factory">
 ///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
 /// <param name="in">
 ///          the input to split up into tokens </param>
 public WhitespaceTokenizer(Version matchVersion, AttributeFactory factory, TextReader @in)
     : base(matchVersion, factory, @in)
 {
 }
        public ReversePathHierarchyTokenizer(AttributeFactory factory, TextReader input, int bufferSize, char delimiter, char replacement, int skip)
            : base(factory, input)
        {
            if (bufferSize < 0)
            {
                throw new System.ArgumentException("bufferSize cannot be negative");
            }
            if (skip < 0)
            {
                throw new System.ArgumentException("skip cannot be negative");
            }
            termAtt = AddAttribute<ICharTermAttribute>();
            offsetAtt = AddAttribute<IOffsetAttribute>();
            posAtt = AddAttribute<IPositionIncrementAttribute>();

            termAtt.ResizeBuffer(bufferSize);
            this.delimiter = delimiter;
            this.replacement = replacement;
            this.skip = skip;
            resultToken = new StringBuilder(bufferSize);
            resultTokenBuffer = new char[bufferSize];
            delimiterPositions = new List<int>(bufferSize / 10);
        }
Ejemplo n.º 26
0
 public PathHierarchyTokenizer(AttributeFactory factory, TextReader input, char delimiter, char replacement, int skip)
     : this(factory, input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip)
 {
 }
Ejemplo n.º 27
0
 protected LowerCaseKeywordTokenizer(AttributeFactory factory, System.IO.TextReader input)
     : base(factory, input)
 {
     offsetAtt = AddAttribute<IOffsetAttribute>();
     termAtt = AddAttribute<ITermAttribute>();
 }
Ejemplo n.º 28
0
 /// <summary>Construct a new LetterTokenizer using a given {@link Mono.Lucene.Net.Util.AttributeSource.AttributeFactory}. </summary>
 public LetterTokenizer(AttributeFactory factory, System.IO.TextReader in_Renamed) : base(factory, in_Renamed)
 {
 }
 public ArabicLetterTokenizer(AttributeFactory factory, TextReader @in) : base(factory, @in)
 {
     
 }
 public ReplacerTokenizer(AttributeFactory factory, System.IO.TextReader input)
     : base(factory, input)
 {
     Init();
 }
Ejemplo n.º 31
0
 /// <summary>
 /// Creates <see cref="NGramTokenizer"/> with given min and max n-grams. </summary>
 /// <param name="version"> the lucene compatibility version </param>
 /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/> to use </param>
 /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
 /// <param name="minGram"> the smallest n-gram to generate </param>
 /// <param name="maxGram"> the largest n-gram to generate </param>
 public NGramTokenizer(LuceneVersion version, AttributeFactory factory, TextReader input, int minGram, int maxGram)
     : this(version, factory, input, minGram, maxGram, false)
 {
 }
Ejemplo n.º 32
0
 /// <summary>
 /// Creates a new StandardTokenizer with a given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> 
 /// </summary>
 public StandardTokenizer(Version matchVersion, AttributeFactory factory, Reader input)
     : base(factory, input)
 {
     Init(matchVersion);
 }
Ejemplo n.º 33
0
 public MockTokenizer(AttributeFactory factory, StreamReader input, CharacterRunAutomaton runAutomaton, bool lowerCase)
     : this(factory, input, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH)
 {
 }
Ejemplo n.º 34
0
 /// <summary>
 /// Construct a new RussianLetterTokenizer using a given
 /// <see cref="AttributeSource.AttributeFactory"/>.
 /// </summary>
 /// <param name="matchVersion"> lucene compatibility version </param>
 /// <param name="factory">
 ///          the attribute factory to use for this <see cref="Tokenizer"/> </param>
 /// <param name="in">
 ///          the input to split up into tokens </param>
 public RussianLetterTokenizer(LuceneVersion matchVersion, AttributeFactory factory, TextReader @in)
     : base(matchVersion, factory, @in)
 {
 }
Ejemplo n.º 35
0
 internal NGramTokenizer(LuceneVersion version, AttributeFactory factory, TextReader input, int minGram, int maxGram, bool edgesOnly)
     : base(factory, input)
 {
     Init(version, minGram, maxGram, edgesOnly);
 }
Ejemplo n.º 36
0
 /// <summary>
 /// Calls {@link #MockTokenizer(Lucene.Net.Util.AttributeSource.AttributeFactory,Reader,CharacterRunAutomaton,boolean)
 ///                MockTokenizer(AttributeFactory, Reader, WHITESPACE, true)}
 /// </summary>
 public MockTokenizer(AttributeFactory factory, StreamReader input)
     : this(input, WHITESPACE, true)
 {
 }
 public CharJoinAbbreviationsLowerCaseExactTokenizer(AttributeFactory factory, System.IO.TextReader input)
     : base(factory, input)
 {
     offsetAtt = AddAttribute<IOffsetAttribute>();
     termAtt = AddAttribute<ITermAttribute>();
 }
 public override WhitespaceTokenizer create(AttributeFactory factory, Reader input)
 {
     return(new WhitespaceTokenizer(luceneMatchVersion, factory, input));
 }
Ejemplo n.º 39
0
 /// <summary>
 /// Construct a new RussianLetterTokenizer using a given
 /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>. * @param
 /// matchVersion Lucene version to match See
 /// <seealso cref="<a href="#version">above</a>"/>
 /// </summary>
 /// <param name="factory">
 ///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
 /// <param name="in">
 ///          the input to split up into tokens </param>
 public RussianLetterTokenizer(Version matchVersion, AttributeFactory factory, Reader @in)
     : base(matchVersion, factory, @in)
 {
 }
Ejemplo n.º 40
0
 /// <summary>
 /// Creates a new StandardTokenizer with a given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>
 /// </summary>
 public StandardTokenizer(Version matchVersion, AttributeFactory factory, Reader input)
     : base(factory, input)
 {
     Init(matchVersion);
 }
 public UpperCaseKeywordTokenizer(AttributeFactory factory, TextReader @in)
     : base(factory, @in)
 {
 }
Ejemplo n.º 42
0
 private Loader(Uri defaultBaseUri)
 {
     DefaultBaseUri   = defaultBaseUri;
     ElementFactory   = new ElementFactory();
     AttributeFactory = new AttributeFactory();
 }
Ejemplo n.º 43
0
 /// <summary>
 /// Creates a new ClassicTokenizer with a given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> 
 /// </summary>
 public ClassicTokenizer(LuceneVersion matchVersion, AttributeFactory factory, Reader input)
     : base(factory, input)
 {
     Init(matchVersion);
 }
Ejemplo n.º 44
0
        /*
         * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
         * 
         * <param name="factory"><see cref="AttributeSource.AttributeFactory"/> to use</param>
         * <param name="input"><see cref="TextReader"/> holding the input to be tokenized</param>
         * <param name="side">the <see cref="Side"/> from which to chop off an n-gram</param>
         * <param name="minGram">the smallest n-gram to generate</param>
         * <param name="maxGram">the largest n-gram to generate</param>
         */
        public EdgeNGramTokenizer(AttributeFactory factory, TextReader input, Side side, int minGram, int maxGram)
            : base(factory, input)
        {

            init(side, minGram, maxGram);
        }
 public ReversePathHierarchyTokenizer(AttributeFactory factory, TextReader input, char delimiter, char replacement, int skip)
     : this(factory, input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip)
 {
 }
Ejemplo n.º 46
0
	  /// <summary>
	  /// Creates NGramTokenizer with given min and max n-grams. </summary>
	  /// <param name="version"> the lucene compatibility <a href="#version">version</a> </param>
	  /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
	  /// <param name="minGram"> the smallest n-gram to generate </param>
	  /// <param name="maxGram"> the largest n-gram to generate </param>
	  public NGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram) : this(version, factory, input, minGram, maxGram, false)
	  {
	  }
Ejemplo n.º 47
0
 /// <summary>
 /// Creates a new instance of the <see cref="WikipediaTokenizer"/>.  Attaches the
 /// <paramref name="input"/> to a the newly created JFlex scanner. Uses the given <see cref="AttributeSource.AttributeFactory"/>.
 /// </summary>
 /// <param name="factory"> The <see cref="AttributeSource.AttributeFactory"/> </param>
 /// <param name="input"> The input </param>
 /// <param name="tokenOutput"> One of <see cref="TOKENS_ONLY"/>, <see cref="UNTOKENIZED_ONLY"/>, <see cref="BOTH"/> </param>
 /// <param name="untokenizedTypes"> Untokenized types </param>
 public WikipediaTokenizer(AttributeFactory factory, TextReader input, int tokenOutput, ICollection <string> untokenizedTypes)
     : base(factory, input)
 {
     this.scanner = new WikipediaTokenizerImpl(this.m_input);
     Init(tokenOutput, untokenizedTypes);
 }
    public static System.Attribute ConvertPlatformAttribute(CustomAttributeData attribute, PlatformName platform)
    {
        var constructorArguments = new object [attribute.ConstructorArguments.Count];

        for (int i = 0; i < attribute.ConstructorArguments.Count; ++i)
        {
            constructorArguments [i] = attribute.ConstructorArguments [i].Value;
        }

        Func <string> createErrorMessage = () => {
            var b = new System.Text.StringBuilder(" Types { ");
            for (int i = 0; i < constructorArguments.Length; ++i)
            {
                b.Append(constructorArguments[i].GetType().ToString() + " ");
            }
            b.Append("}");
            return(b.ToString());
        };

        Func <string> unknownFormatError = () => $"Unknown format for old style availability attribute {attribute.AttributeType.FullName} {attribute.ConstructorArguments.Count} {createErrorMessage ()}";

        object []      ctorValues;
        System.Type [] ctorTypes;

        switch (attribute.ConstructorArguments.Count)
        {
        case 2:
            if (constructorArguments [0].GetType() == typeof(byte) &&
                constructorArguments [1].GetType() == typeof(byte))
            {
                ctorValues = new object [] { (byte)platform, (int)(byte)constructorArguments [0], (int)(byte)constructorArguments [1], (byte)0xff, null };
                ctorTypes  = new System.Type [] { AttributeFactory.PlatformEnum, typeof(int), typeof(int), AttributeFactory.PlatformArch, typeof(string) };
                break;
            }
            throw new NotImplementedException(unknownFormatError());

        case 3:
            if (constructorArguments [0].GetType() == typeof(byte) &&
                constructorArguments [1].GetType() == typeof(byte) &&
                constructorArguments [2].GetType() == typeof(byte))
            {
                ctorValues = new object [] { (byte)platform, (int)(byte)constructorArguments [0], (int)(byte)constructorArguments [1], (int)(byte)constructorArguments [2], (byte)0xff, null };
                ctorTypes  = new System.Type [] { AttributeFactory.PlatformEnum, typeof(int), typeof(int), typeof(int), AttributeFactory.PlatformArch, typeof(string) };
                break;
            }
            if (constructorArguments [0].GetType() == typeof(byte) &&
                constructorArguments [1].GetType() == typeof(byte) &&
                constructorArguments [2].GetType() == typeof(bool))
            {
                byte arch = (bool)constructorArguments [2] ? (byte)2 : (byte)0xff;
                ctorValues = new object [] { (byte)platform, (int)(byte)constructorArguments [0], (int)(byte)constructorArguments [1], arch, null };
                ctorTypes  = new System.Type [] { AttributeFactory.PlatformEnum, typeof(int), typeof(int), AttributeFactory.PlatformArch, typeof(string) };
                break;
            }
            throw new NotImplementedException(unknownFormatError());

        case 4:
            if (constructorArguments [0].GetType() == typeof(byte) &&
                constructorArguments [1].GetType() == typeof(byte) &&
                constructorArguments [2].GetType() == typeof(byte) &&
                constructorArguments [3].GetType() == typeof(bool))
            {
                byte arch = (bool)constructorArguments [3] ? (byte)2 : (byte)0xff;
                ctorValues = new object [] { (byte)platform, (int)(byte)constructorArguments [0], (int)(byte)constructorArguments [1], (int)(byte)constructorArguments [2], arch, null };
                ctorTypes  = new System.Type [] { AttributeFactory.PlatformEnum, typeof(int), typeof(int), typeof(int), AttributeFactory.PlatformArch, typeof(string) };
                break;
            }
            if (constructorArguments [0].GetType() == typeof(byte) &&
                constructorArguments [1].GetType() == typeof(byte) &&
                constructorArguments [2].GetType() == typeof(byte) &&
                constructorArguments [3].GetType() == typeof(byte) /* ObjCRuntime.PlatformArchitecture */)
            {
                ctorValues = new object [] { (byte)platform, (int)(byte)constructorArguments [0], (int)(byte)constructorArguments [1], (int)(byte)constructorArguments [2], constructorArguments [3], null };
                ctorTypes  = new System.Type [] { AttributeFactory.PlatformEnum, typeof(int), typeof(int), typeof(int), AttributeFactory.PlatformArch, typeof(string) };
                break;
            }

            throw new NotImplementedException(unknownFormatError());

        default:
            throw new NotImplementedException($"Unknown count {attribute.ConstructorArguments.Count} {createErrorMessage ()}");
        }

        return(AttributeFactory.CreateNewAttribute(AttributeFactory.IntroducedAttributeType, ctorTypes, ctorValues));
    }
Ejemplo n.º 49
0
 /*
  * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
  * 
  * <param name="factory"><see cref="AttributeSource.AttributeFactory"/> to use</param>
  * <param name="input"><see cref="TextReader"/> holding the input to be tokenized</param>
  * <param name="sideLabel">the name of the <see cref="Side"/> from which to chop off an n-gram</param>
  * <param name="minGram">the smallest n-gram to generate</param>
  * <param name="maxGram">the largest n-gram to generate</param>
  */
 public EdgeNGramTokenizer(AttributeFactory factory, TextReader input, string sideLabel, int minGram, int maxGram) :
     this(factory, input, SideExtensions.GetSide(sideLabel), minGram, maxGram)
 {
 }
Ejemplo n.º 50
0
 public CJKTokenizer(AttributeFactory factory, TextReader @in)
     : base(factory, @in)
 {
     Init();
 }
Ejemplo n.º 51
0
	  internal NGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram, bool edgesOnly) : base(factory, input)
	  {
		init(version, minGram, maxGram, edgesOnly);
	  }
Ejemplo n.º 52
0
 public Lucene43EdgeNGramTokenizer(LuceneVersion version, AttributeFactory factory, TextReader input, Side side, int minGram, int maxGram)
     : base(factory, input)
 {
     Init(version, side, minGram, maxGram);
 }
Ejemplo n.º 53
0
 public CJKTokenizer(AttributeFactory factory, TextReader _in)
     : base(factory, _in)
 {
     Init();
 }
Ejemplo n.º 54
0
 public Lucene43EdgeNGramTokenizer(LuceneVersion version, AttributeFactory factory, TextReader input, string sideLabel, int minGram, int maxGram)
     : this(version, factory, input, GetSide(sideLabel), minGram, maxGram)
 {
 }
Ejemplo n.º 55
0
	  /// <summary>
	  /// Creates NGramTokenizer with given min and max n-grams. </summary>
	  /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
	  /// <param name="minGram"> the smallest n-gram to generate </param>
	  /// <param name="maxGram"> the largest n-gram to generate </param>
	  public Lucene43NGramTokenizer(AttributeFactory factory, Reader input, int minGram, int maxGram) : base(factory, input)
	  {
		init(minGram, maxGram);
	  }
Ejemplo n.º 56
0
 /// <summary>
 /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
 /// </summary>
 /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
 /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/> to use </param>
 /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
 /// <param name="minGram"> the smallest n-gram to generate </param>
 /// <param name="maxGram"> the largest n-gram to generate </param>
 public Lucene43EdgeNGramTokenizer(LuceneVersion version, AttributeFactory factory, TextReader input, int minGram, int maxGram)
     : this(version, factory, input, Side.FRONT, minGram, maxGram)
 {
 }
Ejemplo n.º 57
0
 /// <summary>
 /// Construct a new WhitespaceTokenizer using a given
 /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>.
 ///
 /// @param
 ///          matchVersion Lucene version to match See
 ///          <seealso cref="<a href="#version">above</a>"/> </summary>
 /// <param name="factory">
 ///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
 /// <param name="in">
 ///          the input to split up into tokens </param>
 public WhitespaceTokenizer(Version matchVersion, AttributeFactory factory, TextReader @in) : base(matchVersion, factory, @in)
 {
 }
Ejemplo n.º 58
0
 public MyAttributeFactory(AttributeFactory @delegate)
 {
     this.@delegate = @delegate;
 }
Ejemplo n.º 59
0
		/// <summary> An AttributeSource using the supplied {@link AttributeFactory} for creating new {@link Attribute} instances.</summary>
		public AttributeSource(AttributeFactory factory)
		{
            this.attributes = new SupportClass.GeneralKeyedCollection<Type, SupportClass.AttributeImplItem>(delegate(SupportClass.AttributeImplItem att) { return att.Key; });
            this.attributeImpls = new SupportClass.GeneralKeyedCollection<Type, SupportClass.AttributeImplItem>(delegate(SupportClass.AttributeImplItem att) { return att.Key; });
			this.factory = factory;
		}
Ejemplo n.º 60
0
 /// <summary>
 /// Creates a new ClassicTokenizer with a given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>
 /// </summary>
 public ClassicTokenizer(LuceneVersion matchVersion, AttributeFactory factory, Reader input)
     : base(factory, input)
 {
     Init(matchVersion);
 }