/// <inheritdoc cref="ITranslator.Compile"/> private void InternalCompile() { string text = File.ReadAllText(CurrentFilePath, Encoding.UTF8); Tree = CSharpSyntaxTree.ParseText(text, CSharpParseOptions.Default); Root = Tree.GetRoot(); ThrowIfNotCSharpLang(); SyntaxNode namespaceNode = GetNamespaceNode(); SyntaxNode classOrInterfaceNode = GetDeclarationNode(namespaceNode); IEnumerable <SyntaxNode> children = GetFilteredNodes(classOrInterfaceNode); CreateCSharpHeadNode(classOrInterfaceNode); foreach (var child in children) { InsertNodeInTree(child); } ISyntaxTree head = Generator.GetSyntaxTree(Head); Builder.Add(new FileBuilder(CurrentOutputPath, Configuration.OverrideExistingFile)); Builder.Last().Build(head); }
public async Task BuilderBuildsInvertedIndex() { var builder = new Builder(); builder.AddField("title"); await builder.Add(new Document { { "id", "id" }, { "title", "test" }, { "body", "missing" } }); Index index = builder.Build(); Assert.Empty(builder.InvertedIndex["test"]["title"]["id"]); Assert.IsType <Vector>(builder.FieldVectors["title/id"]); Assert.False(builder.InvertedIndex.ContainsKey("missing")); var needle = TokenSet.FromString("test"); Assert.Contains("test", builder.TokenSet.Intersect(needle).ToEnumeration()); Assert.Equal(1, builder.DocumentCount); Assert.Equal(1, builder.AverageFieldLength["title"]); Assert.NotNull(index); }
public void AddIfNotNull(T?value) { if (value != null) { Builder.Add(value); } }
public static WordList CreateFromWords(IEnumerable <string> words, AffixConfig affix) { if (words == null) { words = Enumerable.Empty <string>(); } var wordListBuilder = new Builder(affix ?? new AffixConfig.Builder().MoveToImmutable()); if (words is IList <string> wordsAsList) { wordListBuilder.InitializeEntriesByRoot(wordsAsList.Count); } else { wordListBuilder.InitializeEntriesByRoot(-1); } var entryDetail = WordEntryDetail.Default; foreach (var word in words) { wordListBuilder.Add(word, entryDetail); } return(wordListBuilder.MoveToImmutable()); }
public static Rule CreateSimple(TableInfo t1, string c1, TableInfo t2, string c2, string name, int priority) { var builder = new Builder(t1, t2, name, priority); builder.Add(c1, c2); return(builder.Finish()); }
public void AddNull() { bool excThrown = false; var b = new Builder(GetLibHoney()); try { b.Add(null); } catch (ArgumentNullException) { excThrown = true; } Assert.Equal(true, excThrown); }
/// <summary> /// Creates a new <see cref="IGraph{TShape, TMat}"/> by importing the given graphs and passing their <see cref="Shape"/>s /// along with the <see cref="Builder{TMat}"/> to the given create function. /// </summary> /// <typeparam name="TShapeOut">TBD</typeparam> /// <typeparam name="TMatOut">TBD</typeparam> /// <typeparam name="TMat0">TBD</typeparam> /// <typeparam name="TMat1">TBD</typeparam> /// <typeparam name="TShape0">TBD</typeparam> /// <typeparam name="TShape1">TBD</typeparam> /// <param name="g0">TBD</param> /// <param name="g1">TBD</param> /// <param name="combineMaterializers">TBD</param> /// <param name="buildBlock">TBD</param> /// <returns>TBD</returns> public static IGraph <TShapeOut, TMatOut> Create <TShapeOut, TMatOut, TMat0, TMat1, TShape0, TShape1>( IGraph <TShape0, TMat0> g0, IGraph <TShape1, TMat1> g1, Func <TMat0, TMat1, TMatOut> combineMaterializers, Func <Builder <TMatOut>, TShape0, TShape1, TShapeOut> buildBlock) where TShapeOut : Shape where TShape0 : Shape where TShape1 : Shape { var builder = new Builder <TMatOut>(); var shape0 = builder.Add <TShape0, TMat0, Func <TMat1, TMatOut> >(g0, m0 => (m1 => combineMaterializers(m0, m1))); var shape1 = builder.Add <TShape1, Func <TMat1, TMatOut>, TMat1, TMatOut>(g1, (f, m1) => f(m1)); var shape = buildBlock(builder, shape0, shape1); var module = builder.Module.ReplaceShape(shape); return(new GraphImpl <TShapeOut, TMatOut>(shape, module)); }
public void RunApp(DataTable source) { Builder b = new Builder(); foreach (DataRow dr in source.Rows) { b.Add((string)dr["User"], (string)dr["Unit"], (double)dr["Score"]); } DataTable dt = b.ToDataTable(); }
public static IBooleanPart Build(params IBooleanPart[] parts) { var builder = new Builder(); foreach (var booleanPart in parts) { builder.Add(booleanPart); } return(builder.Build()); }
internal static RealmSchema CreateSchemaForClasses(IEnumerable <Type> classes) { var builder = new Builder(); foreach (var @class in classes) { builder.Add(ObjectSchema.FromType(@class)); } return(builder.Build()); }
/// <summary> /// Creates a new <see cref="IGraph{TShape, TMat}"/> by importing the given graph <paramref name="g1"/> /// and passing its <see cref="Shape"/> along with the <see cref="Builder{TMat}"/> to the given create function. /// </summary> /// <typeparam name="TShapeOut">TBD</typeparam> /// <typeparam name="TMat">TBD</typeparam> /// <typeparam name="TShape1">TBD</typeparam> /// <param name="g1">TBD</param> /// <param name="buildBlock">TBD</param> /// <returns>TBD</returns> public static IGraph <TShapeOut, TMat> Create <TShapeOut, TMat, TShape1>(IGraph <TShape1, TMat> g1, Func <Builder <TMat>, TShape1, TShapeOut> buildBlock) where TShapeOut : Shape where TShape1 : Shape { var builder = new Builder <TMat>(); var shape1 = builder.Add <TShape1, object, TMat, TMat>(g1, Keep.Right); var shape = buildBlock(builder, shape1); var module = builder.Module.ReplaceShape(shape); return(new GraphImpl <TShapeOut, TMat>(shape, module)); }
public IDbQuery <TModel> Select(Func <TModel, object> expression) { if (expression == null) { throw new ArgumentNullException("expression"); } var model = expression(new TModel()); var properties = ReflectionHelper.AllProperty(model.GetType(), ReflectionHelper.DefBindingFlags | BindingFlags.DeclaredOnly); if (properties.Count == 0) { throw new NotImplementedException(); } var modelType = typeof(TModel); var tableName = modelType.Name; var tableAliasName = Resolver.ResolveAliasName <TModel>(modelType); Builder.Dispose(); Builder.Add("SELECT"); Builder.Add(" "); var index = 0; foreach (var property in properties) { var aliasName = Resolver.ResolveAliasName <TModel>(property.Member); if (tableName != tableAliasName) { Builder.Add(tableAliasName).Add("."); } if (aliasName != property.Name) { Builder.Add(" AS [").AddColumn(aliasName).Add("]"); } else { Builder.AddColumn(property.Name); } if (index != properties.Count - 1) { Builder.Add(","); } index++; } Builder.Add(" "); Builder.Add("FORM "); Builder.Add(tableName); if (tableName != tableAliasName) { Builder.Add(" AS ").Add(tableAliasName); } return(this); }
public FstFieldWriter(FieldInfo fieldInfo, long termsFilePointer, VariableGapTermsIndexWriter vgtiw) { _vgtiw = vgtiw; FieldInfo = fieldInfo; PositiveIntOutputs fstOutputs = PositiveIntOutputs.Singleton; _fstBuilder = new Builder <long>(FST.INPUT_TYPE.BYTE1, fstOutputs); IndexStart = _vgtiw.Output.FilePointer; // Always put empty string in _fstBuilder.Add(new IntsRef(), termsFilePointer); _startTermsFilePointer = termsFilePointer; }
/// <summary> /// Creates a new <see cref="IGraph{TShape, TMat}"/> by importing the given graphs and passing their <see cref="Shape"/>s /// along with the <see cref="Builder{TMat}"/> to the given create function. /// </summary> /// <typeparam name="TShapeOut">TBD</typeparam> /// <typeparam name="TMatOut">TBD</typeparam> /// <typeparam name="TMat0">TBD</typeparam> /// <typeparam name="TMat1">TBD</typeparam> /// <typeparam name="TMat2">TBD</typeparam> /// <typeparam name="TMat3">TBD</typeparam> /// <typeparam name="TShape0">TBD</typeparam> /// <typeparam name="TShape1">TBD</typeparam> /// <typeparam name="TShape2">TBD</typeparam> /// <typeparam name="TShape3">TBD</typeparam> /// <param name="g0">TBD</param> /// <param name="g1">TBD</param> /// <param name="g2">TBD</param> /// <param name="g3">TBD</param> /// <param name="combineMaterializers">TBD</param> /// <param name="buildBlock">TBD</param> /// <returns>TBD</returns> public static IGraph <TShapeOut, TMatOut> Create <TShapeOut, TMatOut, TMat0, TMat1, TMat2, TMat3, TShape0, TShape1, TShape2, TShape3>( IGraph <TShape0, TMat0> g0, IGraph <TShape1, TMat1> g1, IGraph <TShape2, TMat2> g2, IGraph <TShape3, TMat3> g3, Func <TMat0, TMat1, TMat2, TMat3, TMatOut> combineMaterializers, Func <Builder <TMatOut>, TShape0, TShape1, TShape2, TShape3, TShapeOut> buildBlock) where TShapeOut : Shape where TShape0 : Shape where TShape1 : Shape where TShape2 : Shape where TShape3 : Shape { var builder = new Builder <TMatOut>(); var shape0 = builder.Add <TShape0, TMat0, Func <TMat1, Func <TMat2, Func <TMat3, TMatOut> > > >(g0, m0 => (m1 => (m2 => (m3 => combineMaterializers(m0, m1, m2, m3))))); var shape1 = builder.Add <TShape1, Func <TMat1, Func <TMat2, Func <TMat3, TMatOut> > >, TMat1, Func <TMat2, Func <TMat3, TMatOut> > >(g1, (f, m1) => f(m1)); var shape2 = builder.Add <TShape2, Func <TMat2, Func <TMat3, TMatOut> >, TMat2, Func <TMat3, TMatOut> >(g2, (f, m2) => f(m2)); var shape3 = builder.Add <TShape3, Func <TMat3, TMatOut>, TMat3, TMatOut>(g3, (f, m3) => f(m3)); var shape = buildBlock(builder, shape0, shape1, shape2, shape3); var module = builder.Module.ReplaceShape(shape); return(new GraphImpl <TShapeOut, TMatOut>(shape, module)); }
public static DataXRefDb FromDisassembly(DisassemblyView disassembly) { var builder = new Builder(); foreach (var(addr, instruction) in disassembly.EnumerateInstructions()) { foreach (var xRef in instruction.DataXRefOut) { builder.Add(addr, xRef.Address, xRef.Type); } } return(builder.Build()); }
public IValueSet Complement() { Builder Builder = new Builder(this.Type); if (!this.LowIndexedRanges.Any()) { return(Builder.Add(Range.All(this.Type)).Build()); } IEnumerable <Range> RangeIterator = this.LowIndexedRanges.Values; Range FirstRange = RangeIterator.First(); if (!FirstRange.Low.IsLowerUnbounded()) { Builder.Add(new Range(Marker.LowerUnbounded(this.Type), FirstRange.Low.LesserAdjacent())); } Range PreviousRange = FirstRange; foreach (Range Next in RangeIterator.Skip(1)) { Marker LowMarker = PreviousRange.High.GreaterAdjacent(); Marker HighMarker = Next.Low.LesserAdjacent(); Builder.Add(new Range(LowMarker, HighMarker)); PreviousRange = Next; } Range LastRange = PreviousRange; if (!LastRange.High.IsUpperUnbounded()) { Builder.Add(new Range(LastRange.High.GreaterAdjacent(), Marker.UpperUnbounded(this.Type))); } return(Builder.Build()); }
/// <summary> /// Creates a schema describing a RealmObject subclass in terms of its persisted members. /// </summary> /// <exception cref="ArgumentException">Thrown if no class Type is provided or if it doesn't descend directly from RealmObject.</exception> /// <returns>An ObjectSchema describing the specified Type.</returns> /// <param name="type">Type of a RealmObject descendant for which you want a schema.</param> public static ObjectSchema FromType(Type type) { if (type == null) { throw new ArgumentNullException(nameof(type)); } if (type.BaseType != typeof(RealmObject)) { throw new ArgumentException($"The class {type.FullName} must descend directly from RealmObject"); } Contract.EndContractBlock(); var builder = new Builder(type.Name); foreach (var property in type.GetProperties(BindingFlags.Instance | BindingFlags.DeclaredOnly | BindingFlags.NonPublic | BindingFlags.Public)) { if (property.GetCustomAttribute <WovenPropertyAttribute>() == null) { continue; } var isPrimaryKey = property.GetCustomAttribute <PrimaryKeyAttribute>() != null; var schemaProperty = new Property { Name = property.GetCustomAttribute <MapToAttribute>()?.Mapping ?? property.Name, IsPrimaryKey = isPrimaryKey, IsIndexed = isPrimaryKey || property.GetCustomAttribute <IndexedAttribute>() != null, PropertyInfo = property }; Type innerType; bool isNullable; schemaProperty.Type = property.PropertyType.ToPropertyType(out isNullable, out innerType); schemaProperty.ObjectType = innerType?.Name; schemaProperty.IsNullable = isNullable; if (property.GetCustomAttribute <RequiredAttribute>() != null) { schemaProperty.IsNullable = false; } builder.Add(schemaProperty); } var ret = builder.Build(); ret.Type = type; return(ret); }
public FSTFieldWriter(VariableGapTermsIndexWriter outerInstance, FieldInfo fieldInfo, long termsFilePointer) { this.outerInstance = outerInstance; this.fieldInfo = fieldInfo; fstOutputs = PositiveInt32Outputs.Singleton; fstBuilder = new Builder <long?>(FST.INPUT_TYPE.BYTE1, fstOutputs); indexStart = outerInstance.m_output.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream ////System.out.println("VGW: field=" + fieldInfo.name); // Always put empty string in fstBuilder.Add(new Int32sRef(), termsFilePointer); startTermsFilePointer = termsFilePointer; }
/// <summary> /// Ejecuta un comentario /// </summary> private void Execute(InstructionComment instruction) { if (!Builder.IsCompressed) { // Añade el inicio de comentario Builder.Indent = instruction.Token.Indent; Builder.AddIndent(); Builder.Add("<!--"); // Añade el texto Builder.Add(instruction.Content); // Añade el fin de comentario Builder.Add("-->"); } }
public async Task DocumentRefClashesWithObjectPrototype() { var builder = new Builder(); builder.AddField("title"); await builder.Add(new Document { { "id", "constructor" }, { "title", "word" } }); Assert.Empty(builder.InvertedIndex["word"]["title"]["constructor"]); }
/// <summary> /// Creates a schema describing a <see cref="RealmObject"/> or <see cref="EmbeddedObject"/> subclass in terms of its persisted members. /// </summary> /// <exception cref="ArgumentException"> /// Thrown if no class Type is provided or if it doesn't descend directly from <see cref="RealmObject"/>/<see cref="EmbeddedObject"/>. /// </exception> /// <returns>An <see cref="ObjectSchema"/> describing the specified Type.</returns> /// <param name="type">Type of a <see cref="RealmObject"/>/<see cref="EmbeddedObject"/> descendant for which you want a schema.</param> public static ObjectSchema FromType(TypeInfo type) { Argument.NotNull(type, nameof(type)); Argument.Ensure(type.IsRealmObject() || type.IsEmbeddedObject(), $"The class {type.FullName} must descend directly from RealmObject", nameof(type)); var builder = new Builder(type.GetMappedOrOriginalName(), type.IsEmbeddedObject()); foreach (var property in type.DeclaredProperties.Where(p => !p.IsStatic() && p.HasCustomAttribute <WovenPropertyAttribute>())) { var isPrimaryKey = property.HasCustomAttribute <PrimaryKeyAttribute>(); var schemaProperty = new Property { Name = property.GetMappedOrOriginalName(), IsPrimaryKey = isPrimaryKey, IsIndexed = isPrimaryKey || property.HasCustomAttribute <IndexedAttribute>(), PropertyInfo = property }; var backlinks = property.GetCustomAttribute <BacklinkAttribute>(); if (backlinks != null) { var innerType = property.PropertyType.GenericTypeArguments.Single(); var linkOriginProperty = innerType.GetProperty(backlinks.Property); schemaProperty.Type = PropertyType.LinkingObjects | PropertyType.Array; schemaProperty.ObjectType = innerType.GetTypeInfo().GetMappedOrOriginalName(); schemaProperty.LinkOriginPropertyName = linkOriginProperty.GetMappedOrOriginalName(); } else { schemaProperty.Type = property.PropertyType.ToPropertyType(out var objectType); schemaProperty.ObjectType = objectType?.GetTypeInfo().GetMappedOrOriginalName(); } if (property.HasCustomAttribute <RequiredAttribute>()) { schemaProperty.Type &= ~PropertyType.Nullable; } builder.Add(schemaProperty); } var ret = builder.Build(); ret.Type = type; return(ret); }
public static ReadOnlyArray <T> CreateFrom <U>(IEnumerable <U> items) where U : T { if (items == null) { return(Null); } var builder = new Builder(); foreach (T t in items) { builder.Add(t); } return(builder.ToImmutable()); }
public override void FinishTerm(BytesRef text, TermStats stats) { if (_numTerms > 0 && _numTerms % SKIP_INTERVAL == 0) { BufferSkip(); } // write term meta data into fst var longs = new long[_longsSize]; long delta = stats.TotalTermFreq - stats.DocFreq; if (stats.TotalTermFreq > 0) { if (delta == 0) { _statsOut.WriteVInt32(stats.DocFreq << 1 | 1); } else { _statsOut.WriteVInt32(stats.DocFreq << 1 | 0); _statsOut.WriteVInt64(stats.TotalTermFreq - stats.DocFreq); } } else { _statsOut.WriteVInt32(stats.DocFreq); } var state = _outerInstance.postingsWriter.NewTermState(); state.DocFreq = stats.DocFreq; state.TotalTermFreq = stats.TotalTermFreq; _outerInstance.postingsWriter.FinishTerm(state); _outerInstance.postingsWriter.EncodeTerm(longs, _metaBytesOut, _fieldInfo, state, true); for (var i = 0; i < _longsSize; i++) { _metaLongsOut.WriteVInt64(longs[i] - _lastLongs[i]); _lastLongs[i] = longs[i]; } _metaLongsOut.WriteVInt64(_metaBytesOut.Position - _lastMetaBytesFp); // LUCENENET specific: Renamed from getFilePointer() to match FileStream _builder.Add(Util.ToInt32sRef(text, _scratchTerm), _numTerms); _numTerms++; _lastMetaBytesFp = _metaBytesOut.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream }
public override void Build(IInputEnumerator enumerator) { // LUCENENET: Added guard clause for null if (enumerator is null) { throw new ArgumentNullException(nameof(enumerator)); } if (enumerator.HasPayloads) { throw new ArgumentException("this suggester doesn't support payloads"); } if (enumerator.HasContexts) { throw new ArgumentException("this suggester doesn't support contexts"); } count = 0; BytesRef scratch; IInputEnumerator iter = new WFSTInputEnumerator(enumerator); var scratchInts = new Int32sRef(); BytesRef previous = null; var outputs = PositiveInt32Outputs.Singleton; var builder = new Builder <long?>(FST.INPUT_TYPE.BYTE1, outputs); while (iter.MoveNext()) { scratch = iter.Current; long cost = iter.Weight; if (previous == null) { previous = new BytesRef(); } else if (scratch.Equals(previous)) { continue; // for duplicate suggestions, the best weight is actually // added } Lucene.Net.Util.Fst.Util.ToInt32sRef(scratch, scratchInts); builder.Add(scratchInts, cost); previous.CopyBytes(scratch); count++; } fst = builder.Finish(); }
public async Task FieldContainsTermsThatClashWithObjectPrototype() { var builder = new Builder(); builder.AddField("title"); await builder.Add(new Document { { "id", "id" }, { "title", "constructor" } }); Assert.Empty(builder.InvertedIndex["constructor"]["title"]["id"]); Assert.Equal(1, builder.FieldTermFrequencies [FieldReference.FromString("title/id")] [new Token("constructor")]); }
public void Can_round_trip_inverted_indexes() { var builder = new Builder(); builder.AddField("title"); builder.Add(new Document { { "id", "id" }, { "title", "test" }, { "body", "missing" } }).ConfigureAwait(false).GetAwaiter().GetResult(); Index index = builder.Build(); var original = index.InvertedIndex; var deserialized = original.Serialize().DeserializeInvertedIndex(); AssertInvertedIndex(original, deserialized); }
/// <summary> /// Returns a <see cref="StemmerOverrideMap"/> to be used with the <see cref="StemmerOverrideFilter"/> </summary> /// <returns> a <see cref="StemmerOverrideMap"/> to be used with the <see cref="StemmerOverrideFilter"/> </returns> /// <exception cref="IOException"> if an <see cref="IOException"/> occurs; </exception> public virtual StemmerOverrideMap Build() { ByteSequenceOutputs outputs = ByteSequenceOutputs.Singleton; Builder <BytesRef> builder = new Builder <BytesRef>(FST.INPUT_TYPE.BYTE4, outputs); int[] sort = hash.Sort(BytesRef.UTF8SortedAsUnicodeComparer); Int32sRef intsSpare = new Int32sRef(); int size = hash.Count; for (int i = 0; i < size; i++) { int id = sort[i]; BytesRef bytesRef = hash.Get(id, spare); UnicodeUtil.UTF8toUTF32(bytesRef, intsSpare); builder.Add(intsSpare, new BytesRef(outputValues[id])); } return(new StemmerOverrideMap(builder.Finish(), ignoreCase)); }
public IValueSet Intersect(IValueSet other) { SortedRangeSet OtherRangeSet = this.CheckCompatibility(other); Builder Builder = new Builder(this.Type); IEnumerator <Range> Iterator1 = this.GetOrderedRanges().GetEnumerator(); IEnumerator <Range> Iterator2 = OtherRangeSet.GetOrderedRanges().GetEnumerator(); if (Iterator1.MoveNext() && Iterator2.MoveNext()) { Range Range1 = Iterator1.Current; Range Range2 = Iterator2.Current; while (true) { if (Range1.Overlaps(Range2)) { Builder.Add(Range1.Intersect(Range2)); } if (Range1.High.CompareTo(Range2.High) <= 0) { if (!Iterator1.MoveNext()) { break; } Range1 = Iterator1.Current; } else { if (!Iterator2.MoveNext()) { break; } Range2 = Iterator2.Current; } } } return(Builder.Build()); }
private void LoadTermsIndex() { if (Fst != null) { return; } var clone = (IndexInput)_vgtir._input.Clone(); clone.Seek(_indexStart); Fst = new FST <long?>(clone, _vgtir._fstOutputs); clone.Dispose(); /* * final String dotFileName = segment + "_" + fieldInfo.name + ".dot"; * Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName)); * Util.toDot(fst, w, false, false); * System.out.println("FST INDEX: SAVED to " + dotFileName); * w.close(); */ if (_vgtir._indexDivisor > 1) { // subsample var scratchIntsRef = new IntsRef(); var outputs = PositiveIntOutputs.Singleton; var builder = new Builder <long?>(FST.INPUT_TYPE.BYTE1, outputs); var fstEnum = new BytesRefFSTEnum <long?>(Fst); var count = _vgtir._indexDivisor; BytesRefFSTEnum.InputOutput <long?> result; while ((result = fstEnum.Next()) != null) { if (count == _vgtir._indexDivisor) { builder.Add(Util.ToIntsRef(result.Input, scratchIntsRef), result.Output); count = 0; } count++; } Fst = builder.Finish(); } }
public void Can_round_trip_token_set() { var builder = new Builder(); builder.AddField("title"); builder.Add(new Document { { "id", "id" }, { "title", "test" }, { "body", "missing" } }).ConfigureAwait(false).GetAwaiter().GetResult(); Index index = builder.Build(); var original = index.TokenSet; var deserialized = original.Serialize().DeserializeTokenSet(); Assert.NotSame(original, deserialized); Assert.Equal(original.ToEnumeration(), deserialized.ToEnumeration()); }
public virtual void TestReplacements() { Outputs<CharsRef> outputs = CharSequenceOutputs.Singleton; Builder<CharsRef> builder = new Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs); IntsRef scratchInts = new IntsRef(); // a -> b Lucene.Net.Util.Fst.Util.ToUTF16("a", scratchInts); builder.Add(scratchInts, new CharsRef("b")); // ab -> c Lucene.Net.Util.Fst.Util.ToUTF16("ab", scratchInts); builder.Add(scratchInts, new CharsRef("c")); // c -> de Lucene.Net.Util.Fst.Util.ToUTF16("c", scratchInts); builder.Add(scratchInts, new CharsRef("de")); // def -> gh Lucene.Net.Util.Fst.Util.ToUTF16("def", scratchInts); builder.Add(scratchInts, new CharsRef("gh")); FST<CharsRef> fst = builder.Finish(); StringBuilder sb = new StringBuilder("atestanother"); Dictionary.ApplyMappings(fst, sb); assertEquals("btestbnother", sb.ToString()); sb = new StringBuilder("abtestanother"); Dictionary.ApplyMappings(fst, sb); assertEquals("ctestbnother", sb.ToString()); sb = new StringBuilder("atestabnother"); Dictionary.ApplyMappings(fst, sb); assertEquals("btestcnother", sb.ToString()); sb = new StringBuilder("abtestabnother"); Dictionary.ApplyMappings(fst, sb); assertEquals("ctestcnother", sb.ToString()); sb = new StringBuilder("abtestabcnother"); Dictionary.ApplyMappings(fst, sb); assertEquals("ctestcdenother", sb.ToString()); sb = new StringBuilder("defdefdefc"); Dictionary.ApplyMappings(fst, sb); assertEquals("ghghghde", sb.ToString()); }
public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer) { this.fieldInfo = fieldInfo; fstOutputs = PositiveIntOutputs.Singleton; fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, fstOutputs); indexStart = output.FilePointer; ////System.out.println("VGW: field=" + fieldInfo.name); // Always put empty string in fstBuilder.Add(new IntsRef(), termsFilePointer); startTermsFilePointer = termsFilePointer; }
public FstFieldWriter(FieldInfo fieldInfo, long termsFilePointer, VariableGapTermsIndexWriter vgtiw) { _vgtiw = vgtiw; FieldInfo = fieldInfo; PositiveIntOutputs fstOutputs = PositiveIntOutputs.Singleton; _fstBuilder = new Builder<long?>(FST.INPUT_TYPE.BYTE1, fstOutputs); IndexStart = _vgtiw.Output.FilePointer; // Always put empty string in _fstBuilder.Add(new IntsRef(), termsFilePointer); _startTermsFilePointer = termsFilePointer; }
public virtual void Test() { int[] ints = new int[7]; IntsRef input = new IntsRef(ints, 0, ints.Length); int seed = Random().Next(); Directory dir = new MMapDirectory(CreateTempDir("2BFST")); for (int doPackIter = 0; doPackIter < 2; doPackIter++) { bool doPack = doPackIter == 1; // Build FST w/ NoOutputs and stop when nodeCount > 2.2B if (!doPack) { Console.WriteLine("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS"); Outputs<object> outputs = NoOutputs.Singleton; object NO_OUTPUT = outputs.NoOutput; Builder<object> b = new Builder<object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInts.COMPACT, true, 15); int count = 0; Random r = new Random(seed); int[] ints2 = new int[200]; IntsRef input2 = new IntsRef(ints2, 0, ints2.Length); while (true) { //System.out.println("add: " + input + " -> " + output); for (int i = 10; i < ints2.Length; i++) { ints2[i] = r.Next(256); } b.Add(input2, NO_OUTPUT); count++; if (count % 100000 == 0) { Console.WriteLine(count + ": " + b.FstSizeInBytes() + " bytes; " + b.TotStateCount + " nodes"); } if (b.TotStateCount > int.MaxValue + 100L * 1024 * 1024) { break; } NextInput(r, ints2); } FST<object> fst = b.Finish(); for (int verify = 0; verify < 2; verify++) { Console.WriteLine("\nTEST: now verify [fst size=" + fst.SizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]"); Arrays.Fill(ints2, 0); r = new Random(seed); for (int i = 0; i < count; i++) { if (i % 1000000 == 0) { Console.WriteLine(i + "...: "); } for (int j = 10; j < ints2.Length; j++) { ints2[j] = r.Next(256); } Assert.AreEqual(NO_OUTPUT, Util.Get(fst, input2)); NextInput(r, ints2); } Console.WriteLine("\nTEST: enum all input/outputs"); IntsRefFSTEnum<object> fstEnum = new IntsRefFSTEnum<object>(fst); Arrays.Fill(ints2, 0); r = new Random(seed); int upto = 0; while (true) { IntsRefFSTEnum<object>.InputOutput<object> pair = fstEnum.Next(); if (pair == null) { break; } for (int j = 10; j < ints2.Length; j++) { ints2[j] = r.Next(256); } Assert.AreEqual(input2, pair.Input); Assert.AreEqual(NO_OUTPUT, pair.Output); upto++; NextInput(r, ints2); } Assert.AreEqual(count, upto); if (verify == 0) { Console.WriteLine("\nTEST: save/load FST and re-verify"); IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT); fst.Save(@out); @out.Dispose(); IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT); fst = new FST<object>(@in, outputs); @in.Dispose(); } else { dir.DeleteFile("fst"); } } } // Build FST w/ ByteSequenceOutputs and stop when FST // size = 3GB { Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=bytes"); Outputs<BytesRef> outputs = ByteSequenceOutputs.Singleton; Builder<BytesRef> b = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInts.COMPACT, true, 15); var outputBytes = new byte[20]; BytesRef output = new BytesRef(outputBytes); Arrays.Fill(ints, 0); int count = 0; Random r = new Random(seed); while (true) { r.NextBytes(outputBytes); //System.out.println("add: " + input + " -> " + output); b.Add(input, BytesRef.DeepCopyOf(output)); count++; if (count % 1000000 == 0) { Console.WriteLine(count + "...: " + b.FstSizeInBytes() + " bytes"); } if (b.FstSizeInBytes() > LIMIT) { break; } NextInput(r, ints); } FST<BytesRef> fst = b.Finish(); for (int verify = 0; verify < 2; verify++) { Console.WriteLine("\nTEST: now verify [fst size=" + fst.SizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]"); r = new Random(seed); Arrays.Fill(ints, 0); for (int i = 0; i < count; i++) { if (i % 1000000 == 0) { Console.WriteLine(i + "...: "); } r.NextBytes((byte[])(Array)outputBytes); Assert.AreEqual(output, Util.Get(fst, input)); NextInput(r, ints); } Console.WriteLine("\nTEST: enum all input/outputs"); IntsRefFSTEnum<BytesRef> fstEnum = new IntsRefFSTEnum<BytesRef>(fst); Arrays.Fill(ints, 0); r = new Random(seed); int upto = 0; while (true) { IntsRefFSTEnum<BytesRef>.InputOutput<BytesRef> pair = fstEnum.Next(); if (pair == null) { break; } Assert.AreEqual(input, pair.Input); r.NextBytes((byte[])(Array)outputBytes); Assert.AreEqual(output, pair.Output); upto++; NextInput(r, ints); } Assert.AreEqual(count, upto); if (verify == 0) { Console.WriteLine("\nTEST: save/load FST and re-verify"); IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT); fst.Save(@out); @out.Dispose(); IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT); fst = new FST<BytesRef>(@in, outputs); @in.Dispose(); } else { dir.DeleteFile("fst"); } } } // Build FST w/ PositiveIntOutputs and stop when FST // size = 3GB { Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=long"); Outputs<long?> outputs = PositiveIntOutputs.Singleton; Builder<long?> b = new Builder<long?>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInts.COMPACT, true, 15); long output = 1; Arrays.Fill(ints, 0); int count = 0; Random r = new Random(seed); while (true) { //System.out.println("add: " + input + " -> " + output); b.Add(input, output); output += 1 + r.Next(10); count++; if (count % 1000000 == 0) { Console.WriteLine(count + "...: " + b.FstSizeInBytes() + " bytes"); } if (b.FstSizeInBytes() > LIMIT) { break; } NextInput(r, ints); } FST<long?> fst = b.Finish(); for (int verify = 0; verify < 2; verify++) { Console.WriteLine("\nTEST: now verify [fst size=" + fst.SizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]"); Arrays.Fill(ints, 0); output = 1; r = new Random(seed); for (int i = 0; i < count; i++) { if (i % 1000000 == 0) { Console.WriteLine(i + "...: "); } // forward lookup: Assert.AreEqual(output, (long)Util.Get(fst, input)); // reverse lookup: Assert.AreEqual(input, Util.GetByOutput(fst, output)); output += 1 + r.Next(10); NextInput(r, ints); } Console.WriteLine("\nTEST: enum all input/outputs"); IntsRefFSTEnum<long?> fstEnum = new IntsRefFSTEnum<long?>(fst); Arrays.Fill(ints, 0); r = new Random(seed); int upto = 0; output = 1; while (true) { IntsRefFSTEnum<long?>.InputOutput<long?> pair = fstEnum.Next(); if (pair == null) { break; } Assert.AreEqual(input, pair.Input); Assert.AreEqual(output, pair.Output.Value); output += 1 + r.Next(10); upto++; NextInput(r, ints); } Assert.AreEqual(count, upto); if (verify == 0) { Console.WriteLine("\nTEST: save/load FST and re-verify"); IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT); fst.Save(@out); @out.Dispose(); IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT); fst = new FST<long?>(@in, outputs); @in.Dispose(); } else { dir.DeleteFile("fst"); } } } } dir.Dispose(); }
/// <summary> /// Builds the NormalizeCharMap; call this once you /// are done calling <seealso cref="#add"/>. /// </summary> public virtual NormalizeCharMap Build() { FST<CharsRef> map; try { Outputs<CharsRef> outputs = CharSequenceOutputs.Singleton; Builder<CharsRef> builder = new Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs); IntsRef scratch = new IntsRef(); foreach (var ent in pendingPairs) { builder.Add(Lucene.Net.Util.Fst.Util.ToUTF16(ent.Key, scratch), new CharsRef(ent.Value)); } map = builder.Finish(); pendingPairs.Clear(); } catch (IOException ioe) { // Bogus FST IOExceptions!! (will never happen) throw new Exception("Should never happen", ioe); } return new NormalizeCharMap(map); }
private void LoadTerms() { var posIntOutputs = PositiveIntOutputs.Singleton; var outputsInner = new PairOutputs<long?, long?>(posIntOutputs, posIntOutputs); var outputs = new PairOutputs<long?, PairOutputs<long?,long?>.Pair>(posIntOutputs, outputsInner); // honestly, wtf kind of generic mess is this. var b = new Builder<PairOutputs<long?, PairOutputs<long?,long?>.Pair>.Pair>(FST.INPUT_TYPE.BYTE1, outputs); var input = (IndexInput) _outerInstance._input.Clone(); input.Seek(_termsStart); var lastTerm = new BytesRef(10); long lastDocsStart = -1; int docFreq = 0; long totalTermFreq = 0; var visitedDocs = new FixedBitSet(_maxDoc); var scratchIntsRef = new IntsRef(); while (true) { SimpleTextUtil.ReadLine(input, _scratch); if (_scratch.Equals(SimpleTextFieldsWriter.END) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD)) { if (lastDocsStart != -1) { b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef), outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq))); _sumTotalTermFreq += totalTermFreq; } break; } if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC)) { docFreq++; _sumDocFreq++; UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length, _scratchUtf16); int docId = ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length); visitedDocs.Set(docId); } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length, _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16); totalTermFreq += ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length); } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM)) { if (lastDocsStart != -1) { b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef), outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq))); } lastDocsStart = input.FilePointer; int len = _scratch.Length - SimpleTextFieldsWriter.TERM.Length; if (len > lastTerm.Length) { lastTerm.Grow(len); } Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.TERM.Length, lastTerm.Bytes, 0, len); lastTerm.Length = len; docFreq = 0; _sumTotalTermFreq += totalTermFreq; totalTermFreq = 0; _termCount++; } } _docCount = visitedDocs.Cardinality(); _fst = b.Finish(); }
private void loadTermsIndex() { if (Fst == null) { IndexInput clone = input.Clone(); clone.Seek(indexStart); Fst = new FST<>(clone, fstOutputs); clone.Close(); /* final String dotFileName = segment + "_" + fieldInfo.name + ".dot"; Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName)); Util.toDot(fst, w, false, false); System.out.println("FST INDEX: SAVED to " + dotFileName); w.close(); */ if (indexDivisor > 1) { // subsample IntsRef scratchIntsRef = new IntsRef(); PositiveIntOutputs outputs = PositiveIntOutputs.GetSingleton(); Builder<long> builder = new Builder<long>(FST.INPUT_TYPE.BYTE1, outputs); BytesRefFSTEnum<long> fstEnum = new BytesRefFSTEnum<long>(fst); BytesRefFSTEnum.InputOutput<long> result; int count = indexDivisor; while ((result = fstEnum.Next()) != null) { if (count == indexDivisor) { builder.Add(Util.ToIntsRef(result.Input, scratchIntsRef), result.Output); count = 0; } count++; } Fst = builder.Finish(); } } }
/// <summary> /// Builds an <seealso cref="SynonymMap"/> and returns it. /// </summary> public virtual SynonymMap Build() { ByteSequenceOutputs outputs = ByteSequenceOutputs.Singleton; // TODO: are we using the best sharing options? var builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE4, outputs); BytesRef scratch = new BytesRef(64); ByteArrayDataOutput scratchOutput = new ByteArrayDataOutput(); HashSet<int?> dedupSet; if (dedup) { dedupSet = new HashSet<int?>(); } else { dedupSet = null; } var spare = new sbyte[5]; Dictionary<CharsRef, MapEntry>.KeyCollection keys = workingSet.Keys; CharsRef[] sortedKeys = keys.ToArray(); Arrays.Sort(sortedKeys, CharsRef.UTF16SortedAsUTF8Comparator); IntsRef scratchIntsRef = new IntsRef(); //System.out.println("fmap.build"); for (int keyIdx = 0; keyIdx < sortedKeys.Length; keyIdx++) { CharsRef input = sortedKeys[keyIdx]; MapEntry output = workingSet[input]; int numEntries = output.ords.Count; // output size, assume the worst case int estimatedSize = 5 + numEntries * 5; // numEntries + one ord for each entry scratch.Grow(estimatedSize); scratchOutput.Reset(scratch.Bytes, scratch.Offset, scratch.Bytes.Length); Debug.Assert(scratch.Offset == 0); // now write our output data: int count = 0; for (int i = 0; i < numEntries; i++) { if (dedupSet != null) { // box once int? ent = output.ords[i]; if (dedupSet.Contains(ent)) { continue; } dedupSet.Add(ent); } scratchOutput.WriteVInt(output.ords[i]); count++; } int pos = scratchOutput.Position; scratchOutput.WriteVInt(count << 1 | (output.includeOrig ? 0 : 1)); int pos2 = scratchOutput.Position; int vIntLen = pos2 - pos; // Move the count + includeOrig to the front of the byte[]: Array.Copy(scratch.Bytes, pos, spare, 0, vIntLen); Array.Copy(scratch.Bytes, 0, scratch.Bytes, vIntLen, pos); Array.Copy(spare, 0, scratch.Bytes, 0, vIntLen); if (dedupSet != null) { dedupSet.Clear(); } scratch.Length = scratchOutput.Position - scratch.Offset; //System.out.println(" add input=" + input + " output=" + scratch + " offset=" + scratch.offset + " length=" + scratch.length + " count=" + count); builder.Add(Util.ToUTF32(input, scratchIntsRef), BytesRef.DeepCopyOf(scratch)); } FST<BytesRef> fst = builder.Finish(); return new SynonymMap(fst, words, maxHorizontalContext); }
/// <summary> /// Builds the final automaton from a list of entries. /// </summary> private FST<object> BuildAutomaton(BytesRefSorter sorter) { // Build the automaton. Outputs<object> outputs = NoOutputs.Singleton; object empty = outputs.NoOutput; Builder<object> builder = new Builder<object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, shareMaxTailLength, outputs, null, false, PackedInts.DEFAULT, true, 15); BytesRef scratch = new BytesRef(); BytesRef entry; IntsRef scratchIntsRef = new IntsRef(); int count = 0; BytesRefIterator iter = sorter.GetEnumerator(); while ((entry = iter.Next()) != null) { count++; if (scratch.CompareTo(entry) != 0) { builder.Add(Util.Fst.Util.ToIntsRef(entry, scratchIntsRef), empty); scratch.CopyBytes(entry); } } return count == 0 ? null : builder.Finish(); }
private void LoadTermsIndex() { if (Fst != null) return; var clone = (IndexInput) _vgtir._input.Clone(); clone.Seek(_indexStart); Fst = new FST<long?>(clone, _vgtir._fstOutputs); clone.Dispose(); /* final String dotFileName = segment + "_" + fieldInfo.name + ".dot"; Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName)); Util.toDot(fst, w, false, false); System.out.println("FST INDEX: SAVED to " + dotFileName); w.close(); */ if (_vgtir._indexDivisor > 1) { // subsample var scratchIntsRef = new IntsRef(); var outputs = PositiveIntOutputs.Singleton; var builder = new Builder<long?>(FST.INPUT_TYPE.BYTE1, outputs); var fstEnum = new BytesRefFSTEnum<long?>(Fst); var count = _vgtir._indexDivisor; BytesRefFSTEnum<long?>.InputOutput<long?> result; while ((result = fstEnum.Next()) != null) { if (count == _vgtir._indexDivisor) { builder.Add(Util.ToIntsRef(result.Input, scratchIntsRef), result.Output); count = 0; } count++; } Fst = builder.Finish(); } }