private readonly short[][] costs; // array is backward IDs first since get is called using the same backward ID consecutively. maybe doesn't matter. private ConnectionCosts() { short[][] costs = null; using (Stream @is = BinaryDictionary.GetTypeResource(GetType(), FILENAME_SUFFIX)) { DataInput @in = new InputStreamDataInput(@is); CodecUtil.CheckHeader(@in, HEADER, VERSION, VERSION); int forwardSize = @in.ReadVInt32(); int backwardSize = @in.ReadVInt32(); costs = RectangularArrays.ReturnRectangularArray <short>(backwardSize, forwardSize); int accum = 0; for (int j = 0; j < costs.Length; j++) { short[] a = costs[j]; for (int i = 0; i < a.Length; i++) { int raw = @in.ReadVInt32(); accum += ((int)((uint)raw) >> 1) ^ -(raw & 1); a[i] = (short)accum; } } } this.costs = costs; }
public virtual void TestManyFields() { int NUM_DOCS = AtLeast(200); int MAX_FIELDS = AtLeast(50); int[][] docs = RectangularArrays.ReturnRectangularArray <int>(NUM_DOCS, 4); for (int i = 0; i < docs.Length; i++) { for (int j = 0; j < docs[i].Length; j++) { docs[i][j] = Random.Next(MAX_FIELDS); } } Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); for (int i = 0; i < NUM_DOCS; i++) { Document d = new Document(); for (int j = 0; j < docs[i].Length; j++) { d.Add(GetField(docs[i][j])); } writer.AddDocument(d); } writer.ForceMerge(1); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); foreach (SegmentCommitInfo si in sis.Segments) { FieldInfos fis = SegmentReader.ReadFieldInfos(si); foreach (FieldInfo fi in fis) { Field expected = GetField(Convert.ToInt32(fi.Name)); Assert.AreEqual(expected.FieldType.IsIndexed, fi.IsIndexed); Assert.AreEqual(expected.FieldType.StoreTermVectors, fi.HasVectors); } } dir.Dispose(); }
/// <summary> /// Loads the string values for each field X docID to be /// highlighted. By default this loads from stored /// fields, but a subclass can change the source. This /// method should allocate the string[fields.length][docids.length] /// and fill all values. The returned strings must be /// identical to what was indexed. /// </summary> protected virtual IList <string[]> LoadFieldValues(IndexSearcher searcher, string[] fields, int[] docids, int maxLength) { string[][] contents = RectangularArrays.ReturnRectangularArray <string>(fields.Length, docids.Length); char[] valueSeparators = new char[fields.Length]; for (int i = 0; i < fields.Length; i++) { valueSeparators[i] = GetMultiValuedSeparator(fields[i]); } LimitedStoredFieldVisitor visitor = new LimitedStoredFieldVisitor(fields, valueSeparators, maxLength); for (int i = 0; i < docids.Length; i++) { searcher.Doc(docids[i], visitor); for (int j = 0; j < fields.Length; j++) { contents[j][i] = visitor.GetValue(j); // LUCENENET: No point in doing ToString() on a string } visitor.Reset(); } return(contents); }
private readonly long[] totalTermFreq; // LUCENENET: marked readonly internal BaseTermsEnum(TermsReader outerInstance) { this.outerInstance = outerInstance; this.state = outerInstance.outerInstance.postingsReader.NewTermState(); this.term = null; this.statsReader.Reset(outerInstance.statsBlock); this.metaLongsReader.Reset(outerInstance.metaLongsBlock); this.metaBytesReader.Reset(outerInstance.metaBytesBlock); this.longs = RectangularArrays.ReturnRectangularArray <long>(INTERVAL, outerInstance.longsSize); this.bytesStart = new int[INTERVAL]; this.bytesLength = new int[INTERVAL]; this.docFreq = new int[INTERVAL]; this.totalTermFreq = new long[INTERVAL]; this.statsBlockOrd = -1; this.metaBlockOrd = -1; if (!outerInstance.HasFreqs) { Arrays.Fill(totalTermFreq, -1); } }
public void TestRandomDiscreteMultiValueHighlighting() { String[] randomValues = new String[3 + Random.nextInt(10 * RANDOM_MULTIPLIER)]; for (int i = 0; i < randomValues.Length; i++) { String randomValue; do { randomValue = TestUtil.RandomSimpleString(Random); } while ("".Equals(randomValue, StringComparison.Ordinal)); randomValues[i] = randomValue; } Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy())); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.StoreTermVectors = (true); customType.StoreTermVectorOffsets = (true); customType.StoreTermVectorPositions = (true); int numDocs = randomValues.Length * 5; int numFields = 2 + Random.nextInt(5); int numTerms = 2 + Random.nextInt(3); List <Doc> docs = new List <Doc>(numDocs); List <Document> documents = new List <Document>(numDocs); IDictionary <String, ISet <int> > valueToDocId = new HashMap <String, ISet <int> >(); for (int i = 0; i < numDocs; i++) { Document document = new Document(); String[][] fields = RectangularArrays.ReturnRectangularArray <string>(numFields, numTerms); //new String[numFields][numTerms]; for (int j = 0; j < numFields; j++) { String[] fieldValues = new String[numTerms]; fieldValues[0] = getRandomValue(randomValues, valueToDocId, i); StringBuilder builder = new StringBuilder(fieldValues[0]); for (int k = 1; k < numTerms; k++) { fieldValues[k] = getRandomValue(randomValues, valueToDocId, i); builder.Append(' ').Append(fieldValues[k]); } document.Add(new Field(F, builder.ToString(), customType)); fields[j] = fieldValues; } docs.Add(new Doc(fields)); documents.Add(document); } writer.AddDocuments(documents); writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir); try { int highlightIters = 1 + Random.nextInt(120 * RANDOM_MULTIPLIER); for (int highlightIter = 0; highlightIter < highlightIters; highlightIter++) { Console.WriteLine($"Highlighter iter: {highlightIter}"); String queryTerm = randomValues[Random.nextInt(randomValues.Length)]; int randomHit = valueToDocId[queryTerm].First(); List <StringBuilder> builders = new List <StringBuilder>(); foreach (String[] fieldValues in docs[randomHit].fieldValues) { StringBuilder builder = new StringBuilder(); bool hit = false; for (int i = 0; i < fieldValues.Length; i++) { if (queryTerm.Equals(fieldValues[i], StringComparison.Ordinal)) { builder.Append("<b>").Append(queryTerm).Append("</b>"); hit = true; } else { builder.Append(fieldValues[i]); } if (i != fieldValues.Length - 1) { builder.Append(' '); } } if (hit) { builders.Add(builder); } } FieldQuery fq = new FieldQuery(tq(queryTerm), true, true); FieldTermStack stack = new FieldTermStack(reader, randomHit, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(100); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 300); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); sfb.IsDiscreteMultiValueHighlighting = (true); String[] actualFragments = sfb.CreateFragments(reader, randomHit, F, ffl, numFields); assertEquals(builders.Count, actualFragments.Length); for (int i = 0; i < actualFragments.Length; i++) { assertEquals(builders[i].ToString(), actualFragments[i]); } } } finally { reader.Dispose(); dir.Dispose(); } }
public override void BeforeClass() { base.BeforeClass(); NUM_DOCS = AtLeast(500); NUM_ORDS = AtLeast(2); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy())); long theLong = long.MaxValue; double theDouble = double.MaxValue; sbyte theByte = sbyte.MaxValue; short theShort = short.MaxValue; int theInt = int.MaxValue; float theFloat = float.MaxValue; UnicodeStrings = new string[NUM_DOCS]; //MultiValued = new BytesRef[NUM_DOCS, NUM_ORDS]; MultiValued = RectangularArrays.ReturnRectangularArray <BytesRef>(NUM_DOCS, NUM_ORDS); if (VERBOSE) { Console.WriteLine("TEST: setUp"); } for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.Add(NewStringField("theLong", (theLong--).ToString(CultureInfo.InvariantCulture), Field.Store.NO)); doc.Add(NewStringField("theDouble", (theDouble--).ToString("R", CultureInfo.InvariantCulture), Field.Store.NO)); doc.Add(NewStringField("theByte", (theByte--).ToString(CultureInfo.InvariantCulture), Field.Store.NO)); doc.Add(NewStringField("theShort", (theShort--).ToString(CultureInfo.InvariantCulture), Field.Store.NO)); doc.Add(NewStringField("theInt", (theInt--).ToString(CultureInfo.InvariantCulture), Field.Store.NO)); doc.Add(NewStringField("theFloat", (theFloat--).ToString("R", CultureInfo.InvariantCulture), Field.Store.NO)); if (i % 2 == 0) { doc.Add(NewStringField("sparse", (i).ToString(CultureInfo.InvariantCulture), Field.Store.NO)); } if (i % 2 == 0) { doc.Add(new Int32Field("numInt", i, Field.Store.NO)); } // sometimes skip the field: if (Random.Next(40) != 17) { UnicodeStrings[i] = GenerateString(i); doc.Add(NewStringField("theRandomUnicodeString", UnicodeStrings[i], Field.Store.YES)); } // sometimes skip the field: if (Random.Next(10) != 8) { for (int j = 0; j < NUM_ORDS; j++) { string newValue = GenerateString(i); MultiValued[i][j] = new BytesRef(newValue); doc.Add(NewStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES)); } Array.Sort(MultiValued[i]); } writer.AddDocument(doc); } IndexReader r = writer.GetReader(); Reader = SlowCompositeReaderWrapper.Wrap(r); writer.Dispose(); }
public float GetDistance(string target, string other) { Int32sRef targetPoints; Int32sRef otherPoints; int n; int[][] d; // cost array // NOTE: if we cared, we could 3*m space instead of m*n space, similar to // what LevenshteinDistance does, except cycling thru a ring of three // horizontal cost arrays... but this comparer is never actually used by // DirectSpellChecker, its only used for merging results from multiple shards // in "distributed spellcheck", and its inefficient in other ways too... // cheaper to do this up front once targetPoints = ToInt32sRef(target); otherPoints = ToInt32sRef(other); n = targetPoints.Length; int m = otherPoints.Length; d = RectangularArrays.ReturnRectangularArray <int>(n + 1, m + 1); if (n == 0 || m == 0) { if (n == m) { return(0); } else { return(Math.Max(n, m)); } } // indexes into strings s and t int i; // iterates through s int j; // iterates through t int t_j; // jth character of t int cost; // cost for (i = 0; i <= n; i++) { d[i][0] = i; } for (j = 0; j <= m; j++) { d[0][j] = j; } for (j = 1; j <= m; j++) { t_j = otherPoints.Int32s[j - 1]; for (i = 1; i <= n; i++) { cost = targetPoints.Int32s[i - 1] == t_j ? 0 : 1; // minimum of cell to the left+1, to the top+1, diagonally left and up +cost d[i][j] = Math.Min(Math.Min(d[i - 1][j] + 1, d[i][j - 1] + 1), d[i - 1][j - 1] + cost); // transposition if (i > 1 && j > 1 && targetPoints.Int32s[i - 1] == otherPoints.Int32s[j - 2] && targetPoints.Int32s[i - 2] == otherPoints.Int32s[j - 1]) { d[i][j] = Math.Min(d[i][j], d[i - 2][j - 2] + cost); } } } return(1.0f - ((float)d[n][m] / Math.Min(m, n))); }
private int GetTDistance(string target, string other) { char[] sa; int n; int[][] d; // cost array sa = target.ToCharArray(); n = sa.Length; int m = other.Length; d = RectangularArrays.ReturnRectangularArray <int>(n + 1, m + 1); if (n == 0 || m == 0) { if (n == m) { return(0); } else { return(Math.Max(n, m)); } } // indexes into strings s and t int i; // iterates through s int j; // iterates through t char t_j; // jth character of t int cost; // cost for (i = 0; i <= n; i++) { d[i][0] = i; } for (j = 0; j <= m; j++) { d[0][j] = j; } for (j = 1; j <= m; j++) { t_j = other[j - 1]; for (i = 1; i <= n; i++) { cost = sa[i - 1] == t_j ? 0 : 1; // minimum of cell to the left+1, to the top+1, diagonally left and up +cost d[i][j] = Math.Min(Math.Min(d[i - 1][j] + 1, d[i][j - 1] + 1), d[i - 1][j - 1] + cost); // transposition if (i > 1 && j > 1 && target[i - 1] == other[j - 2] && target[i - 2] == other[j - 1]) { d[i][j] = Math.Min(d[i][j], d[i - 2][j - 2] + cost); } } } // our last action in the above loop was to switch d and p, so p now // actually has the most recent cost counts return(Math.Abs(d[n][m])); }
/// <summary> /// Construct a patch string that transforms a to b. /// </summary> /// <param name="a">1st string</param> /// <param name="b">2nd string</param> /// <returns></returns> public string Exec(string a, string b) { if (a is null || b is null) { return(null); } int x; int y; int maxx; int maxy; int[] go = new int[4]; const int X = 1; const int Y = 2; const int R = 3; const int D = 0; /* * setup memory if needed => processing speed up */ maxx = a.Length + 1; maxy = b.Length + 1; if ((maxx >= sizex) || (maxy >= sizey)) { sizex = maxx + 8; sizey = maxy + 8; net = RectangularArrays.ReturnRectangularArray <int>(sizex, sizey); way = RectangularArrays.ReturnRectangularArray <int>(sizex, sizey); } /* * clear the network */ for (x = 0; x < maxx; x++) { for (y = 0; y < maxy; y++) { net[x][y] = 0; } } /* * set known persistent values */ for (x = 1; x < maxx; x++) { net[x][0] = x; way[x][0] = X; } for (y = 1; y < maxy; y++) { net[0][y] = y; way[0][y] = Y; } for (x = 1; x < maxx; x++) { for (y = 1; y < maxy; y++) { go[X] = net[x - 1][y] + DELETE; // way on x costs 1 unit go[Y] = net[x][y - 1] + INSERT; // way on y costs 1 unit go[R] = net[x - 1][y - 1] + REPLACE; go[D] = net[x - 1][y - 1] + ((a[x - 1] == b[y - 1]) ? NOOP : 100); // diagonal costs 0, when no change ushort min = (ushort)D; if (go[min] >= go[X]) { min = (ushort)X; } if (go[min] > go[Y]) { min = (ushort)Y; } if (go[min] > go[R]) { min = (ushort)R; } way[x][y] = min; net[x][y] = (ushort)go[min]; } } // read the patch string StringBuilder result = new StringBuilder(); char @base = (char)('a' - 1); char deletes = @base; char equals = @base; for (x = maxx - 1, y = maxy - 1; x + y != 0;) { switch (way[x][y]) { case X: if (equals != @base) { result.Append("-" + (equals)); equals = @base; } deletes++; x--; break; // delete case Y: if (deletes != @base) { result.Append("D" + (deletes)); deletes = @base; } if (equals != @base) { result.Append("-" + (equals)); equals = @base; } result.Append('I'); result.Append(b[--y]); break; // insert case R: if (deletes != @base) { result.Append("D" + (deletes)); deletes = @base; } if (equals != @base) { result.Append("-" + (equals)); equals = @base; } result.Append('R'); result.Append(b[--y]); x--; break; // replace case D: if (deletes != @base) { result.Append("D" + (deletes)); deletes = @base; } equals++; x--; y--; break; // no change } } if (deletes != @base) { result.Append("D" + (deletes)); //deletes = @base; // LUCENENET: IDE0059: Remove unnecessary value assignment } return(result.ToString()); }