Exemple #1
0
        private readonly short[][] costs; // array is backward IDs first since get is called using the same backward ID consecutively. maybe doesn't matter.

        private ConnectionCosts()
        {
            short[][] costs = null;

            using (Stream @is = BinaryDictionary.GetTypeResource(GetType(), FILENAME_SUFFIX))
            {
                DataInput @in = new InputStreamDataInput(@is);
                CodecUtil.CheckHeader(@in, HEADER, VERSION, VERSION);
                int forwardSize  = @in.ReadVInt32();
                int backwardSize = @in.ReadVInt32();
                costs = RectangularArrays.ReturnRectangularArray <short>(backwardSize, forwardSize);
                int accum = 0;
                for (int j = 0; j < costs.Length; j++)
                {
                    short[] a = costs[j];
                    for (int i = 0; i < a.Length; i++)
                    {
                        int raw = @in.ReadVInt32();
                        accum += ((int)((uint)raw) >> 1) ^ -(raw & 1);
                        a[i]   = (short)accum;
                    }
                }
            }

            this.costs = costs;
        }
        public virtual void TestManyFields()
        {
            int NUM_DOCS   = AtLeast(200);
            int MAX_FIELDS = AtLeast(50);

            int[][] docs = RectangularArrays.ReturnRectangularArray <int>(NUM_DOCS, 4);
            for (int i = 0; i < docs.Length; i++)
            {
                for (int j = 0; j < docs[i].Length; j++)
                {
                    docs[i][j] = Random.Next(MAX_FIELDS);
                }
            }

            Directory   dir    = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));

            for (int i = 0; i < NUM_DOCS; i++)
            {
                Document d = new Document();
                for (int j = 0; j < docs[i].Length; j++)
                {
                    d.Add(GetField(docs[i][j]));
                }

                writer.AddDocument(d);
            }

            writer.ForceMerge(1);
            writer.Dispose();

            SegmentInfos sis = new SegmentInfos();

            sis.Read(dir);
            foreach (SegmentCommitInfo si in sis.Segments)
            {
                FieldInfos fis = SegmentReader.ReadFieldInfos(si);

                foreach (FieldInfo fi in fis)
                {
                    Field expected = GetField(Convert.ToInt32(fi.Name));
                    Assert.AreEqual(expected.FieldType.IsIndexed, fi.IsIndexed);
                    Assert.AreEqual(expected.FieldType.StoreTermVectors, fi.HasVectors);
                }
            }

            dir.Dispose();
        }
Exemple #3
0
        /// <summary>
        /// Loads the string values for each field X docID to be
        /// highlighted.  By default this loads from stored
        /// fields, but a subclass can change the source.  This
        /// method should allocate the string[fields.length][docids.length]
        /// and fill all values.  The returned strings must be
        /// identical to what was indexed.
        /// </summary>
        protected virtual IList <string[]> LoadFieldValues(IndexSearcher searcher, string[] fields, int[] docids, int maxLength)
        {
            string[][] contents        = RectangularArrays.ReturnRectangularArray <string>(fields.Length, docids.Length);
            char[]     valueSeparators = new char[fields.Length];
            for (int i = 0; i < fields.Length; i++)
            {
                valueSeparators[i] = GetMultiValuedSeparator(fields[i]);
            }
            LimitedStoredFieldVisitor visitor = new LimitedStoredFieldVisitor(fields, valueSeparators, maxLength);

            for (int i = 0; i < docids.Length; i++)
            {
                searcher.Doc(docids[i], visitor);
                for (int j = 0; j < fields.Length; j++)
                {
                    contents[j][i] = visitor.GetValue(j); // LUCENENET: No point in doing ToString() on a string
                }
                visitor.Reset();
            }
            return(contents);
        }
Exemple #4
0
                private readonly long[] totalTermFreq; // LUCENENET: marked readonly

                internal BaseTermsEnum(TermsReader outerInstance)
                {
                    this.outerInstance = outerInstance;
                    this.state         = outerInstance.outerInstance.postingsReader.NewTermState();
                    this.term          = null;
                    this.statsReader.Reset(outerInstance.statsBlock);
                    this.metaLongsReader.Reset(outerInstance.metaLongsBlock);
                    this.metaBytesReader.Reset(outerInstance.metaBytesBlock);

                    this.longs         = RectangularArrays.ReturnRectangularArray <long>(INTERVAL, outerInstance.longsSize);
                    this.bytesStart    = new int[INTERVAL];
                    this.bytesLength   = new int[INTERVAL];
                    this.docFreq       = new int[INTERVAL];
                    this.totalTermFreq = new long[INTERVAL];
                    this.statsBlockOrd = -1;
                    this.metaBlockOrd  = -1;
                    if (!outerInstance.HasFreqs)
                    {
                        Arrays.Fill(totalTermFreq, -1);
                    }
                }
Exemple #5
0
        public void TestRandomDiscreteMultiValueHighlighting()
        {
            String[] randomValues = new String[3 + Random.nextInt(10 * RANDOM_MULTIPLIER)];
            for (int i = 0; i < randomValues.Length; i++)
            {
                String randomValue;
                do
                {
                    randomValue = TestUtil.RandomSimpleString(Random);
                } while ("".Equals(randomValue, StringComparison.Ordinal));
                randomValues[i] = randomValue;
            }

            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(
                Random,
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy()));

            FieldType customType = new FieldType(TextField.TYPE_STORED);

            customType.StoreTermVectors         = (true);
            customType.StoreTermVectorOffsets   = (true);
            customType.StoreTermVectorPositions = (true);

            int             numDocs   = randomValues.Length * 5;
            int             numFields = 2 + Random.nextInt(5);
            int             numTerms  = 2 + Random.nextInt(3);
            List <Doc>      docs      = new List <Doc>(numDocs);
            List <Document> documents = new List <Document>(numDocs);
            IDictionary <String, ISet <int> > valueToDocId = new HashMap <String, ISet <int> >();

            for (int i = 0; i < numDocs; i++)
            {
                Document   document = new Document();
                String[][] fields   = RectangularArrays.ReturnRectangularArray <string>(numFields, numTerms); //new String[numFields][numTerms];
                for (int j = 0; j < numFields; j++)
                {
                    String[] fieldValues = new String[numTerms];
                    fieldValues[0] = getRandomValue(randomValues, valueToDocId, i);
                    StringBuilder builder = new StringBuilder(fieldValues[0]);
                    for (int k = 1; k < numTerms; k++)
                    {
                        fieldValues[k] = getRandomValue(randomValues, valueToDocId, i);
                        builder.Append(' ').Append(fieldValues[k]);
                    }
                    document.Add(new Field(F, builder.ToString(), customType));
                    fields[j] = fieldValues;
                }
                docs.Add(new Doc(fields));
                documents.Add(document);
            }
            writer.AddDocuments(documents);
            writer.Dispose();
            IndexReader reader = DirectoryReader.Open(dir);

            try
            {
                int highlightIters = 1 + Random.nextInt(120 * RANDOM_MULTIPLIER);
                for (int highlightIter = 0; highlightIter < highlightIters; highlightIter++)
                {
                    Console.WriteLine($"Highlighter iter: {highlightIter}");

                    String queryTerm = randomValues[Random.nextInt(randomValues.Length)];
                    int    randomHit = valueToDocId[queryTerm].First();
                    List <StringBuilder> builders = new List <StringBuilder>();
                    foreach (String[] fieldValues in docs[randomHit].fieldValues)
                    {
                        StringBuilder builder = new StringBuilder();
                        bool          hit     = false;
                        for (int i = 0; i < fieldValues.Length; i++)
                        {
                            if (queryTerm.Equals(fieldValues[i], StringComparison.Ordinal))
                            {
                                builder.Append("<b>").Append(queryTerm).Append("</b>");
                                hit = true;
                            }
                            else
                            {
                                builder.Append(fieldValues[i]);
                            }
                            if (i != fieldValues.Length - 1)
                            {
                                builder.Append(' ');
                            }
                        }
                        if (hit)
                        {
                            builders.Add(builder);
                        }
                    }

                    FieldQuery     fq    = new FieldQuery(tq(queryTerm), true, true);
                    FieldTermStack stack = new FieldTermStack(reader, randomHit, F, fq);

                    FieldPhraseList       fpl  = new FieldPhraseList(stack, fq);
                    SimpleFragListBuilder sflb = new SimpleFragListBuilder(100);
                    FieldFragList         ffl  = sflb.CreateFieldFragList(fpl, 300);

                    SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
                    sfb.IsDiscreteMultiValueHighlighting = (true);
                    String[] actualFragments = sfb.CreateFragments(reader, randomHit, F, ffl, numFields);
                    assertEquals(builders.Count, actualFragments.Length);
                    for (int i = 0; i < actualFragments.Length; i++)
                    {
                        assertEquals(builders[i].ToString(), actualFragments[i]);
                    }
                }
            }
            finally
            {
                reader.Dispose();
                dir.Dispose();
            }
        }
        public override void BeforeClass()
        {
            base.BeforeClass();

            NUM_DOCS  = AtLeast(500);
            NUM_ORDS  = AtLeast(2);
            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random, Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy()));
            long   theLong           = long.MaxValue;
            double theDouble         = double.MaxValue;
            sbyte  theByte           = sbyte.MaxValue;
            short  theShort          = short.MaxValue;
            int    theInt            = int.MaxValue;
            float  theFloat          = float.MaxValue;

            UnicodeStrings = new string[NUM_DOCS];
            //MultiValued = new BytesRef[NUM_DOCS, NUM_ORDS];
            MultiValued = RectangularArrays.ReturnRectangularArray <BytesRef>(NUM_DOCS, NUM_ORDS);
            if (VERBOSE)
            {
                Console.WriteLine("TEST: setUp");
            }
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("theLong", (theLong--).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                doc.Add(NewStringField("theDouble", (theDouble--).ToString("R", CultureInfo.InvariantCulture), Field.Store.NO));
                doc.Add(NewStringField("theByte", (theByte--).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                doc.Add(NewStringField("theShort", (theShort--).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                doc.Add(NewStringField("theInt", (theInt--).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                doc.Add(NewStringField("theFloat", (theFloat--).ToString("R", CultureInfo.InvariantCulture), Field.Store.NO));
                if (i % 2 == 0)
                {
                    doc.Add(NewStringField("sparse", (i).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                }

                if (i % 2 == 0)
                {
                    doc.Add(new Int32Field("numInt", i, Field.Store.NO));
                }

                // sometimes skip the field:
                if (Random.Next(40) != 17)
                {
                    UnicodeStrings[i] = GenerateString(i);
                    doc.Add(NewStringField("theRandomUnicodeString", UnicodeStrings[i], Field.Store.YES));
                }

                // sometimes skip the field:
                if (Random.Next(10) != 8)
                {
                    for (int j = 0; j < NUM_ORDS; j++)
                    {
                        string newValue = GenerateString(i);
                        MultiValued[i][j] = new BytesRef(newValue);
                        doc.Add(NewStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES));
                    }
                    Array.Sort(MultiValued[i]);
                }
                writer.AddDocument(doc);
            }
            IndexReader r = writer.GetReader();

            Reader = SlowCompositeReaderWrapper.Wrap(r);
            writer.Dispose();
        }
        public float GetDistance(string target, string other)
        {
            Int32sRef targetPoints;
            Int32sRef otherPoints;
            int       n;

            int[][] d; // cost array

            // NOTE: if we cared, we could 3*m space instead of m*n space, similar to
            // what LevenshteinDistance does, except cycling thru a ring of three
            // horizontal cost arrays... but this comparer is never actually used by
            // DirectSpellChecker, its only used for merging results from multiple shards
            // in "distributed spellcheck", and its inefficient in other ways too...

            // cheaper to do this up front once
            targetPoints = ToInt32sRef(target);
            otherPoints  = ToInt32sRef(other);
            n            = targetPoints.Length;
            int m = otherPoints.Length;

            d = RectangularArrays.ReturnRectangularArray <int>(n + 1, m + 1);

            if (n == 0 || m == 0)
            {
                if (n == m)
                {
                    return(0);
                }
                else
                {
                    return(Math.Max(n, m));
                }
            }

            // indexes into strings s and t
            int i;    // iterates through s
            int j;    // iterates through t

            int t_j;  // jth character of t

            int cost; // cost

            for (i = 0; i <= n; i++)
            {
                d[i][0] = i;
            }

            for (j = 0; j <= m; j++)
            {
                d[0][j] = j;
            }

            for (j = 1; j <= m; j++)
            {
                t_j = otherPoints.Int32s[j - 1];

                for (i = 1; i <= n; i++)
                {
                    cost = targetPoints.Int32s[i - 1] == t_j ? 0 : 1;
                    // minimum of cell to the left+1, to the top+1, diagonally left and up +cost
                    d[i][j] = Math.Min(Math.Min(d[i - 1][j] + 1, d[i][j - 1] + 1), d[i - 1][j - 1] + cost);
                    // transposition
                    if (i > 1 && j > 1 && targetPoints.Int32s[i - 1] == otherPoints.Int32s[j - 2] && targetPoints.Int32s[i - 2] == otherPoints.Int32s[j - 1])
                    {
                        d[i][j] = Math.Min(d[i][j], d[i - 2][j - 2] + cost);
                    }
                }
            }

            return(1.0f - ((float)d[n][m] / Math.Min(m, n)));
        }
        private int GetTDistance(string target, string other)
        {
            char[] sa;
            int    n;

            int[][] d; // cost array

            sa = target.ToCharArray();
            n  = sa.Length;
            int m = other.Length;

            d = RectangularArrays.ReturnRectangularArray <int>(n + 1, m + 1);

            if (n == 0 || m == 0)
            {
                if (n == m)
                {
                    return(0);
                }
                else
                {
                    return(Math.Max(n, m));
                }
            }

            // indexes into strings s and t
            int i;    // iterates through s
            int j;    // iterates through t

            char t_j; // jth character of t

            int cost; // cost

            for (i = 0; i <= n; i++)
            {
                d[i][0] = i;
            }

            for (j = 0; j <= m; j++)
            {
                d[0][j] = j;
            }

            for (j = 1; j <= m; j++)
            {
                t_j = other[j - 1];

                for (i = 1; i <= n; i++)
                {
                    cost = sa[i - 1] == t_j ? 0 : 1;
                    // minimum of cell to the left+1, to the top+1, diagonally left and up +cost
                    d[i][j] = Math.Min(Math.Min(d[i - 1][j] + 1, d[i][j - 1] + 1), d[i - 1][j - 1] + cost);
                    // transposition
                    if (i > 1 && j > 1 && target[i - 1] == other[j - 2] && target[i - 2] == other[j - 1])
                    {
                        d[i][j] = Math.Min(d[i][j], d[i - 2][j - 2] + cost);
                    }
                }
            }

            // our last action in the above loop was to switch d and p, so p now
            // actually has the most recent cost counts
            return(Math.Abs(d[n][m]));
        }
Exemple #9
0
        /// <summary>
        /// Construct a patch string that transforms a to b.
        /// </summary>
        /// <param name="a">1st string</param>
        /// <param name="b">2nd string</param>
        /// <returns></returns>
        public string Exec(string a, string b)
        {
            if (a is null || b is null)
            {
                return(null);
            }

            int x;
            int y;
            int maxx;
            int maxy;

            int[]     go = new int[4];
            const int X  = 1;
            const int Y  = 2;
            const int R  = 3;
            const int D  = 0;

            /*
             * setup memory if needed => processing speed up
             */
            maxx = a.Length + 1;
            maxy = b.Length + 1;
            if ((maxx >= sizex) || (maxy >= sizey))
            {
                sizex = maxx + 8;
                sizey = maxy + 8;
                net   = RectangularArrays.ReturnRectangularArray <int>(sizex, sizey);
                way   = RectangularArrays.ReturnRectangularArray <int>(sizex, sizey);
            }

            /*
             * clear the network
             */
            for (x = 0; x < maxx; x++)
            {
                for (y = 0; y < maxy; y++)
                {
                    net[x][y] = 0;
                }
            }

            /*
             * set known persistent values
             */
            for (x = 1; x < maxx; x++)
            {
                net[x][0] = x;
                way[x][0] = X;
            }
            for (y = 1; y < maxy; y++)
            {
                net[0][y] = y;
                way[0][y] = Y;
            }

            for (x = 1; x < maxx; x++)
            {
                for (y = 1; y < maxy; y++)
                {
                    go[X] = net[x - 1][y] + DELETE;
                    // way on x costs 1 unit
                    go[Y] = net[x][y - 1] + INSERT;
                    // way on y costs 1 unit
                    go[R] = net[x - 1][y - 1] + REPLACE;
                    go[D] = net[x - 1][y - 1]
                            + ((a[x - 1] == b[y - 1]) ? NOOP : 100);
                    // diagonal costs 0, when no change
                    ushort min = (ushort)D;
                    if (go[min] >= go[X])
                    {
                        min = (ushort)X;
                    }
                    if (go[min] > go[Y])
                    {
                        min = (ushort)Y;
                    }
                    if (go[min] > go[R])
                    {
                        min = (ushort)R;
                    }
                    way[x][y] = min;
                    net[x][y] = (ushort)go[min];
                }
            }

            // read the patch string
            StringBuilder result  = new StringBuilder();
            char          @base   = (char)('a' - 1);
            char          deletes = @base;
            char          equals  = @base;

            for (x = maxx - 1, y = maxy - 1; x + y != 0;)
            {
                switch (way[x][y])
                {
                case X:
                    if (equals != @base)
                    {
                        result.Append("-" + (equals));
                        equals = @base;
                    }
                    deletes++;
                    x--;
                    break;

                // delete
                case Y:
                    if (deletes != @base)
                    {
                        result.Append("D" + (deletes));
                        deletes = @base;
                    }
                    if (equals != @base)
                    {
                        result.Append("-" + (equals));
                        equals = @base;
                    }
                    result.Append('I');
                    result.Append(b[--y]);
                    break;

                // insert
                case R:
                    if (deletes != @base)
                    {
                        result.Append("D" + (deletes));
                        deletes = @base;
                    }
                    if (equals != @base)
                    {
                        result.Append("-" + (equals));
                        equals = @base;
                    }
                    result.Append('R');
                    result.Append(b[--y]);
                    x--;
                    break;

                // replace
                case D:
                    if (deletes != @base)
                    {
                        result.Append("D" + (deletes));
                        deletes = @base;
                    }
                    equals++;
                    x--;
                    y--;
                    break;
                    // no change
                }
            }
            if (deletes != @base)
            {
                result.Append("D" + (deletes));
                //deletes = @base; // LUCENENET: IDE0059: Remove unnecessary value assignment
            }

            return(result.ToString());
        }