Exemple #1
0
        public virtual void TestRandom()
        {
            int alphabetSize = TestUtil.NextInt32(Random, 2, 7);

            int docLen = AtLeast(3000);
            //final int docLen = 50;

            string document = GetRandomString('a', alphabetSize, docLen);

            if (Verbose)
            {
                Console.WriteLine("TEST: doc=" + document);
            }

            int numSyn = AtLeast(5);
            //final int numSyn = 2;

            IDictionary <string, OneSyn> synMap = new Dictionary <string, OneSyn>();
            IList <OneSyn> syns  = new JCG.List <OneSyn>();
            bool           dedup = Random.nextBoolean();

            if (Verbose)
            {
                Console.WriteLine("  dedup=" + dedup);
            }
            b = new SynonymMap.Builder(dedup);
            for (int synIDX = 0; synIDX < numSyn; synIDX++)
            {
                string synIn = GetRandomString('a', alphabetSize, TestUtil.NextInt32(Random, 1, 5)).Trim();
                if (!synMap.TryGetValue(synIn, out OneSyn s) || s is null)
                {
                    s     = new OneSyn();
                    s.@in = synIn;
                    syns.Add(s);
                    s.@out        = new JCG.List <string>();
                    synMap[synIn] = s;
                    s.keepOrig    = Random.nextBoolean();
                }
                string synOut = GetRandomString('0', 10, TestUtil.NextInt32(Random, 1, 5)).Trim();
                [email protected](synOut);
                Add(synIn, synOut, s.keepOrig);
                if (Verbose)
                {
                    Console.WriteLine("  syns[" + synIDX + "] = " + s.@in + " -> " + s.@out + " keepOrig=" + s.keepOrig);
                }
            }

            tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true);
            tokensIn.Reset();
            assertTrue(tokensIn.IncrementToken());
            assertFalse(tokensIn.IncrementToken());
            tokensIn.End();
            tokensIn.Dispose();

            tokensOut  = new SynonymFilter(tokensIn, b.Build(), true);
            termAtt    = tokensOut.AddAttribute <ICharTermAttribute>();
            posIncrAtt = tokensOut.AddAttribute <IPositionIncrementAttribute>();
            posLenAtt  = tokensOut.AddAttribute <IPositionLengthAttribute>();
            offsetAtt  = tokensOut.AddAttribute <IOffsetAttribute>();

            if (dedup)
            {
                PruneDups(syns);
            }

            string expected = SlowSynMatcher(document, syns, 5);

            if (Verbose)
            {
                Console.WriteLine("TEST: expected=" + expected);
            }

            Verify(document, expected);
        }
Exemple #2
0
        protected virtual string SlowSynMatcher(string doc, IList <OneSyn> syns, int maxOutputLength)
        {
            assertTrue(doc.Length % 2 == 0);
            int numInputs = doc.Length / 2;

            bool[] keepOrigs = new bool[numInputs];
            bool[] hasMatch  = new bool[numInputs];
            Arrays.Fill(keepOrigs, false);
            string[] outputs = new string[numInputs + maxOutputLength];
            OneSyn[] matches = new OneSyn[numInputs];
            foreach (OneSyn syn in syns)
            {
                int idx = -1;
                while (true)
                {
                    idx = doc.IndexOf(syn.@in, 1 + idx, StringComparison.Ordinal);
                    if (idx == -1)
                    {
                        break;
                    }
                    assertTrue(idx % 2 == 0);
                    int matchIDX = idx / 2;
                    assertTrue([email protected] % 2 == 1);
                    if (matches[matchIDX] is null)
                    {
                        matches[matchIDX] = syn;
                    }
                    else if ([email protected] > matches[matchIDX][email protected])
                    {
                        // Greedy conflict resolution: longer match wins:
                        matches[matchIDX] = syn;
                    }
                    else
                    {
                        assertTrue([email protected] < matches[matchIDX][email protected]);
                    }
                }
            }

            // Greedy conflict resolution: if syn matches a range of inputs,
            // it prevents other syns from matching that range
            for (int inputIDX = 0; inputIDX < numInputs; inputIDX++)
            {
                OneSyn match = matches[inputIDX];
                if (match != null)
                {
                    int synInLength = (1 + [email protected]) / 2;
                    for (int nextInputIDX = inputIDX + 1; nextInputIDX < numInputs && nextInputIDX < (inputIDX + synInLength); nextInputIDX++)
                    {
                        matches[nextInputIDX] = null;
                    }
                }
            }

            // Fill overlapping outputs:
            for (int inputIDX = 0; inputIDX < numInputs; inputIDX++)
            {
                OneSyn syn = matches[inputIDX];
                if (syn is null)
                {
                    continue;
                }
                for (int idx = 0; idx < (1 + [email protected]) / 2; idx++)
                {
                    hasMatch[inputIDX + idx]   = true;
                    keepOrigs[inputIDX + idx] |= syn.keepOrig;
                }
                foreach (string synOut in syn.@out)
                {
                    string[] synOutputs = synOut.Split(' ').TrimEnd();
                    assertEquals(synOutputs.Length, (1 + synOut.Length) / 2);
                    int matchEnd = inputIDX + synOutputs.Length;
                    int synUpto  = 0;
                    for (int matchIDX = inputIDX; matchIDX < matchEnd; matchIDX++)
                    {
                        if (outputs[matchIDX] is null)
                        {
                            outputs[matchIDX] = synOutputs[synUpto++];
                        }
                        else
                        {
                            outputs[matchIDX] = outputs[matchIDX] + "/" + synOutputs[synUpto++];
                        }
                        int endOffset;
                        if (matchIDX < numInputs)
                        {
                            int posLen;
                            if (synOutputs.Length == 1)
                            {
                                // Add full endOffset
                                endOffset = (inputIDX * 2) + [email protected];
                                posLen    = syn.keepOrig ? (1 + [email protected]) / 2 : 1;
                            }
                            else
                            {
                                // Add endOffset matching input token's
                                endOffset = (matchIDX * 2) + 1;
                                posLen    = 1;
                            }
                            outputs[matchIDX] = outputs[matchIDX] + ":" + endOffset + "_" + posLen;
                        }
                    }
                }
            }

            StringBuilder sb = new StringBuilder();

            string[] inputTokens = doc.Split(' ').TrimEnd();
            int      limit       = inputTokens.Length + maxOutputLength;

            for (int inputIDX = 0; inputIDX < limit; inputIDX++)
            {
                bool posHasOutput = false;
                if (inputIDX >= numInputs && outputs[inputIDX] is null)
                {
                    break;
                }
                if (inputIDX < numInputs && (!hasMatch[inputIDX] || keepOrigs[inputIDX]))
                {
                    assertTrue(inputTokens[inputIDX].Length != 0);
                    sb.Append(inputTokens[inputIDX]);
                    posHasOutput = true;
                }

                if (outputs[inputIDX] != null)
                {
                    if (posHasOutput)
                    {
                        sb.Append('/');
                    }
                    sb.Append(outputs[inputIDX]);
                }
                else if (!posHasOutput)
                {
                    continue;
                }
                if (inputIDX < limit - 1)
                {
                    sb.Append(' ');
                }
            }

            return(sb.ToString());
        }
        public virtual string slowSynMatcher(string doc, IList<OneSyn> syns, int maxOutputLength)
        {
            assertTrue(doc.Length % 2 == 0);
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int numInputs = doc.length()/2;
            int numInputs = doc.Length / 2;
            bool[] keepOrigs = new bool[numInputs];
            bool[] hasMatch = new bool[numInputs];
            Arrays.fill(keepOrigs, false);
            string[] outputs = new string[numInputs + maxOutputLength];
            OneSyn[] matches = new OneSyn[numInputs];
            foreach (OneSyn syn in syns)
            {
              int idx = -1;
              while (true)
              {
            idx = doc.IndexOf(syn.@in, 1 + idx, StringComparison.Ordinal);
            if (idx == -1)
            {
              break;
            }
            assertTrue(idx % 2 == 0);
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int matchIDX = idx/2;
            int matchIDX = idx / 2;
            assertTrue([email protected] % 2 == 1);
            if (matches[matchIDX] == null)
            {
              matches[matchIDX] = syn;
            }
            else if ([email protected] > matches[matchIDX][email protected])
            {
              // Greedy conflict resolution: longer match wins:
              matches[matchIDX] = syn;
            }
            else
            {
              assertTrue([email protected] < matches[matchIDX][email protected]);
            }
              }
            }

            // Greedy conflict resolution: if syn matches a range of inputs,
            // it prevents other syns from matching that range
            for (int inputIDX = 0;inputIDX < numInputs;inputIDX++)
            {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final OneSyn match = matches[inputIDX];
              OneSyn match = matches[inputIDX];
              if (match != null)
              {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int synInLength = (1+match.in.length())/2;
            int synInLength = (1 + [email protected]) / 2;
            for (int nextInputIDX = inputIDX + 1;nextInputIDX < numInputs && nextInputIDX < (inputIDX + synInLength);nextInputIDX++)
            {
              matches[nextInputIDX] = null;
            }
              }
            }

            // Fill overlapping outputs:
            for (int inputIDX = 0;inputIDX < numInputs;inputIDX++)
            {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final OneSyn syn = matches[inputIDX];
              OneSyn syn = matches[inputIDX];
              if (syn == null)
              {
            continue;
              }
              for (int idx = 0;idx < (1 + [email protected]) / 2;idx++)
              {
            hasMatch[inputIDX + idx] = true;
            keepOrigs[inputIDX + idx] |= syn.keepOrig;
              }
              foreach (string synOut in syn.@out)
              {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final String[] synOutputs = synOut.split(" ");
            string[] synOutputs = synOut.Split(" ", true);
            assertEquals(synOutputs.Length, (1 + synOut.Length) / 2);
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int matchEnd = inputIDX + synOutputs.length;
            int matchEnd = inputIDX + synOutputs.Length;
            int synUpto = 0;
            for (int matchIDX = inputIDX;matchIDX < matchEnd;matchIDX++)
            {
              if (outputs[matchIDX] == null)
              {
                outputs[matchIDX] = synOutputs[synUpto++];
              }
              else
              {
                outputs[matchIDX] = outputs[matchIDX] + "/" + synOutputs[synUpto++];
              }
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int endOffset;
              int endOffset;
              if (matchIDX < numInputs)
              {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int posLen;
                int posLen;
                if (synOutputs.Length == 1)
                {
                  // Add full endOffset
                  endOffset = (inputIDX * 2) + [email protected];
                  posLen = syn.keepOrig ? (1 + [email protected]) / 2 : 1;
                }
                else
                {
                  // Add endOffset matching input token's
                  endOffset = (matchIDX * 2) + 1;
                  posLen = 1;
                }
                outputs[matchIDX] = outputs[matchIDX] + ":" + endOffset + "_" + posLen;
              }
            }
              }
            }

            StringBuilder sb = new StringBuilder();
            string[] inputTokens = doc.Split(" ", true);
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int limit = inputTokens.length + maxOutputLength;
            int limit = inputTokens.Length + maxOutputLength;
            for (int inputIDX = 0;inputIDX < limit;inputIDX++)
            {
              bool posHasOutput = false;
              if (inputIDX >= numInputs && outputs[inputIDX] == null)
              {
            break;
              }
              if (inputIDX < numInputs && (!hasMatch[inputIDX] || keepOrigs[inputIDX]))
              {
            assertTrue(inputTokens[inputIDX].Length != 0);
            sb.Append(inputTokens[inputIDX]);
            posHasOutput = true;
              }

              if (outputs[inputIDX] != null)
              {
            if (posHasOutput)
            {
              sb.Append('/');
            }
            sb.Append(outputs[inputIDX]);
              }
              else if (!posHasOutput)
              {
            continue;
              }
              if (inputIDX < limit - 1)
              {
            sb.Append(' ');
              }
            }

            return sb.ToString();
        }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testRandom() throws Exception
        public virtual void testRandom()
        {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int alphabetSize = org.apache.lucene.util.TestUtil.nextInt(random(), 2, 7);
            int alphabetSize = TestUtil.Next(random(), 2, 7);

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int docLen = atLeast(3000);
            int docLen = atLeast(3000);
            //final int docLen = 50;

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final String document = getRandomString('a', alphabetSize, docLen);
            string document = getRandomString('a', alphabetSize, docLen);

            if (VERBOSE)
            {
              Console.WriteLine("TEST: doc=" + document);
            }

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int numSyn = atLeast(5);
            int numSyn = atLeast(5);
            //final int numSyn = 2;

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final java.util.Map<String,OneSyn> synMap = new java.util.HashMap<>();
            IDictionary<string, OneSyn> synMap = new Dictionary<string, OneSyn>();
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final java.util.List<OneSyn> syns = new java.util.ArrayList<>();
            IList<OneSyn> syns = new List<OneSyn>();
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final boolean dedup = random().nextBoolean();
            bool dedup = random().nextBoolean();
            if (VERBOSE)
            {
              Console.WriteLine("  dedup=" + dedup);
            }
            b = new SynonymMap.Builder(dedup);
            for (int synIDX = 0;synIDX < numSyn;synIDX++)
            {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final String synIn = getRandomString('a', alphabetSize, org.apache.lucene.util.TestUtil.nextInt(random(), 1, 5)).trim();
              string synIn = getRandomString('a', alphabetSize, TestUtil.Next(random(), 1, 5)).Trim();
              OneSyn s = synMap[synIn];
              if (s == null)
              {
            s = new OneSyn();
            s.@in = synIn;
            syns.Add(s);
            s.@out = new List<>();
            synMap[synIn] = s;
            s.keepOrig = random().nextBoolean();
              }
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final String synOut = getRandomString('0', 10, org.apache.lucene.util.TestUtil.nextInt(random(), 1, 5)).trim();
              string synOut = getRandomString('0', 10, TestUtil.Next(random(), 1, 5)).Trim();
              [email protected](synOut);
              add(synIn, synOut, s.keepOrig);
              if (VERBOSE)
              {
            Console.WriteLine("  syns[" + synIDX + "] = " + s.@in + " -> " + s.@out + " keepOrig=" + s.keepOrig);
              }
            }

            tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true);
            tokensIn.reset();
            assertTrue(tokensIn.incrementToken());
            assertFalse(tokensIn.incrementToken());
            tokensIn.end();
            tokensIn.close();

            tokensOut = new SynonymFilter(tokensIn, b.build(), true);
            termAtt = tokensOut.addAttribute(typeof(CharTermAttribute));
            posIncrAtt = tokensOut.addAttribute(typeof(PositionIncrementAttribute));
            posLenAtt = tokensOut.addAttribute(typeof(PositionLengthAttribute));
            offsetAtt = tokensOut.addAttribute(typeof(OffsetAttribute));

            if (dedup)
            {
              pruneDups(syns);
            }

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final String expected = slowSynMatcher(document, syns, 5);
            string expected = slowSynMatcher(document, syns, 5);

            if (VERBOSE)
            {
              Console.WriteLine("TEST: expected=" + expected);
            }

            verify(document, expected);
        }
        public virtual void TestRandom()
        {

            int alphabetSize = TestUtil.NextInt(Random(), 2, 7);

            int docLen = AtLeast(3000);
            //final int docLen = 50;

            string document = GetRandomString('a', alphabetSize, docLen);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: doc=" + document);
            }

            int numSyn = AtLeast(5);
            //final int numSyn = 2;

            IDictionary<string, OneSyn> synMap = new Dictionary<string, OneSyn>();
            IList<OneSyn> syns = new List<OneSyn>();
            bool dedup = Random().nextBoolean();
            if (VERBOSE)
            {
                Console.WriteLine("  dedup=" + dedup);
            }
            b = new SynonymMap.Builder(dedup);
            for (int synIDX = 0; synIDX < numSyn; synIDX++)
            {
                string synIn = GetRandomString('a', alphabetSize, TestUtil.NextInt(Random(), 1, 5)).Trim();
                OneSyn s = synMap.ContainsKey(synIn) ? synMap[synIn] : null;
                if (s == null)
                {
                    s = new OneSyn();
                    s.@in = synIn;
                    syns.Add(s);
                    s.@out = new List<string>();
                    synMap[synIn] = s;
                    s.keepOrig = Random().nextBoolean();
                }
                string synOut = GetRandomString('0', 10, TestUtil.NextInt(Random(), 1, 5)).Trim();
                [email protected](synOut);
                Add(synIn, synOut, s.keepOrig);
                if (VERBOSE)
                {
                    Console.WriteLine("  syns[" + synIDX + "] = " + s.@in + " -> " + s.@out + " keepOrig=" + s.keepOrig);
                }
            }

            tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true);
            tokensIn.Reset();
            assertTrue(tokensIn.IncrementToken());
            assertFalse(tokensIn.IncrementToken());
            tokensIn.End();
            tokensIn.Dispose();

            tokensOut = new SynonymFilter(tokensIn, b.Build(), true);
            termAtt = tokensOut.AddAttribute<ICharTermAttribute>();
            posIncrAtt = tokensOut.AddAttribute<IPositionIncrementAttribute>();
            posLenAtt = tokensOut.AddAttribute<IPositionLengthAttribute>();
            offsetAtt = tokensOut.AddAttribute<IOffsetAttribute>();

            if (dedup)
            {
                PruneDups(syns);
            }

            string expected = SlowSynMatcher(document, syns, 5);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: expected=" + expected);
            }

            Verify(document, expected);
        }