public virtual void TestRandom() { int alphabetSize = TestUtil.NextInt32(Random, 2, 7); int docLen = AtLeast(3000); //final int docLen = 50; string document = GetRandomString('a', alphabetSize, docLen); if (Verbose) { Console.WriteLine("TEST: doc=" + document); } int numSyn = AtLeast(5); //final int numSyn = 2; IDictionary <string, OneSyn> synMap = new Dictionary <string, OneSyn>(); IList <OneSyn> syns = new JCG.List <OneSyn>(); bool dedup = Random.nextBoolean(); if (Verbose) { Console.WriteLine(" dedup=" + dedup); } b = new SynonymMap.Builder(dedup); for (int synIDX = 0; synIDX < numSyn; synIDX++) { string synIn = GetRandomString('a', alphabetSize, TestUtil.NextInt32(Random, 1, 5)).Trim(); if (!synMap.TryGetValue(synIn, out OneSyn s) || s is null) { s = new OneSyn(); s.@in = synIn; syns.Add(s); s.@out = new JCG.List <string>(); synMap[synIn] = s; s.keepOrig = Random.nextBoolean(); } string synOut = GetRandomString('0', 10, TestUtil.NextInt32(Random, 1, 5)).Trim(); [email protected](synOut); Add(synIn, synOut, s.keepOrig); if (Verbose) { Console.WriteLine(" syns[" + synIDX + "] = " + s.@in + " -> " + s.@out + " keepOrig=" + s.keepOrig); } } tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true); tokensIn.Reset(); assertTrue(tokensIn.IncrementToken()); assertFalse(tokensIn.IncrementToken()); tokensIn.End(); tokensIn.Dispose(); tokensOut = new SynonymFilter(tokensIn, b.Build(), true); termAtt = tokensOut.AddAttribute <ICharTermAttribute>(); posIncrAtt = tokensOut.AddAttribute <IPositionIncrementAttribute>(); posLenAtt = tokensOut.AddAttribute <IPositionLengthAttribute>(); offsetAtt = tokensOut.AddAttribute <IOffsetAttribute>(); if (dedup) { PruneDups(syns); } string expected = SlowSynMatcher(document, syns, 5); if (Verbose) { Console.WriteLine("TEST: expected=" + expected); } Verify(document, expected); }
protected virtual string SlowSynMatcher(string doc, IList <OneSyn> syns, int maxOutputLength) { assertTrue(doc.Length % 2 == 0); int numInputs = doc.Length / 2; bool[] keepOrigs = new bool[numInputs]; bool[] hasMatch = new bool[numInputs]; Arrays.Fill(keepOrigs, false); string[] outputs = new string[numInputs + maxOutputLength]; OneSyn[] matches = new OneSyn[numInputs]; foreach (OneSyn syn in syns) { int idx = -1; while (true) { idx = doc.IndexOf(syn.@in, 1 + idx, StringComparison.Ordinal); if (idx == -1) { break; } assertTrue(idx % 2 == 0); int matchIDX = idx / 2; assertTrue([email protected] % 2 == 1); if (matches[matchIDX] is null) { matches[matchIDX] = syn; } else if ([email protected] > matches[matchIDX][email protected]) { // Greedy conflict resolution: longer match wins: matches[matchIDX] = syn; } else { assertTrue([email protected] < matches[matchIDX][email protected]); } } } // Greedy conflict resolution: if syn matches a range of inputs, // it prevents other syns from matching that range for (int inputIDX = 0; inputIDX < numInputs; inputIDX++) { OneSyn match = matches[inputIDX]; if (match != null) { int synInLength = (1 + [email protected]) / 2; for (int nextInputIDX = inputIDX + 1; nextInputIDX < numInputs && nextInputIDX < (inputIDX + synInLength); nextInputIDX++) { matches[nextInputIDX] = null; } } } // Fill overlapping outputs: for (int inputIDX = 0; inputIDX < numInputs; inputIDX++) { OneSyn syn = matches[inputIDX]; if (syn is null) { continue; } for (int idx = 0; idx < (1 + [email protected]) / 2; idx++) { hasMatch[inputIDX + idx] = true; keepOrigs[inputIDX + idx] |= syn.keepOrig; } foreach (string synOut in syn.@out) { string[] synOutputs = synOut.Split(' ').TrimEnd(); assertEquals(synOutputs.Length, (1 + synOut.Length) / 2); int matchEnd = inputIDX + synOutputs.Length; int synUpto = 0; for (int matchIDX = inputIDX; matchIDX < matchEnd; matchIDX++) { if (outputs[matchIDX] is null) { outputs[matchIDX] = synOutputs[synUpto++]; } else { outputs[matchIDX] = outputs[matchIDX] + "/" + synOutputs[synUpto++]; } int endOffset; if (matchIDX < numInputs) { int posLen; if (synOutputs.Length == 1) { // Add full endOffset endOffset = (inputIDX * 2) + [email protected]; posLen = syn.keepOrig ? (1 + [email protected]) / 2 : 1; } else { // Add endOffset matching input token's endOffset = (matchIDX * 2) + 1; posLen = 1; } outputs[matchIDX] = outputs[matchIDX] + ":" + endOffset + "_" + posLen; } } } } StringBuilder sb = new StringBuilder(); string[] inputTokens = doc.Split(' ').TrimEnd(); int limit = inputTokens.Length + maxOutputLength; for (int inputIDX = 0; inputIDX < limit; inputIDX++) { bool posHasOutput = false; if (inputIDX >= numInputs && outputs[inputIDX] is null) { break; } if (inputIDX < numInputs && (!hasMatch[inputIDX] || keepOrigs[inputIDX])) { assertTrue(inputTokens[inputIDX].Length != 0); sb.Append(inputTokens[inputIDX]); posHasOutput = true; } if (outputs[inputIDX] != null) { if (posHasOutput) { sb.Append('/'); } sb.Append(outputs[inputIDX]); } else if (!posHasOutput) { continue; } if (inputIDX < limit - 1) { sb.Append(' '); } } return(sb.ToString()); }
public virtual string slowSynMatcher(string doc, IList<OneSyn> syns, int maxOutputLength) { assertTrue(doc.Length % 2 == 0); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int numInputs = doc.length()/2; int numInputs = doc.Length / 2; bool[] keepOrigs = new bool[numInputs]; bool[] hasMatch = new bool[numInputs]; Arrays.fill(keepOrigs, false); string[] outputs = new string[numInputs + maxOutputLength]; OneSyn[] matches = new OneSyn[numInputs]; foreach (OneSyn syn in syns) { int idx = -1; while (true) { idx = doc.IndexOf(syn.@in, 1 + idx, StringComparison.Ordinal); if (idx == -1) { break; } assertTrue(idx % 2 == 0); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int matchIDX = idx/2; int matchIDX = idx / 2; assertTrue([email protected] % 2 == 1); if (matches[matchIDX] == null) { matches[matchIDX] = syn; } else if ([email protected] > matches[matchIDX][email protected]) { // Greedy conflict resolution: longer match wins: matches[matchIDX] = syn; } else { assertTrue([email protected] < matches[matchIDX][email protected]); } } } // Greedy conflict resolution: if syn matches a range of inputs, // it prevents other syns from matching that range for (int inputIDX = 0;inputIDX < numInputs;inputIDX++) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final OneSyn match = matches[inputIDX]; OneSyn match = matches[inputIDX]; if (match != null) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int synInLength = (1+match.in.length())/2; int synInLength = (1 + [email protected]) / 2; for (int nextInputIDX = inputIDX + 1;nextInputIDX < numInputs && nextInputIDX < (inputIDX + synInLength);nextInputIDX++) { matches[nextInputIDX] = null; } } } // Fill overlapping outputs: for (int inputIDX = 0;inputIDX < numInputs;inputIDX++) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final OneSyn syn = matches[inputIDX]; OneSyn syn = matches[inputIDX]; if (syn == null) { continue; } for (int idx = 0;idx < (1 + [email protected]) / 2;idx++) { hasMatch[inputIDX + idx] = true; keepOrigs[inputIDX + idx] |= syn.keepOrig; } foreach (string synOut in syn.@out) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String[] synOutputs = synOut.split(" "); string[] synOutputs = synOut.Split(" ", true); assertEquals(synOutputs.Length, (1 + synOut.Length) / 2); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int matchEnd = inputIDX + synOutputs.length; int matchEnd = inputIDX + synOutputs.Length; int synUpto = 0; for (int matchIDX = inputIDX;matchIDX < matchEnd;matchIDX++) { if (outputs[matchIDX] == null) { outputs[matchIDX] = synOutputs[synUpto++]; } else { outputs[matchIDX] = outputs[matchIDX] + "/" + synOutputs[synUpto++]; } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int endOffset; int endOffset; if (matchIDX < numInputs) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int posLen; int posLen; if (synOutputs.Length == 1) { // Add full endOffset endOffset = (inputIDX * 2) + [email protected]; posLen = syn.keepOrig ? (1 + [email protected]) / 2 : 1; } else { // Add endOffset matching input token's endOffset = (matchIDX * 2) + 1; posLen = 1; } outputs[matchIDX] = outputs[matchIDX] + ":" + endOffset + "_" + posLen; } } } } StringBuilder sb = new StringBuilder(); string[] inputTokens = doc.Split(" ", true); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int limit = inputTokens.length + maxOutputLength; int limit = inputTokens.Length + maxOutputLength; for (int inputIDX = 0;inputIDX < limit;inputIDX++) { bool posHasOutput = false; if (inputIDX >= numInputs && outputs[inputIDX] == null) { break; } if (inputIDX < numInputs && (!hasMatch[inputIDX] || keepOrigs[inputIDX])) { assertTrue(inputTokens[inputIDX].Length != 0); sb.Append(inputTokens[inputIDX]); posHasOutput = true; } if (outputs[inputIDX] != null) { if (posHasOutput) { sb.Append('/'); } sb.Append(outputs[inputIDX]); } else if (!posHasOutput) { continue; } if (inputIDX < limit - 1) { sb.Append(' '); } } return sb.ToString(); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRandom() throws Exception public virtual void testRandom() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int alphabetSize = org.apache.lucene.util.TestUtil.nextInt(random(), 2, 7); int alphabetSize = TestUtil.Next(random(), 2, 7); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int docLen = atLeast(3000); int docLen = atLeast(3000); //final int docLen = 50; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String document = getRandomString('a', alphabetSize, docLen); string document = getRandomString('a', alphabetSize, docLen); if (VERBOSE) { Console.WriteLine("TEST: doc=" + document); } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int numSyn = atLeast(5); int numSyn = atLeast(5); //final int numSyn = 2; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final java.util.Map<String,OneSyn> synMap = new java.util.HashMap<>(); IDictionary<string, OneSyn> synMap = new Dictionary<string, OneSyn>(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final java.util.List<OneSyn> syns = new java.util.ArrayList<>(); IList<OneSyn> syns = new List<OneSyn>(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final boolean dedup = random().nextBoolean(); bool dedup = random().nextBoolean(); if (VERBOSE) { Console.WriteLine(" dedup=" + dedup); } b = new SynonymMap.Builder(dedup); for (int synIDX = 0;synIDX < numSyn;synIDX++) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String synIn = getRandomString('a', alphabetSize, org.apache.lucene.util.TestUtil.nextInt(random(), 1, 5)).trim(); string synIn = getRandomString('a', alphabetSize, TestUtil.Next(random(), 1, 5)).Trim(); OneSyn s = synMap[synIn]; if (s == null) { s = new OneSyn(); s.@in = synIn; syns.Add(s); s.@out = new List<>(); synMap[synIn] = s; s.keepOrig = random().nextBoolean(); } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String synOut = getRandomString('0', 10, org.apache.lucene.util.TestUtil.nextInt(random(), 1, 5)).trim(); string synOut = getRandomString('0', 10, TestUtil.Next(random(), 1, 5)).Trim(); [email protected](synOut); add(synIn, synOut, s.keepOrig); if (VERBOSE) { Console.WriteLine(" syns[" + synIDX + "] = " + s.@in + " -> " + s.@out + " keepOrig=" + s.keepOrig); } } tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true); tokensIn.reset(); assertTrue(tokensIn.incrementToken()); assertFalse(tokensIn.incrementToken()); tokensIn.end(); tokensIn.close(); tokensOut = new SynonymFilter(tokensIn, b.build(), true); termAtt = tokensOut.addAttribute(typeof(CharTermAttribute)); posIncrAtt = tokensOut.addAttribute(typeof(PositionIncrementAttribute)); posLenAtt = tokensOut.addAttribute(typeof(PositionLengthAttribute)); offsetAtt = tokensOut.addAttribute(typeof(OffsetAttribute)); if (dedup) { pruneDups(syns); } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String expected = slowSynMatcher(document, syns, 5); string expected = slowSynMatcher(document, syns, 5); if (VERBOSE) { Console.WriteLine("TEST: expected=" + expected); } verify(document, expected); }
public virtual void TestRandom() { int alphabetSize = TestUtil.NextInt(Random(), 2, 7); int docLen = AtLeast(3000); //final int docLen = 50; string document = GetRandomString('a', alphabetSize, docLen); if (VERBOSE) { Console.WriteLine("TEST: doc=" + document); } int numSyn = AtLeast(5); //final int numSyn = 2; IDictionary<string, OneSyn> synMap = new Dictionary<string, OneSyn>(); IList<OneSyn> syns = new List<OneSyn>(); bool dedup = Random().nextBoolean(); if (VERBOSE) { Console.WriteLine(" dedup=" + dedup); } b = new SynonymMap.Builder(dedup); for (int synIDX = 0; synIDX < numSyn; synIDX++) { string synIn = GetRandomString('a', alphabetSize, TestUtil.NextInt(Random(), 1, 5)).Trim(); OneSyn s = synMap.ContainsKey(synIn) ? synMap[synIn] : null; if (s == null) { s = new OneSyn(); s.@in = synIn; syns.Add(s); s.@out = new List<string>(); synMap[synIn] = s; s.keepOrig = Random().nextBoolean(); } string synOut = GetRandomString('0', 10, TestUtil.NextInt(Random(), 1, 5)).Trim(); [email protected](synOut); Add(synIn, synOut, s.keepOrig); if (VERBOSE) { Console.WriteLine(" syns[" + synIDX + "] = " + s.@in + " -> " + s.@out + " keepOrig=" + s.keepOrig); } } tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true); tokensIn.Reset(); assertTrue(tokensIn.IncrementToken()); assertFalse(tokensIn.IncrementToken()); tokensIn.End(); tokensIn.Dispose(); tokensOut = new SynonymFilter(tokensIn, b.Build(), true); termAtt = tokensOut.AddAttribute<ICharTermAttribute>(); posIncrAtt = tokensOut.AddAttribute<IPositionIncrementAttribute>(); posLenAtt = tokensOut.AddAttribute<IPositionLengthAttribute>(); offsetAtt = tokensOut.AddAttribute<IOffsetAttribute>(); if (dedup) { PruneDups(syns); } string expected = SlowSynMatcher(document, syns, 5); if (VERBOSE) { Console.WriteLine("TEST: expected=" + expected); } Verify(document, expected); }