public override void DeleteFile(string name) { UninterruptableMonitor.Enter(this); try { if (VERBOSE) { Console.WriteLine("nrtdir.deleteFile name=" + name); } #pragma warning disable 612, 618 if (cache.FileExists(name)) #pragma warning restore 612, 618 { cache.DeleteFile(name); } else { @delegate.DeleteFile(name); } } finally { UninterruptableMonitor.Exit(this); } }
public override IndexOutput CreateOutput(string name, IOContext context) { if (VERBOSE) { Console.WriteLine("nrtdir.createOutput name=" + name); } if (DoCacheWrite(name, context)) { if (VERBOSE) { Console.WriteLine(" to cache"); } try { @delegate.DeleteFile(name); } catch (IOException) // LUCENENET: IDE0059: Remove unnecessary value assignment { // this is fine: file may not exist } return(cache.CreateOutput(name, context)); } else { try { cache.DeleteFile(name); } catch (IOException) // LUCENENET: IDE0059: Remove unnecessary value assignment { // this is fine: file may not exist } return(@delegate.CreateOutput(name, context)); } }
public override IndexInput OpenInput(string name, IOContext context) { UninterruptableMonitor.Enter(this); try { if (VERBOSE) { Console.WriteLine("nrtdir.openInput name=" + name); } #pragma warning disable 612, 618 if (cache.FileExists(name)) #pragma warning restore 612, 618 { if (VERBOSE) { Console.WriteLine(" from cache"); } return(cache.OpenInput(name, context)); } else { return(@delegate.OpenInput(name, context)); } } finally { UninterruptableMonitor.Exit(this); } }
public override IndexInputSlicer CreateSlicer(string name, IOContext context) { lock (this) { EnsureOpen(); if (VERBOSE) { Console.WriteLine("nrtdir.openInput name=" + name); } #pragma warning disable 612, 618 if (cache.FileExists(name)) #pragma warning restore 612, 618 { if (VERBOSE) { Console.WriteLine(" from cache"); } return(cache.CreateSlicer(name, context)); } else { return(@delegate.CreateSlicer(name, context)); } } }
private void SetNewSuffixStart(BytesRef br1, BytesRef br2) { int limit = Math.Min(br1.Length, br2.Length); int lastStart = 0; for (int i = 0; i < limit; i++) { if ((br1.Bytes[br1.Offset + i] & 0xc0) == 0xc0 || (br1.Bytes[br1.Offset + i] & 0x80) == 0) { lastStart = i; } if (br1.Bytes[br1.Offset + i] != br2.Bytes[br2.Offset + i]) { newSuffixStart = lastStart; if (DEBUG_SURROGATES) { Console.WriteLine(" set newSuffixStart=" + newSuffixStart); } return; } } newSuffixStart = limit; if (DEBUG_SURROGATES) { Console.WriteLine(" set newSuffixStart=" + newSuffixStart); } }
public override IndexOutput CreateOutput(string name, IOContext context) { if (VERBOSE) { Console.WriteLine("nrtdir.createOutput name=" + name); } if (DoCacheWrite(name, context)) { if (VERBOSE) { Console.WriteLine(" to cache"); } try { @delegate.DeleteFile(name); } catch (Exception ioe) when(ioe.IsIOException()) { // this is fine: file may not exist } return(cache.CreateOutput(name, context)); } else { try { cache.DeleteFile(name); } catch (Exception ioe) when(ioe.IsIOException()) { // this is fine: file may not exist } return(@delegate.CreateOutput(name, context)); } }
// Seek type 2 "continue" (back to the start of the // surrogates): scan the stripped suffix from the // prior term, backwards. If there was an E in that // part, then we try to seek back to S. If that // seek finds a matching term, we go there. private bool DoContinue() { if (DEBUG_SURROGATES) { Console.WriteLine(" try cont"); } int downTo = prevTerm.Length - 1; bool didSeek = false; int limit = Math.Min(newSuffixStart, scratchTerm.Length - 1); while (downTo > limit) { if (IsHighBMPChar(prevTerm.Bytes, downTo)) { if (DEBUG_SURROGATES) { Console.WriteLine(" found E pos=" + downTo + " vs len=" + prevTerm.Length); } if (SeekToNonBMP(seekTermEnum, prevTerm, downTo)) { // TODO: more efficient seek? outerInstance.TermsDict.SeekEnum(termEnum, seekTermEnum.Term(), true); //newSuffixStart = downTo+4; newSuffixStart = downTo; scratchTerm.CopyBytes(termEnum.Term().Bytes); didSeek = true; if (DEBUG_SURROGATES) { Console.WriteLine(" seek!"); } break; } else { if (DEBUG_SURROGATES) { Console.WriteLine(" no seek"); } } } // Shorten prevTerm in place so that we don't redo // this loop if we come back here: if ((prevTerm.Bytes[downTo] & 0xc0) == 0xc0 || (prevTerm.Bytes[downTo] & 0x80) == 0) { prevTerm.Length = downTo; } downTo--; } return(didSeek); }
public override void Sync(ICollection <string> fileNames) { if (VERBOSE) { Console.WriteLine("nrtdir.sync files=" + fileNames); } foreach (string fileName in fileNames) { UnCache(fileName); } @delegate.Sync(fileNames); }
private void UnCache(string fileName) { // Only let one thread uncache at a time; this only // happens during commit() or close(): UninterruptableMonitor.Enter(uncacheLock); try { if (VERBOSE) { Console.WriteLine("nrtdir.unCache name=" + fileName); } #pragma warning disable 612, 618 if (!cache.FileExists(fileName)) #pragma warning restore 612, 618 { // Another thread beat us... return; } IOContext context = IOContext.DEFAULT; IndexOutput @out = @delegate.CreateOutput(fileName, context); IndexInput @in = null; try { @in = cache.OpenInput(fileName, context); @out.CopyBytes(@in, @in.Length); } finally { IOUtils.Dispose(@in, @out); } // Lock order: uncacheLock -> this UninterruptableMonitor.Enter(this); try { // Must sync here because other sync methods have // if (cache.fileExists(name)) { ... } else { ... }: cache.DeleteFile(fileName); } finally { UninterruptableMonitor.Exit(this); } } finally { UninterruptableMonitor.Exit(uncacheLock); } }
public override void DeleteFile(string name) { lock (this) { if (VERBOSE) { Console.WriteLine("nrtdir.deleteFile name=" + name); } #pragma warning disable 612, 618 if (cache.FileExists(name)) #pragma warning restore 612, 618 { cache.DeleteFile(name); } else { @delegate.DeleteFile(name); } } }
public override IndexOutput CreateOutput(string name, IOContext context) { if (VERBOSE) { Console.WriteLine("nrtdir.createOutput name=" + name); } if (DoCacheWrite(name, context)) { if (VERBOSE) { Console.WriteLine(" to cache"); } try { @delegate.DeleteFile(name); } #pragma warning disable 168 catch (IOException ioe) #pragma warning restore 168 { // this is fine: file may not exist } return(cache.CreateOutput(name, context)); } else { try { cache.DeleteFile(name); } #pragma warning disable 168 catch (IOException ioe) #pragma warning restore 168 { // this is fine: file may not exist } return(@delegate.CreateOutput(name, context)); } }
public static void Main(string[] args) { if (args.Length != 7) { // LUCENENET specific - our wrapper console shows the correct usage throw new ArgumentException(); //Console.WriteLine("Usage: java Lucene.Net.Store.LockStressTest myID verifierHost verifierPort lockFactoryClassName lockDirName sleepTimeMS count\n" + // "\n" + // " myID = int from 0 .. 255 (should be unique for test process)\n" + // " verifierHost = hostname that LockVerifyServer is listening on\n" + // " verifierPort = port that LockVerifyServer is listening on\n" + // " lockFactoryClassName = primary LockFactory class that we will use\n" + // " lockDirName = path to the lock directory (only set for Simple/NativeFSLockFactory\n" + // " sleepTimeMS = milliseconds to pause betweeen each lock obtain/release\n" + // " count = number of locking tries\n" + // "\n" + // "You should run multiple instances of this process, each with its own\n" + // "unique ID, and each pointing to the same lock directory, to verify\n" + // "that locking is working correctly.\n" + // "\n" + // "Make sure you are first running LockVerifyServer."); //Environment.FailFast("1"); } int arg = 0; int myID = Convert.ToInt32(args[arg++], CultureInfo.InvariantCulture); if (myID < 0 || myID > 255) { throw new ArgumentException("ID must be a unique int 0..255"); //Console.WriteLine("myID must be a unique int 0..255"); //Environment.Exit(1); } string verifierHost = args[arg++]; int verifierPort = Convert.ToInt32(args[arg++], CultureInfo.InvariantCulture); string lockFactoryClassName = args[arg++]; string lockDirName = args[arg++]; int sleepTimeMS = Convert.ToInt32(args[arg++], CultureInfo.InvariantCulture); int count = Convert.ToInt32(args[arg++], CultureInfo.InvariantCulture); IPAddress[] addresses = Dns.GetHostAddressesAsync(verifierHost).Result; IPAddress addr = addresses.Length > 0 ? addresses[0] : null; Type c; try { c = Type.GetType(lockFactoryClassName); if (c == null) { // LUCENENET: try again, this time with the Store namespace c = Type.GetType("Lucene.Net.Store." + lockFactoryClassName); } } catch (Exception) { throw new IOException("unable to find LockClass " + lockFactoryClassName); } LockFactory lockFactory; try { lockFactory = (LockFactory)Activator.CreateInstance(c); } catch (UnauthorizedAccessException e) { throw new IOException("Cannot instantiate lock factory " + lockFactoryClassName, e); } catch (InvalidCastException e) { throw new IOException("unable to cast LockClass " + lockFactoryClassName + " instance to a LockFactory", e); } catch (Exception e) { throw new IOException("InstantiationException when instantiating LockClass " + lockFactoryClassName, e); } DirectoryInfo lockDir = new DirectoryInfo(lockDirName); if (lockFactory is FSLockFactory) { ((FSLockFactory)lockFactory).SetLockDir(lockDir); } Console.WriteLine("Connecting to server " + addr + " and registering as client " + myID + "..."); using (Socket socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp)) { socket.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.ReuseAddress, 1); socket.Connect(verifierHost, verifierPort); using (Stream stream = new NetworkStream(socket)) { BinaryReader intReader = new BinaryReader(stream); BinaryWriter intWriter = new BinaryWriter(stream); intWriter.Write(myID); stream.Flush(); lockFactory.LockPrefix = "test"; LockFactory verifyLF = new VerifyingLockFactory(lockFactory, stream); Lock l = verifyLF.MakeLock("test.lock"); Random rnd = new Random(); // wait for starting gun if (intReader.ReadInt32() != 43) { throw new IOException("Protocol violation"); } for (int i = 0; i < count; i++) { bool obtained = false; try { obtained = l.Obtain(rnd.Next(100) + 10); } #pragma warning disable 168 catch (LockObtainFailedException e) #pragma warning restore 168 { } if (obtained) { Thread.Sleep(sleepTimeMS); l.Dispose(); } if (i % 500 == 0) { Console.WriteLine((i * 100.0 / count) + "% done."); } Thread.Sleep(sleepTimeMS); } } } Console.WriteLine("Finished " + count + " tries."); }
public override BytesRef Next() { if (DEBUG_SURROGATES) { Console.WriteLine("TE.next()"); } if (skipNext) { if (DEBUG_SURROGATES) { Console.WriteLine(" skipNext=true"); } skipNext = false; if (termEnum.Term() == null) { return(null); // PreFlex codec interns field names: } else if (termEnum.Term().Field != internedFieldName) { return(null); } else { return(current = termEnum.Term().Bytes); } } // TODO: can we use STE's prevBuffer here? prevTerm.CopyBytes(termEnum.Term().Bytes); if (termEnum.Next() && termEnum.Term().Field == internedFieldName) { newSuffixStart = termEnum.newSuffixStart; if (DEBUG_SURROGATES) { Console.WriteLine(" newSuffixStart=" + newSuffixStart); } SurrogateDance(); Term t = termEnum.Term(); if (t == null || t.Field != internedFieldName) { // PreFlex codec interns field names; verify: if (Debugging.AssertsEnabled) { Debugging.Assert(t == null || !t.Field.Equals(internedFieldName, StringComparison.Ordinal)); } current = null; } else { current = t.Bytes; } return(current); } else { // this field is exhausted, but we have to give // surrogateDance a chance to seek back: if (DEBUG_SURROGATES) { Console.WriteLine(" force cont"); } //newSuffixStart = prevTerm.length; newSuffixStart = 0; SurrogateDance(); Term t = termEnum.Term(); if (t == null || t.Field != internedFieldName) { // PreFlex codec interns field names; verify: if (Debugging.AssertsEnabled) { Debugging.Assert(t == null || !t.Field.Equals(internedFieldName, StringComparison.Ordinal)); } return(null); } else { current = t.Bytes; return(current); } } }
public override SeekStatus SeekCeil(BytesRef term) { if (DEBUG_SURROGATES) { Console.WriteLine("TE.seek target=" + UnicodeUtil.ToHexString(term.Utf8ToString())); } skipNext = false; TermInfosReader tis = outerInstance.TermsDict; Term t0 = new Term(fieldInfo.Name, term); if (Debugging.AssertsEnabled) { Debugging.Assert(termEnum != null); } tis.SeekEnum(termEnum, t0, false); Term t = termEnum.Term(); if (t != null && t.Field == internedFieldName && term.BytesEquals(t.Bytes)) { // If we found an exact match, no need to do the // surrogate dance if (DEBUG_SURROGATES) { Console.WriteLine(" seek exact match"); } current = t.Bytes; return(SeekStatus.FOUND); } else if (t == null || t.Field != internedFieldName) { // TODO: maybe we can handle this like the next() // into null? set term as prevTerm then dance? if (DEBUG_SURROGATES) { Console.WriteLine(" seek hit EOF"); } // We hit EOF; try end-case surrogate dance: if we // find an E, try swapping in S, backwards: scratchTerm.CopyBytes(term); if (Debugging.AssertsEnabled) { Debugging.Assert(scratchTerm.Offset == 0); } for (int i = scratchTerm.Length - 1; i >= 0; i--) { if (IsHighBMPChar(scratchTerm.Bytes, i)) { if (DEBUG_SURROGATES) { Console.WriteLine(" found E pos=" + i + "; try seek"); } if (SeekToNonBMP(seekTermEnum, scratchTerm, i)) { scratchTerm.CopyBytes(seekTermEnum.Term().Bytes); outerInstance.TermsDict.SeekEnum(termEnum, seekTermEnum.Term(), false); newSuffixStart = 1 + i; DoPushes(); // Found a match // TODO: faster seek? current = termEnum.Term().Bytes; return(SeekStatus.NOT_FOUND); } } } if (DEBUG_SURROGATES) { Console.WriteLine(" seek END"); } current = null; return(SeekStatus.END); } else { // We found a non-exact but non-null term; this one // is fun -- just treat it like next, by pretending // requested term was prev: prevTerm.CopyBytes(term); if (DEBUG_SURROGATES) { Console.WriteLine(" seek hit non-exact term=" + UnicodeUtil.ToHexString(t.Text())); } BytesRef br = t.Bytes; if (Debugging.AssertsEnabled) { Debugging.Assert(br.Offset == 0); } SetNewSuffixStart(term, br); SurrogateDance(); Term t2 = termEnum.Term(); if (t2 == null || t2.Field != internedFieldName) { // PreFlex codec interns field names; verify: if (Debugging.AssertsEnabled) { Debugging.Assert(t2 == null || !t2.Field.Equals(internedFieldName, StringComparison.Ordinal)); } current = null; return(SeekStatus.END); } else { current = t2.Bytes; if (Debugging.AssertsEnabled) { Debugging.Assert(!unicodeSortOrder || term.CompareTo(current) < 0, () => "term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " vs current=" + UnicodeUtil.ToHexString(current.Utf8ToString())); } return(SeekStatus.NOT_FOUND); } } }
// Look for seek type 1 ("push"): if the newly added // suffix contains any S, we must try to seek to the // corresponding E. If we find a match, we go there; // else we keep looking for additional S's in the new // suffix. this "starts" the dance, at this character // position: private void DoPushes() { int upTo = newSuffixStart; if (DEBUG_SURROGATES) { Console.WriteLine(" try push newSuffixStart=" + newSuffixStart + " scratchLen=" + scratchTerm.Length); } while (upTo < scratchTerm.Length) { if (IsNonBMPChar(scratchTerm.Bytes, upTo) && (upTo > newSuffixStart || (upTo >= prevTerm.Length || (!IsNonBMPChar(prevTerm.Bytes, upTo) && !IsHighBMPChar(prevTerm.Bytes, upTo))))) { // A non-BMP char (4 bytes UTF8) starts here: if (Debugging.AssertsEnabled) { Debugging.Assert(scratchTerm.Length >= upTo + 4); } int savLength = scratchTerm.Length; scratch[0] = (sbyte)scratchTerm.Bytes[upTo]; scratch[1] = (sbyte)scratchTerm.Bytes[upTo + 1]; scratch[2] = (sbyte)scratchTerm.Bytes[upTo + 2]; scratchTerm.Bytes[upTo] = (byte)UTF8_HIGH_BMP_LEAD; scratchTerm.Bytes[upTo + 1] = 0x80; scratchTerm.Bytes[upTo + 2] = 0x80; scratchTerm.Length = upTo + 3; if (DEBUG_SURROGATES) { Console.WriteLine(" try seek 1 pos=" + upTo + " term=" + UnicodeUtil.ToHexString(scratchTerm.Utf8ToString()) + " " + scratchTerm.ToString() + " len=" + scratchTerm.Length); } // Seek "forward": // TODO: more efficient seek? outerInstance.TermsDict.SeekEnum(seekTermEnum, new Term(fieldInfo.Name, scratchTerm), true); scratchTerm.Bytes[upTo] = (byte)scratch[0]; scratchTerm.Bytes[upTo + 1] = (byte)scratch[1]; scratchTerm.Bytes[upTo + 2] = (byte)scratch[2]; scratchTerm.Length = savLength; // Did we find a match? Term t2 = seekTermEnum.Term(); if (DEBUG_SURROGATES) { if (t2 == null) { Console.WriteLine(" hit term=null"); } else { Console.WriteLine(" hit term=" + UnicodeUtil.ToHexString(t2.Text()) + " " + (t2 == null ? null : t2.Bytes)); } } // Since this was a seek "forward", we could hit // EOF or a different field: bool matches; if (t2 != null && t2.Field == internedFieldName) { BytesRef b2 = t2.Bytes; if (Debugging.AssertsEnabled) { Debugging.Assert(b2.Offset == 0); } if (b2.Length >= upTo + 3 && IsHighBMPChar(b2.Bytes, upTo)) { matches = true; for (int i = 0; i < upTo; i++) { if (scratchTerm.Bytes[i] != b2.Bytes[i]) { matches = false; break; } } } else { matches = false; } } else { matches = false; } if (matches) { if (DEBUG_SURROGATES) { Console.WriteLine(" matches!"); } // OK seek "back" // TODO: more efficient seek? outerInstance.TermsDict.SeekEnum(termEnum, seekTermEnum.Term(), true); scratchTerm.CopyBytes(seekTermEnum.Term().Bytes); // +3 because we don't need to check the char // at upTo: we know it's > BMP upTo += 3; // NOTE: we keep iterating, now, since this // can easily "recurse". Ie, after seeking // forward at a certain char position, we may // find another surrogate in our [new] suffix // and must then do another seek (recurse) } else { upTo++; } } else { upTo++; } } }
// Pre-flex indices store terms in UTF16 sort order, but // certain queries require Unicode codepoint order; this // method carefully seeks around surrogates to handle // this impedance mismatch private void SurrogateDance() { if (!unicodeSortOrder) { return; } // We are invoked after TIS.next() (by UTF16 order) to // possibly seek to a different "next" (by unicode // order) term. // We scan only the "delta" from the last term to the // current term, in UTF8 bytes. We look at 1) the bytes // stripped from the prior term, and then 2) the bytes // appended to that prior term's prefix. // We don't care about specific UTF8 sequences, just // the "category" of the UTF16 character. Category S // is a high/low surrogate pair (it non-BMP). // Category E is any BMP char > UNI_SUR_LOW_END (and < // U+FFFF). Category A is the rest (any unicode char // <= UNI_SUR_HIGH_START). // The core issue is that pre-flex indices sort the // characters as ASE, while flex must sort as AES. So // when scanning, when we hit S, we must 1) seek // forward to E and enum the terms there, then 2) seek // back to S and enum all terms there, then 3) seek to // after E. Three different seek points (1, 2, 3). // We can easily detect S in UTF8: if a byte has // prefix 11110 (0xf0), then that byte and the // following 3 bytes encode a single unicode codepoint // in S. Similarly, we can detect E: if a byte has // prefix 1110111 (0xee), then that byte and the // following 2 bytes encode a single unicode codepoint // in E. // Note that this is really a recursive process -- // maybe the char at pos 2 needs to dance, but any // point in its dance, suddenly pos 4 needs to dance // so you must finish pos 4 before returning to pos // 2. But then during pos 4's dance maybe pos 7 needs // to dance, etc. However, despite being recursive, // we don't need to hold any state because the state // can always be derived by looking at prior term & // current term. // TODO: can we avoid this copy? if (termEnum.Term() == null || termEnum.Term().Field != internedFieldName) { scratchTerm.Length = 0; } else { scratchTerm.CopyBytes(termEnum.Term().Bytes); } if (DEBUG_SURROGATES) { Console.WriteLine(" dance"); Console.WriteLine(" prev=" + UnicodeUtil.ToHexString(prevTerm.Utf8ToString())); Console.WriteLine(" " + prevTerm.ToString()); Console.WriteLine(" term=" + UnicodeUtil.ToHexString(scratchTerm.Utf8ToString())); Console.WriteLine(" " + scratchTerm.ToString()); } // this code assumes TermInfosReader/SegmentTermEnum // always use BytesRef.offset == 0 if (Debugging.AssertsEnabled) { Debugging.Assert(prevTerm.Offset == 0); Debugging.Assert(scratchTerm.Offset == 0); } // Need to loop here because we may need to do multiple // pops, and possibly a continue in the end, ie: // // cont // pop, cont // pop, pop, cont // <nothing> // while (true) { if (DoContinue()) { break; } else { if (!DoPop()) { break; } } } if (DEBUG_SURROGATES) { Console.WriteLine(" finish bmp ends"); } DoPushes(); }
// Look for seek type 3 ("pop"): if the delta from // prev -> current was replacing an S with an E, // we must now seek to beyond that E. this seek // "finishes" the dance at this character // position. private bool DoPop() { if (DEBUG_SURROGATES) { Console.WriteLine(" try pop"); } if (Debugging.AssertsEnabled) { Debugging.Assert(newSuffixStart <= prevTerm.Length); Debugging.Assert(newSuffixStart < scratchTerm.Length || newSuffixStart == 0); } if (prevTerm.Length > newSuffixStart && IsNonBMPChar(prevTerm.Bytes, newSuffixStart) && IsHighBMPChar(scratchTerm.Bytes, newSuffixStart)) { // Seek type 2 -- put 0xFF at this position: scratchTerm.Bytes[newSuffixStart] = 0xff; scratchTerm.Length = newSuffixStart + 1; if (DEBUG_SURROGATES) { Console.WriteLine(" seek to term=" + UnicodeUtil.ToHexString(scratchTerm.Utf8ToString()) + " " + scratchTerm.ToString()); } // TODO: more efficient seek? can we simply swap // the enums? outerInstance.TermsDict.SeekEnum(termEnum, new Term(fieldInfo.Name, scratchTerm), true); Term t2 = termEnum.Term(); // We could hit EOF or different field since this // was a seek "forward": if (t2 != null && t2.Field == internedFieldName) { if (DEBUG_SURROGATES) { Console.WriteLine(" got term=" + UnicodeUtil.ToHexString(t2.Text()) + " " + t2.Bytes); } BytesRef b2 = t2.Bytes; if (Debugging.AssertsEnabled) { Debugging.Assert(b2.Offset == 0); } // Set newSuffixStart -- we can't use // termEnum's since the above seek may have // done no scanning (eg, term was precisely // and index term, or, was in the term seek // cache): scratchTerm.CopyBytes(b2); SetNewSuffixStart(prevTerm, scratchTerm); return(true); } else if (newSuffixStart != 0 || scratchTerm.Length != 0) { if (DEBUG_SURROGATES) { Console.WriteLine(" got term=null (or next field)"); } newSuffixStart = 0; scratchTerm.Length = 0; return(true); } } return(false); }
// Swap in S, in place of E: private bool SeekToNonBMP(SegmentTermEnum te, BytesRef term, int pos) { int savLength = term.Length; if (Debugging.AssertsEnabled) { Debugging.Assert(term.Offset == 0); } // The 3 bytes starting at downTo make up 1 // unicode character: if (Debugging.AssertsEnabled) { Debugging.Assert(IsHighBMPChar(term.Bytes, pos)); } // NOTE: we cannot make this assert, because // AutomatonQuery legitimately sends us malformed UTF8 // (eg the UTF8 bytes with just 0xee) // assert term.length >= pos + 3: "term.length=" + term.length + " pos+3=" + (pos+3) + " byte=" + Integer.toHexString(term.bytes[pos]) + " term=" + term.toString(); // Save the bytes && length, since we need to // restore this if seek "back" finds no matching // terms if (term.Bytes.Length < 4 + pos) { term.Grow(4 + pos); } scratch[0] = (sbyte)term.Bytes[pos]; scratch[1] = (sbyte)term.Bytes[pos + 1]; scratch[2] = (sbyte)term.Bytes[pos + 2]; term.Bytes[pos] = 0xf0; term.Bytes[pos + 1] = 0x90; term.Bytes[pos + 2] = 0x80; term.Bytes[pos + 3] = 0x80; term.Length = 4 + pos; if (DEBUG_SURROGATES) { Console.WriteLine(" try seek term=" + UnicodeUtil.ToHexString(term.Utf8ToString())); } // Seek "back": outerInstance.TermsDict.SeekEnum(te, new Term(fieldInfo.Name, term), true); // Test if the term we seek'd to in fact found a // surrogate pair at the same position as the E: Term t2 = te.Term(); // Cannot be null (or move to next field) because at // "worst" it'd seek to the same term we are on now, // unless we are being called from seek if (t2 == null || t2.Field != internedFieldName) { return(false); } if (DEBUG_SURROGATES) { Console.WriteLine(" got term=" + UnicodeUtil.ToHexString(t2.Text())); } // Now test if prefix is identical and we found // a non-BMP char at the same position: BytesRef b2 = t2.Bytes; if (Debugging.AssertsEnabled) { Debugging.Assert(b2.Offset == 0); } bool matches; if (b2.Length >= term.Length && IsNonBMPChar(b2.Bytes, pos)) { matches = true; for (int i = 0; i < pos; i++) { if (term.Bytes[i] != b2.Bytes[i]) { matches = false; break; } } } else { matches = false; } // Restore term: term.Length = savLength; term.Bytes[pos] = (byte)scratch[0]; term.Bytes[pos + 1] = (byte)scratch[1]; term.Bytes[pos + 2] = (byte)scratch[2]; return(matches); }