/// <summary> /// Sort input to output, explicit hint for the buffer size. The amount of allocated /// memory may deviate from the hint (may be smaller or larger). /// </summary> public SortInfo Sort(FileInfo input, FileInfo output) { sortInfo = new SortInfo(this) { TotalTime = Environment.TickCount }; // LUCENENET NOTE: Can't do this because another thread could recreate the file before we are done here. // and cause this to bomb. We use the existence of the file as an indicator that we are done using it. //output.Delete(); var merges = new List <FileInfo>(); bool success2 = false; try { var inputStream = new ByteSequencesReader(input); bool success = false; try { int lines = 0; while ((lines = ReadPartition(inputStream)) > 0) { merges.Add(SortPartition(lines)); sortInfo.TempMergeFiles++; sortInfo.Lines += lines; // Handle intermediate merges. if (merges.Count == MaxTempFiles) { var intermediate = new FileInfo(Path.GetTempFileName()); try { MergePartitions(merges, intermediate); } finally { foreach (var file in merges) { file.Delete(); } merges.Clear(); merges.Add(intermediate); } sortInfo.TempMergeFiles++; } } success = true; } finally { if (success) { IOUtils.Close(inputStream); } else { IOUtils.CloseWhileHandlingException(inputStream); } } // One partition, try to rename or copy if unsuccessful. if (merges.Count == 1) { FileInfo single = merges[0]; Copy(single, output); try { File.Delete(single.FullName); } catch (Exception) { // ignored } } else { // otherwise merge the partitions with a priority queue. MergePartitions(merges, output); } success2 = true; } finally { foreach (FileInfo file in merges) { file.Delete(); } if (!success2) { output.Delete(); } } sortInfo.TotalTime = (Environment.TickCount - sortInfo.TotalTime); return(sortInfo); }
/// <summary> /// Creates a query from the analysis chain. /// <p> /// Expert: this is more useful for subclasses such as queryparsers. /// If using this class directly, just use <seealso cref="#createBooleanQuery(String, String)"/> /// and <seealso cref="#createPhraseQuery(String, String)"/> </summary> /// <param name="analyzer"> analyzer used for this query </param> /// <param name="operator"> default boolean operator used for this query </param> /// <param name="field"> field to create queries against </param> /// <param name="queryText"> text to be passed to the analysis chain </param> /// <param name="quoted"> true if phrases should be generated when terms occur at more than one position </param> /// <param name="phraseSlop"> slop factor for phrase/multiphrase queries </param> protected Query CreateFieldQuery(Analyzer analyzer, Occur @operator, string field, string queryText, bool quoted, int phraseSlop) { Debug.Assert(@operator == Occur.SHOULD || @operator == Occur.MUST); // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count CachingTokenFilter buffer = null; ITermToBytesRefAttribute termAtt = null; IPositionIncrementAttribute posIncrAtt = null; int numTokens = 0; int positionCount = 0; bool severalTokensAtSamePosition = false; bool hasMoreTokens = false; TokenStream source = null; try { source = analyzer.GetTokenStream(field, new StringReader(queryText)); source.Reset(); buffer = new CachingTokenFilter(source); buffer.Reset(); if (buffer.HasAttribute <ITermToBytesRefAttribute>()) { termAtt = buffer.GetAttribute <ITermToBytesRefAttribute>(); } if (buffer.HasAttribute <IPositionIncrementAttribute>()) { posIncrAtt = buffer.GetAttribute <IPositionIncrementAttribute>(); } if (termAtt != null) { try { hasMoreTokens = buffer.IncrementToken(); while (hasMoreTokens) { numTokens++; int positionIncrement = (posIncrAtt != null) ? posIncrAtt.PositionIncrement : 1; if (positionIncrement != 0) { positionCount += positionIncrement; } else { severalTokensAtSamePosition = true; } hasMoreTokens = buffer.IncrementToken(); } } catch (System.IO.IOException) { // ignore } } } catch (System.IO.IOException e) { throw new Exception("Error analyzing query text", e); } finally { IOUtils.CloseWhileHandlingException(source); } // rewind the buffer stream buffer.Reset(); BytesRef bytes = termAtt == null ? null : termAtt.BytesRef; if (numTokens == 0) { return(null); } else if (numTokens == 1) { try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); termAtt.FillBytesRef(); } catch (System.IO.IOException) { // safe to ignore, because we know the number of tokens } return(NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes)))); } else { if (severalTokensAtSamePosition || (!quoted)) { if (positionCount == 1 || (!quoted)) { // no phrase query: if (positionCount == 1) { // simple case: only one position, with synonyms BooleanQuery q = NewBooleanQuery(true); for (int i = 0; i < numTokens; i++) { try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); termAtt.FillBytesRef(); } catch (System.IO.IOException) { // safe to ignore, because we know the number of tokens } Query currentQuery = NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes))); q.Add(currentQuery, Occur.SHOULD); } return(q); } else { // multiple positions BooleanQuery q = NewBooleanQuery(false); Query currentQuery = null; for (int i = 0; i < numTokens; i++) { try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); termAtt.FillBytesRef(); } catch (System.IO.IOException) { // safe to ignore, because we know the number of tokens } if (posIncrAtt != null && posIncrAtt.PositionIncrement == 0) { if (!(currentQuery is BooleanQuery)) { Query t = currentQuery; currentQuery = NewBooleanQuery(true); ((BooleanQuery)currentQuery).Add(t, Occur.SHOULD); } ((BooleanQuery)currentQuery).Add(NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes))), Occur.SHOULD); } else { if (currentQuery != null) { q.Add(currentQuery, @operator); } currentQuery = NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes))); } } q.Add(currentQuery, @operator); return(q); } } else { // phrase query: MultiPhraseQuery mpq = NewMultiPhraseQuery(); mpq.Slop = phraseSlop; IList <Term> multiTerms = new List <Term>(); int position = -1; for (int i = 0; i < numTokens; i++) { int positionIncrement = 1; try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); termAtt.FillBytesRef(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.PositionIncrement; } } catch (System.IO.IOException) { // safe to ignore, because we know the number of tokens } if (positionIncrement > 0 && multiTerms.Count > 0) { if (enablePositionIncrements) { mpq.Add(multiTerms.ToArray(), position); } else { mpq.Add(multiTerms.ToArray()); } multiTerms.Clear(); } position += positionIncrement; multiTerms.Add(new Term(field, BytesRef.DeepCopyOf(bytes))); } if (enablePositionIncrements) { mpq.Add(multiTerms.ToArray(), position); } else { mpq.Add(multiTerms.ToArray()); } return(mpq); } } else { PhraseQuery pq = NewPhraseQuery(); pq.Slop = phraseSlop; int position = -1; for (int i = 0; i < numTokens; i++) { int positionIncrement = 1; try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); termAtt.FillBytesRef(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.PositionIncrement; } } catch (System.IO.IOException) { // safe to ignore, because we know the number of tokens } if (enablePositionIncrements) { position += positionIncrement; pq.Add(new Term(field, BytesRef.DeepCopyOf(bytes)), position); } else { pq.Add(new Term(field, BytesRef.DeepCopyOf(bytes))); } } return(pq); } } }