private string[] InitTasksPackages(Config config) { // LUCENENET specific - changing the logic a bit // to add all referenced assemblies by default. // The alt.tasks.packages parameter still exists, but // it is only necessary for assemblies that are not // referenced by the host assembly. ISet <string> result = new JCG.HashSet <string>(); string alts = config.Get("alt.tasks.packages", null); string dfltPkg = typeof(PerfTask).Assembly.GetName().Name; IEnumerable <string> referencedAssemblies = AssemblyUtils.GetReferencedAssemblies().Select(a => a.GetName().Name); result.Add(dfltPkg); if (alts == null) { result.UnionWith(referencedAssemblies); return(result.ToArray()); } foreach (string alt in alts.Split(',').TrimEnd()) { result.Add(alt); } result.UnionWith(referencedAssemblies); return(result.ToArray()); }
// TODO: Remove warning after API has been finalized /// <summary> /// WARNING: The List is not necessarily in order of the the positions </summary> /// <returns> Collection of <see cref="T:byte[]"/> payloads </returns> /// <exception cref="System.IO.IOException"> if there is a low-level I/O error </exception> public override ICollection <byte[]> GetPayload() { var matchPayload = new JCG.HashSet <byte[]>(); for (var cell = first; cell != null; cell = cell.next) { if (cell.IsPayloadAvailable) { matchPayload.UnionWith(cell.GetPayload()); } } return(matchPayload); }
public virtual void Inform(IResourceLoader loader) { IList <string> files = SplitFileNames(stopTypesFiles); if (files.Count > 0) { stopTypes = new JCG.HashSet <string>(); foreach (string file in files) { IList <string> typesLines = GetLines(loader, file.Trim()); stopTypes.UnionWith(typesLines); } } }
/// <summary> /// Returns all files in use by this segment. </summary> public virtual ICollection <string> GetFiles() { // Start from the wrapped info's files: ISet <string> files = new JCG.HashSet <string>(Info.GetFiles()); // TODO we could rely on TrackingDir.getCreatedFiles() (like we do for // updates) and then maybe even be able to remove LiveDocsFormat.files(). // Must separately add any live docs files: Info.Codec.LiveDocsFormat.Files(this, files); // Must separately add any field updates files foreach (ISet <string> updateFiles in genUpdatesFiles.Values) { files.UnionWith(updateFiles); } return(files); }
internal virtual void SealFlushedSegment(FlushedSegment flushedSegment) { if (Debugging.AssertsEnabled) { Debugging.Assert(flushedSegment != null); } SegmentCommitInfo newSegment = flushedSegment.segmentInfo; IndexWriter.SetDiagnostics(newSegment.Info, IndexWriter.SOURCE_FLUSH); IOContext context = new IOContext(new FlushInfo(newSegment.Info.DocCount, newSegment.GetSizeInBytes())); bool success = false; try { if (indexWriterConfig.UseCompoundFile) { filesToDelete.UnionWith(IndexWriter.CreateCompoundFile(infoStream, directory, CheckAbort.NONE, newSegment.Info, context)); newSegment.Info.UseCompoundFile = true; } // Have codec write SegmentInfo. Must do this after // creating CFS so that 1) .si isn't slurped into CFS, // and 2) .si reflects useCompoundFile=true change // above: codec.SegmentInfoFormat.SegmentInfoWriter.Write(directory, newSegment.Info, flushedSegment.fieldInfos, context); // TODO: ideally we would freeze newSegment here!! // because any changes after writing the .si will be // lost... // Must write deleted docs after the CFS so we don't // slurp the del file into CFS: if (flushedSegment.liveDocs != null) { int delCount = flushedSegment.delCount; if (Debugging.AssertsEnabled) { Debugging.Assert(delCount > 0); } if (infoStream.IsEnabled("DWPT")) { infoStream.Message("DWPT", "flush: write " + delCount + " deletes gen=" + flushedSegment.segmentInfo.DelGen); } // TODO: we should prune the segment if it's 100% // deleted... but merge will also catch it. // TODO: in the NRT case it'd be better to hand // this del vector over to the // shortly-to-be-opened SegmentReader and let it // carry the changes; there's no reason to use // filesystem as intermediary here. SegmentCommitInfo info = flushedSegment.segmentInfo; Codec codec = info.Info.Codec; codec.LiveDocsFormat.WriteLiveDocs(flushedSegment.liveDocs, directory, info, delCount, context); newSegment.DelCount = delCount; newSegment.AdvanceDelGen(); } success = true; } finally { if (!success) { if (infoStream.IsEnabled("DWPT")) { infoStream.Message("DWPT", "hit exception creating compound file for newly flushed segment " + newSegment.Info.Name); } } } }
/// <summary> /// The <see cref="SubSpans"/> are ordered in the same doc, so there is a possible match. /// Compute the slop while making the match as short as possible by advancing /// all <see cref="SubSpans"/> except the last one in reverse order. /// </summary> private bool ShrinkToAfterShortestMatch() { matchStart = subSpans[subSpans.Length - 1].Start; matchEnd = subSpans[subSpans.Length - 1].End; var possibleMatchPayloads = new JCG.HashSet <byte[]>(); if (subSpans[subSpans.Length - 1].IsPayloadAvailable) { possibleMatchPayloads.UnionWith(subSpans[subSpans.Length - 1].GetPayload()); } IList <byte[]> possiblePayload = null; int matchSlop = 0; int lastStart = matchStart; int lastEnd = matchEnd; for (int i = subSpans.Length - 2; i >= 0; i--) { Spans prevSpans = subSpans[i]; if (collectPayloads && prevSpans.IsPayloadAvailable) { possiblePayload = new List <byte[]>(prevSpans.GetPayload()); // LUCENENET specific - using copy constructor instead of AddRange() } int prevStart = prevSpans.Start; int prevEnd = prevSpans.End; while (true) // Advance prevSpans until after (lastStart, lastEnd) { if (!prevSpans.Next()) { inSameDoc = false; more = false; break; // Check remaining subSpans for final match. } else if (matchDoc != prevSpans.Doc) { inSameDoc = false; // The last subSpans is not advanced here. break; // Check remaining subSpans for last match in this document. } else { int ppStart = prevSpans.Start; int ppEnd = prevSpans.End; // Cannot avoid invoking .end() if (!DocSpansOrdered(ppStart, ppEnd, lastStart, lastEnd)) { break; // Check remaining subSpans. } // prevSpans still before (lastStart, lastEnd) else { prevStart = ppStart; prevEnd = ppEnd; if (collectPayloads && prevSpans.IsPayloadAvailable) { possiblePayload = new List <byte[]>(prevSpans.GetPayload()); // LUCENENET specific - using copy constructor instead of AddRange() } } } } if (collectPayloads && possiblePayload != null) { possibleMatchPayloads.UnionWith(possiblePayload); } Debug.Assert(prevStart <= matchStart); if (matchStart > prevEnd) // Only non overlapping spans add to slop. { matchSlop += (matchStart - prevEnd); } /* Do not break on (matchSlop > allowedSlop) here to make sure * that subSpans[0] is advanced after the match, if any. */ matchStart = prevStart; lastStart = prevStart; lastEnd = prevEnd; } bool match = matchSlop <= allowedSlop; if (collectPayloads && match && possibleMatchPayloads.Count > 0) { matchPayload.AddRange(possibleMatchPayloads); } return(match); // ordered and allowed slop }
public virtual void TestIndexing() { DirectoryInfo tmpDir = CreateTempDir("TestNeverDelete"); BaseDirectoryWrapper d = NewFSDirectory(tmpDir); // We want to "see" files removed if Lucene removed // them. this is still worth running on Windows since // some files the IR opens and closes. if (d is MockDirectoryWrapper) { ((MockDirectoryWrapper)d).NoDeleteOpenFile = false; } RandomIndexWriter w = new RandomIndexWriter(Random, d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE)); w.IndexWriter.Config.SetMaxBufferedDocs(TestUtil.NextInt32(Random, 5, 30)); w.Commit(); ThreadJob[] indexThreads = new ThreadJob[Random.Next(4)]; long stopTime = (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) + AtLeast(1000); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results for (int x = 0; x < indexThreads.Length; x++) { indexThreads[x] = new ThreadAnonymousClass(w, stopTime, NewStringField, NewTextField); indexThreads[x].Name = "Thread " + x; indexThreads[x].Start(); } ISet <string> allFiles = new JCG.HashSet <string>(); DirectoryReader r = DirectoryReader.Open(d); while (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond < stopTime) // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results { IndexCommit ic = r.IndexCommit; if (Verbose) { Console.WriteLine("TEST: check files: " + ic.FileNames); } allFiles.UnionWith(ic.FileNames); // Make sure no old files were removed foreach (string fileName in allFiles) { Assert.IsTrue(SlowFileExists(d, fileName), "file " + fileName + " does not exist"); } DirectoryReader r2 = DirectoryReader.OpenIfChanged(r); if (r2 != null) { r.Dispose(); r = r2; } Thread.Sleep(1); } r.Dispose(); foreach (ThreadJob t in indexThreads) { t.Join(); } w.Dispose(); d.Dispose(); System.IO.Directory.Delete(tmpDir.FullName, true); }
//----------------------------------------------------------------------------- // // calcChainedFollowPos. Modify the previously calculated followPos sets // to implement rule chaining. NOT described by Aho // //----------------------------------------------------------------------------- internal virtual void CalcChainedFollowPos(RBBINode tree) { IList <RBBINode> endMarkerNodes = new JCG.List <RBBINode>(); IList <RBBINode> leafNodes = new JCG.List <RBBINode>(); // get a list of all endmarker nodes. tree.FindNodes(endMarkerNodes, RBBINode.endMark); // get a list all leaf nodes tree.FindNodes(leafNodes, RBBINode.leafChar); // Collect all leaf nodes that can start matches for rules // with inbound chaining enabled, which is the union of the // firstPosition sets from each of the rule root nodes. IList <RBBINode> ruleRootNodes = new JCG.List <RBBINode>(); AddRuleRootNodes(ruleRootNodes, tree); ISet <RBBINode> matchStartNodes = new JCG.HashSet <RBBINode>(); foreach (RBBINode node in ruleRootNodes) { if (node.fChainIn) { matchStartNodes.UnionWith(node.fFirstPosSet); } } // Iterate over all leaf nodes, // foreach (RBBINode tNode in leafNodes) { RBBINode endNode = null; // Identify leaf nodes that correspond to overall rule match positions. // These include an endMarkerNode in their followPos sets. foreach (RBBINode endMarkerNode in endMarkerNodes) { if (tNode.fFollowPos.Contains(endMarkerNode)) { endNode = tNode; break; } } if (endNode == null) { // node wasn't an end node. Try again with the next. continue; } // We've got a node that can end a match. // Line Break Specific hack: If this node's val correspond to the $CM char class, // don't chain from it. // TODO: Add rule syntax for this behavior, get specifics out of here and // into the rule file. if (fRB.fLBCMNoChain) { int c = this.fRB.fSetBuilder.GetFirstChar(endNode.fVal); if (c != -1) { // c == -1 occurs with sets containing only the {eof} marker string. int cLBProp = UChar.GetIntPropertyValue(c, UProperty.Line_Break); if (cLBProp == LineBreak.CombiningMark) { continue; } } } // Now iterate over the nodes that can start a match, looking for ones // with the same char class as our ending node. foreach (RBBINode startNode in matchStartNodes) { if (startNode.fType != RBBINode.leafChar) { continue; } if (endNode.fVal == startNode.fVal) { // The end val (character class) of one possible match is the // same as the start of another. // Add all nodes from the followPos of the start node to the // followPos set of the end node, which will have the effect of // letting matches transition from a match state at endNode // to the second char of a match starting with startNode. endNode.fFollowPos.UnionWith(startNode.fFollowPos); } } } }
/// <summary> /// Encodes an input string into an output phonetic representation, given a set of possible origin languages. /// </summary> /// <param name="input">String to phoneticise; a string with dashes or spaces separating each word.</param> /// <param name="languageSet"></param> /// <returns>A phonetic representation of the input; a string containing '-'-separated phonetic representations of the input.</returns> public virtual string Encode(string input, LanguageSet languageSet) { IDictionary <string, IList <Rule> > rules = Rule.GetInstanceMap(this.nameType, RuleType.RULES, languageSet); // rules common across many (all) languages IDictionary <string, IList <Rule> > finalRules1 = Rule.GetInstanceMap(this.nameType, this.ruleType, "common"); // rules that apply to a specific language that may be ambiguous or wrong if applied to other languages IDictionary <string, IList <Rule> > finalRules2 = Rule.GetInstanceMap(this.nameType, this.ruleType, languageSet); // tidy the input // lower case is a locale-dependent operation input = input.ToLowerInvariant().Replace('-', ' ').Trim(); if (this.nameType == NameType.GENERIC) { if (input.Length >= 2 && input.Substring(0, 2 - 0).Equals("d'", StringComparison.Ordinal)) { // check for d' string remainder = input.Substring(2); string combined = "d" + remainder; return("(" + Encode(remainder) + ")-(" + Encode(combined) + ")"); } foreach (string l in NAME_PREFIXES[this.nameType]) { // handle generic prefixes if (input.StartsWith(l + " ", StringComparison.Ordinal)) { // check for any prefix in the words list string remainder = input.Substring(l.Length + 1); // input without the prefix string combined = l + remainder; // input with prefix without space return("(" + Encode(remainder) + ")-(" + Encode(combined) + ")"); } } } IList <string> words = WHITESPACE.Split(input).TrimEnd(); ISet <string> words2 = new JCG.HashSet <string>(); // special-case handling of word prefixes based upon the name type switch (this.nameType) { case NameType.SEPHARDIC: foreach (string aWord in words) { string[] parts = aWord.Split('\'').TrimEnd(); string lastPart = parts[parts.Length - 1]; words2.Add(lastPart); } words2.ExceptWith(NAME_PREFIXES[this.nameType]); break; case NameType.ASHKENAZI: words2.UnionWith(words); words2.ExceptWith(NAME_PREFIXES[this.nameType]); break; case NameType.GENERIC: words2.UnionWith(words); break; default: throw new InvalidOperationException("Unreachable case: " + this.nameType); } if (this.concat) { // concat mode enabled input = Join(words2, " "); } else if (words2.Count == 1) { // not a multi-word name //input = words.iterator().next(); input = words[0]; } else { // encode each word in a multi-word name separately (normally used for approx matches) StringBuilder result = new StringBuilder(); foreach (string word in words2) { result.Append("-").Append(Encode(word)); } // return the result without the leading "-" return(result.ToString(1, result.Length - 1)); } PhonemeBuilder phonemeBuilder = PhonemeBuilder.Empty(languageSet); // loop over each char in the input - we will handle the increment manually for (int i = 0; i < input.Length;) { RulesApplication rulesApplication = new RulesApplication(rules, input, phonemeBuilder, i, maxPhonemes).Invoke(); i = rulesApplication.I; phonemeBuilder = rulesApplication.PhonemeBuilder; } // Apply the general rules phonemeBuilder = ApplyFinalRules(phonemeBuilder, finalRules1); // Apply the language-specific rules phonemeBuilder = ApplyFinalRules(phonemeBuilder, finalRules2); return(phonemeBuilder.MakeString()); }