Exemple #1
0
        private string[] InitTasksPackages(Config config)
        {
            // LUCENENET specific - changing the logic a bit
            // to add all referenced assemblies by default.
            // The alt.tasks.packages parameter still exists, but
            // it is only necessary for assemblies that are not
            // referenced by the host assembly.

            ISet <string>        result  = new JCG.HashSet <string>();
            string               alts    = config.Get("alt.tasks.packages", null);
            string               dfltPkg = typeof(PerfTask).Assembly.GetName().Name;
            IEnumerable <string> referencedAssemblies = AssemblyUtils.GetReferencedAssemblies().Select(a => a.GetName().Name);

            result.Add(dfltPkg);

            if (alts == null)
            {
                result.UnionWith(referencedAssemblies);
                return(result.ToArray());
            }

            foreach (string alt in alts.Split(',').TrimEnd())
            {
                result.Add(alt);
            }
            result.UnionWith(referencedAssemblies);
            return(result.ToArray());
        }
        // TODO: Remove warning after API has been finalized
        /// <summary>
        /// WARNING: The List is not necessarily in order of the the positions </summary>
        /// <returns> Collection of <see cref="T:byte[]"/> payloads </returns>
        /// <exception cref="System.IO.IOException"> if there is a low-level I/O error </exception>
        public override ICollection <byte[]> GetPayload()
        {
            var matchPayload = new JCG.HashSet <byte[]>();

            for (var cell = first; cell != null; cell = cell.next)
            {
                if (cell.IsPayloadAvailable)
                {
                    matchPayload.UnionWith(cell.GetPayload());
                }
            }
            return(matchPayload);
        }
Exemple #3
0
        public virtual void Inform(IResourceLoader loader)
        {
            IList <string> files = SplitFileNames(stopTypesFiles);

            if (files.Count > 0)
            {
                stopTypes = new JCG.HashSet <string>();
                foreach (string file in files)
                {
                    IList <string> typesLines = GetLines(loader, file.Trim());
                    stopTypes.UnionWith(typesLines);
                }
            }
        }
        /// <summary>
        /// Returns all files in use by this segment. </summary>
        public virtual ICollection <string> GetFiles()
        {
            // Start from the wrapped info's files:
            ISet <string> files = new JCG.HashSet <string>(Info.GetFiles());

            // TODO we could rely on TrackingDir.getCreatedFiles() (like we do for
            // updates) and then maybe even be able to remove LiveDocsFormat.files().

            // Must separately add any live docs files:
            Info.Codec.LiveDocsFormat.Files(this, files);

            // Must separately add any field updates files
            foreach (ISet <string> updateFiles in genUpdatesFiles.Values)
            {
                files.UnionWith(updateFiles);
            }

            return(files);
        }
        internal virtual void SealFlushedSegment(FlushedSegment flushedSegment)
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(flushedSegment != null);
            }

            SegmentCommitInfo newSegment = flushedSegment.segmentInfo;

            IndexWriter.SetDiagnostics(newSegment.Info, IndexWriter.SOURCE_FLUSH);

            IOContext context = new IOContext(new FlushInfo(newSegment.Info.DocCount, newSegment.GetSizeInBytes()));

            bool success = false;

            try
            {
                if (indexWriterConfig.UseCompoundFile)
                {
                    filesToDelete.UnionWith(IndexWriter.CreateCompoundFile(infoStream, directory, CheckAbort.NONE, newSegment.Info, context));
                    newSegment.Info.UseCompoundFile = true;
                }

                // Have codec write SegmentInfo.  Must do this after
                // creating CFS so that 1) .si isn't slurped into CFS,
                // and 2) .si reflects useCompoundFile=true change
                // above:
                codec.SegmentInfoFormat.SegmentInfoWriter.Write(directory, newSegment.Info, flushedSegment.fieldInfos, context);

                // TODO: ideally we would freeze newSegment here!!
                // because any changes after writing the .si will be
                // lost...

                // Must write deleted docs after the CFS so we don't
                // slurp the del file into CFS:
                if (flushedSegment.liveDocs != null)
                {
                    int delCount = flushedSegment.delCount;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(delCount > 0);
                    }
                    if (infoStream.IsEnabled("DWPT"))
                    {
                        infoStream.Message("DWPT", "flush: write " + delCount + " deletes gen=" + flushedSegment.segmentInfo.DelGen);
                    }

                    // TODO: we should prune the segment if it's 100%
                    // deleted... but merge will also catch it.

                    // TODO: in the NRT case it'd be better to hand
                    // this del vector over to the
                    // shortly-to-be-opened SegmentReader and let it
                    // carry the changes; there's no reason to use
                    // filesystem as intermediary here.

                    SegmentCommitInfo info  = flushedSegment.segmentInfo;
                    Codec             codec = info.Info.Codec;
                    codec.LiveDocsFormat.WriteLiveDocs(flushedSegment.liveDocs, directory, info, delCount, context);
                    newSegment.DelCount = delCount;
                    newSegment.AdvanceDelGen();
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    if (infoStream.IsEnabled("DWPT"))
                    {
                        infoStream.Message("DWPT", "hit exception creating compound file for newly flushed segment " + newSegment.Info.Name);
                    }
                }
            }
        }
        /// <summary>
        /// The <see cref="SubSpans"/> are ordered in the same doc, so there is a possible match.
        /// Compute the slop while making the match as short as possible by advancing
        /// all <see cref="SubSpans"/> except the last one in reverse order.
        /// </summary>
        private bool ShrinkToAfterShortestMatch()
        {
            matchStart = subSpans[subSpans.Length - 1].Start;
            matchEnd   = subSpans[subSpans.Length - 1].End;
            var possibleMatchPayloads = new JCG.HashSet <byte[]>();

            if (subSpans[subSpans.Length - 1].IsPayloadAvailable)
            {
                possibleMatchPayloads.UnionWith(subSpans[subSpans.Length - 1].GetPayload());
            }

            IList <byte[]> possiblePayload = null;

            int matchSlop = 0;
            int lastStart = matchStart;
            int lastEnd   = matchEnd;

            for (int i = subSpans.Length - 2; i >= 0; i--)
            {
                Spans prevSpans = subSpans[i];
                if (collectPayloads && prevSpans.IsPayloadAvailable)
                {
                    possiblePayload = new List <byte[]>(prevSpans.GetPayload()); // LUCENENET specific - using copy constructor instead of AddRange()
                }

                int prevStart = prevSpans.Start;
                int prevEnd   = prevSpans.End;
                while (true) // Advance prevSpans until after (lastStart, lastEnd)
                {
                    if (!prevSpans.Next())
                    {
                        inSameDoc = false;
                        more      = false;
                        break; // Check remaining subSpans for final match.
                    }
                    else if (matchDoc != prevSpans.Doc)
                    {
                        inSameDoc = false; // The last subSpans is not advanced here.
                        break;             // Check remaining subSpans for last match in this document.
                    }
                    else
                    {
                        int ppStart = prevSpans.Start;
                        int ppEnd   = prevSpans.End; // Cannot avoid invoking .end()
                        if (!DocSpansOrdered(ppStart, ppEnd, lastStart, lastEnd))
                        {
                            break; // Check remaining subSpans.
                        } // prevSpans still before (lastStart, lastEnd)
                        else
                        {
                            prevStart = ppStart;
                            prevEnd   = ppEnd;
                            if (collectPayloads && prevSpans.IsPayloadAvailable)
                            {
                                possiblePayload = new List <byte[]>(prevSpans.GetPayload()); // LUCENENET specific - using copy constructor instead of AddRange()
                            }
                        }
                    }
                }

                if (collectPayloads && possiblePayload != null)
                {
                    possibleMatchPayloads.UnionWith(possiblePayload);
                }

                Debug.Assert(prevStart <= matchStart);
                if (matchStart > prevEnd) // Only non overlapping spans add to slop.
                {
                    matchSlop += (matchStart - prevEnd);
                }

                /* Do not break on (matchSlop > allowedSlop) here to make sure
                 * that subSpans[0] is advanced after the match, if any.
                 */
                matchStart = prevStart;
                lastStart  = prevStart;
                lastEnd    = prevEnd;
            }

            bool match = matchSlop <= allowedSlop;

            if (collectPayloads && match && possibleMatchPayloads.Count > 0)
            {
                matchPayload.AddRange(possibleMatchPayloads);
            }

            return(match); // ordered and allowed slop
        }
Exemple #7
0
        public virtual void TestIndexing()
        {
            DirectoryInfo        tmpDir = CreateTempDir("TestNeverDelete");
            BaseDirectoryWrapper d      = NewFSDirectory(tmpDir);

            // We want to "see" files removed if Lucene removed
            // them.  this is still worth running on Windows since
            // some files the IR opens and closes.
            if (d is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)d).NoDeleteOpenFile = false;
            }
            RandomIndexWriter w = new RandomIndexWriter(Random, d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE));

            w.IndexWriter.Config.SetMaxBufferedDocs(TestUtil.NextInt32(Random, 5, 30));

            w.Commit();
            ThreadJob[] indexThreads = new ThreadJob[Random.Next(4)];
            long        stopTime     = (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) + AtLeast(1000); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            for (int x = 0; x < indexThreads.Length; x++)
            {
                indexThreads[x]      = new ThreadAnonymousClass(w, stopTime, NewStringField, NewTextField);
                indexThreads[x].Name = "Thread " + x;
                indexThreads[x].Start();
            }

            ISet <string> allFiles = new JCG.HashSet <string>();

            DirectoryReader r = DirectoryReader.Open(d);

            while (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond < stopTime) // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            {
                IndexCommit ic = r.IndexCommit;
                if (Verbose)
                {
                    Console.WriteLine("TEST: check files: " + ic.FileNames);
                }
                allFiles.UnionWith(ic.FileNames);
                // Make sure no old files were removed
                foreach (string fileName in allFiles)
                {
                    Assert.IsTrue(SlowFileExists(d, fileName), "file " + fileName + " does not exist");
                }
                DirectoryReader r2 = DirectoryReader.OpenIfChanged(r);
                if (r2 != null)
                {
                    r.Dispose();
                    r = r2;
                }
                Thread.Sleep(1);
            }
            r.Dispose();

            foreach (ThreadJob t in indexThreads)
            {
                t.Join();
            }
            w.Dispose();
            d.Dispose();

            System.IO.Directory.Delete(tmpDir.FullName, true);
        }
Exemple #8
0
        //-----------------------------------------------------------------------------
        //
        //   calcChainedFollowPos.    Modify the previously calculated followPos sets
        //                            to implement rule chaining.  NOT described by Aho
        //
        //-----------------------------------------------------------------------------
        internal virtual void CalcChainedFollowPos(RBBINode tree)
        {
            IList <RBBINode> endMarkerNodes = new JCG.List <RBBINode>();
            IList <RBBINode> leafNodes      = new JCG.List <RBBINode>();

            // get a list of all endmarker nodes.
            tree.FindNodes(endMarkerNodes, RBBINode.endMark);

            // get a list all leaf nodes
            tree.FindNodes(leafNodes, RBBINode.leafChar);

            // Collect all leaf nodes that can start matches for rules
            // with inbound chaining enabled, which is the union of the
            // firstPosition sets from each of the rule root nodes.

            IList <RBBINode> ruleRootNodes = new JCG.List <RBBINode>();

            AddRuleRootNodes(ruleRootNodes, tree);

            ISet <RBBINode> matchStartNodes = new JCG.HashSet <RBBINode>();

            foreach (RBBINode node in ruleRootNodes)
            {
                if (node.fChainIn)
                {
                    matchStartNodes.UnionWith(node.fFirstPosSet);
                }
            }

            // Iterate over all leaf nodes,
            //
            foreach (RBBINode tNode in leafNodes)
            {
                RBBINode endNode = null;

                // Identify leaf nodes that correspond to overall rule match positions.
                //   These include an endMarkerNode in their followPos sets.
                foreach (RBBINode endMarkerNode in endMarkerNodes)
                {
                    if (tNode.fFollowPos.Contains(endMarkerNode))
                    {
                        endNode = tNode;
                        break;
                    }
                }
                if (endNode == null)
                {
                    // node wasn't an end node.  Try again with the next.
                    continue;
                }

                // We've got a node that can end a match.

                // Line Break Specific hack:  If this node's val correspond to the $CM char class,
                //                            don't chain from it.
                // TODO:  Add rule syntax for this behavior, get specifics out of here and
                //        into the rule file.
                if (fRB.fLBCMNoChain)
                {
                    int c = this.fRB.fSetBuilder.GetFirstChar(endNode.fVal);
                    if (c != -1)
                    {
                        // c == -1 occurs with sets containing only the {eof} marker string.
                        int cLBProp = UChar.GetIntPropertyValue(c, UProperty.Line_Break);
                        if (cLBProp == LineBreak.CombiningMark)
                        {
                            continue;
                        }
                    }
                }


                // Now iterate over the nodes that can start a match, looking for ones
                //   with the same char class as our ending node.
                foreach (RBBINode startNode in matchStartNodes)
                {
                    if (startNode.fType != RBBINode.leafChar)
                    {
                        continue;
                    }

                    if (endNode.fVal == startNode.fVal)
                    {
                        // The end val (character class) of one possible match is the
                        //   same as the start of another.

                        // Add all nodes from the followPos of the start node to the
                        //  followPos set of the end node, which will have the effect of
                        //  letting matches transition from a match state at endNode
                        //  to the second char of a match starting with startNode.
                        endNode.fFollowPos.UnionWith(startNode.fFollowPos);
                    }
                }
            }
        }
Exemple #9
0
        /// <summary>
        /// Encodes an input string into an output phonetic representation, given a set of possible origin languages.
        /// </summary>
        /// <param name="input">String to phoneticise; a string with dashes or spaces separating each word.</param>
        /// <param name="languageSet"></param>
        /// <returns>A phonetic representation of the input; a string containing '-'-separated phonetic representations of the input.</returns>
        public virtual string Encode(string input, LanguageSet languageSet)
        {
            IDictionary <string, IList <Rule> > rules = Rule.GetInstanceMap(this.nameType, RuleType.RULES, languageSet);
            // rules common across many (all) languages
            IDictionary <string, IList <Rule> > finalRules1 = Rule.GetInstanceMap(this.nameType, this.ruleType, "common");
            // rules that apply to a specific language that may be ambiguous or wrong if applied to other languages
            IDictionary <string, IList <Rule> > finalRules2 = Rule.GetInstanceMap(this.nameType, this.ruleType, languageSet);

            // tidy the input
            // lower case is a locale-dependent operation
            input = input.ToLowerInvariant().Replace('-', ' ').Trim();

            if (this.nameType == NameType.GENERIC)
            {
                if (input.Length >= 2 && input.Substring(0, 2 - 0).Equals("d'", StringComparison.Ordinal))
                { // check for d'
                    string remainder = input.Substring(2);
                    string combined  = "d" + remainder;
                    return("(" + Encode(remainder) + ")-(" + Encode(combined) + ")");
                }
                foreach (string l in NAME_PREFIXES[this.nameType])
                {
                    // handle generic prefixes
                    if (input.StartsWith(l + " ", StringComparison.Ordinal))
                    {
                        // check for any prefix in the words list
                        string remainder = input.Substring(l.Length + 1); // input without the prefix
                        string combined  = l + remainder;                 // input with prefix without space
                        return("(" + Encode(remainder) + ")-(" + Encode(combined) + ")");
                    }
                }
            }

            IList <string> words  = WHITESPACE.Split(input).TrimEnd();
            ISet <string>  words2 = new JCG.HashSet <string>();

            // special-case handling of word prefixes based upon the name type
            switch (this.nameType)
            {
            case NameType.SEPHARDIC:
                foreach (string aWord in words)
                {
                    string[] parts    = aWord.Split('\'').TrimEnd();
                    string   lastPart = parts[parts.Length - 1];
                    words2.Add(lastPart);
                }
                words2.ExceptWith(NAME_PREFIXES[this.nameType]);
                break;

            case NameType.ASHKENAZI:
                words2.UnionWith(words);
                words2.ExceptWith(NAME_PREFIXES[this.nameType]);
                break;

            case NameType.GENERIC:
                words2.UnionWith(words);
                break;

            default:
                throw new InvalidOperationException("Unreachable case: " + this.nameType);
            }

            if (this.concat)
            {
                // concat mode enabled
                input = Join(words2, " ");
            }
            else if (words2.Count == 1)
            {
                // not a multi-word name
                //input = words.iterator().next();
                input = words[0];
            }
            else
            {
                // encode each word in a multi-word name separately (normally used for approx matches)
                StringBuilder result = new StringBuilder();
                foreach (string word in words2)
                {
                    result.Append("-").Append(Encode(word));
                }
                // return the result without the leading "-"
                return(result.ToString(1, result.Length - 1));
            }

            PhonemeBuilder phonemeBuilder = PhonemeBuilder.Empty(languageSet);

            // loop over each char in the input - we will handle the increment manually
            for (int i = 0; i < input.Length;)
            {
                RulesApplication rulesApplication =
                    new RulesApplication(rules, input, phonemeBuilder, i, maxPhonemes).Invoke();
                i = rulesApplication.I;
                phonemeBuilder = rulesApplication.PhonemeBuilder;
            }

            // Apply the general rules
            phonemeBuilder = ApplyFinalRules(phonemeBuilder, finalRules1);
            // Apply the language-specific rules
            phonemeBuilder = ApplyFinalRules(phonemeBuilder, finalRules2);

            return(phonemeBuilder.MakeString());
        }