public virtual void SpecialScriptPreLayoutProcessing()
        {
            PdfDocument  pdfDocument           = new PdfDocument(new PdfWriter(new MemoryStream()));
            Document     document              = new Document(pdfDocument);
            int          thaiTextSplitPosition = THAI_TEXT.Length / 2;
            PdfFont      font = PdfFontFactory.CreateFont(THAI_FONT, PdfEncodings.IDENTITY_H);
            TextRenderer textRendererFirstPart = new TextRenderer(new Text(THAI_TEXT.JSubstring(0, thaiTextSplitPosition
                                                                                                )));

            textRendererFirstPart.SetProperty(Property.FONT, font);
            textRendererFirstPart.SetText(THAI_TEXT.JSubstring(0, thaiTextSplitPosition));
            TextRenderer textRendererSecondPart = new TextRenderer(new Text(THAI_TEXT.Substring(thaiTextSplitPosition)
                                                                            ));

            textRendererSecondPart.SetProperty(Property.FONT, font);
            textRendererSecondPart.SetText(THAI_TEXT.Substring(thaiTextSplitPosition));
            TableRenderer floatingNonTextRenderer = new TableRenderer(new Table(3));

            floatingNonTextRenderer.SetProperty(Property.FLOAT, FloatPropertyValue.RIGHT);
            TableRenderer regularNonTextRenderer = new TableRenderer(new Table(3));
            LineRenderer  lineRenderer           = new LineRenderer();

            lineRenderer.SetParent(document.GetRenderer());
            lineRenderer.AddChild(textRendererFirstPart);
            lineRenderer.AddChild(floatingNonTextRenderer);
            lineRenderer.AddChild(textRendererSecondPart);
            lineRenderer.AddChild(regularNonTextRenderer);
            LineRenderer.SpecialScriptsContainingTextRendererSequenceInfo info = lineRenderer.GetSpecialScriptsContainingTextRendererSequenceInfo
                                                                                     (0);
            int         numberOfSequentialTextRenderers = info.numberOfSequentialTextRenderers;
            String      sequentialTextContent           = info.sequentialTextContent;
            IList <int> indicesOfFloating = info.indicesOfFloating;

            NUnit.Framework.Assert.AreEqual(3, numberOfSequentialTextRenderers);
            NUnit.Framework.Assert.AreEqual(THAI_TEXT, sequentialTextContent);
            NUnit.Framework.Assert.AreEqual(1, indicesOfFloating.Count);
            NUnit.Framework.Assert.AreEqual(1, (int)indicesOfFloating[0]);
            IList <int> possibleBreaks = new List <int>(JavaUtil.ArraysAsList(3, 8, 10, 12, 15, 20, 23, 26, 28, 30, 36));

            lineRenderer.DistributePossibleBreakPointsOverSequentialTextRenderers(0, numberOfSequentialTextRenderers,
                                                                                  possibleBreaks, indicesOfFloating);
            IList <int> possibleBreaksFirstPart = textRendererFirstPart.GetSpecialScriptsWordBreakPoints();

            NUnit.Framework.Assert.IsNotNull(possibleBreaksFirstPart);
            IList <int> possibleBreaksSecondPart = textRendererSecondPart.GetSpecialScriptsWordBreakPoints();

            NUnit.Framework.Assert.IsNotNull(possibleBreaksSecondPart);
            int         indexOfLastPossibleBreakInTheFirstRenderer = 4;
            IList <int> expectedPossibleBreaksFirstPart            = possibleBreaks.SubList(0, indexOfLastPossibleBreakInTheFirstRenderer
                                                                                            + 1);
            IList <int> expectedPossibleBreaksSecondPart = possibleBreaks.SubList(indexOfLastPossibleBreakInTheFirstRenderer
                                                                                  + 1, possibleBreaks.Count);

            NUnit.Framework.Assert.AreEqual(expectedPossibleBreaksFirstPart, possibleBreaksFirstPart);
            for (int i = 0; i < expectedPossibleBreaksSecondPart.Count; i++)
            {
                expectedPossibleBreaksSecondPart[i] = expectedPossibleBreaksSecondPart[i] - thaiTextSplitPosition;
            }
            NUnit.Framework.Assert.AreEqual(expectedPossibleBreaksSecondPart, possibleBreaksSecondPart);
        }
Example #2
0
        protected internal virtual RelationTriple BlueCatsPlayWithYarnNoIndices()
        {
            IList <CoreLabel> sentence = new List <CoreLabel>();

            sentence.Add(IETestUtils.MkWord("blue", -1));
            sentence.Add(IETestUtils.MkWord("cats", -1));
            sentence.Add(IETestUtils.MkWord("play", -1));
            sentence.Add(IETestUtils.MkWord("with", -1));
            sentence.Add(IETestUtils.MkWord("yarn", -1));
            return(new RelationTriple(sentence.SubList(0, 2), sentence.SubList(2, 4), sentence.SubList(4, 5)));
        }
Example #3
0
        protected internal virtual RelationTriple YarnBlueCatsPlayWith()
        {
            IList <CoreLabel> sentence = new List <CoreLabel>();

            sentence.Add(IETestUtils.MkWord("yarn", 0));
            sentence.Add(IETestUtils.MkWord("blue", 1));
            sentence.Add(IETestUtils.MkWord("cats", 2));
            sentence.Add(IETestUtils.MkWord("play", 3));
            sentence.Add(IETestUtils.MkWord("with", 4));
            return(new RelationTriple(sentence.SubList(1, 3), sentence.SubList(3, 5), sentence.SubList(0, 1)));
        }
        public void ProcessJ1708Diagnostic(List <byte> RawData)
        {
            byte totalMessages = RawData[1];

            RawData = RawData.SubList(2);
            var count = RawData.Count;

            while (count >= 2)
            {
                var message = new J1708DiagnosticMessage(RawData);
                RawData = RawData.SubList(message.MessageLength);
                AddAndPublish(message);
                count = RawData.Count;
            }
        }
Example #5
0
        public override MergeSpecification FindMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos)
        {
            MergeSpecification mergeSpec = null;
            //System.out.println("MRMP: findMerges sis=" + segmentInfos);

            int numSegments /* = segmentInfos.Count*/; // LUCENENET: IDE0059: Remove unnecessary value assignment

            IList <SegmentCommitInfo>       segments = new List <SegmentCommitInfo>();
            ICollection <SegmentCommitInfo> merging  = base.m_writer.Get().MergingSegments;

            foreach (SegmentCommitInfo sipc in segmentInfos.Segments)
            {
                if (!merging.Contains(sipc))
                {
                    segments.Add(sipc);
                }
            }

            numSegments = segments.Count;

            if (numSegments > 1 && (numSegments > 30 || random.Next(5) == 3))
            {
                segments.Shuffle(random);

                // TODO: sometimes make more than 1 merge?
                mergeSpec = new MergeSpecification();
                int segsToMerge = TestUtil.NextInt32(random, 1, numSegments);
                mergeSpec.Add(new OneMerge(segments.SubList(0, segsToMerge)));
            }

            return(mergeSpec);
        }
Example #6
0
        public override MergeSpecification FindMerges(MergeTrigger?mergeTrigger, SegmentInfos segmentInfos)
        {
            MergeSpecification mergeSpec = null;
            //System.out.println("MRMP: findMerges sis=" + segmentInfos);

            int numSegments = segmentInfos.Size();

            IList <SegmentCommitInfo>       segments = new List <SegmentCommitInfo>();
            ICollection <SegmentCommitInfo> merging  = Writer.Get().MergingSegments;

            foreach (SegmentCommitInfo sipc in segmentInfos.Segments)
            {
                if (!merging.Contains(sipc))
                {
                    segments.Add(sipc);
                }
            }

            numSegments = segments.Count;

            if (numSegments > 1 && (numSegments > 30 || Random.Next(5) == 3))
            {
                segments = CollectionsHelper.Shuffle(segments);

                // TODO: sometimes make more than 1 merge?
                mergeSpec = new MergeSpecification();
                int segsToMerge = TestUtil.NextInt(Random, 1, numSegments);
                mergeSpec.Add(new OneMerge(segments.SubList(0, segsToMerge)));
            }

            return(mergeSpec);
        }
Example #7
0
        private static IList <FacetField> RandomCategories(Random random)
        {
            // add random categories from the two dimensions, ensuring that the same
            // category is not added twice.
            int numFacetsA = random.Next(3) + 1; // 1-3
            int numFacetsB = random.Next(2) + 1; // 1-2
            List <FacetField> categories_a = new List <FacetField>();

            categories_a.AddRange(CATEGORIES_A);
            List <FacetField> categories_b = new List <FacetField>();

            categories_b.AddRange(CATEGORIES_B);
            categories_a.Shuffle(Random);
            categories_b.Shuffle(Random);

            List <FacetField> categories = new List <FacetField>();

            categories.AddRange(categories_a.SubList(0, numFacetsA));
            categories.AddRange(categories_b.SubList(0, numFacetsB));

            // add the NO_PARENT categories
            categories.Add(CATEGORIES_C[Util.LuceneTestCase.Random.Next(NUM_CHILDREN_CP_C)]);
            categories.Add(CATEGORIES_D[Util.LuceneTestCase.Random.Next(NUM_CHILDREN_CP_D)]);

            return(categories);
        }
Example #8
0
        private void PopNextLemma()
        {
            // One tag (concatenated) per lemma.
            WordData lemma = lemmaList[lemmaListIndex++];

            termAtt.SetEmpty().Append(lemma.GetStem().ToString());
            var tag = lemma.GetTag();

            if (tag != null)
            {
                string[] tags = lemmaSplitter.Split(tag.ToString());
                for (int i = 0; i < tags.Length; i++)
                {
                    if (tagsList.Count <= i)
                    {
                        tagsList.Add(new StringBuilder());
                    }
                    StringBuilder buffer = tagsList[i];
                    buffer.Length = 0;
                    buffer.Append(tags[i]);
                }
                tagsAtt.Tags = tagsList.SubList(0, tags.Length);
            }
            else
            {
                tagsAtt.Tags = Collections.EmptyList <StringBuilder>();
            }
        }
        public virtual float Score(IntTaggedWord iTW, int loc, string word, string featureSpec)
        {
            string tag = tagIndex.Get(iTW.tag);

            System.Diagnostics.Debug.Assert(!word.Equals(LexiconConstants.Boundary));
            char[] chars = word.ToCharArray();
            IList <ISerializable> charList = new List <ISerializable>(chars.Length + ContextLength + 1);

            // this starts of storing Symbol's and then starts storing String's. Clean this up someday!
            // charList is constructed backward
            // END_WORD char[length-1] char[length-2] ... char[0] BEGIN_WORD BEGIN_WORD
            charList.Add(ChineseCharacterBasedLexicon.Symbol.EndWord);
            for (int i = chars.Length - 1; i >= 0; i--)
            {
                ChineseCharacterBasedLexicon.Symbol ch = ChineseCharacterBasedLexicon.Symbol.CannonicalSymbol(chars[i]);
                if (knownChars.Contains(ch))
                {
                    charList.Add(ch);
                }
                else
                {
                    charList.Add(UnknownCharClass(ch));
                }
            }
            for (int i_1 = 0; i_1 < ContextLength; i_1++)
            {
                charList.Add(ChineseCharacterBasedLexicon.Symbol.BeginWord);
            }
            double score = 0.0;

            for (int i_2 = 0; i_2 < size - ContextLength; i_2++)
            {
                ChineseCharacterBasedLexicon.Symbol nextChar = (ChineseCharacterBasedLexicon.Symbol)charList[i_2];
                charList.Set(i_2, tag);
                double charScore = GetBackedOffDist(charList.SubList(i_2, i_2 + ContextLength + 1)).ProbabilityOf(nextChar);
                score += Math.Log(charScore);
            }
            switch (penaltyType)
            {
            case 0:
            {
                break;
            }

            case 1:
            {
                score -= (chars.Length * (chars.Length + 1)) * (lengthPenalty / 2);
                break;
            }

            case 2:
            {
                score -= (chars.Length - 1) * lengthPenalty;
                break;
            }
            }
            return((float)score);
        }
Example #10
0
        /// <summary>
        /// Returns the merges necessary to merge the index, taking the max merge
        /// size or max merge docs into consideration. this method attempts to respect
        /// the {@code maxNumSegments} parameter, however it might be, due to size
        /// constraints, that more than that number of segments will remain in the
        /// index. Also, this method does not guarantee that exactly {@code
        /// maxNumSegments} will remain, but &lt;= that number.
        /// </summary>
        private MergeSpecification FindForcedMergesSizeLimit(SegmentInfos infos, int maxNumSegments, int last)
        {
            MergeSpecification       spec     = new MergeSpecification();
            List <SegmentCommitInfo> segments = infos.AsList();

            int start = last - 1;

            while (start >= 0)
            {
                SegmentCommitInfo info = infos.Info(start);
                if (Size(info) > MaxMergeSizeForForcedMerge || SizeDocs(info) > MaxMergeDocs_Renamed)
                {
                    if (Verbose())
                    {
                        Message("findForcedMergesSizeLimit: skip segment=" + info + ": size is > maxMergeSize (" + MaxMergeSizeForForcedMerge + ") or sizeDocs is > maxMergeDocs (" + MaxMergeDocs_Renamed + ")");
                    }
                    // need to skip that segment + add a merge for the 'right' segments,
                    // unless there is only 1 which is merged.
                    if (last - start - 1 > 1 || (start != last - 1 && !IsMerged(infos, infos.Info(start + 1))))
                    {
                        // there is more than 1 segment to the right of
                        // this one, or a mergeable single segment.
                        spec.Add(new OneMerge(segments.SubList(start + 1, last)));
                    }
                    last = start;
                }
                else if (last - start == MergeFactor_Renamed)
                {
                    // mergeFactor eligible segments were found, add them as a merge.
                    spec.Add(new OneMerge(segments.SubList(start, last)));
                    last = start;
                }
                --start;
            }

            // Add any left-over segments, unless there is just 1
            // already fully merged
            if (last > 0 && (++start + 1 < last || !IsMerged(infos, infos.Info(start))))
            {
                spec.Add(new OneMerge(segments.SubList(start, last)));
            }

            return(spec.Merges.Count == 0 ? null : spec);
        }
        public virtual void TestCorruptReplicaInfo()
        {
            CorruptReplicasMap crm = new CorruptReplicasMap();

            // Make sure initial values are returned correctly
            NUnit.Framework.Assert.AreEqual("Number of corrupt blocks must initially be 0", 0
                                            , crm.Size());
            NUnit.Framework.Assert.IsNull("Param n cannot be less than 0", crm.GetCorruptReplicaBlockIds
                                              (-1, null));
            NUnit.Framework.Assert.IsNull("Param n cannot be greater than 100", crm.GetCorruptReplicaBlockIds
                                              (101, null));
            long[] l = crm.GetCorruptReplicaBlockIds(0, null);
            NUnit.Framework.Assert.IsNotNull("n = 0 must return non-null", l);
            NUnit.Framework.Assert.AreEqual("n = 0 must return an empty list", 0, l.Length);
            // create a list of block_ids. A list is used to allow easy validation of the
            // output of getCorruptReplicaBlockIds
            int          NumBlockIds = 140;
            IList <long> block_ids   = new List <long>();

            for (int i = 0; i < NumBlockIds; i++)
            {
                block_ids.AddItem((long)i);
            }
            DatanodeDescriptor dn1 = DFSTestUtil.GetLocalDatanodeDescriptor();
            DatanodeDescriptor dn2 = DFSTestUtil.GetLocalDatanodeDescriptor();

            AddToCorruptReplicasMap(crm, GetBlock(0), dn1);
            NUnit.Framework.Assert.AreEqual("Number of corrupt blocks not returning correctly"
                                            , 1, crm.Size());
            AddToCorruptReplicasMap(crm, GetBlock(1), dn1);
            NUnit.Framework.Assert.AreEqual("Number of corrupt blocks not returning correctly"
                                            , 2, crm.Size());
            AddToCorruptReplicasMap(crm, GetBlock(1), dn2);
            NUnit.Framework.Assert.AreEqual("Number of corrupt blocks not returning correctly"
                                            , 2, crm.Size());
            crm.RemoveFromCorruptReplicasMap(GetBlock(1));
            NUnit.Framework.Assert.AreEqual("Number of corrupt blocks not returning correctly"
                                            , 1, crm.Size());
            crm.RemoveFromCorruptReplicasMap(GetBlock(0));
            NUnit.Framework.Assert.AreEqual("Number of corrupt blocks not returning correctly"
                                            , 0, crm.Size());
            foreach (long block_id in block_ids)
            {
                AddToCorruptReplicasMap(crm, GetBlock(block_id), dn1);
            }
            NUnit.Framework.Assert.AreEqual("Number of corrupt blocks not returning correctly"
                                            , NumBlockIds, crm.Size());
            NUnit.Framework.Assert.IsTrue("First five block ids not returned correctly ", Arrays
                                          .Equals(new long[] { 0, 1, 2, 3, 4 }, crm.GetCorruptReplicaBlockIds(5, null)));
            Log.Info(crm.GetCorruptReplicaBlockIds(10, 7L));
            Log.Info(block_ids.SubList(7, 18));
            NUnit.Framework.Assert.IsTrue("10 blocks after 7 not returned correctly ", Arrays
                                          .Equals(new long[] { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 }, crm.GetCorruptReplicaBlockIds
                                                      (10, 7L)));
        }
Example #12
0
        public override MergeSpecification FindForcedDeletesMerges(SegmentInfos infos)
        {
            if (Verbose())
            {
                Message("findForcedDeletesMerges infos=" + Writer.Get().SegString(infos.Segments) + " forceMergeDeletesPctAllowed=" + ForceMergeDeletesPctAllowed_Renamed);
            }
            List <SegmentCommitInfo>        eligible = new List <SegmentCommitInfo>();
            ICollection <SegmentCommitInfo> merging  = Writer.Get().MergingSegments;

            foreach (SegmentCommitInfo info in infos.Segments)
            {
                double pctDeletes = 100.0 * ((double)Writer.Get().NumDeletedDocs(info)) / info.Info.DocCount;
                if (pctDeletes > ForceMergeDeletesPctAllowed_Renamed && !merging.Contains(info))
                {
                    eligible.Add(info);
                }
            }

            if (eligible.Count == 0)
            {
                return(null);
            }

            eligible.Sort(new SegmentByteSizeDescending(this));

            if (Verbose())
            {
                Message("eligible=" + eligible);
            }

            int start = 0;
            MergeSpecification spec = null;

            while (start < eligible.Count)
            {
                // Don't enforce max merged size here: app is explicitly
                // calling forceMergeDeletes, and knows this may take a
                // long time / produce big segments (like forceMerge):
                int end = Math.Min(start + MaxMergeAtOnceExplicit_Renamed, eligible.Count);
                if (spec == null)
                {
                    spec = new MergeSpecification();
                }

                OneMerge merge = new OneMerge(eligible.SubList(start, end));
                if (Verbose())
                {
                    Message("add merge=" + Writer.Get().SegString(merge.Segments));
                }
                spec.Add(merge);
                start = end;
            }

            return(spec);
        }
            public State(ClustererDataLoader.ClustererDoc doc)
            {
                currentDocId      = doc.id;
                this.doc          = doc;
                this.hashedScores = new Dictionary <Clusterer.MergeKey, bool>();
                this.hashedCosts  = new Dictionary <long, double>();
                this.clusters     = new List <Clusterer.Cluster>();
                this.hash         = 0;
                mentionToCluster  = new Dictionary <int, Clusterer.Cluster>();
                foreach (int m in doc.mentions)
                {
                    Clusterer.Cluster c = new Clusterer.Cluster(m);
                    clusters.Add(c);
                    mentionToCluster[m] = c;
                    hash ^= c.hash * 7;
                }
                IList <Pair <int, int> >    allPairs = new List <Pair <int, int> >(doc.classificationScores.KeySet());
                ICounter <Pair <int, int> > scores   = UseRanking ? doc.rankingScores : doc.classificationScores;

                allPairs.Sort(null);
                int i = 0;

                for (i = 0; i < allPairs.Count; i++)
                {
                    double score = scores.GetCount(allPairs[i]);
                    if (score < MinPairwiseScore && i > MinPairs)
                    {
                        break;
                    }
                    if (i >= EarlyStopThreshold && i / score > EarlyStopVal)
                    {
                        break;
                    }
                }
                mentionPairs = allPairs.SubList(0, i);
                ICounter <int> seenAnaphors    = new ClassicCounter <int>();
                ICounter <int> seenAntecedents = new ClassicCounter <int>();

                globalFeatures = new List <Clusterer.GlobalFeatures>();
                for (int j = 0; j < allPairs.Count; j++)
                {
                    Pair <int, int>          mentionPair = allPairs[j];
                    Clusterer.GlobalFeatures gf          = new Clusterer.GlobalFeatures();
                    gf.currentIndex = j;
                    gf.anaphorSeen  = seenAnaphors.ContainsKey(mentionPair.second);
                    gf.size         = mentionPairs.Count;
                    gf.docSize      = doc.mentions.Count / 300.0;
                    globalFeatures.Add(gf);
                    seenAnaphors.IncrementCount(mentionPair.second);
                    seenAntecedents.IncrementCount(mentionPair.first);
                }
                currentIndex = 0;
                SetClusters();
            }
Example #14
0
        public void TestRandom()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            int          num         = AtLeast(100);
            bool         singleField = Random.NextBoolean();
            IList <Term> terms       = new List <Term>();

            for (int i = 0; i < num; i++)
            {
                string field   = "field" + (singleField ? "1" : Random.Next(100).ToString(CultureInfo.InvariantCulture));
                string @string = TestUtil.RandomRealisticUnicodeString(Random);
                terms.Add(new Term(field, @string));
                Document doc = new Document();
                doc.Add(NewStringField(field, @string, Field.Store.YES));
                w.AddDocument(doc);
            }
            IndexReader reader = w.GetReader();

            w.Dispose();

            IndexSearcher searcher = NewSearcher(reader);

            int numQueries = AtLeast(10);

            for (int i = 0; i < numQueries; i++)
            {
                terms.Shuffle(Random);
                int          numTerms = 1 + Random.Next(Math.Min(BooleanQuery.MaxClauseCount, terms.Count));
                BooleanQuery bq       = new BooleanQuery();
                for (int j = 0; j < numTerms; j++)
                {
                    bq.Add(new BooleanClause(new TermQuery(terms[j]), Occur.SHOULD));
                }
                TopDocs queryResult = searcher.Search(new ConstantScoreQuery(bq), reader.MaxDoc);

                MatchAllDocsQuery matchAll     = new MatchAllDocsQuery();
                TermsFilter       filter       = TermsFilter(singleField, terms.SubList(0, numTerms));
                TopDocs           filterResult = searcher.Search(matchAll, filter, reader.MaxDoc);
                assertEquals(filterResult.TotalHits, queryResult.TotalHits);
                ScoreDoc[] scoreDocs = filterResult.ScoreDocs;
                for (int j = 0; j < scoreDocs.Length; j++)
                {
                    assertEquals(scoreDocs[j].Doc, queryResult.ScoreDocs[j].Doc);
                }
            }

            reader.Dispose();
            dir.Dispose();
        }
        //public byte OccurrenceCount { get; private set; }

        public J1708DiagnosticMessage(List <byte> rawData)
            : base()
        {
            ID            = rawData[0];
            Mid           = 0x80; //We only take engine faults from J1708
            CountIncluded = ((rawData[1] & 0x80b) == 1);
            IsActive      = ((rawData[1] & 0x40b) == 0);
            Fmi           = (byte)(rawData[1] & 0x0Fb);
            MessageLength = (CountIncluded) ? 3 : 2;
            var byteList = rawData.SubList(0, MessageLength);

            RawCode = byteList.ToHexString();
        }
Example #16
0
        public string[] Split(string input, int limit)
        {
            int           index        = 0;
            bool          matchLimited = limit > 0;
            List <string> matchList    = new List <string>();
            Matcher       m            = Matcher(input);

            // Add segments before each match found
            while (m.Find())
            {
                if (!matchLimited || matchList.Count < limit - 1)
                {
                    String match = input.Substring(index, m.Start()).ToString();
                    matchList.Add(match);
                    index = m.End();
                }
                else if (matchList.Count == limit - 1)
                {                 // last one
                    String match = input.Substring(index,
                                                   input.Length).ToString();
                    matchList.Add(match);
                    index = m.End();
                }
            }

            // If no match was found, return this
            if (index == 0)
            {
                return new String[] { input.ToString() }
            }
            ;

            // Add remaining segment
            if (!matchLimited || matchList.Count < limit)
            {
                matchList.Add(input.Substring(index, input.Length).ToString());
            }

            // Construct result
            int resultSize = matchList.Count;

            if (limit == 0)
            {
                while (resultSize > 0 && matchList[resultSize - 1].Equals(""))
                {
                    resultSize--;
                }
            }
            return(matchList.SubList(0, resultSize).ToArray());
        }
    }
Example #17
0
        static void Main(string[] args)
        {
            var numbers = new List <int>()
            {
                1, 2, 3, 4, 5, 6, 7, 8, 9, 10
            };

            foreach (var subListNumbers in numbers.SubList(3))
            {
                subListNumbers.ForEach(Console.Write);
                Console.WriteLine();
            }
            Console.ReadKey();
        }
Example #18
0
        static void Main(string[] args)
        {
            var numbers = new List <int>()
            {
                1, 2, 3, 4, 5, 6, 7, 8, 9
            };
            var subList = numbers.SubList(4);

            foreach (var list in subList)
            {
                list.ForEach(Console.Write);
                Console.WriteLine();
            }
            Console.ReadKey();
        }
Example #19
0
        /// <summary>Sample k items uniformly from an Iterable of size n (without replacement).</summary>
        /// <param name="items">The items from which to sample.</param>
        /// <param name="n">The total number of items in the Iterable.</param>
        /// <param name="k">The number of items to sample.</param>
        /// <param name="random">The random number generator.</param>
        /// <returns>An Iterable of k items, chosen randomly from the original n items.</returns>
        public static IEnumerable <T> Sample <T>(IEnumerable <T> items, int n, int k, Random random)
        {
            // assemble a list of all indexes
            IList <int> indexes = new List <int>();

            for (int i = 0; i < n; ++i)
            {
                indexes.Add(i);
            }
            // shuffle the indexes and select the first k
            Java.Util.Collections.Shuffle(indexes, random);
            ICollection <int> indexSet = Generics.NewHashSet(indexes.SubList(0, k));

            // filter down to only the items at the selected indexes
            return(Iterables.Filter(items, new _IPredicate_614(indexSet)));
        }
Example #20
0
        public override MergeSpecification FindForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount, IDictionary <SegmentCommitInfo, bool?> segmentsToMerge)
        {
            IList <SegmentCommitInfo> eligibleSegments = new List <SegmentCommitInfo>();

            foreach (SegmentCommitInfo info in segmentInfos.Segments)
            {
                if (segmentsToMerge.ContainsKey(info))
                {
                    eligibleSegments.Add(info);
                }
            }

            //System.out.println("MRMP: findMerges sis=" + segmentInfos + " eligible=" + eligibleSegments);
            MergeSpecification mergeSpec = null;

            if (eligibleSegments.Count > 1 || (eligibleSegments.Count == 1 && eligibleSegments[0].HasDeletions))
            {
                mergeSpec = new MergeSpecification();
                // Already shuffled having come out of a set but
                // shuffle again for good measure:
                eligibleSegments.Shuffle(random);
                int upto = 0;
                while (upto < eligibleSegments.Count)
                {
                    int max = Math.Min(10, eligibleSegments.Count - upto);
                    int inc = max <= 2 ? max : TestUtil.NextInt32(random, 2, max);
                    mergeSpec.Add(new OneMerge(eligibleSegments.SubList(upto, upto + inc)));
                    upto += inc;
                }
            }

            if (mergeSpec != null)
            {
                foreach (OneMerge merge in mergeSpec.Merges)
                {
                    foreach (SegmentCommitInfo info in merge.Segments)
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(segmentsToMerge.ContainsKey(info));
                        }
                    }
                }
            }
            return(mergeSpec);
        }
Example #21
0
        /**
         * Initialize the bucket map assuming the given number of maxBuckets.
         *
         * @param maxBuckets
         * @param offset
         */
        public void InitializeBucketMap(int maxBuckets, double?offset)
        {
            /*
             * The first bucket index will be _maxBuckets / 2 and bucket indices
             * will be allowed to grow lower or higher as long as they don't become
             * negative. _maxBuckets is required because the current CLA Classifier
             * assumes bucket indices must be non-negative. This normally does not
             * need to be changed but if altered, should be Set to an even number.
             */

            SetMaxBuckets(maxBuckets);

            SetMinIndex(maxBuckets / 2);
            SetMaxIndex(maxBuckets / 2);

            /*
             * The scalar offset used to map scalar values to bucket indices. The
             * middle bucket will correspond to numbers in the range
             * [offset-resolution/2, offset+resolution/2). The bucket index for a
             * number x will be: maxBuckets/2 + int( round( (x-offset)/resolution )
             * )
             */
            SetOffset(offset);

            /*
             * This HashMap maps a bucket index into its bit representation We
             * initialize the HashMap with a single bucket with index 0
             */
            bucketMap = new ConcurrentDictionary <int, List <int> >();
            // generate the random permutation
            List <int> temp = new List <int>(GetN());

            for (int i = 0; i < GetN(); i++)
            {
                temp.Add(i);
            }
            temp.Shuffle(rng);
            //java.util.Collections.shuffle(temp, rng);
            bucketMap.TryAdd(GetMinIndex(), temp.SubList(0, GetW()));

            // How often we need to retry when generating valid encodings
            SetNumRetry(0);
        }
Example #22
0
        public virtual void TestNextVaryingNumberOfTerms()
        {
            IList<string> termsList = new List<string>(CommonTerms.Length + MediumTerms.Length + RareTerms.Length);
            termsList.AddRange(CommonTerms);
            termsList.AddRange(MediumTerms);
            termsList.AddRange(RareTerms);
            termsList.Shuffle();

            for (int numTerms = 2; numTerms <= termsList.Count; numTerms++)
            {
                string[] terms = termsList.SubList(0, numTerms).ToArray(/*new string[0]*/);
                for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.Length; minNrShouldMatch++)
                {
                    Scorer expected = Scorer(terms, minNrShouldMatch, true);
                    Scorer actual = Scorer(terms, minNrShouldMatch, false);
                    AssertNext(expected, actual);
                }
            }
        }
        public override MergeSpecification FindForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount, IDictionary<SegmentCommitInfo, bool?> segmentsToMerge)
        {
            IList<SegmentCommitInfo> eligibleSegments = new List<SegmentCommitInfo>();
            foreach (SegmentCommitInfo info in segmentInfos.Segments)
            {
                if (segmentsToMerge.ContainsKey(info))
                {
                    eligibleSegments.Add(info);
                }
            }

            //System.out.println("MRMP: findMerges sis=" + segmentInfos + " eligible=" + eligibleSegments);
            MergeSpecification mergeSpec = null;
            if (eligibleSegments.Count > 1 || (eligibleSegments.Count == 1 && eligibleSegments[0].HasDeletions()))
            {
                mergeSpec = new MergeSpecification();
                // Already shuffled having come out of a set but
                // shuffle again for good measure:
                eligibleSegments = CollectionsHelper.Shuffle(eligibleSegments);
                int upto = 0;
                while (upto < eligibleSegments.Count)
                {
                    int max = Math.Min(10, eligibleSegments.Count - upto);
                    int inc = max <= 2 ? max : TestUtil.NextInt(Random, 2, max);
                    mergeSpec.Add(new OneMerge(eligibleSegments.SubList(upto, upto + inc)));
                    upto += inc;
                }
            }

            if (mergeSpec != null)
            {
                foreach (OneMerge merge in mergeSpec.Merges)
                {
                    foreach (SegmentCommitInfo info in merge.Segments)
                    {
                        Debug.Assert(segmentsToMerge.ContainsKey(info));
                    }
                }
            }
            return mergeSpec;
        }
Example #24
0
        public virtual void TestAdvanceVaryingNumberOfTerms()
        {
            IList <string> termsList = new List <string>();

            termsList.AddRange(Arrays.AsList(CommonTerms));
            termsList.AddRange(Arrays.AsList(MediumTerms));
            termsList.AddRange(Arrays.AsList(RareTerms));
            termsList = CollectionsHelper.Shuffle(termsList);

            for (int amount = 25; amount < 200; amount += 25)
            {
                for (int numTerms = 2; numTerms <= termsList.Count; numTerms++)
                {
                    string[] terms = termsList.SubList(0, numTerms).ToArray(/*new string[0]*/);
                    for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.Length; minNrShouldMatch++)
                    {
                        Scorer expected = Scorer(terms, minNrShouldMatch, true);
                        Scorer actual   = Scorer(terms, minNrShouldMatch, false);
                        AssertAdvance(expected, actual, amount);
                    }
                }
            }
        }
        private static IList<FacetField> RandomCategories(Random random)
        {
            // add random categories from the two dimensions, ensuring that the same
            // category is not added twice.
            int numFacetsA = random.Next(3) + 1; // 1-3
            int numFacetsB = random.Next(2) + 1; // 1-2
            List<FacetField> categories_a = new List<FacetField>();
            categories_a.AddRange(Arrays.AsList(CATEGORIES_A));
            List<FacetField> categories_b = new List<FacetField>();
            categories_b.AddRange(Arrays.AsList(CATEGORIES_B));
            categories_a = CollectionsHelper.Shuffle(categories_a).ToList();
            categories_b = CollectionsHelper.Shuffle(categories_b).ToList();

            List<FacetField> categories = new List<FacetField>();
            categories.AddRange(categories_a.SubList(0, numFacetsA));
            categories.AddRange(categories_b.SubList(0, numFacetsB));

            // add the NO_PARENT categories
            categories.Add(CATEGORIES_C[Random().Next(NUM_CHILDREN_CP_C)]);
            categories.Add(CATEGORIES_D[Random().Next(NUM_CHILDREN_CP_D)]);

            return categories;
        }
Example #26
0
        /// <summary>
        /// Retrieve suggestions.
        /// </summary>
        public virtual IList <LookupResult> DoLookup(string key, IEnumerable <BytesRef> contexts, int num)
        {
            if (contexts != null)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }

            TokenStream ts = queryAnalyzer.GetTokenStream("", key.ToString());

            try
            {
                ITermToBytesRefAttribute    termBytesAtt = ts.AddAttribute <ITermToBytesRefAttribute>();
                IOffsetAttribute            offsetAtt    = ts.AddAttribute <IOffsetAttribute>();
                IPositionLengthAttribute    posLenAtt    = ts.AddAttribute <IPositionLengthAttribute>();
                IPositionIncrementAttribute posIncAtt    = ts.AddAttribute <IPositionIncrementAttribute>();
                ts.Reset();

                var lastTokens = new BytesRef[grams];
                //System.out.println("lookup: key='" + key + "'");

                // Run full analysis, but save only the
                // last 1gram, last 2gram, etc.:
                BytesRef tokenBytes   = termBytesAtt.BytesRef;
                int      maxEndOffset = -1;
                bool     sawRealToken = false;
                while (ts.IncrementToken())
                {
                    termBytesAtt.FillBytesRef();
                    sawRealToken |= tokenBytes.Length > 0;
                    // TODO: this is somewhat iffy; today, ShingleFilter
                    // sets posLen to the gram count; maybe we should make
                    // a separate dedicated att for this?
                    int gramCount = posLenAtt.PositionLength;

                    Debug.Assert(gramCount <= grams);

                    // Safety: make sure the recalculated count "agrees":
                    if (CountGrams(tokenBytes) != gramCount)
                    {
                        throw new System.ArgumentException("tokens must not contain separator byte; got token=" + tokenBytes + " but gramCount=" + gramCount + " does not match recalculated count=" + CountGrams(tokenBytes));
                    }
                    maxEndOffset = Math.Max(maxEndOffset, offsetAtt.EndOffset);
                    lastTokens[gramCount - 1] = BytesRef.DeepCopyOf(tokenBytes);
                }
                ts.End();

                if (!sawRealToken)
                {
                    throw new System.ArgumentException("no tokens produced by analyzer, or the only tokens were empty strings");
                }

                // Carefully fill last tokens with _ tokens;
                // ShingleFilter appraently won't emit "only hole"
                // tokens:
                int endPosInc = posIncAtt.PositionIncrement;

                // Note this will also be true if input is the empty
                // string (in which case we saw no tokens and
                // maxEndOffset is still -1), which in fact works out OK
                // because we fill the unigram with an empty BytesRef
                // below:
                bool lastTokenEnded = offsetAtt.EndOffset > maxEndOffset || endPosInc > 0;
                //System.out.println("maxEndOffset=" + maxEndOffset + " vs " + offsetAtt.EndOffset);

                if (lastTokenEnded)
                {
                    //System.out.println("  lastTokenEnded");
                    // If user hit space after the last token, then
                    // "upgrade" all tokens.  This way "foo " will suggest
                    // all bigrams starting w/ foo, and not any unigrams
                    // starting with "foo":
                    for (int i = grams - 1; i > 0; i--)
                    {
                        BytesRef token = lastTokens[i - 1];
                        if (token == null)
                        {
                            continue;
                        }
                        token.Grow(token.Length + 1);
                        token.Bytes[token.Length] = separator;
                        token.Length++;
                        lastTokens[i] = token;
                    }
                    lastTokens[0] = new BytesRef();
                }

                var arc = new FST.Arc <long?>();

                var bytesReader = fst.GetBytesReader();

                // Try highest order models first, and if they return
                // results, return that; else, fallback:
                double backoff = 1.0;

                List <LookupResult> results = new List <LookupResult>(num);

                // We only add a given suffix once, from the highest
                // order model that saw it; for subsequent lower order
                // models we skip it:
                var seen = new HashSet <BytesRef>();

                for (int gram = grams - 1; gram >= 0; gram--)
                {
                    BytesRef token = lastTokens[gram];
                    // Don't make unigram predictions from empty string:
                    if (token == null || (token.Length == 0 && key.Length > 0))
                    {
                        // Input didn't have enough tokens:
                        //System.out.println("  gram=" + gram + ": skip: not enough input");
                        continue;
                    }

                    if (endPosInc > 0 && gram <= endPosInc)
                    {
                        // Skip hole-only predictions; in theory we
                        // shouldn't have to do this, but we'd need to fix
                        // ShingleFilter to produce only-hole tokens:
                        //System.out.println("  break: only holes now");
                        break;
                    }

                    //System.out.println("try " + (gram+1) + " gram token=" + token.utf8ToString());

                    // TODO: we could add fuzziness here
                    // match the prefix portion exactly
                    //Pair<Long,BytesRef> prefixOutput = null;
                    long?prefixOutput = null;
                    try
                    {
                        prefixOutput = LookupPrefix(fst, bytesReader, token, arc);
                    }
                    catch (IOException bogus)
                    {
                        throw new Exception(bogus.ToString(), bogus);
                    }
                    //System.out.println("  prefixOutput=" + prefixOutput);

                    if (prefixOutput == null)
                    {
                        // This model never saw this prefix, e.g. the
                        // trigram model never saw context "purple mushroom"
                        backoff *= ALPHA;
                        continue;
                    }

                    // TODO: we could do this division at build time, and
                    // bake it into the FST?

                    // Denominator for computing scores from current
                    // model's predictions:
                    long contextCount = totTokens;

                    BytesRef lastTokenFragment = null;

                    for (int i = token.Length - 1; i >= 0; i--)
                    {
                        if (token.Bytes[token.Offset + i] == separator)
                        {
                            BytesRef context = new BytesRef(token.Bytes, token.Offset, i);
                            long?    output  = Lucene.Net.Util.Fst.Util.Get(fst, Lucene.Net.Util.Fst.Util.ToInt32sRef(context, new Int32sRef()));
                            Debug.Assert(output != null);
                            contextCount      = DecodeWeight(output);
                            lastTokenFragment = new BytesRef(token.Bytes, token.Offset + i + 1, token.Length - i - 1);
                            break;
                        }
                    }

                    BytesRef finalLastToken;

                    if (lastTokenFragment == null)
                    {
                        finalLastToken = BytesRef.DeepCopyOf(token);
                    }
                    else
                    {
                        finalLastToken = BytesRef.DeepCopyOf(lastTokenFragment);
                    }
                    Debug.Assert(finalLastToken.Offset == 0);

                    CharsRef spare = new CharsRef();

                    // complete top-N
                    Util.Fst.Util.TopResults <long?> completions = null;
                    try
                    {
                        // Because we store multiple models in one FST
                        // (1gram, 2gram, 3gram), we must restrict the
                        // search so that it only considers the current
                        // model.  For highest order model, this is not
                        // necessary since all completions in the FST
                        // must be from this model, but for lower order
                        // models we have to filter out the higher order
                        // ones:

                        // Must do num+seen.size() for queue depth because we may
                        // reject up to seen.size() paths in acceptResult():
                        Util.Fst.Util.TopNSearcher <long?> searcher = new TopNSearcherAnonymousInnerClassHelper(this, fst, num, num + seen.Count, weightComparer, seen, finalLastToken);

                        // since this search is initialized with a single start node
                        // it is okay to start with an empty input path here
                        searcher.AddStartPaths(arc, prefixOutput, true, new Int32sRef());

                        completions = searcher.Search();
                        Debug.Assert(completions.IsComplete);
                    }
                    catch (IOException bogus)
                    {
                        throw new Exception(bogus.ToString(), bogus);
                    }

                    int prefixLength = token.Length;

                    BytesRef suffix = new BytesRef(8);
                    //System.out.println("    " + completions.length + " completions");

                    foreach (Util.Fst.Util.Result <long?> completion in completions)
                    {
                        token.Length = prefixLength;
                        // append suffix
                        Util.Fst.Util.ToBytesRef(completion.Input, suffix);
                        token.Append(suffix);

                        //System.out.println("    completion " + token.utf8ToString());

                        // Skip this path if a higher-order model already
                        // saw/predicted its last token:
                        BytesRef lastToken = token;
                        for (int i = token.Length - 1; i >= 0; i--)
                        {
                            if (token.Bytes[token.Offset + i] == separator)
                            {
                                Debug.Assert(token.Length - i - 1 > 0);
                                lastToken = new BytesRef(token.Bytes, token.Offset + i + 1, token.Length - i - 1);
                                break;
                            }
                        }
                        if (seen.Contains(lastToken))
                        {
                            //System.out.println("      skip dup " + lastToken.utf8ToString());
                            goto nextCompletionContinue;
                        }
                        seen.Add(BytesRef.DeepCopyOf(lastToken));
                        spare.Grow(token.Length);
                        UnicodeUtil.UTF8toUTF16(token, spare);
                        LookupResult result = new LookupResult(spare.ToString(),
                                                               // LUCENENET NOTE: We need to calculate this as decimal because when using double it can sometimes
                                                               // return numbers that are greater than long.MaxValue, which results in a negative long number.
                                                               (long)(long.MaxValue * (decimal)backoff * ((decimal)DecodeWeight(completion.Output)) / contextCount));
                        results.Add(result);
                        Debug.Assert(results.Count == seen.Count);
                        //System.out.println("  add result=" + result);
                        nextCompletionContinue :;
                    }
                    backoff *= ALPHA;
                }

                results.Sort(new ComparerAnonymousInnerClassHelper(this));

                if (results.Count > num)
                {
                    results.SubList(num, results.Count).Clear();
                }

                return(results);
            }
            finally
            {
                IOUtils.DisposeWhileHandlingException(ts);
            }
        }
Example #27
0
        public override MergeSpecification FindForcedMerges(SegmentInfos infos, int maxSegmentCount, IDictionary <SegmentCommitInfo, bool?> segmentsToMerge)
        {
            if (Verbose())
            {
                Message("findForcedMerges maxSegmentCount=" + maxSegmentCount + " infos=" + Writer.Get().SegString(infos.Segments) + " segmentsToMerge=" + segmentsToMerge);
            }

            List <SegmentCommitInfo> eligible       = new List <SegmentCommitInfo>();
            bool forceMergeRunning                  = false;
            ICollection <SegmentCommitInfo> merging = Writer.Get().MergingSegments;
            bool?segmentIsOriginal                  = false;

            foreach (SegmentCommitInfo info in infos.Segments)
            {
                bool?isOriginal = segmentsToMerge[info];
                if (isOriginal != null)
                {
                    segmentIsOriginal = isOriginal;
                    if (!merging.Contains(info))
                    {
                        eligible.Add(info);
                    }
                    else
                    {
                        forceMergeRunning = true;
                    }
                }
            }

            if (eligible.Count == 0)
            {
                return(null);
            }

            if ((maxSegmentCount > 1 && eligible.Count <= maxSegmentCount) || (maxSegmentCount == 1 && eligible.Count == 1 && (segmentIsOriginal == false || IsMerged(infos, eligible[0]))))
            {
                if (Verbose())
                {
                    Message("already merged");
                }
                return(null);
            }

            eligible.Sort(new SegmentByteSizeDescending(this));

            if (Verbose())
            {
                Message("eligible=" + eligible);
                Message("forceMergeRunning=" + forceMergeRunning);
            }

            int end = eligible.Count;

            MergeSpecification spec = null;

            // Do full merges, first, backwards:
            while (end >= MaxMergeAtOnceExplicit_Renamed + maxSegmentCount - 1)
            {
                if (spec == null)
                {
                    spec = new MergeSpecification();
                }
                OneMerge merge = new OneMerge(eligible.SubList(end - MaxMergeAtOnceExplicit_Renamed, end));
                if (Verbose())
                {
                    Message("add merge=" + Writer.Get().SegString(merge.Segments));
                }
                spec.Add(merge);
                end -= MaxMergeAtOnceExplicit_Renamed;
            }

            if (spec == null && !forceMergeRunning)
            {
                // Do final merge
                int      numToMerge = end - maxSegmentCount + 1;
                OneMerge merge      = new OneMerge(eligible.SubList(end - numToMerge, end));
                if (Verbose())
                {
                    Message("add final merge=" + merge.SegString(Writer.Get().Directory));
                }
                spec = new MergeSpecification();
                spec.Add(merge);
            }

            return(spec);
        }
Example #28
0
        public virtual T_Type RenderView(StringBuilder builder, List <string> call_stack, StringBuilder paras, AssignOperatorInfo?op, StringBuilder opValue,
                                         PostfixPartType?postfixPartType, StringBuilder func_paras, StringBuilder index_para)
        {
            //if(call_stack == null)

            var member                  = call_stack[0];
            AssignOperatorInfo?op2      = null;
            StringBuilder      opValue2 = null;

            if (call_stack.Count == 1)
            {
                op2      = op;
                opValue2 = opValue;
            }

            T_Type nextType = null;

            if (!this.IsInstance)
            {
                foreach (var f in this.Static_Fields)
                {
                    if (f.Name == member)
                    {
                        this.RenderView(builder, f, op2, opValue2);
                        nextType = f.Type;
                        break;
                    }
                }
                if (nextType == null)
                {
                    foreach (var m in this.Static_Methods)
                    {
                        if (m.Name == member)
                        {
                            this.RenderView(builder, m, paras);
                            nextType = m.Type;
                            break;
                        }
                    }
                }
            }
            else
            {
                foreach (var f in this.Fields)
                {
                    if (f.Name == member)
                    {
                        this.RenderView(builder, f, op2, opValue2);
                        nextType = f.Type;
                        break;
                    }
                }
                if (nextType == null)
                {
                    foreach (var m in this.Methods)
                    {
                        if (m.Name == member)
                        {
                            this.RenderView(builder, m, paras);
                            nextType = m.Type;
                            break;
                        }
                    }
                }
            }
            if (call_stack.Count > 1)
            {
                //var call_stack2 = new List<string>();
                //call_stack2.AddRange(call_stack);
                //call_stack2.RemoveAt(0);
                return(nextType.RenderView(builder, call_stack.SubList(1), paras, op, opValue, postfixPartType, func_paras, index_para));
            }

            return(nextType);
        }
        /// <exception cref="System.IO.IOException"/>
        public virtual void Train(IList <Tree> sentences, IdentityHashMap <Tree, byte[]> compressedParses, Treebank testTreebank, string modelPath, string resultsRecordPath)
        {
            // process:
            //   we come up with a cost and a derivative for the model
            //   we always use the gold tree as the example to train towards
            //   every time through, we will look at the top N trees from
            //     the LexicalizedParser and pick the best one according to
            //     our model (at the start, this is essentially random)
            // we use QN to minimize the cost function for the model
            // to do this minimization, we turn all of the matrices in the
            //   DVModel into one big Theta, which is the set of variables to
            //   be optimized by the QN.
            Timing timing             = new Timing();
            long   maxTrainTimeMillis = op.trainOptions.maxTrainTimeSeconds * 1000;
            int    batchCount         = 0;
            int    debugCycle         = 0;
            double bestLabelF1        = 0.0;

            if (op.trainOptions.useContextWords)
            {
                foreach (Tree tree in sentences)
                {
                    Edu.Stanford.Nlp.Trees.Trees.ConvertToCoreLabels(tree);
                    tree.SetSpans();
                }
            }
            // for AdaGrad
            double[] sumGradSquare = new double[dvModel.TotalParamSize()];
            Arrays.Fill(sumGradSquare, 1.0);
            int numBatches = sentences.Count / op.trainOptions.batchSize + 1;

            log.Info("Training on " + sentences.Count + " trees in " + numBatches + " batches");
            log.Info("Times through each training batch: " + op.trainOptions.trainingIterations);
            log.Info("QN iterations per batch: " + op.trainOptions.qnIterationsPerBatch);
            for (int iter = 0; iter < op.trainOptions.trainingIterations; ++iter)
            {
                IList <Tree> shuffledSentences = new List <Tree>(sentences);
                Java.Util.Collections.Shuffle(shuffledSentences, dvModel.rand);
                for (int batch = 0; batch < numBatches; ++batch)
                {
                    ++batchCount;
                    // This did not help performance
                    //log.info("Setting AdaGrad's sum of squares to 1...");
                    //Arrays.fill(sumGradSquare, 1.0);
                    log.Info("======================================");
                    log.Info("Iteration " + iter + " batch " + batch);
                    // Each batch will be of the specified batch size, except the
                    // last batch will include any leftover trees at the end of
                    // the list
                    int startTree = batch * op.trainOptions.batchSize;
                    int endTree   = (batch + 1) * op.trainOptions.batchSize;
                    if (endTree > shuffledSentences.Count)
                    {
                        endTree = shuffledSentences.Count;
                    }
                    ExecuteOneTrainingBatch(shuffledSentences.SubList(startTree, endTree), compressedParses, sumGradSquare);
                    long totalElapsed = timing.Report();
                    log.Info("Finished iteration " + iter + " batch " + batch + "; total training time " + totalElapsed + " ms");
                    if (maxTrainTimeMillis > 0 && totalElapsed > maxTrainTimeMillis)
                    {
                        // no need to debug output, we're done now
                        break;
                    }
                    if (op.trainOptions.debugOutputFrequency > 0 && batchCount % op.trainOptions.debugOutputFrequency == 0)
                    {
                        log.Info("Finished " + batchCount + " total batches, running evaluation cycle");
                        // Time for debugging output!
                        double tagF1   = 0.0;
                        double labelF1 = 0.0;
                        if (testTreebank != null)
                        {
                            EvaluateTreebank evaluator = new EvaluateTreebank(AttachModelToLexicalizedParser());
                            evaluator.TestOnTreebank(testTreebank);
                            labelF1 = evaluator.GetLBScore();
                            tagF1   = evaluator.GetTagScore();
                            if (labelF1 > bestLabelF1)
                            {
                                bestLabelF1 = labelF1;
                            }
                            log.Info("Best label f1 on dev set so far: " + Nf.Format(bestLabelF1));
                        }
                        string tempName = null;
                        if (modelPath != null)
                        {
                            tempName = modelPath;
                            if (modelPath.EndsWith(".ser.gz"))
                            {
                                tempName = Sharpen.Runtime.Substring(modelPath, 0, modelPath.Length - 7) + "-" + Filename.Format(debugCycle) + "-" + Nf.Format(labelF1) + ".ser.gz";
                            }
                            SaveModel(tempName);
                        }
                        string statusLine = ("CHECKPOINT:" + " iteration " + iter + " batch " + batch + " labelF1 " + Nf.Format(labelF1) + " tagF1 " + Nf.Format(tagF1) + " bestLabelF1 " + Nf.Format(bestLabelF1) + " model " + tempName + op.trainOptions + " word vectors: "
                                             + op.lexOptions.wordVectorFile + " numHid: " + op.lexOptions.numHid);
                        log.Info(statusLine);
                        if (resultsRecordPath != null)
                        {
                            FileWriter fout = new FileWriter(resultsRecordPath, true);
                            // append
                            fout.Write(statusLine);
                            fout.Write("\n");
                            fout.Close();
                        }
                        ++debugCycle;
                    }
                }
                long totalElapsed_1 = timing.Report();
                if (maxTrainTimeMillis > 0 && totalElapsed_1 > maxTrainTimeMillis)
                {
                    // no need to debug output, we're done now
                    log.Info("Max training time exceeded, exiting");
                    break;
                }
            }
        }
Example #30
0
        public virtual void TestSimple()
        {
            int numNodes = TestUtil.NextInt(Random(), 1, 10);

            double runTimeSec = AtLeast(3);

            int minDocsToMakeTerms = TestUtil.NextInt(Random(), 5, 20);

            int maxSearcherAgeSeconds = TestUtil.NextInt(Random(), 1, 3);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: numNodes=" + numNodes + " runTimeSec=" + runTimeSec + " maxSearcherAgeSeconds=" + maxSearcherAgeSeconds);
            }

            Start(numNodes, runTimeSec, maxSearcherAgeSeconds);

            List <PreviousSearchState> priorSearches = new List <PreviousSearchState>();
            List <BytesRef>            terms         = null;

            while (Time.NanoTime() < endTimeNanos)
            {
                bool doFollowon = priorSearches.Count > 0 && Random().Next(7) == 1;

                // Pick a random node; we will run the query on this node:
                int myNodeID = Random().Next(numNodes);

                NodeState.ShardIndexSearcher localShardSearcher;

                PreviousSearchState prevSearchState;

                if (doFollowon)
                {
                    // Pretend user issued a followon query:
                    prevSearchState = priorSearches[Random().Next(priorSearches.Count)];

                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: follow-on query age=" + ((Time.NanoTime() - prevSearchState.SearchTimeNanos) / 1000000000.0));
                    }

                    try
                    {
                        localShardSearcher = Nodes[myNodeID].Acquire(prevSearchState.Versions);
                    }
                    catch (SearcherExpiredException see)
                    {
                        // Expected, sometimes; in a "real" app we would
                        // either forward this error to the user ("too
                        // much time has passed; please re-run your
                        // search") or sneakily just switch to newest
                        // searcher w/o telling them...
                        if (VERBOSE)
                        {
                            Console.WriteLine("  searcher expired during local shard searcher init: " + see);
                        }
                        priorSearches.Remove(prevSearchState);
                        continue;
                    }
                }
                else
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: fresh query");
                    }
                    // Do fresh query:
                    localShardSearcher = Nodes[myNodeID].Acquire();
                    prevSearchState    = null;
                }

                IndexReader[] subs = new IndexReader[numNodes];

                PreviousSearchState searchState = null;

                try
                {
                    // Mock: now make a single reader (MultiReader) from all node
                    // searchers.  In a real shard env you can't do this... we
                    // do it to confirm results from the shard searcher
                    // are correct:
                    int docCount = 0;
                    try
                    {
                        for (int nodeID = 0; nodeID < numNodes; nodeID++)
                        {
                            long          subVersion = localShardSearcher.NodeVersions[nodeID];
                            IndexSearcher sub        = Nodes[nodeID].Searchers.Acquire(subVersion);
                            if (sub == null)
                            {
                                nodeID--;
                                while (nodeID >= 0)
                                {
                                    subs[nodeID].DecRef();
                                    subs[nodeID] = null;
                                    nodeID--;
                                }
                                throw new SearcherExpiredException("nodeID=" + nodeID + " version=" + subVersion);
                            }
                            subs[nodeID] = sub.IndexReader;
                            docCount    += subs[nodeID].MaxDoc;
                        }
                    }
                    catch (SearcherExpiredException see)
                    {
                        // Expected
                        if (VERBOSE)
                        {
                            Console.WriteLine("  searcher expired during mock reader init: " + see);
                        }
                        continue;
                    }

                    IndexReader   mockReader   = new MultiReader(subs);
                    IndexSearcher mockSearcher = new IndexSearcher(mockReader);

                    Query query;
                    Sort  sort;

                    if (prevSearchState != null)
                    {
                        query = prevSearchState.Query;
                        sort  = prevSearchState.Sort;
                    }
                    else
                    {
                        if (terms == null && docCount > minDocsToMakeTerms)
                        {
                            // TODO: try to "focus" on high freq terms sometimes too
                            // TODO: maybe also periodically reset the terms...?
                            TermsEnum termsEnum = MultiFields.GetTerms(mockReader, "body").GetIterator(null);
                            terms = new List <BytesRef>();
                            while (termsEnum.Next() != null)
                            {
                                terms.Add(BytesRef.DeepCopyOf(termsEnum.Term));
                            }
                            if (VERBOSE)
                            {
                                Console.WriteLine("TEST: init terms: " + terms.Count + " terms");
                            }
                            if (terms.Count == 0)
                            {
                                terms = null;
                            }
                        }

                        if (VERBOSE)
                        {
                            Console.WriteLine("  maxDoc=" + mockReader.MaxDoc);
                        }

                        if (terms != null)
                        {
                            if (Random().NextBoolean())
                            {
                                query = new TermQuery(new Term("body", terms[Random().Next(terms.Count)]));
                            }
                            else
                            {
                                string t = terms[Random().Next(terms.Count)].Utf8ToString();
                                string prefix;
                                if (t.Length <= 1)
                                {
                                    prefix = t;
                                }
                                else
                                {
                                    prefix = t.Substring(0, TestUtil.NextInt(Random(), 1, 2));
                                }
                                query = new PrefixQuery(new Term("body", prefix));
                            }

                            if (Random().NextBoolean())
                            {
                                sort = null;
                            }
                            else
                            {
                                // TODO: sort by more than 1 field
                                int what = Random().Next(3);
                                if (what == 0)
                                {
                                    sort = new Sort(SortField.FIELD_SCORE);
                                }
                                else if (what == 1)
                                {
                                    // TODO: this sort doesn't merge
                                    // correctly... it's tricky because you
                                    // could have > 2.1B docs across all shards:
                                    //sort = new Sort(SortField.FIELD_DOC);
                                    sort = null;
                                }
                                else if (what == 2)
                                {
                                    sort = new Sort(new SortField[] { new SortField("docid", SortFieldType.INT32, Random().NextBoolean()) });
                                }
                                else
                                {
                                    sort = new Sort(new SortField[] { new SortField("title", SortFieldType.STRING, Random().NextBoolean()) });
                                }
                            }
                        }
                        else
                        {
                            query = null;
                            sort  = null;
                        }
                    }

                    if (query != null)
                    {
                        try
                        {
                            searchState = AssertSame(mockSearcher, localShardSearcher, query, sort, prevSearchState);
                        }
                        catch (SearcherExpiredException see)
                        {
                            // Expected; in a "real" app we would
                            // either forward this error to the user ("too
                            // much time has passed; please re-run your
                            // search") or sneakily just switch to newest
                            // searcher w/o telling them...
                            if (VERBOSE)
                            {
                                Console.WriteLine("  searcher expired during search: " + see);
                                Console.Out.Write(see.StackTrace);
                            }
                            // We can't do this in general: on a very slow
                            // computer it's possible the local searcher
                            // expires before we can finish our search:
                            // assert prevSearchState != null;
                            if (prevSearchState != null)
                            {
                                priorSearches.Remove(prevSearchState);
                            }
                        }
                    }
                }
                finally
                {
                    Nodes[myNodeID].Release(localShardSearcher);
                    foreach (IndexReader sub in subs)
                    {
                        if (sub != null)
                        {
                            sub.DecRef();
                        }
                    }
                }

                if (searchState != null && searchState.SearchAfterLocal != null && Random().Next(5) == 3)
                {
                    priorSearches.Add(searchState);
                    if (priorSearches.Count > 200)
                    {
                        Collections.Shuffle(priorSearches);
                        priorSearches.SubList(100, priorSearches.Count).Clear();
                    }
                }
            }

            Finish();
        }
        public override MergeSpecification FindMerges(MergeTrigger? mergeTrigger, SegmentInfos segmentInfos)
        {
            MergeSpecification mergeSpec = null;
            //System.out.println("MRMP: findMerges sis=" + segmentInfos);

            int numSegments = segmentInfos.Size();

            IList<SegmentCommitInfo> segments = new List<SegmentCommitInfo>();
            ICollection<SegmentCommitInfo> merging = Writer.Get().MergingSegments;

            foreach (SegmentCommitInfo sipc in segmentInfos.Segments)
            {
                if (!merging.Contains(sipc))
                {
                    segments.Add(sipc);
                }
            }

            numSegments = segments.Count;

            if (numSegments > 1 && (numSegments > 30 || Random.Next(5) == 3))
            {
                segments = CollectionsHelper.Shuffle(segments);

                // TODO: sometimes make more than 1 merge?
                mergeSpec = new MergeSpecification();
                int segsToMerge = TestUtil.NextInt(Random, 1, numSegments);
                mergeSpec.Add(new OneMerge(segments.SubList(0, segsToMerge)));
            }

            return mergeSpec;
        }
Example #32
0
        /// <summary>
        /// See nicer wrapper: MannWhitneyUTestOneSided.
        /// this is a one-sided test looking for the case where the group labelled with 1 is larger than the group labelled with 0
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="rowList"></param>
        /// <param name="scoreAccessor"></param>
        /// <param name="label01Accessor"></param>
        /// <param name="maxNumPermutations"></param>
        /// <param name="forceAssymptoticApprox"></param>
        /// <param name="neverDoExactPermutations"></param>
        /// <param name="parallelOptionsOrNullFor1"></param>
        /// <returns>The z score and the p-value</returns>
        public static KeyValuePair <double, double> ComputeZ0AndPValue <T>(IList <T> rowList,
                                                                           Func <T, double> scoreAccessor, Func <T, int> label01Accessor, int maxNumPermutations = 10000, bool forceAssymptoticApprox = false, bool neverDoExactPermutations = false,
                                                                           ParallelOptions parallelOptionsOrNullFor1 = null)
        {
            ParallelOptions parallelOptions = parallelOptionsOrNullFor1 ?? new ParallelOptions()
            {
                MaxDegreeOfParallelism = 1
            };


            //var zeroAndCountThenOneAndCount = CreateZeroAndCountThenOneAndCount(rowList, pTargetFunc, targetValFunc, parallelOptions);
            //int n0 = zeroAndCountThenOneAndCount.First().Value;
            //int n1 = SpecialFunctions.FirstAndOnly(zeroAndCountThenOneAndCount.Skip(1)).Value;// the class we think has larger values for the one-tailed test

            //having problems with the parallelOptions above, so re-writing like this
            int n0 = rowList.Where(elt => label01Accessor(elt) == 0).Count();
            int n1 = rowList.Where(elt => label01Accessor(elt) == 1).Count();

            double z0;

            //Helper.CheckCondition(ignoreSafetyOfNormal || (n0 > 10 && n1 > 10), "The count should be at least 10 for the normal distribution to work");

            double p;

            if ((n0 > 10 && n1 > 10) || forceAssymptoticApprox)
            {
                z0 = ComputeZ0 <T>(rowList, parallelOptions, n0, n1, scoreAccessor, label01Accessor);
                p  = 1.0 - SpecialFunctions.ZScoreToOneTailedPValue(z0, 1e-10);
                SanityCheckP(z0, p);
            }
            else
            {
                ParallelOptions parallelOptions1 = new ParallelOptions {
                    MaxDegreeOfParallelism = 1
                };

                //now need to check out here if using all permutations or not to bypass Carl's code if not
                double        logExactPermutationCount = SpecialFunctions.LogFactorialNMOverFactorialNFactorialMApprox(n0, n1);
                bool          useExactPermutations     = (logExactPermutationCount <= Math.Log(maxNumPermutations)) && !neverDoExactPermutations;
                List <double> zList;

                if (useExactPermutations)
                {
                    z0 = ComputeZ0 <T>(rowList, parallelOptions, n0, n1, scoreAccessor, label01Accessor);
                    /*faster than this is to simply permute the ranks of the real data (including ties), rather than the real data itself, but leaving this in for when exact permutations are needed*/
                    zList =
                        (from permutation in SpecialFunctions.Permute01Targets(rowList, scoreAccessor, label01Accessor, maxNumPermutations)
                         .AsParallel().WithDegreeOfParallelism(parallelOptions.MaxDegreeOfParallelism)
                         let z = ComputeZ0(permutation, parallelOptions1, n0, n1, pair => pair.Key, pair => pair.Value)
                                 orderby z
                                 select z).ToList();
                }
                else
                {
                    /*--------------------------------------------------------------------------------------------------
                     * NB there is now a dead branch in SpecialFunctions.Permute01Targets(), which formerly used to do both
                     * 'exact'/'complete' and 'inexact'/'subsampled' permutations. Now it only does the former ,and the 'inexact' is here. This is because I
                     * do it much faster, but didn't want to bother with doing the 'exact'.
                     * -------------------------------------------------------------------------------------------------*/
                    //don't bother converting to z, just use u instead
                    List <double> listOfAllValues = rowList.Select(elt => scoreAccessor(elt)).ToList();
                    List <double> ranksWithTies   = SpecialFunctions.RanksWithTies(listOfAllValues);
                    //List<int> indsOfClass0 = Enumerable.Range(0, n0 + n1).ToList().Where(elt => targetValFunc(rowList[elt]) == 0).ToList();
                    //List<double> ranksWithTiesClass0 = ranksWithTies.SubList(indsOfClass0);
                    //double u0 = ComputeUFromRanks(ranksWithTiesClass0);
                    List <int>    indsOfClass1        = Enumerable.Range(0, n0 + n1).ToList().Where(elt => label01Accessor(rowList[elt]) == 1).ToList();
                    List <double> ranksWithTiesClass1 = ranksWithTies.SubList(indsOfClass1);
                    double        u1 = ComputeUFromRanks(ranksWithTiesClass1);

                    //!!!not parallelized
                    List <double> uList  = new List <double>();
                    Random        myRand = new MachineInvariantRandom("123456");
                    for (int perm = 0; perm < maxNumPermutations; perm++)
                    {
                        ranksWithTies.ShuffleInPlace(myRand);

                        List <double> ranksWithTies0 = ranksWithTies.SubSequence(0, n0).ToList();
                        double        thisUscore0    = ComputeUFromRanks(ranksWithTies0);
                        List <double> ranksWithTies1 = ranksWithTies.SubSequence(n0, n1).ToList();
                        double        thisUscore1    = ComputeUFromRanks(ranksWithTies1);

                        //if it were 2-sided, we would use this (I think)
                        //double uScore = Math.Min(thisUscore0, thisUscore1);
                        //but it's one-sided, so we use the one from the set that had labels "1"
                        double uScore = thisUscore1;

                        //double thisZ = ComputeZfromU(n0, n1, uScore);
                        uList.Add(uScore);
                    }
                    //to let the rest of the code do what it should
                    zList = uList;
                    z0    = u1;
                }
                TwoByOne twoByOne = TwoByOne.GetInstance(zList, z => z0 <= z);
                p = twoByOne.Freq;
                //Can't  SanityCheckP(z0, p) because ties mean it wont always get the right answer
            }



            ////To get two-sided, which says "are they different" use this pTwoSided = 2 * ((p < .5) ? p : (1-p));
            //ResultsRow resultRow = new ResultsRow { DataSetName = dataSetName, CidGroup= cidGroup, PValue = p, N0 = n0, N1 = n1, UScore0 = uScore0, UScore1 = uScore1, Z0 = z0, Z1 = -z0 };
            //return resultRow;
            return(new KeyValuePair <double, double>(z0, p));
        }
Example #33
0
        public virtual T_Type RenderView(IndentStringBuilder builder, List<string> call_stack, IndentStringBuilder paras,AssignOperatorInfo? op, IndentStringBuilder opValue,
            PostfixPartType? postfixPartType, IndentStringBuilder func_paras, IndentStringBuilder index_para)
        {
            //if(call_stack == null)

            var member = call_stack[0];
            AssignOperatorInfo? op2 = null;
            IndentStringBuilder opValue2 = null;
            if (call_stack.Count == 1)
            {
                op2 = op;
                opValue2 = opValue;
            }

            T_Type nextType = null;
            if (!this.IsInstance)
            {
                foreach (var f in this.Static_Fields)
                {
                    if (f.Name == member)
                    {
                        this.RenderView(builder, f, op2, opValue2);
                        nextType = f.Type;
                        break;
                    }
                }
                if (nextType == null)
                {
                    foreach (var m in this.Static_Methods)
                    {
                        if (m.Name == member)
                        {
                            this.RenderView(builder, m, paras);
                            nextType = m.Type;
                            break;
                        }
                    }
                }
            }
            else
            {
                foreach (var f in this.Fields)
                {
                    if (f.Name == member)
                    {
                        this.RenderView(builder, f, op2, opValue2);
                        nextType = f.Type;
                        break;
                    }
                }
                if (nextType == null)
                {
                    foreach (var m in this.Methods)
                    {
                        if (m.Name == member)
                        {
                            this.RenderView(builder, m, paras);
                            nextType = m.Type;
                            break;
                        }
                    }
                }
            }
            if (call_stack.Count > 1)
            {
                //var call_stack2 = new List<string>();
                //call_stack2.AddRange(call_stack);
                //call_stack2.RemoveAt(0);
                return nextType.RenderView(builder, call_stack.SubList(1), paras, op, opValue, postfixPartType, func_paras, index_para);
            }

            return nextType;
        }