public virtual void SpecialScriptPreLayoutProcessing() { PdfDocument pdfDocument = new PdfDocument(new PdfWriter(new MemoryStream())); Document document = new Document(pdfDocument); int thaiTextSplitPosition = THAI_TEXT.Length / 2; PdfFont font = PdfFontFactory.CreateFont(THAI_FONT, PdfEncodings.IDENTITY_H); TextRenderer textRendererFirstPart = new TextRenderer(new Text(THAI_TEXT.JSubstring(0, thaiTextSplitPosition ))); textRendererFirstPart.SetProperty(Property.FONT, font); textRendererFirstPart.SetText(THAI_TEXT.JSubstring(0, thaiTextSplitPosition)); TextRenderer textRendererSecondPart = new TextRenderer(new Text(THAI_TEXT.Substring(thaiTextSplitPosition) )); textRendererSecondPart.SetProperty(Property.FONT, font); textRendererSecondPart.SetText(THAI_TEXT.Substring(thaiTextSplitPosition)); TableRenderer floatingNonTextRenderer = new TableRenderer(new Table(3)); floatingNonTextRenderer.SetProperty(Property.FLOAT, FloatPropertyValue.RIGHT); TableRenderer regularNonTextRenderer = new TableRenderer(new Table(3)); LineRenderer lineRenderer = new LineRenderer(); lineRenderer.SetParent(document.GetRenderer()); lineRenderer.AddChild(textRendererFirstPart); lineRenderer.AddChild(floatingNonTextRenderer); lineRenderer.AddChild(textRendererSecondPart); lineRenderer.AddChild(regularNonTextRenderer); LineRenderer.SpecialScriptsContainingTextRendererSequenceInfo info = lineRenderer.GetSpecialScriptsContainingTextRendererSequenceInfo (0); int numberOfSequentialTextRenderers = info.numberOfSequentialTextRenderers; String sequentialTextContent = info.sequentialTextContent; IList <int> indicesOfFloating = info.indicesOfFloating; NUnit.Framework.Assert.AreEqual(3, numberOfSequentialTextRenderers); NUnit.Framework.Assert.AreEqual(THAI_TEXT, sequentialTextContent); NUnit.Framework.Assert.AreEqual(1, indicesOfFloating.Count); NUnit.Framework.Assert.AreEqual(1, (int)indicesOfFloating[0]); IList <int> possibleBreaks = new List <int>(JavaUtil.ArraysAsList(3, 8, 10, 12, 15, 20, 23, 26, 28, 30, 36)); lineRenderer.DistributePossibleBreakPointsOverSequentialTextRenderers(0, numberOfSequentialTextRenderers, possibleBreaks, indicesOfFloating); IList <int> possibleBreaksFirstPart = textRendererFirstPart.GetSpecialScriptsWordBreakPoints(); NUnit.Framework.Assert.IsNotNull(possibleBreaksFirstPart); IList <int> possibleBreaksSecondPart = textRendererSecondPart.GetSpecialScriptsWordBreakPoints(); NUnit.Framework.Assert.IsNotNull(possibleBreaksSecondPart); int indexOfLastPossibleBreakInTheFirstRenderer = 4; IList <int> expectedPossibleBreaksFirstPart = possibleBreaks.SubList(0, indexOfLastPossibleBreakInTheFirstRenderer + 1); IList <int> expectedPossibleBreaksSecondPart = possibleBreaks.SubList(indexOfLastPossibleBreakInTheFirstRenderer + 1, possibleBreaks.Count); NUnit.Framework.Assert.AreEqual(expectedPossibleBreaksFirstPart, possibleBreaksFirstPart); for (int i = 0; i < expectedPossibleBreaksSecondPart.Count; i++) { expectedPossibleBreaksSecondPart[i] = expectedPossibleBreaksSecondPart[i] - thaiTextSplitPosition; } NUnit.Framework.Assert.AreEqual(expectedPossibleBreaksSecondPart, possibleBreaksSecondPart); }
protected internal virtual RelationTriple BlueCatsPlayWithYarnNoIndices() { IList <CoreLabel> sentence = new List <CoreLabel>(); sentence.Add(IETestUtils.MkWord("blue", -1)); sentence.Add(IETestUtils.MkWord("cats", -1)); sentence.Add(IETestUtils.MkWord("play", -1)); sentence.Add(IETestUtils.MkWord("with", -1)); sentence.Add(IETestUtils.MkWord("yarn", -1)); return(new RelationTriple(sentence.SubList(0, 2), sentence.SubList(2, 4), sentence.SubList(4, 5))); }
protected internal virtual RelationTriple YarnBlueCatsPlayWith() { IList <CoreLabel> sentence = new List <CoreLabel>(); sentence.Add(IETestUtils.MkWord("yarn", 0)); sentence.Add(IETestUtils.MkWord("blue", 1)); sentence.Add(IETestUtils.MkWord("cats", 2)); sentence.Add(IETestUtils.MkWord("play", 3)); sentence.Add(IETestUtils.MkWord("with", 4)); return(new RelationTriple(sentence.SubList(1, 3), sentence.SubList(3, 5), sentence.SubList(0, 1))); }
public void ProcessJ1708Diagnostic(List <byte> RawData) { byte totalMessages = RawData[1]; RawData = RawData.SubList(2); var count = RawData.Count; while (count >= 2) { var message = new J1708DiagnosticMessage(RawData); RawData = RawData.SubList(message.MessageLength); AddAndPublish(message); count = RawData.Count; } }
public override MergeSpecification FindMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos) { MergeSpecification mergeSpec = null; //System.out.println("MRMP: findMerges sis=" + segmentInfos); int numSegments /* = segmentInfos.Count*/; // LUCENENET: IDE0059: Remove unnecessary value assignment IList <SegmentCommitInfo> segments = new List <SegmentCommitInfo>(); ICollection <SegmentCommitInfo> merging = base.m_writer.Get().MergingSegments; foreach (SegmentCommitInfo sipc in segmentInfos.Segments) { if (!merging.Contains(sipc)) { segments.Add(sipc); } } numSegments = segments.Count; if (numSegments > 1 && (numSegments > 30 || random.Next(5) == 3)) { segments.Shuffle(random); // TODO: sometimes make more than 1 merge? mergeSpec = new MergeSpecification(); int segsToMerge = TestUtil.NextInt32(random, 1, numSegments); mergeSpec.Add(new OneMerge(segments.SubList(0, segsToMerge))); } return(mergeSpec); }
public override MergeSpecification FindMerges(MergeTrigger?mergeTrigger, SegmentInfos segmentInfos) { MergeSpecification mergeSpec = null; //System.out.println("MRMP: findMerges sis=" + segmentInfos); int numSegments = segmentInfos.Size(); IList <SegmentCommitInfo> segments = new List <SegmentCommitInfo>(); ICollection <SegmentCommitInfo> merging = Writer.Get().MergingSegments; foreach (SegmentCommitInfo sipc in segmentInfos.Segments) { if (!merging.Contains(sipc)) { segments.Add(sipc); } } numSegments = segments.Count; if (numSegments > 1 && (numSegments > 30 || Random.Next(5) == 3)) { segments = CollectionsHelper.Shuffle(segments); // TODO: sometimes make more than 1 merge? mergeSpec = new MergeSpecification(); int segsToMerge = TestUtil.NextInt(Random, 1, numSegments); mergeSpec.Add(new OneMerge(segments.SubList(0, segsToMerge))); } return(mergeSpec); }
private static IList <FacetField> RandomCategories(Random random) { // add random categories from the two dimensions, ensuring that the same // category is not added twice. int numFacetsA = random.Next(3) + 1; // 1-3 int numFacetsB = random.Next(2) + 1; // 1-2 List <FacetField> categories_a = new List <FacetField>(); categories_a.AddRange(CATEGORIES_A); List <FacetField> categories_b = new List <FacetField>(); categories_b.AddRange(CATEGORIES_B); categories_a.Shuffle(Random); categories_b.Shuffle(Random); List <FacetField> categories = new List <FacetField>(); categories.AddRange(categories_a.SubList(0, numFacetsA)); categories.AddRange(categories_b.SubList(0, numFacetsB)); // add the NO_PARENT categories categories.Add(CATEGORIES_C[Util.LuceneTestCase.Random.Next(NUM_CHILDREN_CP_C)]); categories.Add(CATEGORIES_D[Util.LuceneTestCase.Random.Next(NUM_CHILDREN_CP_D)]); return(categories); }
private void PopNextLemma() { // One tag (concatenated) per lemma. WordData lemma = lemmaList[lemmaListIndex++]; termAtt.SetEmpty().Append(lemma.GetStem().ToString()); var tag = lemma.GetTag(); if (tag != null) { string[] tags = lemmaSplitter.Split(tag.ToString()); for (int i = 0; i < tags.Length; i++) { if (tagsList.Count <= i) { tagsList.Add(new StringBuilder()); } StringBuilder buffer = tagsList[i]; buffer.Length = 0; buffer.Append(tags[i]); } tagsAtt.Tags = tagsList.SubList(0, tags.Length); } else { tagsAtt.Tags = Collections.EmptyList <StringBuilder>(); } }
public virtual float Score(IntTaggedWord iTW, int loc, string word, string featureSpec) { string tag = tagIndex.Get(iTW.tag); System.Diagnostics.Debug.Assert(!word.Equals(LexiconConstants.Boundary)); char[] chars = word.ToCharArray(); IList <ISerializable> charList = new List <ISerializable>(chars.Length + ContextLength + 1); // this starts of storing Symbol's and then starts storing String's. Clean this up someday! // charList is constructed backward // END_WORD char[length-1] char[length-2] ... char[0] BEGIN_WORD BEGIN_WORD charList.Add(ChineseCharacterBasedLexicon.Symbol.EndWord); for (int i = chars.Length - 1; i >= 0; i--) { ChineseCharacterBasedLexicon.Symbol ch = ChineseCharacterBasedLexicon.Symbol.CannonicalSymbol(chars[i]); if (knownChars.Contains(ch)) { charList.Add(ch); } else { charList.Add(UnknownCharClass(ch)); } } for (int i_1 = 0; i_1 < ContextLength; i_1++) { charList.Add(ChineseCharacterBasedLexicon.Symbol.BeginWord); } double score = 0.0; for (int i_2 = 0; i_2 < size - ContextLength; i_2++) { ChineseCharacterBasedLexicon.Symbol nextChar = (ChineseCharacterBasedLexicon.Symbol)charList[i_2]; charList.Set(i_2, tag); double charScore = GetBackedOffDist(charList.SubList(i_2, i_2 + ContextLength + 1)).ProbabilityOf(nextChar); score += Math.Log(charScore); } switch (penaltyType) { case 0: { break; } case 1: { score -= (chars.Length * (chars.Length + 1)) * (lengthPenalty / 2); break; } case 2: { score -= (chars.Length - 1) * lengthPenalty; break; } } return((float)score); }
/// <summary> /// Returns the merges necessary to merge the index, taking the max merge /// size or max merge docs into consideration. this method attempts to respect /// the {@code maxNumSegments} parameter, however it might be, due to size /// constraints, that more than that number of segments will remain in the /// index. Also, this method does not guarantee that exactly {@code /// maxNumSegments} will remain, but <= that number. /// </summary> private MergeSpecification FindForcedMergesSizeLimit(SegmentInfos infos, int maxNumSegments, int last) { MergeSpecification spec = new MergeSpecification(); List <SegmentCommitInfo> segments = infos.AsList(); int start = last - 1; while (start >= 0) { SegmentCommitInfo info = infos.Info(start); if (Size(info) > MaxMergeSizeForForcedMerge || SizeDocs(info) > MaxMergeDocs_Renamed) { if (Verbose()) { Message("findForcedMergesSizeLimit: skip segment=" + info + ": size is > maxMergeSize (" + MaxMergeSizeForForcedMerge + ") or sizeDocs is > maxMergeDocs (" + MaxMergeDocs_Renamed + ")"); } // need to skip that segment + add a merge for the 'right' segments, // unless there is only 1 which is merged. if (last - start - 1 > 1 || (start != last - 1 && !IsMerged(infos, infos.Info(start + 1)))) { // there is more than 1 segment to the right of // this one, or a mergeable single segment. spec.Add(new OneMerge(segments.SubList(start + 1, last))); } last = start; } else if (last - start == MergeFactor_Renamed) { // mergeFactor eligible segments were found, add them as a merge. spec.Add(new OneMerge(segments.SubList(start, last))); last = start; } --start; } // Add any left-over segments, unless there is just 1 // already fully merged if (last > 0 && (++start + 1 < last || !IsMerged(infos, infos.Info(start)))) { spec.Add(new OneMerge(segments.SubList(start, last))); } return(spec.Merges.Count == 0 ? null : spec); }
public virtual void TestCorruptReplicaInfo() { CorruptReplicasMap crm = new CorruptReplicasMap(); // Make sure initial values are returned correctly NUnit.Framework.Assert.AreEqual("Number of corrupt blocks must initially be 0", 0 , crm.Size()); NUnit.Framework.Assert.IsNull("Param n cannot be less than 0", crm.GetCorruptReplicaBlockIds (-1, null)); NUnit.Framework.Assert.IsNull("Param n cannot be greater than 100", crm.GetCorruptReplicaBlockIds (101, null)); long[] l = crm.GetCorruptReplicaBlockIds(0, null); NUnit.Framework.Assert.IsNotNull("n = 0 must return non-null", l); NUnit.Framework.Assert.AreEqual("n = 0 must return an empty list", 0, l.Length); // create a list of block_ids. A list is used to allow easy validation of the // output of getCorruptReplicaBlockIds int NumBlockIds = 140; IList <long> block_ids = new List <long>(); for (int i = 0; i < NumBlockIds; i++) { block_ids.AddItem((long)i); } DatanodeDescriptor dn1 = DFSTestUtil.GetLocalDatanodeDescriptor(); DatanodeDescriptor dn2 = DFSTestUtil.GetLocalDatanodeDescriptor(); AddToCorruptReplicasMap(crm, GetBlock(0), dn1); NUnit.Framework.Assert.AreEqual("Number of corrupt blocks not returning correctly" , 1, crm.Size()); AddToCorruptReplicasMap(crm, GetBlock(1), dn1); NUnit.Framework.Assert.AreEqual("Number of corrupt blocks not returning correctly" , 2, crm.Size()); AddToCorruptReplicasMap(crm, GetBlock(1), dn2); NUnit.Framework.Assert.AreEqual("Number of corrupt blocks not returning correctly" , 2, crm.Size()); crm.RemoveFromCorruptReplicasMap(GetBlock(1)); NUnit.Framework.Assert.AreEqual("Number of corrupt blocks not returning correctly" , 1, crm.Size()); crm.RemoveFromCorruptReplicasMap(GetBlock(0)); NUnit.Framework.Assert.AreEqual("Number of corrupt blocks not returning correctly" , 0, crm.Size()); foreach (long block_id in block_ids) { AddToCorruptReplicasMap(crm, GetBlock(block_id), dn1); } NUnit.Framework.Assert.AreEqual("Number of corrupt blocks not returning correctly" , NumBlockIds, crm.Size()); NUnit.Framework.Assert.IsTrue("First five block ids not returned correctly ", Arrays .Equals(new long[] { 0, 1, 2, 3, 4 }, crm.GetCorruptReplicaBlockIds(5, null))); Log.Info(crm.GetCorruptReplicaBlockIds(10, 7L)); Log.Info(block_ids.SubList(7, 18)); NUnit.Framework.Assert.IsTrue("10 blocks after 7 not returned correctly ", Arrays .Equals(new long[] { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 }, crm.GetCorruptReplicaBlockIds (10, 7L))); }
public override MergeSpecification FindForcedDeletesMerges(SegmentInfos infos) { if (Verbose()) { Message("findForcedDeletesMerges infos=" + Writer.Get().SegString(infos.Segments) + " forceMergeDeletesPctAllowed=" + ForceMergeDeletesPctAllowed_Renamed); } List <SegmentCommitInfo> eligible = new List <SegmentCommitInfo>(); ICollection <SegmentCommitInfo> merging = Writer.Get().MergingSegments; foreach (SegmentCommitInfo info in infos.Segments) { double pctDeletes = 100.0 * ((double)Writer.Get().NumDeletedDocs(info)) / info.Info.DocCount; if (pctDeletes > ForceMergeDeletesPctAllowed_Renamed && !merging.Contains(info)) { eligible.Add(info); } } if (eligible.Count == 0) { return(null); } eligible.Sort(new SegmentByteSizeDescending(this)); if (Verbose()) { Message("eligible=" + eligible); } int start = 0; MergeSpecification spec = null; while (start < eligible.Count) { // Don't enforce max merged size here: app is explicitly // calling forceMergeDeletes, and knows this may take a // long time / produce big segments (like forceMerge): int end = Math.Min(start + MaxMergeAtOnceExplicit_Renamed, eligible.Count); if (spec == null) { spec = new MergeSpecification(); } OneMerge merge = new OneMerge(eligible.SubList(start, end)); if (Verbose()) { Message("add merge=" + Writer.Get().SegString(merge.Segments)); } spec.Add(merge); start = end; } return(spec); }
public State(ClustererDataLoader.ClustererDoc doc) { currentDocId = doc.id; this.doc = doc; this.hashedScores = new Dictionary <Clusterer.MergeKey, bool>(); this.hashedCosts = new Dictionary <long, double>(); this.clusters = new List <Clusterer.Cluster>(); this.hash = 0; mentionToCluster = new Dictionary <int, Clusterer.Cluster>(); foreach (int m in doc.mentions) { Clusterer.Cluster c = new Clusterer.Cluster(m); clusters.Add(c); mentionToCluster[m] = c; hash ^= c.hash * 7; } IList <Pair <int, int> > allPairs = new List <Pair <int, int> >(doc.classificationScores.KeySet()); ICounter <Pair <int, int> > scores = UseRanking ? doc.rankingScores : doc.classificationScores; allPairs.Sort(null); int i = 0; for (i = 0; i < allPairs.Count; i++) { double score = scores.GetCount(allPairs[i]); if (score < MinPairwiseScore && i > MinPairs) { break; } if (i >= EarlyStopThreshold && i / score > EarlyStopVal) { break; } } mentionPairs = allPairs.SubList(0, i); ICounter <int> seenAnaphors = new ClassicCounter <int>(); ICounter <int> seenAntecedents = new ClassicCounter <int>(); globalFeatures = new List <Clusterer.GlobalFeatures>(); for (int j = 0; j < allPairs.Count; j++) { Pair <int, int> mentionPair = allPairs[j]; Clusterer.GlobalFeatures gf = new Clusterer.GlobalFeatures(); gf.currentIndex = j; gf.anaphorSeen = seenAnaphors.ContainsKey(mentionPair.second); gf.size = mentionPairs.Count; gf.docSize = doc.mentions.Count / 300.0; globalFeatures.Add(gf); seenAnaphors.IncrementCount(mentionPair.second); seenAntecedents.IncrementCount(mentionPair.first); } currentIndex = 0; SetClusters(); }
public void TestRandom() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); int num = AtLeast(100); bool singleField = Random.NextBoolean(); IList <Term> terms = new List <Term>(); for (int i = 0; i < num; i++) { string field = "field" + (singleField ? "1" : Random.Next(100).ToString(CultureInfo.InvariantCulture)); string @string = TestUtil.RandomRealisticUnicodeString(Random); terms.Add(new Term(field, @string)); Document doc = new Document(); doc.Add(NewStringField(field, @string, Field.Store.YES)); w.AddDocument(doc); } IndexReader reader = w.GetReader(); w.Dispose(); IndexSearcher searcher = NewSearcher(reader); int numQueries = AtLeast(10); for (int i = 0; i < numQueries; i++) { terms.Shuffle(Random); int numTerms = 1 + Random.Next(Math.Min(BooleanQuery.MaxClauseCount, terms.Count)); BooleanQuery bq = new BooleanQuery(); for (int j = 0; j < numTerms; j++) { bq.Add(new BooleanClause(new TermQuery(terms[j]), Occur.SHOULD)); } TopDocs queryResult = searcher.Search(new ConstantScoreQuery(bq), reader.MaxDoc); MatchAllDocsQuery matchAll = new MatchAllDocsQuery(); TermsFilter filter = TermsFilter(singleField, terms.SubList(0, numTerms)); TopDocs filterResult = searcher.Search(matchAll, filter, reader.MaxDoc); assertEquals(filterResult.TotalHits, queryResult.TotalHits); ScoreDoc[] scoreDocs = filterResult.ScoreDocs; for (int j = 0; j < scoreDocs.Length; j++) { assertEquals(scoreDocs[j].Doc, queryResult.ScoreDocs[j].Doc); } } reader.Dispose(); dir.Dispose(); }
//public byte OccurrenceCount { get; private set; } public J1708DiagnosticMessage(List <byte> rawData) : base() { ID = rawData[0]; Mid = 0x80; //We only take engine faults from J1708 CountIncluded = ((rawData[1] & 0x80b) == 1); IsActive = ((rawData[1] & 0x40b) == 0); Fmi = (byte)(rawData[1] & 0x0Fb); MessageLength = (CountIncluded) ? 3 : 2; var byteList = rawData.SubList(0, MessageLength); RawCode = byteList.ToHexString(); }
public string[] Split(string input, int limit) { int index = 0; bool matchLimited = limit > 0; List <string> matchList = new List <string>(); Matcher m = Matcher(input); // Add segments before each match found while (m.Find()) { if (!matchLimited || matchList.Count < limit - 1) { String match = input.Substring(index, m.Start()).ToString(); matchList.Add(match); index = m.End(); } else if (matchList.Count == limit - 1) { // last one String match = input.Substring(index, input.Length).ToString(); matchList.Add(match); index = m.End(); } } // If no match was found, return this if (index == 0) { return new String[] { input.ToString() } } ; // Add remaining segment if (!matchLimited || matchList.Count < limit) { matchList.Add(input.Substring(index, input.Length).ToString()); } // Construct result int resultSize = matchList.Count; if (limit == 0) { while (resultSize > 0 && matchList[resultSize - 1].Equals("")) { resultSize--; } } return(matchList.SubList(0, resultSize).ToArray()); } }
static void Main(string[] args) { var numbers = new List <int>() { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; foreach (var subListNumbers in numbers.SubList(3)) { subListNumbers.ForEach(Console.Write); Console.WriteLine(); } Console.ReadKey(); }
static void Main(string[] args) { var numbers = new List <int>() { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; var subList = numbers.SubList(4); foreach (var list in subList) { list.ForEach(Console.Write); Console.WriteLine(); } Console.ReadKey(); }
/// <summary>Sample k items uniformly from an Iterable of size n (without replacement).</summary> /// <param name="items">The items from which to sample.</param> /// <param name="n">The total number of items in the Iterable.</param> /// <param name="k">The number of items to sample.</param> /// <param name="random">The random number generator.</param> /// <returns>An Iterable of k items, chosen randomly from the original n items.</returns> public static IEnumerable <T> Sample <T>(IEnumerable <T> items, int n, int k, Random random) { // assemble a list of all indexes IList <int> indexes = new List <int>(); for (int i = 0; i < n; ++i) { indexes.Add(i); } // shuffle the indexes and select the first k Java.Util.Collections.Shuffle(indexes, random); ICollection <int> indexSet = Generics.NewHashSet(indexes.SubList(0, k)); // filter down to only the items at the selected indexes return(Iterables.Filter(items, new _IPredicate_614(indexSet))); }
public override MergeSpecification FindForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount, IDictionary <SegmentCommitInfo, bool?> segmentsToMerge) { IList <SegmentCommitInfo> eligibleSegments = new List <SegmentCommitInfo>(); foreach (SegmentCommitInfo info in segmentInfos.Segments) { if (segmentsToMerge.ContainsKey(info)) { eligibleSegments.Add(info); } } //System.out.println("MRMP: findMerges sis=" + segmentInfos + " eligible=" + eligibleSegments); MergeSpecification mergeSpec = null; if (eligibleSegments.Count > 1 || (eligibleSegments.Count == 1 && eligibleSegments[0].HasDeletions)) { mergeSpec = new MergeSpecification(); // Already shuffled having come out of a set but // shuffle again for good measure: eligibleSegments.Shuffle(random); int upto = 0; while (upto < eligibleSegments.Count) { int max = Math.Min(10, eligibleSegments.Count - upto); int inc = max <= 2 ? max : TestUtil.NextInt32(random, 2, max); mergeSpec.Add(new OneMerge(eligibleSegments.SubList(upto, upto + inc))); upto += inc; } } if (mergeSpec != null) { foreach (OneMerge merge in mergeSpec.Merges) { foreach (SegmentCommitInfo info in merge.Segments) { if (Debugging.AssertsEnabled) { Debugging.Assert(segmentsToMerge.ContainsKey(info)); } } } } return(mergeSpec); }
/** * Initialize the bucket map assuming the given number of maxBuckets. * * @param maxBuckets * @param offset */ public void InitializeBucketMap(int maxBuckets, double?offset) { /* * The first bucket index will be _maxBuckets / 2 and bucket indices * will be allowed to grow lower or higher as long as they don't become * negative. _maxBuckets is required because the current CLA Classifier * assumes bucket indices must be non-negative. This normally does not * need to be changed but if altered, should be Set to an even number. */ SetMaxBuckets(maxBuckets); SetMinIndex(maxBuckets / 2); SetMaxIndex(maxBuckets / 2); /* * The scalar offset used to map scalar values to bucket indices. The * middle bucket will correspond to numbers in the range * [offset-resolution/2, offset+resolution/2). The bucket index for a * number x will be: maxBuckets/2 + int( round( (x-offset)/resolution ) * ) */ SetOffset(offset); /* * This HashMap maps a bucket index into its bit representation We * initialize the HashMap with a single bucket with index 0 */ bucketMap = new ConcurrentDictionary <int, List <int> >(); // generate the random permutation List <int> temp = new List <int>(GetN()); for (int i = 0; i < GetN(); i++) { temp.Add(i); } temp.Shuffle(rng); //java.util.Collections.shuffle(temp, rng); bucketMap.TryAdd(GetMinIndex(), temp.SubList(0, GetW())); // How often we need to retry when generating valid encodings SetNumRetry(0); }
public virtual void TestNextVaryingNumberOfTerms() { IList<string> termsList = new List<string>(CommonTerms.Length + MediumTerms.Length + RareTerms.Length); termsList.AddRange(CommonTerms); termsList.AddRange(MediumTerms); termsList.AddRange(RareTerms); termsList.Shuffle(); for (int numTerms = 2; numTerms <= termsList.Count; numTerms++) { string[] terms = termsList.SubList(0, numTerms).ToArray(/*new string[0]*/); for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.Length; minNrShouldMatch++) { Scorer expected = Scorer(terms, minNrShouldMatch, true); Scorer actual = Scorer(terms, minNrShouldMatch, false); AssertNext(expected, actual); } } }
public override MergeSpecification FindForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount, IDictionary<SegmentCommitInfo, bool?> segmentsToMerge) { IList<SegmentCommitInfo> eligibleSegments = new List<SegmentCommitInfo>(); foreach (SegmentCommitInfo info in segmentInfos.Segments) { if (segmentsToMerge.ContainsKey(info)) { eligibleSegments.Add(info); } } //System.out.println("MRMP: findMerges sis=" + segmentInfos + " eligible=" + eligibleSegments); MergeSpecification mergeSpec = null; if (eligibleSegments.Count > 1 || (eligibleSegments.Count == 1 && eligibleSegments[0].HasDeletions())) { mergeSpec = new MergeSpecification(); // Already shuffled having come out of a set but // shuffle again for good measure: eligibleSegments = CollectionsHelper.Shuffle(eligibleSegments); int upto = 0; while (upto < eligibleSegments.Count) { int max = Math.Min(10, eligibleSegments.Count - upto); int inc = max <= 2 ? max : TestUtil.NextInt(Random, 2, max); mergeSpec.Add(new OneMerge(eligibleSegments.SubList(upto, upto + inc))); upto += inc; } } if (mergeSpec != null) { foreach (OneMerge merge in mergeSpec.Merges) { foreach (SegmentCommitInfo info in merge.Segments) { Debug.Assert(segmentsToMerge.ContainsKey(info)); } } } return mergeSpec; }
public virtual void TestAdvanceVaryingNumberOfTerms() { IList <string> termsList = new List <string>(); termsList.AddRange(Arrays.AsList(CommonTerms)); termsList.AddRange(Arrays.AsList(MediumTerms)); termsList.AddRange(Arrays.AsList(RareTerms)); termsList = CollectionsHelper.Shuffle(termsList); for (int amount = 25; amount < 200; amount += 25) { for (int numTerms = 2; numTerms <= termsList.Count; numTerms++) { string[] terms = termsList.SubList(0, numTerms).ToArray(/*new string[0]*/); for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.Length; minNrShouldMatch++) { Scorer expected = Scorer(terms, minNrShouldMatch, true); Scorer actual = Scorer(terms, minNrShouldMatch, false); AssertAdvance(expected, actual, amount); } } } }
private static IList<FacetField> RandomCategories(Random random) { // add random categories from the two dimensions, ensuring that the same // category is not added twice. int numFacetsA = random.Next(3) + 1; // 1-3 int numFacetsB = random.Next(2) + 1; // 1-2 List<FacetField> categories_a = new List<FacetField>(); categories_a.AddRange(Arrays.AsList(CATEGORIES_A)); List<FacetField> categories_b = new List<FacetField>(); categories_b.AddRange(Arrays.AsList(CATEGORIES_B)); categories_a = CollectionsHelper.Shuffle(categories_a).ToList(); categories_b = CollectionsHelper.Shuffle(categories_b).ToList(); List<FacetField> categories = new List<FacetField>(); categories.AddRange(categories_a.SubList(0, numFacetsA)); categories.AddRange(categories_b.SubList(0, numFacetsB)); // add the NO_PARENT categories categories.Add(CATEGORIES_C[Random().Next(NUM_CHILDREN_CP_C)]); categories.Add(CATEGORIES_D[Random().Next(NUM_CHILDREN_CP_D)]); return categories; }
/// <summary> /// Retrieve suggestions. /// </summary> public virtual IList <LookupResult> DoLookup(string key, IEnumerable <BytesRef> contexts, int num) { if (contexts != null) { throw new System.ArgumentException("this suggester doesn't support contexts"); } TokenStream ts = queryAnalyzer.GetTokenStream("", key.ToString()); try { ITermToBytesRefAttribute termBytesAtt = ts.AddAttribute <ITermToBytesRefAttribute>(); IOffsetAttribute offsetAtt = ts.AddAttribute <IOffsetAttribute>(); IPositionLengthAttribute posLenAtt = ts.AddAttribute <IPositionLengthAttribute>(); IPositionIncrementAttribute posIncAtt = ts.AddAttribute <IPositionIncrementAttribute>(); ts.Reset(); var lastTokens = new BytesRef[grams]; //System.out.println("lookup: key='" + key + "'"); // Run full analysis, but save only the // last 1gram, last 2gram, etc.: BytesRef tokenBytes = termBytesAtt.BytesRef; int maxEndOffset = -1; bool sawRealToken = false; while (ts.IncrementToken()) { termBytesAtt.FillBytesRef(); sawRealToken |= tokenBytes.Length > 0; // TODO: this is somewhat iffy; today, ShingleFilter // sets posLen to the gram count; maybe we should make // a separate dedicated att for this? int gramCount = posLenAtt.PositionLength; Debug.Assert(gramCount <= grams); // Safety: make sure the recalculated count "agrees": if (CountGrams(tokenBytes) != gramCount) { throw new System.ArgumentException("tokens must not contain separator byte; got token=" + tokenBytes + " but gramCount=" + gramCount + " does not match recalculated count=" + CountGrams(tokenBytes)); } maxEndOffset = Math.Max(maxEndOffset, offsetAtt.EndOffset); lastTokens[gramCount - 1] = BytesRef.DeepCopyOf(tokenBytes); } ts.End(); if (!sawRealToken) { throw new System.ArgumentException("no tokens produced by analyzer, or the only tokens were empty strings"); } // Carefully fill last tokens with _ tokens; // ShingleFilter appraently won't emit "only hole" // tokens: int endPosInc = posIncAtt.PositionIncrement; // Note this will also be true if input is the empty // string (in which case we saw no tokens and // maxEndOffset is still -1), which in fact works out OK // because we fill the unigram with an empty BytesRef // below: bool lastTokenEnded = offsetAtt.EndOffset > maxEndOffset || endPosInc > 0; //System.out.println("maxEndOffset=" + maxEndOffset + " vs " + offsetAtt.EndOffset); if (lastTokenEnded) { //System.out.println(" lastTokenEnded"); // If user hit space after the last token, then // "upgrade" all tokens. This way "foo " will suggest // all bigrams starting w/ foo, and not any unigrams // starting with "foo": for (int i = grams - 1; i > 0; i--) { BytesRef token = lastTokens[i - 1]; if (token == null) { continue; } token.Grow(token.Length + 1); token.Bytes[token.Length] = separator; token.Length++; lastTokens[i] = token; } lastTokens[0] = new BytesRef(); } var arc = new FST.Arc <long?>(); var bytesReader = fst.GetBytesReader(); // Try highest order models first, and if they return // results, return that; else, fallback: double backoff = 1.0; List <LookupResult> results = new List <LookupResult>(num); // We only add a given suffix once, from the highest // order model that saw it; for subsequent lower order // models we skip it: var seen = new HashSet <BytesRef>(); for (int gram = grams - 1; gram >= 0; gram--) { BytesRef token = lastTokens[gram]; // Don't make unigram predictions from empty string: if (token == null || (token.Length == 0 && key.Length > 0)) { // Input didn't have enough tokens: //System.out.println(" gram=" + gram + ": skip: not enough input"); continue; } if (endPosInc > 0 && gram <= endPosInc) { // Skip hole-only predictions; in theory we // shouldn't have to do this, but we'd need to fix // ShingleFilter to produce only-hole tokens: //System.out.println(" break: only holes now"); break; } //System.out.println("try " + (gram+1) + " gram token=" + token.utf8ToString()); // TODO: we could add fuzziness here // match the prefix portion exactly //Pair<Long,BytesRef> prefixOutput = null; long?prefixOutput = null; try { prefixOutput = LookupPrefix(fst, bytesReader, token, arc); } catch (IOException bogus) { throw new Exception(bogus.ToString(), bogus); } //System.out.println(" prefixOutput=" + prefixOutput); if (prefixOutput == null) { // This model never saw this prefix, e.g. the // trigram model never saw context "purple mushroom" backoff *= ALPHA; continue; } // TODO: we could do this division at build time, and // bake it into the FST? // Denominator for computing scores from current // model's predictions: long contextCount = totTokens; BytesRef lastTokenFragment = null; for (int i = token.Length - 1; i >= 0; i--) { if (token.Bytes[token.Offset + i] == separator) { BytesRef context = new BytesRef(token.Bytes, token.Offset, i); long? output = Lucene.Net.Util.Fst.Util.Get(fst, Lucene.Net.Util.Fst.Util.ToInt32sRef(context, new Int32sRef())); Debug.Assert(output != null); contextCount = DecodeWeight(output); lastTokenFragment = new BytesRef(token.Bytes, token.Offset + i + 1, token.Length - i - 1); break; } } BytesRef finalLastToken; if (lastTokenFragment == null) { finalLastToken = BytesRef.DeepCopyOf(token); } else { finalLastToken = BytesRef.DeepCopyOf(lastTokenFragment); } Debug.Assert(finalLastToken.Offset == 0); CharsRef spare = new CharsRef(); // complete top-N Util.Fst.Util.TopResults <long?> completions = null; try { // Because we store multiple models in one FST // (1gram, 2gram, 3gram), we must restrict the // search so that it only considers the current // model. For highest order model, this is not // necessary since all completions in the FST // must be from this model, but for lower order // models we have to filter out the higher order // ones: // Must do num+seen.size() for queue depth because we may // reject up to seen.size() paths in acceptResult(): Util.Fst.Util.TopNSearcher <long?> searcher = new TopNSearcherAnonymousInnerClassHelper(this, fst, num, num + seen.Count, weightComparer, seen, finalLastToken); // since this search is initialized with a single start node // it is okay to start with an empty input path here searcher.AddStartPaths(arc, prefixOutput, true, new Int32sRef()); completions = searcher.Search(); Debug.Assert(completions.IsComplete); } catch (IOException bogus) { throw new Exception(bogus.ToString(), bogus); } int prefixLength = token.Length; BytesRef suffix = new BytesRef(8); //System.out.println(" " + completions.length + " completions"); foreach (Util.Fst.Util.Result <long?> completion in completions) { token.Length = prefixLength; // append suffix Util.Fst.Util.ToBytesRef(completion.Input, suffix); token.Append(suffix); //System.out.println(" completion " + token.utf8ToString()); // Skip this path if a higher-order model already // saw/predicted its last token: BytesRef lastToken = token; for (int i = token.Length - 1; i >= 0; i--) { if (token.Bytes[token.Offset + i] == separator) { Debug.Assert(token.Length - i - 1 > 0); lastToken = new BytesRef(token.Bytes, token.Offset + i + 1, token.Length - i - 1); break; } } if (seen.Contains(lastToken)) { //System.out.println(" skip dup " + lastToken.utf8ToString()); goto nextCompletionContinue; } seen.Add(BytesRef.DeepCopyOf(lastToken)); spare.Grow(token.Length); UnicodeUtil.UTF8toUTF16(token, spare); LookupResult result = new LookupResult(spare.ToString(), // LUCENENET NOTE: We need to calculate this as decimal because when using double it can sometimes // return numbers that are greater than long.MaxValue, which results in a negative long number. (long)(long.MaxValue * (decimal)backoff * ((decimal)DecodeWeight(completion.Output)) / contextCount)); results.Add(result); Debug.Assert(results.Count == seen.Count); //System.out.println(" add result=" + result); nextCompletionContinue :; } backoff *= ALPHA; } results.Sort(new ComparerAnonymousInnerClassHelper(this)); if (results.Count > num) { results.SubList(num, results.Count).Clear(); } return(results); } finally { IOUtils.DisposeWhileHandlingException(ts); } }
public override MergeSpecification FindForcedMerges(SegmentInfos infos, int maxSegmentCount, IDictionary <SegmentCommitInfo, bool?> segmentsToMerge) { if (Verbose()) { Message("findForcedMerges maxSegmentCount=" + maxSegmentCount + " infos=" + Writer.Get().SegString(infos.Segments) + " segmentsToMerge=" + segmentsToMerge); } List <SegmentCommitInfo> eligible = new List <SegmentCommitInfo>(); bool forceMergeRunning = false; ICollection <SegmentCommitInfo> merging = Writer.Get().MergingSegments; bool?segmentIsOriginal = false; foreach (SegmentCommitInfo info in infos.Segments) { bool?isOriginal = segmentsToMerge[info]; if (isOriginal != null) { segmentIsOriginal = isOriginal; if (!merging.Contains(info)) { eligible.Add(info); } else { forceMergeRunning = true; } } } if (eligible.Count == 0) { return(null); } if ((maxSegmentCount > 1 && eligible.Count <= maxSegmentCount) || (maxSegmentCount == 1 && eligible.Count == 1 && (segmentIsOriginal == false || IsMerged(infos, eligible[0])))) { if (Verbose()) { Message("already merged"); } return(null); } eligible.Sort(new SegmentByteSizeDescending(this)); if (Verbose()) { Message("eligible=" + eligible); Message("forceMergeRunning=" + forceMergeRunning); } int end = eligible.Count; MergeSpecification spec = null; // Do full merges, first, backwards: while (end >= MaxMergeAtOnceExplicit_Renamed + maxSegmentCount - 1) { if (spec == null) { spec = new MergeSpecification(); } OneMerge merge = new OneMerge(eligible.SubList(end - MaxMergeAtOnceExplicit_Renamed, end)); if (Verbose()) { Message("add merge=" + Writer.Get().SegString(merge.Segments)); } spec.Add(merge); end -= MaxMergeAtOnceExplicit_Renamed; } if (spec == null && !forceMergeRunning) { // Do final merge int numToMerge = end - maxSegmentCount + 1; OneMerge merge = new OneMerge(eligible.SubList(end - numToMerge, end)); if (Verbose()) { Message("add final merge=" + merge.SegString(Writer.Get().Directory)); } spec = new MergeSpecification(); spec.Add(merge); } return(spec); }
public virtual T_Type RenderView(StringBuilder builder, List <string> call_stack, StringBuilder paras, AssignOperatorInfo?op, StringBuilder opValue, PostfixPartType?postfixPartType, StringBuilder func_paras, StringBuilder index_para) { //if(call_stack == null) var member = call_stack[0]; AssignOperatorInfo?op2 = null; StringBuilder opValue2 = null; if (call_stack.Count == 1) { op2 = op; opValue2 = opValue; } T_Type nextType = null; if (!this.IsInstance) { foreach (var f in this.Static_Fields) { if (f.Name == member) { this.RenderView(builder, f, op2, opValue2); nextType = f.Type; break; } } if (nextType == null) { foreach (var m in this.Static_Methods) { if (m.Name == member) { this.RenderView(builder, m, paras); nextType = m.Type; break; } } } } else { foreach (var f in this.Fields) { if (f.Name == member) { this.RenderView(builder, f, op2, opValue2); nextType = f.Type; break; } } if (nextType == null) { foreach (var m in this.Methods) { if (m.Name == member) { this.RenderView(builder, m, paras); nextType = m.Type; break; } } } } if (call_stack.Count > 1) { //var call_stack2 = new List<string>(); //call_stack2.AddRange(call_stack); //call_stack2.RemoveAt(0); return(nextType.RenderView(builder, call_stack.SubList(1), paras, op, opValue, postfixPartType, func_paras, index_para)); } return(nextType); }
/// <exception cref="System.IO.IOException"/> public virtual void Train(IList <Tree> sentences, IdentityHashMap <Tree, byte[]> compressedParses, Treebank testTreebank, string modelPath, string resultsRecordPath) { // process: // we come up with a cost and a derivative for the model // we always use the gold tree as the example to train towards // every time through, we will look at the top N trees from // the LexicalizedParser and pick the best one according to // our model (at the start, this is essentially random) // we use QN to minimize the cost function for the model // to do this minimization, we turn all of the matrices in the // DVModel into one big Theta, which is the set of variables to // be optimized by the QN. Timing timing = new Timing(); long maxTrainTimeMillis = op.trainOptions.maxTrainTimeSeconds * 1000; int batchCount = 0; int debugCycle = 0; double bestLabelF1 = 0.0; if (op.trainOptions.useContextWords) { foreach (Tree tree in sentences) { Edu.Stanford.Nlp.Trees.Trees.ConvertToCoreLabels(tree); tree.SetSpans(); } } // for AdaGrad double[] sumGradSquare = new double[dvModel.TotalParamSize()]; Arrays.Fill(sumGradSquare, 1.0); int numBatches = sentences.Count / op.trainOptions.batchSize + 1; log.Info("Training on " + sentences.Count + " trees in " + numBatches + " batches"); log.Info("Times through each training batch: " + op.trainOptions.trainingIterations); log.Info("QN iterations per batch: " + op.trainOptions.qnIterationsPerBatch); for (int iter = 0; iter < op.trainOptions.trainingIterations; ++iter) { IList <Tree> shuffledSentences = new List <Tree>(sentences); Java.Util.Collections.Shuffle(shuffledSentences, dvModel.rand); for (int batch = 0; batch < numBatches; ++batch) { ++batchCount; // This did not help performance //log.info("Setting AdaGrad's sum of squares to 1..."); //Arrays.fill(sumGradSquare, 1.0); log.Info("======================================"); log.Info("Iteration " + iter + " batch " + batch); // Each batch will be of the specified batch size, except the // last batch will include any leftover trees at the end of // the list int startTree = batch * op.trainOptions.batchSize; int endTree = (batch + 1) * op.trainOptions.batchSize; if (endTree > shuffledSentences.Count) { endTree = shuffledSentences.Count; } ExecuteOneTrainingBatch(shuffledSentences.SubList(startTree, endTree), compressedParses, sumGradSquare); long totalElapsed = timing.Report(); log.Info("Finished iteration " + iter + " batch " + batch + "; total training time " + totalElapsed + " ms"); if (maxTrainTimeMillis > 0 && totalElapsed > maxTrainTimeMillis) { // no need to debug output, we're done now break; } if (op.trainOptions.debugOutputFrequency > 0 && batchCount % op.trainOptions.debugOutputFrequency == 0) { log.Info("Finished " + batchCount + " total batches, running evaluation cycle"); // Time for debugging output! double tagF1 = 0.0; double labelF1 = 0.0; if (testTreebank != null) { EvaluateTreebank evaluator = new EvaluateTreebank(AttachModelToLexicalizedParser()); evaluator.TestOnTreebank(testTreebank); labelF1 = evaluator.GetLBScore(); tagF1 = evaluator.GetTagScore(); if (labelF1 > bestLabelF1) { bestLabelF1 = labelF1; } log.Info("Best label f1 on dev set so far: " + Nf.Format(bestLabelF1)); } string tempName = null; if (modelPath != null) { tempName = modelPath; if (modelPath.EndsWith(".ser.gz")) { tempName = Sharpen.Runtime.Substring(modelPath, 0, modelPath.Length - 7) + "-" + Filename.Format(debugCycle) + "-" + Nf.Format(labelF1) + ".ser.gz"; } SaveModel(tempName); } string statusLine = ("CHECKPOINT:" + " iteration " + iter + " batch " + batch + " labelF1 " + Nf.Format(labelF1) + " tagF1 " + Nf.Format(tagF1) + " bestLabelF1 " + Nf.Format(bestLabelF1) + " model " + tempName + op.trainOptions + " word vectors: " + op.lexOptions.wordVectorFile + " numHid: " + op.lexOptions.numHid); log.Info(statusLine); if (resultsRecordPath != null) { FileWriter fout = new FileWriter(resultsRecordPath, true); // append fout.Write(statusLine); fout.Write("\n"); fout.Close(); } ++debugCycle; } } long totalElapsed_1 = timing.Report(); if (maxTrainTimeMillis > 0 && totalElapsed_1 > maxTrainTimeMillis) { // no need to debug output, we're done now log.Info("Max training time exceeded, exiting"); break; } } }
public virtual void TestSimple() { int numNodes = TestUtil.NextInt(Random(), 1, 10); double runTimeSec = AtLeast(3); int minDocsToMakeTerms = TestUtil.NextInt(Random(), 5, 20); int maxSearcherAgeSeconds = TestUtil.NextInt(Random(), 1, 3); if (VERBOSE) { Console.WriteLine("TEST: numNodes=" + numNodes + " runTimeSec=" + runTimeSec + " maxSearcherAgeSeconds=" + maxSearcherAgeSeconds); } Start(numNodes, runTimeSec, maxSearcherAgeSeconds); List <PreviousSearchState> priorSearches = new List <PreviousSearchState>(); List <BytesRef> terms = null; while (Time.NanoTime() < endTimeNanos) { bool doFollowon = priorSearches.Count > 0 && Random().Next(7) == 1; // Pick a random node; we will run the query on this node: int myNodeID = Random().Next(numNodes); NodeState.ShardIndexSearcher localShardSearcher; PreviousSearchState prevSearchState; if (doFollowon) { // Pretend user issued a followon query: prevSearchState = priorSearches[Random().Next(priorSearches.Count)]; if (VERBOSE) { Console.WriteLine("\nTEST: follow-on query age=" + ((Time.NanoTime() - prevSearchState.SearchTimeNanos) / 1000000000.0)); } try { localShardSearcher = Nodes[myNodeID].Acquire(prevSearchState.Versions); } catch (SearcherExpiredException see) { // Expected, sometimes; in a "real" app we would // either forward this error to the user ("too // much time has passed; please re-run your // search") or sneakily just switch to newest // searcher w/o telling them... if (VERBOSE) { Console.WriteLine(" searcher expired during local shard searcher init: " + see); } priorSearches.Remove(prevSearchState); continue; } } else { if (VERBOSE) { Console.WriteLine("\nTEST: fresh query"); } // Do fresh query: localShardSearcher = Nodes[myNodeID].Acquire(); prevSearchState = null; } IndexReader[] subs = new IndexReader[numNodes]; PreviousSearchState searchState = null; try { // Mock: now make a single reader (MultiReader) from all node // searchers. In a real shard env you can't do this... we // do it to confirm results from the shard searcher // are correct: int docCount = 0; try { for (int nodeID = 0; nodeID < numNodes; nodeID++) { long subVersion = localShardSearcher.NodeVersions[nodeID]; IndexSearcher sub = Nodes[nodeID].Searchers.Acquire(subVersion); if (sub == null) { nodeID--; while (nodeID >= 0) { subs[nodeID].DecRef(); subs[nodeID] = null; nodeID--; } throw new SearcherExpiredException("nodeID=" + nodeID + " version=" + subVersion); } subs[nodeID] = sub.IndexReader; docCount += subs[nodeID].MaxDoc; } } catch (SearcherExpiredException see) { // Expected if (VERBOSE) { Console.WriteLine(" searcher expired during mock reader init: " + see); } continue; } IndexReader mockReader = new MultiReader(subs); IndexSearcher mockSearcher = new IndexSearcher(mockReader); Query query; Sort sort; if (prevSearchState != null) { query = prevSearchState.Query; sort = prevSearchState.Sort; } else { if (terms == null && docCount > minDocsToMakeTerms) { // TODO: try to "focus" on high freq terms sometimes too // TODO: maybe also periodically reset the terms...? TermsEnum termsEnum = MultiFields.GetTerms(mockReader, "body").GetIterator(null); terms = new List <BytesRef>(); while (termsEnum.Next() != null) { terms.Add(BytesRef.DeepCopyOf(termsEnum.Term)); } if (VERBOSE) { Console.WriteLine("TEST: init terms: " + terms.Count + " terms"); } if (terms.Count == 0) { terms = null; } } if (VERBOSE) { Console.WriteLine(" maxDoc=" + mockReader.MaxDoc); } if (terms != null) { if (Random().NextBoolean()) { query = new TermQuery(new Term("body", terms[Random().Next(terms.Count)])); } else { string t = terms[Random().Next(terms.Count)].Utf8ToString(); string prefix; if (t.Length <= 1) { prefix = t; } else { prefix = t.Substring(0, TestUtil.NextInt(Random(), 1, 2)); } query = new PrefixQuery(new Term("body", prefix)); } if (Random().NextBoolean()) { sort = null; } else { // TODO: sort by more than 1 field int what = Random().Next(3); if (what == 0) { sort = new Sort(SortField.FIELD_SCORE); } else if (what == 1) { // TODO: this sort doesn't merge // correctly... it's tricky because you // could have > 2.1B docs across all shards: //sort = new Sort(SortField.FIELD_DOC); sort = null; } else if (what == 2) { sort = new Sort(new SortField[] { new SortField("docid", SortFieldType.INT32, Random().NextBoolean()) }); } else { sort = new Sort(new SortField[] { new SortField("title", SortFieldType.STRING, Random().NextBoolean()) }); } } } else { query = null; sort = null; } } if (query != null) { try { searchState = AssertSame(mockSearcher, localShardSearcher, query, sort, prevSearchState); } catch (SearcherExpiredException see) { // Expected; in a "real" app we would // either forward this error to the user ("too // much time has passed; please re-run your // search") or sneakily just switch to newest // searcher w/o telling them... if (VERBOSE) { Console.WriteLine(" searcher expired during search: " + see); Console.Out.Write(see.StackTrace); } // We can't do this in general: on a very slow // computer it's possible the local searcher // expires before we can finish our search: // assert prevSearchState != null; if (prevSearchState != null) { priorSearches.Remove(prevSearchState); } } } } finally { Nodes[myNodeID].Release(localShardSearcher); foreach (IndexReader sub in subs) { if (sub != null) { sub.DecRef(); } } } if (searchState != null && searchState.SearchAfterLocal != null && Random().Next(5) == 3) { priorSearches.Add(searchState); if (priorSearches.Count > 200) { Collections.Shuffle(priorSearches); priorSearches.SubList(100, priorSearches.Count).Clear(); } } } Finish(); }
public override MergeSpecification FindMerges(MergeTrigger? mergeTrigger, SegmentInfos segmentInfos) { MergeSpecification mergeSpec = null; //System.out.println("MRMP: findMerges sis=" + segmentInfos); int numSegments = segmentInfos.Size(); IList<SegmentCommitInfo> segments = new List<SegmentCommitInfo>(); ICollection<SegmentCommitInfo> merging = Writer.Get().MergingSegments; foreach (SegmentCommitInfo sipc in segmentInfos.Segments) { if (!merging.Contains(sipc)) { segments.Add(sipc); } } numSegments = segments.Count; if (numSegments > 1 && (numSegments > 30 || Random.Next(5) == 3)) { segments = CollectionsHelper.Shuffle(segments); // TODO: sometimes make more than 1 merge? mergeSpec = new MergeSpecification(); int segsToMerge = TestUtil.NextInt(Random, 1, numSegments); mergeSpec.Add(new OneMerge(segments.SubList(0, segsToMerge))); } return mergeSpec; }
/// <summary> /// See nicer wrapper: MannWhitneyUTestOneSided. /// this is a one-sided test looking for the case where the group labelled with 1 is larger than the group labelled with 0 /// </summary> /// <typeparam name="T"></typeparam> /// <param name="rowList"></param> /// <param name="scoreAccessor"></param> /// <param name="label01Accessor"></param> /// <param name="maxNumPermutations"></param> /// <param name="forceAssymptoticApprox"></param> /// <param name="neverDoExactPermutations"></param> /// <param name="parallelOptionsOrNullFor1"></param> /// <returns>The z score and the p-value</returns> public static KeyValuePair <double, double> ComputeZ0AndPValue <T>(IList <T> rowList, Func <T, double> scoreAccessor, Func <T, int> label01Accessor, int maxNumPermutations = 10000, bool forceAssymptoticApprox = false, bool neverDoExactPermutations = false, ParallelOptions parallelOptionsOrNullFor1 = null) { ParallelOptions parallelOptions = parallelOptionsOrNullFor1 ?? new ParallelOptions() { MaxDegreeOfParallelism = 1 }; //var zeroAndCountThenOneAndCount = CreateZeroAndCountThenOneAndCount(rowList, pTargetFunc, targetValFunc, parallelOptions); //int n0 = zeroAndCountThenOneAndCount.First().Value; //int n1 = SpecialFunctions.FirstAndOnly(zeroAndCountThenOneAndCount.Skip(1)).Value;// the class we think has larger values for the one-tailed test //having problems with the parallelOptions above, so re-writing like this int n0 = rowList.Where(elt => label01Accessor(elt) == 0).Count(); int n1 = rowList.Where(elt => label01Accessor(elt) == 1).Count(); double z0; //Helper.CheckCondition(ignoreSafetyOfNormal || (n0 > 10 && n1 > 10), "The count should be at least 10 for the normal distribution to work"); double p; if ((n0 > 10 && n1 > 10) || forceAssymptoticApprox) { z0 = ComputeZ0 <T>(rowList, parallelOptions, n0, n1, scoreAccessor, label01Accessor); p = 1.0 - SpecialFunctions.ZScoreToOneTailedPValue(z0, 1e-10); SanityCheckP(z0, p); } else { ParallelOptions parallelOptions1 = new ParallelOptions { MaxDegreeOfParallelism = 1 }; //now need to check out here if using all permutations or not to bypass Carl's code if not double logExactPermutationCount = SpecialFunctions.LogFactorialNMOverFactorialNFactorialMApprox(n0, n1); bool useExactPermutations = (logExactPermutationCount <= Math.Log(maxNumPermutations)) && !neverDoExactPermutations; List <double> zList; if (useExactPermutations) { z0 = ComputeZ0 <T>(rowList, parallelOptions, n0, n1, scoreAccessor, label01Accessor); /*faster than this is to simply permute the ranks of the real data (including ties), rather than the real data itself, but leaving this in for when exact permutations are needed*/ zList = (from permutation in SpecialFunctions.Permute01Targets(rowList, scoreAccessor, label01Accessor, maxNumPermutations) .AsParallel().WithDegreeOfParallelism(parallelOptions.MaxDegreeOfParallelism) let z = ComputeZ0(permutation, parallelOptions1, n0, n1, pair => pair.Key, pair => pair.Value) orderby z select z).ToList(); } else { /*-------------------------------------------------------------------------------------------------- * NB there is now a dead branch in SpecialFunctions.Permute01Targets(), which formerly used to do both * 'exact'/'complete' and 'inexact'/'subsampled' permutations. Now it only does the former ,and the 'inexact' is here. This is because I * do it much faster, but didn't want to bother with doing the 'exact'. * -------------------------------------------------------------------------------------------------*/ //don't bother converting to z, just use u instead List <double> listOfAllValues = rowList.Select(elt => scoreAccessor(elt)).ToList(); List <double> ranksWithTies = SpecialFunctions.RanksWithTies(listOfAllValues); //List<int> indsOfClass0 = Enumerable.Range(0, n0 + n1).ToList().Where(elt => targetValFunc(rowList[elt]) == 0).ToList(); //List<double> ranksWithTiesClass0 = ranksWithTies.SubList(indsOfClass0); //double u0 = ComputeUFromRanks(ranksWithTiesClass0); List <int> indsOfClass1 = Enumerable.Range(0, n0 + n1).ToList().Where(elt => label01Accessor(rowList[elt]) == 1).ToList(); List <double> ranksWithTiesClass1 = ranksWithTies.SubList(indsOfClass1); double u1 = ComputeUFromRanks(ranksWithTiesClass1); //!!!not parallelized List <double> uList = new List <double>(); Random myRand = new MachineInvariantRandom("123456"); for (int perm = 0; perm < maxNumPermutations; perm++) { ranksWithTies.ShuffleInPlace(myRand); List <double> ranksWithTies0 = ranksWithTies.SubSequence(0, n0).ToList(); double thisUscore0 = ComputeUFromRanks(ranksWithTies0); List <double> ranksWithTies1 = ranksWithTies.SubSequence(n0, n1).ToList(); double thisUscore1 = ComputeUFromRanks(ranksWithTies1); //if it were 2-sided, we would use this (I think) //double uScore = Math.Min(thisUscore0, thisUscore1); //but it's one-sided, so we use the one from the set that had labels "1" double uScore = thisUscore1; //double thisZ = ComputeZfromU(n0, n1, uScore); uList.Add(uScore); } //to let the rest of the code do what it should zList = uList; z0 = u1; } TwoByOne twoByOne = TwoByOne.GetInstance(zList, z => z0 <= z); p = twoByOne.Freq; //Can't SanityCheckP(z0, p) because ties mean it wont always get the right answer } ////To get two-sided, which says "are they different" use this pTwoSided = 2 * ((p < .5) ? p : (1-p)); //ResultsRow resultRow = new ResultsRow { DataSetName = dataSetName, CidGroup= cidGroup, PValue = p, N0 = n0, N1 = n1, UScore0 = uScore0, UScore1 = uScore1, Z0 = z0, Z1 = -z0 }; //return resultRow; return(new KeyValuePair <double, double>(z0, p)); }
public virtual T_Type RenderView(IndentStringBuilder builder, List<string> call_stack, IndentStringBuilder paras,AssignOperatorInfo? op, IndentStringBuilder opValue, PostfixPartType? postfixPartType, IndentStringBuilder func_paras, IndentStringBuilder index_para) { //if(call_stack == null) var member = call_stack[0]; AssignOperatorInfo? op2 = null; IndentStringBuilder opValue2 = null; if (call_stack.Count == 1) { op2 = op; opValue2 = opValue; } T_Type nextType = null; if (!this.IsInstance) { foreach (var f in this.Static_Fields) { if (f.Name == member) { this.RenderView(builder, f, op2, opValue2); nextType = f.Type; break; } } if (nextType == null) { foreach (var m in this.Static_Methods) { if (m.Name == member) { this.RenderView(builder, m, paras); nextType = m.Type; break; } } } } else { foreach (var f in this.Fields) { if (f.Name == member) { this.RenderView(builder, f, op2, opValue2); nextType = f.Type; break; } } if (nextType == null) { foreach (var m in this.Methods) { if (m.Name == member) { this.RenderView(builder, m, paras); nextType = m.Type; break; } } } } if (call_stack.Count > 1) { //var call_stack2 = new List<string>(); //call_stack2.AddRange(call_stack); //call_stack2.RemoveAt(0); return nextType.RenderView(builder, call_stack.SubList(1), paras, op, opValue, postfixPartType, func_paras, index_para); } return nextType; }