/// <summary> /// Constructs for all variants of <see cref="Span(string, int, SpanCondition)"/>, or only for any one variant. /// Initializes as little as possible, for single use. /// </summary> public UnicodeSetStringSpan(UnicodeSet set, IList <string> setStrings, int which) { spanSet = new UnicodeSet(0, 0x10ffff); // TODO: With Java 6, just take the parent set's strings as is, // as a NavigableSet<String>, rather than as an ArrayList copy of the set of strings. // Then iterate via the first() and higher() methods. // (We do not want to create multiple Iterator objects in each span().) // See ICU ticket #7454. strings = setStrings; all = (which == All); spanSet.RetainAll(set); if (0 != (which & NotContained)) { // Default to the same sets. // addToSpanNotSet() will create a separate set if necessary. spanNotSet = spanSet; } offsets = new OffsetList(); // Determine if the strings even need to be taken into account at all for span() etc. // If any string is relevant, then all strings need to be used for // span(longest match) but only the relevant ones for span(while contained). // TODO: Possible optimization: Distinguish CONTAINED vs. LONGEST_MATCH // and do not store UTF-8 strings if !thisRelevant and CONTAINED. // (Only store irrelevant UTF-8 strings for LONGEST_MATCH where they are relevant after all.) // Also count the lengths of the UTF-8 versions of the strings for memory allocation. int stringsLength = strings.Count; int i, spanLength; int maxLength16 = 0; someRelevant = false; for (i = 0; i < stringsLength; ++i) { string str = strings[i]; int length16 = str.Length; spanLength = spanSet.Span(str, SpanCondition.Contained); if (spanLength < length16) { // Relevant string. someRelevant = true; } if (/* (0 != (which & UTF16)) && */ length16 > maxLength16) { maxLength16 = length16; } } this.maxLength16 = maxLength16; if (!someRelevant && (which & WithCount) == 0) { return; } // Freeze after checking for the need to use strings at all because freezing // a set takes some time and memory which are wasted if there are no relevant strings. if (all) { spanSet.Freeze(); } int spanBackLengthsOffset; // Allocate a block of meta data. int allocSize; if (all) { // 2 sets of span lengths allocSize = stringsLength * (2); } else { allocSize = stringsLength; // One set of span lengths. } spanLengths = new short[allocSize]; if (all) { // Store span lengths for all span() variants. spanBackLengthsOffset = stringsLength; } else { // Store span lengths for only one span() variant. spanBackLengthsOffset = 0; } // Set the meta data and spanNotSet and write the UTF-8 strings. for (i = 0; i < stringsLength; ++i) { string str = strings[i]; int length16 = str.Length; spanLength = spanSet.Span(str, SpanCondition.Contained); if (spanLength < length16) { // Relevant string. if (true /* 0 != (which & UTF16) */) { if (0 != (which & Contained)) { if (0 != (which & Forward)) { spanLengths[i] = MakeSpanLengthByte(spanLength); } if (0 != (which & Backward)) { spanLength = length16 - spanSet.SpanBack(str, length16, SpanCondition.Contained); spanLengths[spanBackLengthsOffset + i] = MakeSpanLengthByte(spanLength); } } else /* not CONTAINED, not all, but NOT_CONTAINED */ { spanLengths[i] = spanLengths[spanBackLengthsOffset + i] = 0; // Only store a relevant/irrelevant // flag. } } if (0 != (which & NotContained)) { // Add string start and end code points to the spanNotSet so that // a span(while not contained) stops before any string. int c; if (0 != (which & Forward)) { c = str.CodePointAt(0); AddToSpanNotSet(c); } if (0 != (which & Backward)) { c = str.CodePointBefore(length16); AddToSpanNotSet(c); } } } else { // Irrelevant string. if (all) { spanLengths[i] = spanLengths[spanBackLengthsOffset + i] = ALL_CP_CONTAINED; } else { // All spanXYZLengths pointers contain the same address. spanLengths[i] = ALL_CP_CONTAINED; } } } // Finish. if (all) { spanNotSet.Freeze(); } }