// private methods ------------------------------------------------ /// <summary> /// Set the result values /// </summary> /// /// <param name="element">return result object</param> /// <param name="start">codepoint of range</param> /// <param name="limit">(end + 1) codepoint of range</param> /// <param name="value">common value of range</param> private void SetResult(RangeValueIterator_Constants.Element element, int start, int limit, int value_ren) { element.start = start; element.limit = limit; element.value_ren = value_ren; }
// set of property starts for UnicodeSet ------------------------------- *** public void AddPropertyStarts(UnicodeSet set) { int i, length; int c, start, limit; sbyte prev, jg; /* add the start code point of each same-value range of the trie */ TrieIterator iter = new TrieIterator(trie); RangeValueIterator_Constants.Element element = new RangeValueIterator_Constants.Element(); while (iter.Next(element)) { set.Add(element.start); } /* add the code points from the bidi mirroring table */ length = indexes[IX_MIRROR_LENGTH]; for (i = 0; i < length; ++i) { c = GetMirrorCodePoint(mirrors[i]); set.Add(c, c + 1); } /* * add the code points from the Joining_Group array where the value * changes */ start = indexes[IX_JG_START]; limit = indexes[IX_JG_LIMIT]; length = limit - start; prev = 0; for (i = 0; i < length; ++i) { jg = jgArray[i]; if (jg != prev) { set.Add(start); prev = jg; } ++start; } if (prev != 0) { /* * add the limit code point if the last value was not 0 (it is now * start==limit) */ set.Add(limit); } /* * add code points with hardcoded properties, plus the ones following * them */ /* (none right now) */ }
// public methods ------------------------------------------------- /// <summary> /// <p> /// Returns true if we are not at the end of the iteration, false otherwise. /// </p> /// <p> /// The next set of codepoints with the same value type will be calculated /// during this call and returned in the arguement element. /// </p> /// </summary> /// /// <param name="element">return result</param> /// <returns>true if we are not at the end of the iteration, false otherwise.</returns> /// <exception cref="NoSuchElementException">- if no more elements exist.</exception> /// <seealso cref="null"/> /// @draft 2.1 public bool Next(RangeValueIterator_Constants.Element element) { if (m_nextCodepoint_ > IBM.ICU.Lang.UCharacter.MAX_VALUE) { return(false); } if (m_nextCodepoint_ < IBM.ICU.Lang.UCharacter.SUPPLEMENTARY_MIN_VALUE && CalculateNextBMPElement(element)) { return(true); } CalculateNextSupplementaryElement(element); return(true); }
/// <summary> /// Finding the next element. This method is called just before returning the /// result of next(). We always store the next element before it is /// requested. In the case that we have to continue calculations into the /// supplementary planes, a false will be returned. /// </summary> /// /// <param name="element">return result object</param> /// <returns>true if the next range is found, false if we have to proceed to /// the supplementary range.</returns> private bool CalculateNextBMPElement(RangeValueIterator_Constants.Element element) { int currentBlock = m_nextBlock_; int currentValue = m_nextValue_; m_currentCodepoint_ = m_nextCodepoint_; m_nextCodepoint_++; m_nextBlockIndex_++; if (!CheckBlockDetail(currentValue)) { SetResult(element, m_currentCodepoint_, m_nextCodepoint_, currentValue); return(true); } // synwee check that next block index == 0 here // enumerate BMP - the main loop enumerates data blocks while (m_nextCodepoint_ < IBM.ICU.Lang.UCharacter.SUPPLEMENTARY_MIN_VALUE) { m_nextIndex_++; // because of the way the character is split to form the index // the lead surrogate and trail surrogate can not be in the // mid of a block if (m_nextCodepoint_ == LEAD_SURROGATE_MIN_VALUE_) { // skip lead surrogate code units, // go to lead surrogate codepoints m_nextIndex_ = BMP_INDEX_LENGTH_; } else if (m_nextCodepoint_ == TRAIL_SURROGATE_MIN_VALUE_) { // go back to regular BMP code points m_nextIndex_ = m_nextCodepoint_ >> IBM.ICU.Impl.Trie.INDEX_STAGE_1_SHIFT_; } m_nextBlockIndex_ = 0; if (!CheckBlock(currentBlock, currentValue)) { SetResult(element, m_currentCodepoint_, m_nextCodepoint_, currentValue); return(true); } } m_nextCodepoint_--; // step one back since this value has not been m_nextBlockIndex_--; // retrieved yet. return(false); }
private void _testTrieRanges(TrieTest.SetRange[] setRanges, int countSetRanges, TrieTest.CheckRange[] checkRanges, int countCheckRanges, bool latin1Linear) { IntTrieBuilder newTrie = new IntTrieBuilder(null, 2000, checkRanges[0].value_ren, checkRanges[0].value_ren, latin1Linear); // set values from setRanges[] bool ok = true; for (int i = 0; i < countSetRanges; ++i) { int start_0 = setRanges[i].start; int limit_1 = setRanges[i].limit; int value_ren = setRanges[i].value_ren; bool overwrite_2 = setRanges[i].overwrite; if ((limit_1 - start_0) == 1 && overwrite_2) { ok &= newTrie.SetValue(start_0, value_ren); } else { ok &= newTrie.SetRange(start_0, limit_1, value_ren, overwrite_2); } } if (!ok) { Errln("setting values into a trie failed"); return; } // verify that all these values are in the new Trie int start_3 = 0; for (int i_4 = 0; i_4 < countCheckRanges; ++i_4) { int limit_5 = checkRanges[i_4].limit; int value_6 = checkRanges[i_4].value_ren; while (start_3 < limit_5) { if (value_6 != newTrie.GetValue(start_3)) { Errln("newTrie [U+" + ILOG.J2CsMapping.Util.IlNumber.ToString(start_3, 16) + "]==0x" + ILOG.J2CsMapping.Util.IlNumber.ToString(newTrie.GetValue(start_3), 16) + " instead of 0x" + ILOG.J2CsMapping.Util.IlNumber.ToString(value_6, 16)); } ++start_3; } } IntTrie trie = newTrie.Serialize(new TrieTest._testFoldedValue(newTrie), new TrieTest._testFoldingOffset()); // test linear Latin-1 range from utrie_getData() if (latin1Linear) { start_3 = 0; for (int i_7 = 0; i_7 < countCheckRanges && start_3 <= 0xff; ++i_7) { int limit_8 = checkRanges[i_7].limit; int value_9 = checkRanges[i_7].value_ren; while (start_3 < limit_8 && start_3 <= 0xff) { if (value_9 != trie.GetLatin1LinearValue((char)start_3)) { Errln("IntTrie.getLatin1LinearValue[U+" + ILOG.J2CsMapping.Util.IlNumber.ToString(start_3, 16) + "]==0x" + ILOG.J2CsMapping.Util.IlNumber.ToString(trie .GetLatin1LinearValue((char)start_3), 16) + " instead of 0x" + ILOG.J2CsMapping.Util.IlNumber.ToString(value_9, 16)); } ++start_3; } } } if (latin1Linear != trie.IsLatin1Linear()) { Errln("trie serialization did not preserve " + "Latin-1-linearity"); } // verify that all these values are in the serialized Trie start_3 = 0; for (int i_10 = 0; i_10 < countCheckRanges; ++i_10) { int limit_11 = checkRanges[i_10].limit; int value_12 = checkRanges[i_10].value_ren; if (start_3 == 0xd800) { // skip surrogates start_3 = limit_11; continue; } while (start_3 < limit_11) { if (start_3 <= 0xffff) { int value2 = trie.GetBMPValue((char)start_3); if (value_12 != value2) { Errln("serialized trie.getBMPValue(U+" + ILOG.J2CsMapping.Util.IlNumber.ToString(start_3, 16) + " == 0x" + ILOG.J2CsMapping.Util.IlNumber.ToString(value2, 16) + " instead of 0x" + ILOG.J2CsMapping.Util.IlNumber.ToString(value_12, 16)); } if (!IBM.ICU.Text.UTF16.IsLeadSurrogate((char)start_3)) { value2 = trie.GetLeadValue((char)start_3); if (value_12 != value2) { Errln("serialized trie.getLeadValue(U+" + ILOG.J2CsMapping.Util.IlNumber.ToString(start_3, 16) + " == 0x" + ILOG.J2CsMapping.Util.IlNumber.ToString(value2, 16) + " instead of 0x" + ILOG.J2CsMapping.Util.IlNumber.ToString(value_12, 16)); } } } int value2_13 = trie.GetCodePointValue(start_3); if (value_12 != value2_13) { Errln("serialized trie.getCodePointValue(U+" + ILOG.J2CsMapping.Util.IlNumber.ToString(start_3, 16) + ")==0x" + ILOG.J2CsMapping.Util.IlNumber.ToString(value2_13, 16) + " instead of 0x" + ILOG.J2CsMapping.Util.IlNumber.ToString(value_12, 16)); } ++start_3; } } // enumerate and verify all ranges int enumRanges = 1; TrieIterator iter = new TrieTest._testEnumValue(trie); RangeValueIterator_Constants.Element result = new RangeValueIterator_Constants.Element(); while (iter.Next(result)) { if (result.start != checkRanges[enumRanges - 1].limit || result.limit != checkRanges[enumRanges].limit || (result.value_ren ^ 0x5555) != checkRanges[enumRanges].value_ren) { Errln("utrie_enum() delivers wrong range [U+" + ILOG.J2CsMapping.Util.IlNumber.ToString(result.start, 16) + "..U+" + ILOG.J2CsMapping.Util.IlNumber.ToString(result.limit, 16) + "].0x" + ILOG.J2CsMapping.Util.IlNumber.ToString(result.value_ren ^ 0x5555, 16) + " instead of [U+" + ILOG.J2CsMapping.Util.IlNumber.ToString(checkRanges[enumRanges - 1].limit, 16) + "..U+" + ILOG.J2CsMapping.Util.IlNumber.ToString(checkRanges[enumRanges].limit, 16) + "].0x" + ILOG.J2CsMapping.Util.IlNumber.ToString(checkRanges[enumRanges].value_ren, 16)); } enumRanges++; } // test linear Latin-1 range if (trie.IsLatin1Linear()) { for (start_3 = 0; start_3 < 0x100; ++start_3) { if (trie.GetLatin1LinearValue((char)start_3) != trie .GetLeadValue((char)start_3)) { Errln("trie.getLatin1LinearValue[U+" + ILOG.J2CsMapping.Util.IlNumber.ToString(start_3, 16) + "]=0x" + ILOG.J2CsMapping.Util.IlNumber.ToString(trie .GetLatin1LinearValue((char)start_3), 16) + " instead of 0x" + ILOG.J2CsMapping.Util.IlNumber.ToString(trie .GetLeadValue((char)start_3), 16)); } } } _testTrieIteration(trie, checkRanges, countCheckRanges); }
/// <summary> /// Finds the next supplementary element. For each entry in the trie, the /// value to be delivered is passed through extract(). We always store the /// next element before it is requested. Called after calculateNextBMP() /// completes its round of BMP characters. There is a slight difference in /// the usage of m_currentCodepoint_ here as compared to calculateNextBMP(). /// Though both represents the lower bound of the next element, in /// calculateNextBMP() it gets set at the start of any loop, where-else, in /// calculateNextSupplementary() since m_currentCodepoint_ already contains /// the lower bound of the next element (passed down from /// calculateNextBMP()), we keep it till the end before resetting it to the /// new value. Note, if there are no more iterations, it will never get to /// here. Blocked out by next(). /// </summary> /// /// <param name="element">return result object</param> /// @draft 2.1 private void CalculateNextSupplementaryElement(RangeValueIterator_Constants.Element element) { int currentValue = m_nextValue_; int currentBlock = m_nextBlock_; m_nextCodepoint_++; m_nextBlockIndex_++; if (IBM.ICU.Text.UTF16.GetTrailSurrogate(m_nextCodepoint_) != IBM.ICU.Text.UTF16.TRAIL_SURROGATE_MIN_VALUE) { // this piece is only called when we are in the middle of a lead // surrogate block if (!CheckNullNextTrailIndex() && !CheckBlockDetail(currentValue)) { SetResult(element, m_currentCodepoint_, m_nextCodepoint_, currentValue); m_currentCodepoint_ = m_nextCodepoint_; return; } // we have cleared one block m_nextIndex_++; m_nextTrailIndexOffset_++; if (!CheckTrailBlock(currentBlock, currentValue)) { SetResult(element, m_currentCodepoint_, m_nextCodepoint_, currentValue); m_currentCodepoint_ = m_nextCodepoint_; return; } } int nextLead = IBM.ICU.Text.UTF16.GetLeadSurrogate(m_nextCodepoint_); // enumerate supplementary code points while (nextLead < TRAIL_SURROGATE_MIN_VALUE_) { // lead surrogate access int leadBlock = m_trie_.m_index_[nextLead >> IBM.ICU.Impl.Trie.INDEX_STAGE_1_SHIFT_] << IBM.ICU.Impl.Trie.INDEX_STAGE_2_SHIFT_; if (leadBlock == m_trie_.m_dataOffset_) { // no entries for a whole block of lead surrogates if (currentValue != m_initialValue_) { m_nextValue_ = m_initialValue_; m_nextBlock_ = 0; m_nextBlockIndex_ = 0; SetResult(element, m_currentCodepoint_, m_nextCodepoint_, currentValue); m_currentCodepoint_ = m_nextCodepoint_; return; } nextLead += DATA_BLOCK_LENGTH_; // number of total affected supplementary codepoints in one // block // this is not a simple addition of // DATA_BLOCK_SUPPLEMENTARY_LENGTH since we need to consider // that we might have moved some of the codepoints m_nextCodepoint_ = IBM.ICU.Impl.UCharacterProperty .GetRawSupplementary((char)nextLead, (char)IBM.ICU.Text.UTF16.TRAIL_SURROGATE_MIN_VALUE); continue; } if (m_trie_.m_dataManipulate_ == null) { throw new NullReferenceException( "The field DataManipulate in this Trie is null"); } // enumerate trail surrogates for this lead surrogate m_nextIndex_ = m_trie_.m_dataManipulate_ .GetFoldingOffset(m_trie_.GetValue(leadBlock + (nextLead & IBM.ICU.Impl.Trie.INDEX_STAGE_3_MASK_))); if (m_nextIndex_ <= 0) { // no data for this lead surrogate if (currentValue != m_initialValue_) { m_nextValue_ = m_initialValue_; m_nextBlock_ = 0; m_nextBlockIndex_ = 0; SetResult(element, m_currentCodepoint_, m_nextCodepoint_, currentValue); m_currentCodepoint_ = m_nextCodepoint_; return; } m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_; } else { m_nextTrailIndexOffset_ = 0; if (!CheckTrailBlock(currentBlock, currentValue)) { SetResult(element, m_currentCodepoint_, m_nextCodepoint_, currentValue); m_currentCodepoint_ = m_nextCodepoint_; return; } } nextLead++; } // deliver last range SetResult(element, m_currentCodepoint_, IBM.ICU.Lang.UCharacter.MAX_VALUE + 1, currentValue); }