/// <summary> /// Test the character name at the memory address specified. /// Will assert an error if the PUA codepoint name is not correct. /// </summary> /// <param name="puaIndex">Unicode codepoint</param> /// <param name="puaName">Expected correct PUA codepoint name</param> /// <param name="puaGenCat">The expected PUA General Category</param> public static void Check_PUA(int puaIndex, string puaName, LgGeneralCharCategory puaGenCat) { string name = ""; LgGeneralCharCategory genCategory = LgGeneralCharCategory.kccCn; //Getting the character name at the memory address specified ILgCharacterPropertyEngine charPropEngine = LgIcuCharPropEngineClass.Create(); try { string icuDataDir = GetIcuDataDir(); Icu.SetDataDirectory(icuDataDir); Icu.UErrorCode error; Icu.UCharNameChoice choice = Icu.UCharNameChoice.U_UNICODE_CHAR_NAME; int len = Icu.u_CharName(puaIndex, choice, out name, out error); genCategory = charPropEngine.get_GeneralCategory(puaIndex); } finally { // Must release pointer to free memory-mapping before we try to restore files. Marshal.ReleaseComObject(charPropEngine); charPropEngine = null; Icu.Cleanup(); // clean up the ICU files / data } //Check to make sure expected result is the same as actual result, if not, output error Assert.AreEqual(puaName, name, "PUA Character " + puaIndex.ToString("x", new System.Globalization.NumberFormatInfo()) + " is incorrect"); //Check to make sure expected result is the same as actual result, if not, output error Assert.AreEqual(puaGenCat, genCategory, "PUA Character " + puaIndex.ToString("x", new System.Globalization.NumberFormatInfo()) + " has an incorrect digit value"); }
public void CharacterPropertyOverrides() { Icu.InitIcuDataDir(); var cpe = LgIcuCharPropEngineClass.Create(); var result = cpe.get_GeneralCategory('\xF171'); Assert.That(result, Is.EqualTo(LgGeneralCharCategory.kccMn)); }
/// <summary> /// This is called when we have put all we can on the current line. Sometimes we may have put too much! /// If so, return false, to indicate we can't finalize a line in this state, and trigger backtracking. /// </summary> private bool FinalizeLine() { Debug.Assert(m_lines.Count > 0); Debug.Assert(m_lines.Last().Boxes.Count() > 0); var lastBox = m_lines.Last().Boxes.Last() as StringBox; if (lastBox == null) { return(true); // for now it's always valid to break after a non-string box. } var est = m_lineSegTypes.Last(); // If we know that's a bad break, backtrack. if (est == LgEndSegmentType.kestBadBreak) { return(false); } if (est != LgEndSegmentType.kestWsBreak) { return(true); // all other kinds of break we accept. } // For a writing-system break, we must try to figure out whether we can break here. int ichMin = lastBox.IchMin; int length = lastBox.Segment.get_Lim(ichMin); int ichLast = ichMin + length - 1; // Enhance JohnT: MAYBE we should check for surrogate? But new surrogate pairs we can break after are unlikely. if (ichLast < 0) { return(false); // paranoia } string lastChar = Fetch(ichLast, ichLast + 1); var cpe = LgIcuCharPropEngineClass.Create(); byte lbp; using (var ptr = new ArrayPtr(1)) { cpe.GetLineBreakProps(lastChar, 1, ptr); lbp = Marshal.ReadByte(ptr.IntPtr); } lbp &= 0x1f; // strip 'is it a space' high bit // If it's a space (or other character which provides a break opportunity after), // go ahead and break. Otherwise treat as bad break. if (lbp != (byte)LgLBP.klbpSP && lbp != (byte)LgLBP.klbpBA && lbp != (byte)LgLBP.klbpB2) { return(false); // can't break here, must backtrack } return(true); // stick with the break we have. }
public void CorrelationFactor() { ILgCharacterPropertyEngine engine = LgIcuCharPropEngineClass.Create(); ParagraphCorrelation pc = new ParagraphCorrelation("Hello", "Hello", engine); Assert.AreEqual(1.0, pc.CorrelationFactor); pc = new ParagraphCorrelation("Hello", "Hello ", engine); Assert.AreEqual(1.0, pc.CorrelationFactor); pc = new ParagraphCorrelation(" Hello", "Hello", engine); Assert.AreEqual(1.0, pc.CorrelationFactor); pc = new ParagraphCorrelation("Hello", "Hello there", engine); Assert.AreEqual(0.5, pc.CorrelationFactor); pc = new ParagraphCorrelation("Hello over there", "Hello over here", engine); Assert.AreEqual(0.5, pc.CorrelationFactor); pc = new ParagraphCorrelation("Hello there", "there Hello", engine); Assert.AreEqual(1.0, pc.CorrelationFactor); pc = new ParagraphCorrelation("I am really excited", "I am really really really really excited", engine); Assert.AreEqual(0.8125, pc.CorrelationFactor); pc = new ParagraphCorrelation(string.Empty, "What will happen here?", engine); Assert.AreEqual(0.0, pc.CorrelationFactor); pc = new ParagraphCorrelation(string.Empty, string.Empty, engine); Assert.AreEqual(1.0, pc.CorrelationFactor); pc = new ParagraphCorrelation(null, null, engine); Assert.AreEqual(1.0, pc.CorrelationFactor); pc = new ParagraphCorrelation(null, "what?", engine); Assert.AreEqual(0.0, pc.CorrelationFactor); pc = new ParagraphCorrelation("what?", null, engine); Assert.AreEqual(0.0, pc.CorrelationFactor); }
public void LongestUsefulSubstring() { ILgCharacterPropertyEngine engine = LgIcuCharPropEngineClass.Create(); // two equal strings ParagraphCorrelation pc = new ParagraphCorrelation("Hello", "Hello", engine); Assert.AreEqual("Hello", pc.LongestUsefulSubstring); // LCS at the start pc = new ParagraphCorrelation("Hello over there", "Hello over here", engine); Assert.AreEqual("Hello over ", pc.LongestUsefulSubstring); // LCS in the middle pc = new ParagraphCorrelation("I want to be over there", "You want to be over here", engine); Assert.AreEqual(" want to be over ", pc.LongestUsefulSubstring); // LCS at the end pc = new ParagraphCorrelation("Will you come to visit my relatives?", "Do I ever visit my relatives?", engine); Assert.AreEqual(" visit my relatives?", pc.LongestUsefulSubstring); // two common strings, find the longest pc = new ParagraphCorrelation("This sentence has common words", "This paragraph has common words", engine); Assert.AreEqual(" has common words", pc.LongestUsefulSubstring); // nothing at all in common pc = new ParagraphCorrelation("We have nothing in common", "absolutely nill items", engine); Assert.AreEqual(string.Empty, pc.LongestUsefulSubstring); // pathological cases pc = new ParagraphCorrelation(string.Empty, string.Empty, engine); Assert.AreEqual(string.Empty, pc.LongestUsefulSubstring); pc = new ParagraphCorrelation(null, string.Empty, engine); Assert.AreEqual(string.Empty, pc.LongestUsefulSubstring); pc = new ParagraphCorrelation(string.Empty, "Hello there", engine); Assert.AreEqual(string.Empty, pc.LongestUsefulSubstring); }
public void CorrelationFactor_WithDigitsAndPunc() { ILgCharacterPropertyEngine engine = LgIcuCharPropEngineClass.Create(); ParagraphCorrelation pc = new ParagraphCorrelation("Hello!", "2Hello.", engine); Assert.AreEqual(1.0, pc.CorrelationFactor); pc = new ParagraphCorrelation("Hello", "Hello, there", engine); Assert.AreEqual(0.5, pc.CorrelationFactor); pc = new ParagraphCorrelation("3Hello over there", "Hello over here", engine); Assert.AreEqual(0.5, pc.CorrelationFactor); pc = new ParagraphCorrelation("Hello there?", "4there Hello!", engine); Assert.AreEqual(1.0, pc.CorrelationFactor); pc = new ParagraphCorrelation("5I am really excited!", "6I am really really really really excited.", engine); Assert.AreEqual(0.8125, pc.CorrelationFactor); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes this object. /// </summary> /// <returns>The list of invalid characters encountered.</returns> /// ------------------------------------------------------------------------------------ private List <string> Init() { m_cpe = LgIcuCharPropEngineClass.Create(); Reset(); if (Other != null) { Other = Other.Replace(kSpaceReplacment, " "); } List <string> invalidChars; m_WordFormingCharacters = ParseCharString(WordForming, ksDelimiter, m_cpe, out invalidChars); List <string> invalidCharsTemp; m_NumericCharacters = ParseCharString(Numeric, ksDelimiter, m_cpe, out invalidCharsTemp, m_WordFormingCharacters); invalidChars.AddRange(invalidCharsTemp); m_OtherCharacters = ParseCharString(Other, ksDelimiter, m_cpe, out invalidCharsTemp, m_WordFormingCharacters, m_NumericCharacters); invalidChars.AddRange(invalidCharsTemp); return(invalidChars); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes this object. /// </summary> /// <param name="langDef">The language definition</param> /// <param name="invalidChars">The list of invalid characters encountered.</param> /// ------------------------------------------------------------------------------------ private void Init(LanguageDefinition langDef, out List <string> invalidChars) { m_cpe = LgIcuCharPropEngineClass.Create(); m_langDef = langDef; Reset(); if (Other != null) { Other = Other.Replace(kSpaceReplacment, " "); } m_WordFormingCharacters = ParseCharString(WordForming, ksDelimiter, m_cpe, langDef, out invalidChars); List <string> invalidCharsTemp; m_NumericCharacters = ParseCharString(Numeric, ksDelimiter, m_cpe, langDef, out invalidCharsTemp, m_WordFormingCharacters); invalidChars.AddRange(invalidCharsTemp); m_OtherCharacters = ParseCharString(Other, ksDelimiter, m_cpe, langDef, out invalidCharsTemp, m_WordFormingCharacters, m_NumericCharacters); invalidChars.AddRange(invalidCharsTemp); }
/// <summary> /// Concatenate the two strings. If there is no white space at the end of the first or the start of the second, /// and neither is all-white, add some. /// Enhance JohnT: this cannot consider any special white space characters unique to a particular /// writing system, and does not allow for the possibility of white space surrogate pairs, since /// I don't believe there are any in Unicode. Don't use where absolute consistency with a particular /// writing system is vital. /// </summary> public static ITsString ConcatenateWithSpaceIfNeeded(this ITsString first, ITsString second) { if (first.Length == 0) { return(second); } if (second.Length == 0) { return(first); } var tsb = first.GetBldr(); var cpe = LgIcuCharPropEngineClass.Create(); if (!(IsWhite(cpe, second.Text[0]) || IsWhite(cpe, first.Text.Last()))) { tsb.Replace(first.Length, first.Length, " ", null); } tsb.ReplaceTsString(tsb.Length, tsb.Length, second); return(tsb.GetString()); }
/// <summary> /// Checks all the values of a character in the UnicodeData.txt. /// Checks: fields 1-8,11-14 /// (Skips, 9 and 10, the "Bidi Mirrored" and "Unicode Version 1" /// </summary> /// <param name="puaIndex"></param><param name="puaName"></param> /// <param name="puaGenCat"></param><param name="puaCombiningClass"></param> /// <param name="puaBidiClass"></param><param name="puaDecomposition"></param> /// <param name="puaNumeric"></param><param name="puaNumericValue"></param> /// <param name="puaComment"></param><param name="puaToUpper"></param> /// <param name="puaToLower"></param><param name="puaToTitle"></param> public static void Check_PUA( int puaIndex, string puaName, LgGeneralCharCategory puaGenCat, int puaCombiningClass, LgBidiCategory puaBidiClass, string puaDecomposition, bool puaNumeric, int puaNumericValue, string puaComment, int puaToUpper, int puaToLower, int puaToTitle ) { string name = ""; LgGeneralCharCategory genCategory = LgGeneralCharCategory.kccCn; int combiningClass = 0; string decomposition = "None"; LgBidiCategory bidiCategory = LgBidiCategory.kbicL; string fullDecomp = "I have no clue"; bool isNumber = false; int numericValue = -1; int upper = -1; int lower = -1; int title = -1; string comment = "<none>"; //Getting the character name at the memory address specified ILgCharacterPropertyEngine charPropEngine = LgIcuCharPropEngineClass.Create(); try { string icuDataDir = GetIcuDataDir(); Icu.SetDataDirectory(icuDataDir); Icu.UErrorCode error; Icu.UCharNameChoice choice = Icu.UCharNameChoice.U_UNICODE_CHAR_NAME; int len = Icu.u_CharName(puaIndex, choice, out name, out error); genCategory = charPropEngine.get_GeneralCategory(puaIndex); combiningClass = charPropEngine.get_CombiningClass(puaIndex); bidiCategory = charPropEngine.get_BidiCategory(puaIndex); decomposition = charPropEngine.get_Decomposition(puaIndex); fullDecomp = charPropEngine.get_FullDecomp(puaIndex); // Note: isNumber merely checks the General category, it doesn't check to see if there is a valid numeric value. isNumber = charPropEngine.get_IsNumber(puaIndex); if (isNumber) { numericValue = charPropEngine.get_NumericValue(puaIndex); } comment = charPropEngine.get_Comment(puaIndex); upper = charPropEngine.get_ToUpperCh(puaIndex); lower = charPropEngine.get_ToLowerCh(puaIndex); title = charPropEngine.get_ToTitleCh(puaIndex); } finally { // Must release pointer to free memory-mapping before we try to restore files. Marshal.ReleaseComObject(charPropEngine); charPropEngine = null; Icu.Cleanup(); // clean up the ICU files / data } // StringWriter used to print hexadecimal values in the error messages. StringWriter stringWriter = new StringWriter(new System.Globalization.NumberFormatInfo()); string errorMessage = "PUA Character " + puaIndex.ToString("x", new System.Globalization.NumberFormatInfo()) + " has an incorrect "; //Check Name [1] Assert.AreEqual(puaName, name, errorMessage + "name."); //Check general category [2] Assert.AreEqual(puaGenCat, genCategory, errorMessage + "general category."); //Check combining class [3] Assert.AreEqual(puaCombiningClass, combiningClass, errorMessage + "combining class."); //Check Bidi class [4] Assert.AreEqual(puaBidiClass, bidiCategory, errorMessage + "bidi class value."); //Check Decomposition [5] stringWriter.WriteLine(errorMessage + "decomposition."); stringWriter.WriteLine("Decomposition, {0:x}, is incorrect", (int)decomposition[0]); Assert.AreEqual(puaDecomposition, decomposition, stringWriter.ToString()); //Check Numeric Value [6,7,8] if (puaNumeric != isNumber) { Assert.AreEqual(puaNumeric, isNumber, errorMessage + "numeric type (i.e. does or doesn't have a numeric value when it should be the other)."); } if (puaNumeric) { Assert.AreEqual(puaNumericValue, numericValue, errorMessage + "numeric value."); } //Check ISO Comment [11] Assert.AreEqual(puaComment, comment, errorMessage + "ISO commment"); //Check uppercase [12] stringWriter.Flush(); stringWriter.WriteLine(errorMessage + "upper case."); stringWriter.WriteLine("Found uppercase value: {0:x}", upper); Assert.AreEqual(puaToUpper, upper, stringWriter.ToString()); //Check lowercase [13] Assert.AreEqual(puaToLower, lower, errorMessage + "lower case."); //Check titlecase [14] Assert.AreEqual(puaToTitle, title, errorMessage + "title case."); }
public RangeSelection ExpandToWord() { var cpe = LgIcuCharPropEngineClass.Create(); RangeSelection rangeSel = new RangeSelection(this, new InsertionPoint(Hookup, StringPosition + 1, AssociatePrevious)); var backwardSel = new InsertionPoint(Hookup, StringPosition, AssociatePrevious); var forwardSel = new InsertionPoint(Hookup, StringPosition, AssociatePrevious); while (true) { if (backwardSel.StringPosition == 0) { break; } backwardSel.StringPosition--; char testChar = backwardSel.ContainingRun.Text[backwardSel.StringPosition]; int testInt = testChar; if (Surrogates.IsTrailSurrogate(testChar)) { backwardSel.StringPosition--; testInt = Surrogates.Int32FromSurrogates(backwardSel.ContainingRun.Text[backwardSel.StringPosition], testChar); } else if (Surrogates.IsLeadSurrogate(testChar)) { testInt = Surrogates.Int32FromSurrogates(testChar, backwardSel.ContainingRun.Text[backwardSel.StringPosition + 1]); } if (!cpe.get_IsNumber(testInt) && !cpe.get_IsWordForming(testInt) || backwardSel.ContainingRun.WritingSystemAt(backwardSel.StringPosition) != ContainingRun.WritingSystemAt(StringPosition)) { backwardSel.StringPosition++; break; } rangeSel = new RangeSelection(backwardSel, this); } backwardSel = rangeSel.Anchor; while (true) { if (forwardSel.StringPosition == forwardSel.ContainingRun.Length) { if (backwardSel.StringPosition == forwardSel.StringPosition) { return(null); } break; } char testChar = forwardSel.ContainingRun.Text[forwardSel.StringPosition]; int testInt = testChar; if (Surrogates.IsLeadSurrogate(testChar)) { forwardSel.StringPosition++; testInt = Surrogates.Int32FromSurrogates(testChar, forwardSel.ContainingRun.Text[forwardSel.StringPosition]); testChar = (char)testInt; } else if (Surrogates.IsTrailSurrogate(testChar)) { testInt = Surrogates.Int32FromSurrogates(forwardSel.ContainingRun.Text[forwardSel.StringPosition - 1], testChar); testChar = (char)testInt; } if (!cpe.get_IsNumber(testInt) && !cpe.get_IsWordForming(testInt) || forwardSel.ContainingRun.WritingSystemAt(forwardSel.StringPosition) != ContainingRun.WritingSystemAt(StringPosition)) { if (testChar.Equals(" ".ToCharArray()[0])) { forwardSel.StringPosition++; rangeSel = new RangeSelection(backwardSel, forwardSel); } break; } forwardSel.StringPosition++; rangeSel = new RangeSelection(backwardSel, forwardSel); } return(rangeSel); }
/// <summary> /// Redo layout. Should produce the same segments as FullLayout, but assume that segments for text up to /// details.StartChange may be reused (if not affected by changing line breaks), and segments after /// details.StartChange+details.DeleteCount may be re-used if a line break works out (and after adjusting /// their begin offset). /// </summary> /// <param name="details"></param> internal void Relayout(SourceChangeDetails details, LayoutCallbacks lcb) { m_reuseableLines = m_para.Lines; m_lines = new List <ParaLine>(); m_renderRunIndex = 0; m_ichRendered = 0; IRenderRun last = m_renderRuns[m_renderRuns.Count - 1]; m_ichLim = last.RenderStart + last.RenderLength; m_lastRenderRunIndex = m_renderRuns.Count; Rectangle invalidateRect = m_para.InvalidateRect; int delta = details.InsertCount - details.DeleteCount; int oldHeight = m_para.Height; int oldWidth = m_para.Width; // Make use of details.StartChange to reuse some lines at start. if (m_reuseableLines.Count > 0) { // As long as we have two complete lines before the change, we can certainly reuse the first of them. while (m_reuseableLines.Count > 2 && details.StartChange > m_reuseableLines[2].IchMin) { m_lines.Add(m_reuseableLines[0]); m_reuseableLines.RemoveAt(0); } // If we still have one complete line before the change, we can reuse it provided there is white // space after the end of the line and before the change. if (m_reuseableLines.Count > 1) { int startNextLine = m_reuseableLines[1].IchMin; if (details.StartChange > startNextLine) { bool fGotWhite = false; string line1Text = m_reuseableLines[1].CheckedText; int lim = details.StartChange - startNextLine; var cpe = LgIcuCharPropEngineClass.Create(); for (int ich = 0; ich < lim; ich++) { // Enhance JohnT: possibly we need to consider surrogates here? // Worst case is we don't reuse a line we could have, since a surrogate won't // be considered white. if (cpe.get_IsSeparator(Convert.ToInt32(line1Text[ich]))) { fGotWhite = true; break; } } if (fGotWhite) { m_lines.Add(m_reuseableLines[0]); m_reuseableLines.RemoveAt(0); } } } m_ichRendered = m_reuseableLines[0].IchMin; int topOfFirstDiscardedLine = m_reuseableLines[0].Top; // We don't need to invalidate the lines we're keeping. invalidateRect = new Rectangle(invalidateRect.Left, invalidateRect.Top + topOfFirstDiscardedLine, invalidateRect.Width, invalidateRect.Height - topOfFirstDiscardedLine); } // Figure out which run we need to continue from, to correspond to the start of the first line // we need to rebuild. while (m_renderRunIndex < m_renderRuns.Count && m_renderRuns[m_renderRunIndex].RenderLim <= m_ichRendered) { m_renderRunIndex++; } while (!Finished) { // Todo: I think we need to adjust available width if this is the first line. BuildALine(); // Drop any initial reusable lines we now determine to be unuseable after all. // If we've used characters beyond the start of this potentially reusable line, we can't reuse it. // Also, we don't reuse empty lines. Typically an empty line is left over from a previously empty // paragraph, and we no longer need the empty segment, even though it doesn't have any of the same // characters (since it has none) as the segment that has replaced it. while (m_reuseableLines.Count > 0 && (m_ichRendered > m_reuseableLines[0].IchMin + delta || m_reuseableLines[0].Length == 0)) { m_reuseableLines.RemoveAt(0); } if (m_reuseableLines.Count > 0) { // See if we can resync. var nextLine = m_reuseableLines[0]; if (m_ichRendered == nextLine.IchMin + delta) { // reuse it. int top = m_gapTop; if (m_lines.Count > 0) { ParaLine previous = m_lines.Last(); previous.LastBox.Next = nextLine.FirstBox; top = TopOfNextLine(previous, nextLine.Ascent); } m_lines.AddRange(m_reuseableLines); if (top != nextLine.Top) { ParaLine previous = null; foreach (var line in m_reuseableLines) { if (previous != null) // first time top has already been computed { top = TopOfNextLine(previous, line.Ascent); } line.Top = top; // BEFORE ArrangeBoxes, since it gets copied to the individual boxes m_currentLine.ArrangeBoxes(m_para.Style.ParaAlignment, m_gapLeft, m_gapRight, 0, m_layoutInfo.MaxWidth, TopDepth); previous = line; } } else { // reusable lines have not moved, we don't need to invalidate them. invalidateRect.Height -= (m_reuseableLines.Last().Bottom - top); } for (Box box = nextLine.FirstBox; box != null; box = box.Next) { if (box is StringBox) { (box as StringBox).IchMin += delta; } } break; } } } SetParaInfo(); // if the paragraph got larger, we need to invalidate the extra area. // (But, don't reduce it if it got smaller; we want to invalidate all the old stuff as well as all the new.) if (m_para.Height > oldHeight) { invalidateRect.Height += m_para.Height - oldHeight; } if (m_para.Width > oldWidth) { invalidateRect.Width += m_para.Width - oldWidth; } lcb.InvalidateInRoot(invalidateRect); }