Exemplo n.º 1
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Initializes a new instance of the <see cref="SymbolChooserDlg"/> class by passing
		/// the font used in the glyph grid.
		/// </summary>
		/// <param name="font">Font used in the glyph grid.</param>
		/// <param name="cpe">An ILgCharacterPropertyEngine. Set this to null to use the
		/// .Net methods for determining whether or not a codepoint should be added to
		/// the grid.</param>
		/// ------------------------------------------------------------------------------------
		public SymbolChooserDlg(Font font, ILgCharacterPropertyEngine cpe)
			: this()
		{
			charGrid.CharPropEngine = cpe;
			charGrid.Font = font;
			lblFontName.Text = font.Name;
		}
Exemplo n.º 2
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Initializes a new instance of the <see cref="SymbolChooserDlg"/> class by passing
		/// the font used in the glyph grid.
		/// </summary>
		/// <param name="font">Font used in the glyph grid.</param>
		/// <param name="cpe">An ILgCharacterPropertyEngine. Set this to null to use the
		/// .Net methods for determining whether or not a codepoint should be added to
		/// the grid.</param>
		/// <param name="helpTopicProvider">The help topic provider.</param>
		/// ------------------------------------------------------------------------------------
		public SymbolChooserDlg(Font font, ILgCharacterPropertyEngine cpe, IHelpTopicProvider helpTopicProvider)
			: this()
		{
			m_helpTopicProvider = helpTopicProvider;
			charGrid.CharPropEngine = cpe;
			charGrid.Font = font;
			lblFontName.Text = font.Name;
		}
Exemplo n.º 3
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Make one for converting the specified paragraph.
		/// </summary>
		/// <param name="para">The paragraph.</param>
		/// <param name="wsBt">The writing system for which to do the conversion.</param>
		/// ------------------------------------------------------------------------------------
		private BtConverter(IStTxtPara para, int wsBt)
		{
			m_para = para;
			m_cache = para.Cache;
			m_cpe = m_cache.ServiceLocator.UnicodeCharProps;
			m_scr = para.Cache.LangProject.TranslatedScriptureOA;
			m_wsBt = wsBt;
		}
Exemplo n.º 4
0
		/// <summary>
		/// Start it off analyzing a string.
		/// </summary>
		/// <param name="tss"></param>
		/// <param name="cpe">engine to use.</param>
		public WordMaker(ITsString tss, ILgCharacterPropertyEngine cpe)
		{
			m_tss = tss;
			m_ich = 0;
			m_st = tss.get_Text();
			m_cch = m_st.Length;
			m_cpe = cpe;
		}
Exemplo n.º 5
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Initializes a new instance of the <see cref="FwCharacterCategorizer"/> class.
		/// </summary>
		/// <param name="validChars">The valid characters. If null, will fall back on the
		/// specified character property engine.</param>
		/// <param name="charPropEngine">The character property engine.</param>
		/// ------------------------------------------------------------------------------------
		public FwCharacterCategorizer(ValidCharacters validChars,
			ILgCharacterPropertyEngine charPropEngine)
		{
			if (charPropEngine == null)
				throw new ArgumentNullException("charPropEngine");

			m_validChars = validChars;
			m_charPropEngine = charPropEngine;
		}
Exemplo n.º 6
0
		/// <summary>
		/// Make one for converting the specified paragraph.
		/// </summary>
		/// <param name="para"></param>
		public BtConverter(IStTxtPara para)
		{
			m_para = para;
			m_cache = para.Cache;
			kflidFT = StTxtPara.SegmentFreeTranslationFlid(m_cache);
			kflidSegments = StTxtPara.SegmentsFlid(m_cache);
			m_cpe = m_cache.LanguageWritingSystemFactoryAccessor.UnicodeCharProps;
			m_scr = para.Cache.LangProject.TranslatedScriptureOA;
		}
		/// ----------------------------------------------------------------------------------------
		/// <summary>
		/// For a given LanguageDefinition, if the ValidChars field is empty then try to get a set
		/// of ExemplarCharacters (valid characters) from ICU for this language.
		/// </summary>
		/// <param name="ws"></param>
		/// <param name="cpe">A character property engine (needed for normalization).</param>
		/// ----------------------------------------------------------------------------------------
		public static void TryLoadValidCharsIfEmpty(IWritingSystem ws,
			ILgCharacterPropertyEngine cpe)
		{
			//Try to load the ValidChars if none have been loaded yet.
			if (string.IsNullOrEmpty(ws.ValidChars))
			{
				string IcuLocale = ws.LanguageSubtag.Code;
				ws.ValidChars = GetValidCharsForLocale(IcuLocale, cpe);
			}
		}
Exemplo n.º 8
0
		/// ----------------------------------------------------------------------------------------
		/// <summary>
		/// For a given LanguageDefinition, if the ValidChars field is empty then try to get a set
		/// of ExemplarCharacters (valid characters) from ICU for this language.
		/// </summary>
		/// <param name="langDef"></param>
		/// <param name="cpe">A character property engine (needed for normalization).</param>
		/// ----------------------------------------------------------------------------------------
		public static void TryLoadValidCharsIfEmpty(LanguageDefinition langDef,
			ILgCharacterPropertyEngine cpe)
		{
			//Try to load the ValidChars if none have been loaded yet.
			if (string.IsNullOrEmpty(langDef.ValidChars))
			{
				string IcuLocale = (langDef.BaseLocale ?? langDef.LocaleAbbr);
				langDef.ValidChars = GetValidCharsForLocale(IcuLocale, cpe);
			}
		}
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Initializes a new instance of the <see cref="FwCharacterCategorizer"/> class.
        /// </summary>
        /// <param name="validChars">The valid characters. If null, will fall back on the
        /// specified character property engine.</param>
        /// <param name="charPropEngine">The character property engine.</param>
        /// ------------------------------------------------------------------------------------
        public FwCharacterCategorizer(ValidCharacters validChars,
                                      ILgCharacterPropertyEngine charPropEngine)
        {
            if (charPropEngine == null)
            {
                throw new ArgumentNullException("charPropEngine");
            }

            m_validChars     = validChars;
            m_charPropEngine = charPropEngine;
        }
Exemplo n.º 10
0
 /// ------------------------------------------------------------------------------------
 /// <summary>
 /// After last call to AppendRun for the current paragraph, but before calling
 /// CreateParagraph, call this method to trim the last character in the builder
 /// if it is a trailing space.
 /// </summary>
 /// ------------------------------------------------------------------------------------
 public void TrimTrailingSpaceInPara()
 {
     if (m_cpe == null)
     {
         m_cpe = m_cache.ServiceLocator.UnicodeCharProps;
     }
     // check if the last char sent to the builder is a space
     if (Length != 0 && m_cpe.get_IsSeparator(FinalCharInPara))
     {
         m_ParaStrBldr.Replace(Length - 1, Length, null, null);
     }
 }
Exemplo n.º 11
0
		public WordMaker(ITsString tss, ILgWritingSystemFactory encf)
		{
			m_tss = tss;
			m_ich = 0;
			m_st = tss.get_Text();
			if (m_st == null)
				m_st = "";
			m_cch = m_st.Length;
			// Get a character property engine from the wsf.
			m_cpe = encf.get_UnicodeCharProps();
			Debug.Assert(m_cpe != null, "encf.get_UnicodeCharProps() returned null");
		}
Exemplo n.º 12
0
		public WordImporter(FdoCache cache)
		{
			m_cache = cache;
			m_ws = cache.DefaultVernWs;

			//the following comment is from the FDO Scripture class, so the answer may appear there.

			// Get a default character property engine.
			// REVIEW SteveMc(TomB): We need the cpe for the primary vernacular writing system. What
			// should we be passing as the second param (i.e., the old writing system)? For now,
			// 0 seems to work.
			m_lgCharPropEngineVern = m_cache.LanguageWritingSystemFactoryAccessor.get_CharPropEngine(m_cache.DefaultVernWs);
		}
Exemplo n.º 13
0
        int m_ws;         // only text in this language is checked.

        /// <summary>
        /// Make one
        /// </summary>
        /// <param name="tss"></param>
        /// <param name="dict"></param>
        /// <param name="ws"></param>
        public SpellCheckMethod(ITsString tss, ISpellEngine dict, int ws, ILgCharacterPropertyEngine cpe)
        {
            m_tss  = tss;
            m_text = tss.Text;
            if (m_text == null)
            {
                m_text = "";
            }
            m_cch  = m_text.Length;
            m_cpe  = cpe;
            m_dict = dict;
            m_ws   = ws;
        }
Exemplo n.º 14
0
        public WordImporter(FdoCache cache)
        {
            m_cache = cache;
            m_ws    = cache.DefaultVernWs;

            //the following comment is from the FDO Scripture class, so the answer may appear there.

            // Get a default character property engine.
            // REVIEW SteveMc(TomB): We need the cpe for the primary vernacular writing system. What
            // should we be passing as the second param (i.e., the old writing system)? For now,
            // 0 seems to work.
            m_lgCharPropEngineVern = m_cache.LanguageWritingSystemFactoryAccessor.get_CharPropEngine(m_cache.DefaultVernWs);
        }
Exemplo n.º 15
0
		/// -----------------------------------------------------------------------------------
		/// <summary>
		/// Initializes a new instance of the <see cref="ParagraphCorrelation"/> class.
		/// </summary>
		/// -----------------------------------------------------------------------------------
		public ParagraphCorrelation(string para1, string para2,
			ILgCharacterPropertyEngine charPropEngine)
		{
			m_para1 = (para1 == null) ? string.Empty : para1;
			m_para2 = (para2 == null) ? string.Empty : para2;
			if (m_para1 == m_para2)
				m_correlationFactor = 1.0;
			else
			{
				m_wordList = new Dictionary<string, int[]>();
				m_charPropEngine = charPropEngine;
				BuildCorrelation();
			}
		}
Exemplo n.º 16
0
 public WordMaker(ITsString tss, ILgWritingSystemFactory encf)
 {
     m_tss = tss;
     m_ich = 0;
     m_st  = tss.get_Text();
     if (m_st == null)
     {
         m_st = "";
     }
     m_cch = m_st.Length;
     // Get a character property engine from the wsf.
     m_cpe = encf.get_UnicodeCharProps();
     Debug.Assert(m_cpe != null, "encf.get_UnicodeCharProps() returned null");
 }
Exemplo n.º 17
0
        public void SymbolPunctuationOnly()
        {
            var validChars = ValidCharacters.Load(ksXmlHeader +
                                                  "<ValidCharacters><WordForming>a\uFFFCb\uFFFCc\uFFFCd\uFFFCe</WordForming>" +
                                                  "<Numeric>1\uFFFC2\uFFFC3\uFFFC4\uFFFC5</Numeric>" +
                                                  "<Other>'\uFFFC-\uFFFC#</Other>" +
                                                  "</ValidCharacters>", "Test WS", null, null, FwDirectoryFinder.LegacyWordformingCharOverridesFile);
            ILgCharacterPropertyEngine lgCharPropEngineEn = Cache.WritingSystemFactory.get_CharPropEngine(
                m_wsEn);

            FwCharacterCategorizer categorizer = new FwCharacterCategorizer(validChars, lgCharPropEngineEn);

            Assert.IsTrue(categorizer.IsPunctuation('#'));
            Assert.IsFalse(categorizer.IsWordFormingCharacter('#'));
        }
Exemplo n.º 18
0
 /// -----------------------------------------------------------------------------------
 /// <summary>
 /// Initializes a new instance of the <see cref="ParagraphCorrelation"/> class.
 /// </summary>
 /// -----------------------------------------------------------------------------------
 public ParagraphCorrelation(string para1, string para2,
                             ILgCharacterPropertyEngine charPropEngine)
 {
     m_para1 = (para1 == null) ? string.Empty : para1;
     m_para2 = (para2 == null) ? string.Empty : para2;
     if (m_para1 == m_para2)
     {
         m_correlationFactor = 1.0;
     }
     else
     {
         m_wordList       = new Dictionary <string, int[]>();
         m_charPropEngine = charPropEngine;
         BuildCorrelation();
     }
 }
Exemplo n.º 19
0
        public void TestStringsEqualExceptSpace()
        {
            ILgCharacterPropertyEngine cpe = Cache.LanguageWritingSystemFactoryAccessor.UnicodeCharProps;

            VerifyEquality("", "", true, cpe, "empty strings");
            VerifyEquality("a", "a", true, cpe, "single char");
            VerifyEquality(" a", "a", true, cpe, "one leading space");
            VerifyEquality("b  ", "b", true, cpe, "two trailing spaces");
            VerifyEquality("  ab  c  ", "abc", true, cpe, "multiple spaces several places");
            VerifyEquality(" a b c", "a  b  c  ", true, cpe, "spaces both sides");
            VerifyEquality("a", "b", false, cpe, "single char different");
            VerifyEquality(" a b c", "abd", false, cpe, "complex different");
            VerifyEquality("", "abd", false, cpe, "empty/non-empty");
            VerifyEquality("", " ", true, cpe, "empty/space");
            VerifyEquality("", " a b d ", false, cpe, "empty/non-empty with spaces");
        }
Exemplo n.º 20
0
        /// --------------------------------------------------------------------------------
        /// <summary>
        /// Parses the specified string into a list of characters. The unparsed list is a
        /// string of valid characters delimited with the specified delimiter.
        /// </summary>
        /// <param name="chars">The string containing a delimited list of characters.</param>
        /// <param name="delimiter">The delimiter (passed as a string, but really just a single
        /// character).</param>
        /// <param name="cpe">The character property engine.</param>
        /// <param name="invalidChars">The list of invalid characters encountered.</param>
        /// <param name="otherLists">Collection of other lists to check to prevent a character
        /// from being added to multiple lists.</param>
        /// <returns>List of unique characters</returns>
        /// --------------------------------------------------------------------------------
        private static List <string> ParseCharString(string chars, string delimiter,
                                                     ILgCharacterPropertyEngine cpe, out List <string> invalidChars,
                                                     params List <string>[] otherLists)
        {
            List <string> charlist = TsStringUtils.ParseCharString(chars, delimiter, cpe,
                                                                   out invalidChars);

            for (int i = charlist.Count - 1; i >= 0; i--)
            {
                if (IsInAnotherList(charlist[i], otherLists))
                {
                    charlist.RemoveAt(i);
                }
            }
            return(charlist);
        }
 /// <summary/>
 private void Dispose(bool fDisposing)
 {
     System.Diagnostics.Debug.WriteLineIf(!fDisposing, "****** Missing Dispose() call for " + GetType() + " *******");
     if (fDisposing && !IsDisposed)
     {
         // dispose managed and unmanaged objects
         if (m_container != null)
         {
             m_container.Dispose();
         }
     }
     m_baseServiceLocator = null;
     m_container          = null;
     m_lgpe     = null;
     IsDisposed = true;
 }
        public void SymbolPunctuationOnly()
        {
            ValidCharacters validChars = ValidCharacters.Load(ksXmlHeader +
                                                              "<ValidCharacters><WordForming>a\uFFFCb\uFFFCc\uFFFCd\uFFFCe</WordForming>" +
                                                              "<Numeric>1\uFFFC2\uFFFC3\uFFFC4\uFFFC5</Numeric>" +
                                                              "<Other>'\uFFFC-\uFFFC#</Other>" +
                                                              "</ValidCharacters>", "Test WS", null);

            ILgCharacterPropertyEngine lgCharPropEngineEn = (ILgCharacterPropertyEngine)
                                                            m_cache.Cache.LanguageWritingSystemFactoryAccessor.get_CharPropEngine(
                InMemoryFdoCache.s_wsHvos.En);

            FwCharacterCategorizer categorizer = new FwCharacterCategorizer(validChars, lgCharPropEngineEn);

            Assert.IsTrue(categorizer.IsPunctuation('#'));
            Assert.IsFalse(categorizer.IsWordFormingCharacter('#'));
        }
Exemplo n.º 23
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Given a start character position that is within a word, and an delta that is +/- 1,
        /// return the index of the first non-wordforming (and non-number) character in that direction,
        /// or -1 if the start of the string is reached, or string.Length if the end is reached.
        /// For our purposes here, ORC (0xfffc) is considered word-forming.
        /// </summary>
        /// ------------------------------------------------------------------------------------
        private int AdjustWordBoundary(ILgWritingSystemFactory wsf, ITsString tss, int ichStart,
                                       int delta, int lim)
        {
            string text = tss.Text;
            int    ich;

            for (ich = ichStart + delta; !BeyondLim(ich, delta, lim); ich += delta)
            {
                ILgCharacterPropertyEngine cpe = TsStringUtils.GetCharPropEngineAtOffset(tss, wsf, ich);
                char ch = text[ich];
                if (!cpe.get_IsWordForming(ch) && !cpe.get_IsNumber(ch) && ch != 0xfffc)
                {
                    break;
                }
            }
            return(ich);
        }
Exemplo n.º 24
0
        public void WordAndPuncs_OverridePunc()
        {
            var validChars = ValidCharacters.Load(ksXmlHeader +
                                                  "<ValidCharacters><WordForming>a\uFFFCb\uFFFCc\uFFFCd\uFFFCe\uFFFC.</WordForming>" +
                                                  "<Numeric>1\uFFFC2\uFFFC3\uFFFC4\uFFFC5</Numeric>" +
                                                  "<Other>'\uFFFC-\uFFFC#</Other>" +
                                                  "</ValidCharacters>", "Test WS", null, null, FwDirectoryFinder.LegacyWordformingCharOverridesFile);
            ILgCharacterPropertyEngine lgCharPropEngineEn = Cache.WritingSystemFactory.get_CharPropEngine(
                m_wsEn);

            FwCharacterCategorizer categorizer = new FwCharacterCategorizer(validChars, lgCharPropEngineEn);

            List <WordAndPunct> wordsAndPunc = categorizer.WordAndPuncts("abc.de");

            // We expect one word to be returned.
            Assert.AreEqual(1, wordsAndPunc.Count);
            Assert.AreEqual("abc.de", wordsAndPunc[0].Word);
        }
Exemplo n.º 25
0
        /// <summary>
        /// Pull out as internal static for testing.
        /// </summary>
        internal static bool StringsEqualExceptSpace(string first, string second, ILgCharacterPropertyEngine cpe)
        {
            int ichFirst  = 0;
            int ichSecond = 0;

            for ( ; ;)
            {
                char c1 = '\0';
                if (ichFirst < first.Length)
                {
                    c1 = first[ichFirst];
                    if (cpe.get_GeneralCategory(c1) == LgGeneralCharCategory.kccZs)
                    {
                        ichFirst++;
                        continue;                         // skip space in first
                    }
                }
                if (ichSecond < second.Length)
                {
                    char c2 = second[ichSecond];
                    if (cpe.get_GeneralCategory(c2) == LgGeneralCharCategory.kccZs)
                    {
                        ichSecond++;
                        continue;                         // skip space in second
                    }
                    if (ichFirst >= first.Length)
                    {
                        return(false);                        // second has non-white, first has no more
                    }
                    if (c1 != c2)
                    {
                        return(false);                        // corresponding non-white characters not equal
                    }
                    ichFirst++;
                    ichSecond++;
                    continue;                     // current characters match, move on.
                }
                if (ichFirst < first.Length)
                {
                    return(false);           // at end of second, and first has a non-white character left
                }
                return(true);                // at end of both
            }
        }
        public void WordAndPuncs_EmptyString()
        {
            ValidCharacters validChars = ValidCharacters.Load(ksXmlHeader +
                                                              "<ValidCharacters><WordForming>a\uFFFCb\uFFFCc</WordForming>" +
                                                              "<Numeric>1\uFFFC2\uFFFC3\uFFFC4\uFFFC5</Numeric>" +
                                                              "<Other>-\uFFFCU+0020</Other>" +
                                                              "</ValidCharacters>", "Test WS", null);

            ILgCharacterPropertyEngine lgCharPropEngineEn = (ILgCharacterPropertyEngine)
                                                            m_cache.Cache.LanguageWritingSystemFactoryAccessor.get_CharPropEngine(
                InMemoryFdoCache.s_wsHvos.En);

            FwCharacterCategorizer categorizer = new FwCharacterCategorizer(validChars, lgCharPropEngineEn);

            List <WordAndPunct> wordsAndPunc = categorizer.WordAndPuncts("");

            // We expect one word to be returned.
            Assert.AreEqual(0, wordsAndPunc.Count);
        }
Exemplo n.º 27
0
        // Update the nth custom part of the variant, where position = 0 for language, 1 for script, 2 for region.
        // The old value stored in the property (not necessarily the old region code) is passed in oldValue.
        // The property used to be custom if this is equal to markerValue.
        // Return the value that should actually be stored in the appropriate property.
        string UpdateVariantPart(int position, string oldValue, string newValue, string markerValue, bool isNewCustom)
        {
            var isOldCustom = oldValue == markerValue;

            if (!isOldCustom && !isNewCustom)
            {
                return(newValue);                // nothing to do, no change to variant.
            }
            string leadIn;
            string variant;
            string suffix = GetPartsOfVariant(position, out leadIn, out variant);

            // remove oldValue if any
            if (isOldCustom)
            {
                int hyphen = suffix.IndexOf("-");
                if (hyphen >= 0)
                {
                    suffix = suffix.Substring(hyphen + 1);
                }
                else
                {
                    suffix = "";                     // remove it all, it's the only part left.
                }
            }
            // append new value if any
            if (isNewCustom)
            {
                suffix = newValue + "-" + suffix;
            }
            // Now re-assemble the variant.
            var    privateUse = Combine(leadIn, "-", suffix);
            string newVariant = BuildVariant(variant, privateUse);

            if (newVariant.Equals(Variant ?? "", StringComparison.OrdinalIgnoreCase))
            {
                return(isNewCustom ? markerValue : newValue);
            }
            m_cpe   = null;
            Variant = newVariant;
            return(isNewCustom ? markerValue : newValue);
        }
        public void CorrelationFactor()
        {
            ILgCharacterPropertyEngine engine = LgIcuCharPropEngineClass.Create();

            ParagraphCorrelation pc = new ParagraphCorrelation("Hello", "Hello", engine);

            Assert.AreEqual(1.0, pc.CorrelationFactor);

            pc = new ParagraphCorrelation("Hello", "Hello ", engine);
            Assert.AreEqual(1.0, pc.CorrelationFactor);

            pc = new ParagraphCorrelation(" Hello", "Hello", engine);
            Assert.AreEqual(1.0, pc.CorrelationFactor);

            pc = new ParagraphCorrelation("Hello", "Hello there", engine);
            Assert.AreEqual(0.5, pc.CorrelationFactor);

            pc = new ParagraphCorrelation("Hello over there", "Hello over here", engine);
            Assert.AreEqual(0.5, pc.CorrelationFactor);

            pc = new ParagraphCorrelation("Hello there", "there Hello", engine);
            Assert.AreEqual(1.0, pc.CorrelationFactor);

            pc = new ParagraphCorrelation("I am really excited",
                                          "I am really really really really excited", engine);
            Assert.AreEqual(0.8125, pc.CorrelationFactor);

            pc = new ParagraphCorrelation(string.Empty, "What will happen here?", engine);
            Assert.AreEqual(0.0, pc.CorrelationFactor);

            pc = new ParagraphCorrelation(string.Empty, string.Empty, engine);
            Assert.AreEqual(1.0, pc.CorrelationFactor);

            pc = new ParagraphCorrelation(null, null, engine);
            Assert.AreEqual(1.0, pc.CorrelationFactor);

            pc = new ParagraphCorrelation(null, "what?", engine);
            Assert.AreEqual(0.0, pc.CorrelationFactor);

            pc = new ParagraphCorrelation("what?", null, engine);
            Assert.AreEqual(0.0, pc.CorrelationFactor);
        }
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Try to retrieve a set of ValidChars (ExemplarCharacters) from ICU for the language
        /// associated with the LanguageDefinition parameter.
        /// </summary>
        /// <param name="icuLocale">Code for an ICU locale</param>
        /// <param name="cpe">A character property engine (needed for normalization).</param>
        /// <returns>Space-delimited set of valid characters characters for the given locale
        /// </returns>
        /// ------------------------------------------------------------------------------------
        public static string GetValidCharsForLocale(string icuLocale,
                                                    ILgCharacterPropertyEngine cpe)
        {
            if (icuLocale == null)
            {
                return(string.Empty);
            }

            s_ICU.Init();
            string strValidChars = ExpandExemplarCharacters(s_ICU.GetExemplarCharacters(icuLocale));

            strValidChars = AddUppercaseCharacters(strValidChars, icuLocale);
            strValidChars = cpe.NormalizeD(strValidChars);

            List <string> lExemplarChars = TsStringUtils.ParseCharString(strValidChars, " ", cpe);

            //Ensure that we start the string with two space characters so that
            //space is included in the set of valid characters.
            return("  " + lExemplarChars.ToString(" "));
        }
Exemplo n.º 30
0
        /// -----------------------------------------------------------------------------------
        /// <summary>
        /// Clean up any resources being used.
        /// </summary>
        /// <param name="disposing"><c>true</c> to release both managed and unmanaged
        /// resources; <c>false</c> to release only unmanaged resources.
        /// </param>
        /// -----------------------------------------------------------------------------------
        protected override void Dispose(bool disposing)
        {
            System.Diagnostics.Debug.WriteLineIf(!disposing, "****** Missing Dispose() call for " + GetType().Name + ". ****** ");
            // Must not be run more than once.
            if (IsDisposed)
            {
                Debug.Assert(m_cpe == null);
                return;
            }

            if (disposing)
            {
                if (components != null)
                {
                    components.Dispose();
                }
            }
            m_cpe = null;
            base.Dispose(disposing);
        }
Exemplo n.º 31
0
        public void LongestUsefulSubstring()
        {
            ILgCharacterPropertyEngine engine = LgIcuCharPropEngineClass.Create();

            // two equal strings
            ParagraphCorrelation pc = new ParagraphCorrelation("Hello", "Hello", engine);

            Assert.AreEqual("Hello", pc.LongestUsefulSubstring);

            // LCS at the start
            pc = new ParagraphCorrelation("Hello over there", "Hello over here", engine);
            Assert.AreEqual("Hello over ", pc.LongestUsefulSubstring);

            // LCS in the middle
            pc = new ParagraphCorrelation("I want to be over there",
                                          "You want to be over here", engine);
            Assert.AreEqual(" want to be over ", pc.LongestUsefulSubstring);

            // LCS at the end
            pc = new ParagraphCorrelation("Will you come to visit my relatives?",
                                          "Do I ever visit my relatives?", engine);
            Assert.AreEqual(" visit my relatives?", pc.LongestUsefulSubstring);

            // two common strings, find the longest
            pc = new ParagraphCorrelation("This sentence has common words",
                                          "This paragraph has common words", engine);
            Assert.AreEqual(" has common words", pc.LongestUsefulSubstring);

            // nothing at all in common
            pc = new ParagraphCorrelation("We have nothing in common",
                                          "absolutely nill items", engine);
            Assert.AreEqual(string.Empty, pc.LongestUsefulSubstring);

            // pathological cases
            pc = new ParagraphCorrelation(string.Empty, string.Empty, engine);
            Assert.AreEqual(string.Empty, pc.LongestUsefulSubstring);
            pc = new ParagraphCorrelation(null, string.Empty, engine);
            Assert.AreEqual(string.Empty, pc.LongestUsefulSubstring);
            pc = new ParagraphCorrelation(string.Empty, "Hello there", engine);
            Assert.AreEqual(string.Empty, pc.LongestUsefulSubstring);
        }
        public void WordAndPuncs_NoOverridePunc()
        {
            ValidCharacters validChars = ValidCharacters.Load(ksXmlHeader +
                                                              "<ValidCharacters><WordForming>a\uFFFCb\uFFFCc\uFFFCd\uFFFCe</WordForming>" +
                                                              "<Numeric>1\uFFFC2\uFFFC3\uFFFC4\uFFFC5</Numeric>" +
                                                              "<Other>'\uFFFC-\uFFFC#</Other>" +
                                                              "</ValidCharacters>", "Test WS", null);

            ILgCharacterPropertyEngine lgCharPropEngineEn = (ILgCharacterPropertyEngine)
                                                            m_cache.Cache.LanguageWritingSystemFactoryAccessor.get_CharPropEngine(
                InMemoryFdoCache.s_wsHvos.En);

            FwCharacterCategorizer categorizer = new FwCharacterCategorizer(validChars, lgCharPropEngineEn);

            List <WordAndPunct> wordsAndPunc = categorizer.WordAndPuncts("abc.de");

            // We expect two words to be returned.
            Assert.AreEqual(2, wordsAndPunc.Count);
            Assert.AreEqual("abc", wordsAndPunc[0].Word);
            Assert.AreEqual("de", wordsAndPunc[1].Word);
        }
        public void CorrelationFactor_WithDigitsAndPunc()
        {
            ILgCharacterPropertyEngine engine = LgIcuCharPropEngineClass.Create();

            ParagraphCorrelation pc = new ParagraphCorrelation("Hello!", "2Hello.", engine);

            Assert.AreEqual(1.0, pc.CorrelationFactor);

            pc = new ParagraphCorrelation("Hello", "Hello, there", engine);
            Assert.AreEqual(0.5, pc.CorrelationFactor);

            pc = new ParagraphCorrelation("3Hello over there", "Hello over here", engine);
            Assert.AreEqual(0.5, pc.CorrelationFactor);

            pc = new ParagraphCorrelation("Hello there?", "4there Hello!", engine);
            Assert.AreEqual(1.0, pc.CorrelationFactor);

            pc = new ParagraphCorrelation("5I am really excited!",
                                          "6I am really really really really excited.", engine);
            Assert.AreEqual(0.8125, pc.CorrelationFactor);
        }
Exemplo n.º 34
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Converts chapter and verse numbers in the given paragraph.
        /// </summary>
        /// <param name="para">Paragraph to be converted</param>
        /// <param name="zeroDigit">Character representing zero for chapter/verse numbers</param>
        /// <param name="charEngine">Unicode character properties engine</param>
        /// <returns>true if chapter or verse numbers were changed in paragraph</returns>
        /// <remarks>Return value is only used for testing.  Also, method is made virtual so
        /// test class can override it.  Allows testing to limit amount of processing for sake of
        /// time.</remarks>
        /// ------------------------------------------------------------------------------------
        protected virtual bool ConvertChapterVerseNumbers(IScrTxtPara para, char zeroDigit,
                                                          ILgCharacterPropertyEngine charEngine)
        {
            ITsString  tss          = para.Contents;
            ITsStrBldr tssBldr      = tss.GetBldr();
            int        cRun         = tss.RunCount;
            bool       numbersFound = false;

            for (int i = 0; i < cRun; i++)
            {
                TsRunInfo    tri;
                ITsTextProps ttp   = tss.FetchRunInfo(i, out tri);
                IStStyle     style = m_scr.FindStyle(ttp);
                if (style != null &&
                    (style.Function == FunctionValues.Verse ||
                     style.Function == FunctionValues.Chapter) && tri.ichMin < tri.ichLim)
                {
                    numbersFound = true;
                    string        runChars = tss.GetChars(tri.ichMin, tri.ichLim);
                    StringBuilder strBldr  = new StringBuilder(runChars.Length);
                    foreach (char c in runChars)
                    {
                        if (charEngine.get_IsNumber(c))
                        {
                            strBldr.Append((char)(zeroDigit + charEngine.get_NumericValue(c)));
                        }
                        else
                        {
                            strBldr.Append(c);
                        }
                    }
                    tssBldr.Replace(tri.ichMin, tri.ichLim, strBldr.ToString(), ttp);
                }
            }
            if (numbersFound)
            {
                para.Contents = tssBldr.GetString();
            }
            return(numbersFound);
        }
Exemplo n.º 35
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Initializes this object.
        /// </summary>
        /// <param name="langDef">The language definition</param>
        /// <param name="invalidChars">The list of invalid characters encountered.</param>
        /// ------------------------------------------------------------------------------------
        private void Init(LanguageDefinition langDef, out List <string> invalidChars)
        {
            m_cpe     = LgIcuCharPropEngineClass.Create();
            m_langDef = langDef;

            Reset();

            if (Other != null)
            {
                Other = Other.Replace(kSpaceReplacment, " ");
            }

            m_WordFormingCharacters = ParseCharString(WordForming, ksDelimiter, m_cpe,
                                                      langDef, out invalidChars);
            List <string> invalidCharsTemp;

            m_NumericCharacters = ParseCharString(Numeric, ksDelimiter, m_cpe,
                                                  langDef, out invalidCharsTemp, m_WordFormingCharacters);
            invalidChars.AddRange(invalidCharsTemp);
            m_OtherCharacters = ParseCharString(Other, ksDelimiter, m_cpe, langDef,
                                                out invalidCharsTemp, m_WordFormingCharacters, m_NumericCharacters);
            invalidChars.AddRange(invalidCharsTemp);
        }
Exemplo n.º 36
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Initializes this object.
        /// </summary>
        /// <returns>The list of invalid characters encountered.</returns>
        /// ------------------------------------------------------------------------------------
        private List <string> Init()
        {
            m_cpe = LgIcuCharPropEngineClass.Create();

            Reset();

            if (Other != null)
            {
                Other = Other.Replace(kSpaceReplacment, " ");
            }

            List <string> invalidChars;

            m_WordFormingCharacters = ParseCharString(WordForming, ksDelimiter, m_cpe, out invalidChars);
            List <string> invalidCharsTemp;

            m_NumericCharacters = ParseCharString(Numeric, ksDelimiter, m_cpe, out invalidCharsTemp, m_WordFormingCharacters);
            invalidChars.AddRange(invalidCharsTemp);
            m_OtherCharacters = ParseCharString(Other, ksDelimiter, m_cpe, out invalidCharsTemp, m_WordFormingCharacters,
                                                m_NumericCharacters);
            invalidChars.AddRange(invalidCharsTemp);
            return(invalidChars);
        }
Exemplo n.º 37
0
 /// <summary/>
 private void Dispose(bool fDisposing)
 {
     System.Diagnostics.Debug.WriteLineIf(!fDisposing, "****** Missing Dispose() call for " + GetType() + " *******");
     if (fDisposing && !IsDisposed)
     {
         // dispose managed and unmanaged objects
         if (m_container != null)
         {
             try
             {
                 m_container.Dispose();
             }
             catch (InvalidComObjectException e)                    // Intermittantly the dispose of the container fails because a COM object has become invalid
             {
                 // Display an indication of the failure, but don't crash, we made a good faith effort to dispose all our COM objects
                 // and they probably were disposed. Also at this point we are probably shutting down, or wrapping up a unit test.
                 Debug.WriteLine(String.Format(@"COM problem when disposing container in StructureMapServiceLocator: {0}", e.Message));
             }
         }
     }
     m_container = null;
     m_lgpe      = null;
     IsDisposed  = true;
 }
Exemplo n.º 38
0
        /// <summary>
        /// Executes in two distinct scenarios.
        ///
        /// 1. If disposing is true, the method has been called directly
        /// or indirectly by a user's code via the Dispose method.
        /// Both managed and unmanaged resources can be disposed.
        ///
        /// 2. If disposing is false, the method has been called by the
        /// runtime from inside the finalizer and you should not reference (access)
        /// other managed objects, as they already have been garbage collected.
        /// Only unmanaged resources can be disposed.
        /// </summary>
        /// <param name="disposing"></param>
        /// <remarks>
        /// If any exceptions are thrown, that is fine.
        /// If the method is being done in a finalizer, it will be ignored.
        /// If it is thrown by client code calling Dispose,
        /// it needs to be handled by fixing the bug.
        ///
        /// If subclasses override this method, they should call the base implementation.
        /// </remarks>
        protected virtual void Dispose(bool disposing)
        {
            //Debug.WriteLineIf(!disposing, "****************** " + GetType().Name + " 'disposing' is false. ******************");
            // Must not be run more than once.
            if (m_isDisposed)
            {
                return;
            }

            if (disposing)
            {
                // Dispose managed resources here.
            }

            // Dispose unmanaged resources here, whether disposing is true or false.
            m_cache = null;
            if (m_lgCharPropEngineVern != null)
            {
                System.Runtime.InteropServices.Marshal.ReleaseComObject(m_lgCharPropEngineVern);
                m_lgCharPropEngineVern = null;
            }

            m_isDisposed = true;
        }
Exemplo n.º 39
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Tells whether the full character (starting) at ich is a white-space character.
		/// </summary>
		/// <param name="cpe">The character property engine.</param>
		/// <param name="text">The text.</param>
		/// <param name="ich">The character index.</param>
		/// <returns>
		/// 	<c>true</c> if the specified characters in the text is whitespace;
		///		otherwise, <c>false</c>.
		/// </returns>
		/// ------------------------------------------------------------------------------------
		public static bool IsWhite(ILgCharacterPropertyEngine cpe, string text, int ich)
		{
			return cpe.get_GeneralCategory(StringUtils.FullCharAt(text, ich)) == LgGeneralCharCategory.kccZs;
		}
Exemplo n.º 40
0
 /// ------------------------------------------------------------------------------------
 /// <summary>
 /// Initializes a new instance of the <see cref="FwCharacterCategorizer"/> class.
 /// </summary>
 /// <param name="validChars">The valid characters.</param>
 /// <param name="charPropEngine">The character property engine.</param>
 /// ------------------------------------------------------------------------------------
 public FwCharacterCategorizer(ValidCharacters validChars,
                               ILgCharacterPropertyEngine charPropEngine)
 {
     m_validChars     = validChars;
     m_charPropEngine = charPropEngine;
 }
Exemplo n.º 41
0
        /// <summary>
        /// Checks all the values of a character in the UnicodeData.txt.
        /// Checks: fields 1-8,11-14
        /// (Skips, 9 and 10, the "Bidi Mirrored" and "Unicode Version 1"
        /// </summary>
        /// <param name="puaIndex"></param><param name="puaName"></param>
        /// <param name="puaGenCat"></param><param name="puaCombiningClass"></param>
        /// <param name="puaBidiClass"></param><param name="puaDecomposition"></param>
        /// <param name="puaNumeric"></param><param name="puaNumericValue"></param>
        /// <param name="puaComment"></param><param name="puaToUpper"></param>
        /// <param name="puaToLower"></param><param name="puaToTitle"></param>
        public static void Check_PUA(
            int puaIndex,
            string puaName,
            LgGeneralCharCategory puaGenCat,
            int puaCombiningClass,
            LgBidiCategory puaBidiClass,
            string puaDecomposition,
            bool puaNumeric,
            int puaNumericValue,
            string puaComment,
            int puaToUpper,
            int puaToLower,
            int puaToTitle
            )
        {
            string name = "";
            LgGeneralCharCategory genCategory = LgGeneralCharCategory.kccCn;
            int            combiningClass     = 0;
            string         decomposition      = "None";
            LgBidiCategory bidiCategory       = LgBidiCategory.kbicL;
            string         fullDecomp         = "I have no clue";
            bool           isNumber           = false;
            int            numericValue       = -1;
            int            upper   = -1;
            int            lower   = -1;
            int            title   = -1;
            string         comment = "<none>";

            //Getting the character name at the memory address specified
            ILgCharacterPropertyEngine charPropEngine = LgIcuCharPropEngineClass.Create();

            try
            {
                string icuDataDir = GetIcuDataDir();
                Icu.SetDataDirectory(icuDataDir);
                Icu.UErrorCode      error;
                Icu.UCharNameChoice choice = Icu.UCharNameChoice.U_UNICODE_CHAR_NAME;
                int len = Icu.u_CharName(puaIndex, choice, out name, out error);
                genCategory    = charPropEngine.get_GeneralCategory(puaIndex);
                combiningClass = charPropEngine.get_CombiningClass(puaIndex);
                bidiCategory   = charPropEngine.get_BidiCategory(puaIndex);
                decomposition  = charPropEngine.get_Decomposition(puaIndex);
                fullDecomp     = charPropEngine.get_FullDecomp(puaIndex);
                // Note: isNumber merely checks the General category, it doesn't check to see if there is a valid numeric value.
                isNumber = charPropEngine.get_IsNumber(puaIndex);
                if (isNumber)
                {
                    numericValue = charPropEngine.get_NumericValue(puaIndex);
                }
                comment = charPropEngine.get_Comment(puaIndex);

                upper = charPropEngine.get_ToUpperCh(puaIndex);
                lower = charPropEngine.get_ToLowerCh(puaIndex);
                title = charPropEngine.get_ToTitleCh(puaIndex);
            }
            finally
            {
                // Must release pointer to free memory-mapping before we try to restore files.
                Marshal.ReleaseComObject(charPropEngine);
                charPropEngine = null;
                Icu.Cleanup();                          // clean up the ICU files / data
            }

            // StringWriter used to print hexadecimal values in the error messages.
            StringWriter stringWriter = new StringWriter(new System.Globalization.NumberFormatInfo());

            string errorMessage = "PUA Character " +
                                  puaIndex.ToString("x", new System.Globalization.NumberFormatInfo()) +
                                  " has an incorrect ";

            //Check Name [1]
            Assert.AreEqual(puaName, name, errorMessage + "name.");

            //Check general category [2]
            Assert.AreEqual(puaGenCat, genCategory, errorMessage + "general category.");

            //Check combining class [3]
            Assert.AreEqual(puaCombiningClass, combiningClass, errorMessage + "combining class.");

            //Check Bidi class [4]
            Assert.AreEqual(puaBidiClass, bidiCategory, errorMessage + "bidi class value.");

            //Check Decomposition [5]
            stringWriter.WriteLine(errorMessage + "decomposition.");
            stringWriter.WriteLine("Decomposition, {0:x}, is incorrect", (int)decomposition[0]);
            Assert.AreEqual(puaDecomposition, decomposition, stringWriter.ToString());

            //Check Numeric Value [6,7,8]
            if (puaNumeric != isNumber)
            {
                Assert.AreEqual(puaNumeric, isNumber, errorMessage +
                                "numeric type (i.e. does or doesn't have a numeric value when it should be the other).");
            }
            if (puaNumeric)
            {
                Assert.AreEqual(puaNumericValue, numericValue, errorMessage + "numeric value.");
            }
            //Check ISO Comment [11]
            Assert.AreEqual(puaComment, comment, errorMessage + "ISO commment");

            //Check uppercase [12]
            stringWriter.Flush();
            stringWriter.WriteLine(errorMessage + "upper case.");
            stringWriter.WriteLine("Found uppercase value: {0:x}", upper);
            Assert.AreEqual(puaToUpper, upper, stringWriter.ToString());
            //Check lowercase [13]
            Assert.AreEqual(puaToLower, lower, errorMessage + "lower case.");
            //Check titlecase [14]
            Assert.AreEqual(puaToTitle, title, errorMessage + "title case.");
        }
Exemplo n.º 42
0
		void VerifyEquality(string first, string second, bool equal, ILgCharacterPropertyEngine cpe, string label)
		{
			Assert.AreEqual(equal, BtConverter.StringsEqualExceptSpace(first, second, cpe), label + " - forward");
			Assert.AreEqual(equal, BtConverter.StringsEqualExceptSpace(second, first, cpe), label + " - backward");
			Assert.AreEqual(true, BtConverter.StringsEqualExceptSpace(first, first, cpe), label + " - first to self");
			Assert.AreEqual(true, BtConverter.StringsEqualExceptSpace(second, second, cpe), label + " - second to self");
		}
Exemplo n.º 43
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Initializes a new instance of the <see cref="FwCharacterCategorizer"/> class.
		/// </summary>
		/// <param name="validChars">The valid characters.</param>
		/// <param name="charPropEngine">The character property engine.</param>
		/// ------------------------------------------------------------------------------------
		public FwCharacterCategorizer(ValidCharacters validChars,
			ILgCharacterPropertyEngine charPropEngine)
		{
			m_validChars = validChars;
			m_charPropEngine = charPropEngine;
		}
Exemplo n.º 44
0
		/// <summary>
		/// Executes in two distinct scenarios.
		///
		/// 1. If disposing is true, the method has been called directly
		/// or indirectly by a user's code via the Dispose method.
		/// Both managed and unmanaged resources can be disposed.
		///
		/// 2. If disposing is false, the method has been called by the
		/// runtime from inside the finalizer and you should not reference (access)
		/// other managed objects, as they already have been garbage collected.
		/// Only unmanaged resources can be disposed.
		/// </summary>
		/// <param name="disposing"></param>
		/// <remarks>
		/// If any exceptions are thrown, that is fine.
		/// If the method is being done in a finalizer, it will be ignored.
		/// If it is thrown by client code calling Dispose,
		/// it needs to be handled by fixing the bug.
		///
		/// If subclasses override this method, they should call the base implementation.
		/// </remarks>
		protected override void Dispose(bool disposing)
		{
			//Debug.WriteLineIf(!disposing, "****************** " + GetType().Name + " 'disposing' is false. ******************");
			// Must not be run more than once.
			if (m_isDisposed)
				return;

			if (disposing)
			{
				// Dispose managed resources here.
			}

			// Dispose unmanaged resources here, whether disposing is true or false.
			if (m_cpe != null && Marshal.IsComObject(m_cpe))
				Marshal.ReleaseComObject(m_cpe);
			m_cpe = null;
			m_settings = null;
			m_SOWrapper = null;
			m_sSegmentText = null;
			m_sMarker = null;
			m_styleProxy = null;
			m_vernParaStyleProxy = null;
			m_vernTextProps = null;
			m_analTextProps = null;
			m_styleProxies = null;
			m_notesStyleProxies = null;
			m_lastPara = null;
			m_BookTitleParaProxy = null;
			m_DefaultFootnoteParaProxy = null;
			m_TsStringFactory = null;
			m_BTFootnoteStrBldr = null;
			m_CurrParaPictures = null;
			m_CurrParaFootnotes = null;
			m_BTPendingPictures = null;
			m_CurrBTFootnote = null;
			m_sBtFootnoteParaStyle = null;
			m_BtFootnoteStrBldrs = null;
			m_PendingAnnotations = null;
			m_BTfootnoteIndex = null;
			m_sCharStyleEndMarker = null;
			m_sFootnoteEndMarker = null;
			m_sCharStyleBeginMarker = null;
			m_sFootnoteBeginMarker = null;
			m_scrTranslatorAnnotationDef = null;

			base.Dispose(disposing);
		}
Exemplo n.º 45
0
		// Update the nth custom part of the variant, where position = 0 for language, 1 for script, 2 for region.
		// The old value stored in the property (not necessarily the old region code) is passed in oldValue.
		// The property used to be custom if this is equal to markerValue.
		// Return the value that should actually be stored in the appropriate property.
		string UpdateVariantPart(int position, string oldValue, string newValue, string markerValue, bool isNewCustom)
		{
			var isOldCustom = oldValue == markerValue;
			if (!isOldCustom && !isNewCustom)
				return newValue; // nothing to do, no change to variant.
			string leadIn;
			string variant;
			string suffix = GetPartsOfVariant(position, out leadIn, out variant);
			// remove oldValue if any
			if (isOldCustom)
			{
				int hyphen = suffix.IndexOf("-");
				if (hyphen >= 0)
					suffix = suffix.Substring(hyphen + 1);
				else
					suffix = ""; // remove it all, it's the only part left.
			}
			// append new value if any
			if (isNewCustom)
				suffix = newValue + "-" + suffix;
			// Now re-assemble the variant.
			var privateUse = Combine(leadIn, "-", suffix);
			string newVariant = BuildVariant(variant, privateUse);
			if (newVariant.Equals(Variant ?? "", StringComparison.OrdinalIgnoreCase))
				return isNewCustom ? markerValue : newValue;
			m_cpe = null;
			Variant = newVariant;
			return isNewCustom ? markerValue : newValue;
		}
Exemplo n.º 46
0
		/// --------------------------------------------------------------------------------
		/// <summary>
		/// Parses the specified string into a list of characters. The unparsed list is a
		/// string of valid characters delimited with the specified delimiter.
		/// </summary>
		/// <param name="chars">The string containing a delimited list of characters.</param>
		/// <param name="delimiter">The delimiter (passed as a string, but really just a single
		/// character).</param>
		/// <param name="cpe">The character property engine.</param>
		/// <param name="langDef">The language definition</param>
		/// <param name="invalidChars">The list of invalid characters encountered.</param>
		/// <param name="otherLists">Collection of other lists to check to prevent a character
		/// from being added to multiple lists.</param>
		/// <returns>List of unique characters</returns>
		/// --------------------------------------------------------------------------------
		private static List<string> ParseCharString(string chars, string delimiter,
			ILgCharacterPropertyEngine cpe, LanguageDefinition langDef, out List<string> invalidChars,
			params List<string>[] otherLists)
		{
			List<string> charlist = StringUtils.ParseCharString(chars, delimiter, langDef, cpe,
				out invalidChars);

			for (int i = charlist.Count - 1; i >= 0; i--)
			{
				if (IsInAnotherList(charlist[i], otherLists))
					charlist.RemoveAt(i);
			}
			return charlist;
		}
Exemplo n.º 47
0
		/// <summary>
		///
		/// </summary>
		public void Run()
		{
			int ichStartSeg = 0; // First segment always starts at zero.
			SegParseState state = SegParseState.AwaitingFirstLetter;

			// This is the position where we will end a segment if we decide to make another one.
			// When we find a segment-terminating character, we set it to a position one greater.
			// If we find subsequent punctuation, we keep incrementing it till we find a space.
			// If it is equal to ichStartSeg, we haven't found a segment-terminating character.
			int ichLimSeg = 0;

			int ch = 0;
			LgGeneralCharCategory cc = 0;
			if (String.IsNullOrEmpty(m_paraText))
				return;
			m_prevCh = 0; // not numeric or period

			for (int ich = 0; ich < m_paraText.Length; ich = Surrogates.NextChar(m_paraText, ich))
			{
				m_prevCh = ch;
				ch = StringUtils.FullCharAt(m_paraText, ich);
				m_cpe = m_cpeTracker.CharPropEngine(ich);
				cc = m_cpe.get_GeneralCategory(ch);

				// don't try to deduce this from cc, it can be overiden.
				bool fIsLetter = m_cpe.get_IsWordForming(ch);// || m_cpe.get_IsNumber(ch); //Numbers are now wordforming in Analysis [LT-10746]
				bool fIsLabel = IsLabelText(m_tssText, m_tssText.get_RunAt(ich), TreatOrcsAsLabel);
				if (ch == StringUtils.kChHardLB)
				{
					// Hard line break, always its own segment.
					if (ich > ichStartSeg)
					{
						// If we've already recorded an EOS for the preceding segment, don't record another.
						if (m_ichMinSegBreaks.Count <= m_csegs)
							m_ichMinSegBreaks.Add(ich);
						CreateSegment(ichStartSeg, ich);
					}
					CreateSegment(ich, ich + 1);
					m_ichMinSegBreaks.Add(ich + 1);
					ichStartSeg = ich + 1;
					state = SegParseState.AwaitingFirstLetter;
					continue;
				}
				switch (state)
				{
					case SegParseState.AwaitingFirstLetter:
						if (fIsLabel)
							state = SegParseState.ProcessingLabel;
						else if (fIsLetter)
							state = SegParseState.BuildingSegment;
						break;
					case SegParseState.BuildingSegment:
						if (fIsLabel)
						{
							m_ichMinSegBreaks.Add(ich);
							ichLimSeg = ich;
							CreateSegment(ichStartSeg, ichLimSeg);
							ichStartSeg = ichLimSeg;
							state = SegParseState.ProcessingLabel;
							break;
						}
						if (IsEosChar(ch, cc, ich))
						{
							m_ichMinSegBreaks.Add(ich);
							state = SegParseState.FoundEosChar;
						}
						break;
					case SegParseState.FoundEosChar:
						if (fIsLabel)
						{
							ichLimSeg = ich;
							CreateSegment(ichStartSeg, ichLimSeg);
							ichStartSeg = ichLimSeg;
							state = SegParseState.ProcessingLabel;
							break;
						}
						if (cc == LgGeneralCharCategory.kccZs)
						{
							// We will end the segment here, provided we find valid content for
							// a following segment.
							state = SegParseState.FoundBlankAfterEos;
							ichLimSeg = ich + 1;
						}
						else if (fIsLetter)
						{
							// If a letter happens after a segment break, assume it's a new sentence
							// even if the preceding characters form an ellipsis (...)
							// This is the simplest way to handle the case where the user has
							// deleted a paragraph break resulting in two sentences separated
							// only by a segment break character (e.g. "first sentence.second sentence.")
							// as tested by TextEditingTexts.DeleteParagraphBreak().
							ichLimSeg = ich;
							CreateSegment(ichStartSeg, ichLimSeg);
							ichStartSeg = ichLimSeg;
							state = SegParseState.BuildingSegment;
						}
						break;
					case SegParseState.FoundBlankAfterEos:
					case SegParseState.FoundNonBlankAfterBlankAfterEos:
						if (fIsLabel)
						{
							ichLimSeg = ich;
							CreateSegment(ichStartSeg, ichLimSeg);
							ichStartSeg = ichLimSeg;
							state = SegParseState.ProcessingLabel;
							break;
						}
						if (fIsLetter)
						{
							// We found a segment break character, a following blank,
							// and something to make a following segment from.
							// Make the previous segment as determined.
							CreateSegment(ichStartSeg, ichLimSeg);
							ichStartSeg = ichLimSeg;
							state = SegParseState.BuildingSegment;
						}
						else if (cc == LgGeneralCharCategory.kccZs)
						{
							// found sequence of trailing spaces, put all in prev segment,
							// but only if we haven't seen a non-blank.
							if (state == SegParseState.FoundBlankAfterEos)
								ichLimSeg = ich + 1;
						}
						else
						{
							// non-letter non-blank, we'll stop incrementing ichLimSeg.
							state = SegParseState.FoundNonBlankAfterBlankAfterEos;
						}
						break;
					case SegParseState.ProcessingLabel:
						// A label segment is allowed to absorb following white space, but anything else non-label
						// will break it.
						if (fIsLabel || cc == LgGeneralCharCategory.kccZs)
							break;
						m_ichMinSegBreaks.Add(ich);
						ichLimSeg = ich;
						CreateSegment(ichStartSeg, ichLimSeg);
						ichStartSeg = ichLimSeg;
						state = SegParseState.BuildingSegment;
						break;
				}
			}
			// We reached the end of the loop. Make a segment out of anything left over.
			if (ichStartSeg < m_paraText.Length)
				CreateSegment(ichStartSeg, m_paraText.Length);

		}
Exemplo n.º 48
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Determines whether the specified character is a mark (either as defined by the
		/// character property engine or as overridden by the language definition.
		/// </summary>
		/// <param name="chr">The character.</param>
		/// <param name="cpe">The character property engine.</param>
		/// ------------------------------------------------------------------------------------
		private static bool IsMark(char chr, ILgCharacterPropertyEngine cpe)
		{
			return cpe.get_IsMark(chr);
		}
Exemplo n.º 49
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Determines whether the specified string is a valid string consisting of exactly one
		/// initial base character followed by zero or more legally placed combining marks.
		/// </summary>
		/// <param name="chr">The string to check.</param>
		/// <param name="cpe">The character property engine.</param>
		/// ------------------------------------------------------------------------------------
		public static bool IsValidChar(string chr, ILgCharacterPropertyEngine cpe)
		{
			// We need to decompose the results in order to check for equality because
			// it is possible (Korean is the only example of this we know of) for multiple base
			// letters to compose into a single base letter. In that case,
			// ValidateCharacterSequence returns the composed character because we don't really
			// want to store the decomposed sequence as a valid character since the characters
			// check always operates on individual base characters. (To make that check work
			// properly for Korean, if the valid characters list contains the composed
			// characters, the check would need to account for this and try to compose the data
			// being checked.)
			string ch = ValidateCharacterSequence(chr, cpe);
			return (ch.Length == 0) ? false : (cpe.NormalizeD(ch) == cpe.NormalizeD(chr));
		}
Exemplo n.º 50
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Parses the specified string into a list of characters, ignoring any bogus characters
		/// (digraphs, undefined Unicode characters, lone diacritics, etc.). The unparsed list
		/// is a string of valid characters delimited with the specified delimiter.
		/// </summary>
		/// <param name="chars">The string containing a delimited list of characters.</param>
		/// <param name="delimiter">The delimiter (passed as a string, but really just a single
		/// character).</param>
		/// <param name="cpe">The character property engine.</param>
		/// ------------------------------------------------------------------------------------
		public static List<string> ParseCharString(string chars, string delimiter, ILgCharacterPropertyEngine cpe)
		{
			return ParseCharString(chars, delimiter, cpe, null);
		}
Exemplo n.º 51
0
		/// <summary>
		/// Get a suitable CPE for the specified character of the original string.
		/// </summary>
		/// <param name="ich"></param>
		/// <returns></returns>
		public ILgCharacterPropertyEngine CharPropEngine(int ich)
		{
			if (ich >= m_ichMinCpe && ich < m_ichLimCpe)
				return m_cpe;
			int ws;
			if (m_tssText == null)
			{
				ws = m_wsf.UserWs;
				// pick an arbitrary one, for any index.
				m_ichMinCpe = 0;
				m_ichLimCpe = int.MaxValue;
			}

			else
			{
				int irun = m_tssText.get_RunAt(ich);
				m_tssText.GetBoundsOfRun(irun, out m_ichMinCpe, out m_ichLimCpe);
				ws = m_tssText.get_WritingSystem(irun);
			}
			// different run, but may not differ in ws.
			if (ws != m_wsCpe)
			{
				m_wsCpe = ws;
				if (ws == -1)
				{
					// Bizarrely, the run has no WS specified. This happens occasionally in poorly-written tests.
					// Maybe there's some other way. Fall back to a default engine.
					m_cpe = LgIcuCharPropEngineClass.Create();
				}

				else
				{
					m_cpe = m_wsf.get_CharPropEngine(ws);
				}
			}
			return m_cpe;
		}
 /// ------------------------------------------------------------------------------------
 /// <summary>
 /// Initializes a new instance of the <see cref="DummyCharPropEngine"/> class.
 /// </summary>
 /// ------------------------------------------------------------------------------------
 public DummyCharPropEngine()
 {
     m_cpe     = new DynamicMock(typeof(ILgCharacterPropertyEngine));
     m_mockCPE = (ILgCharacterPropertyEngine)m_cpe.MockInstance;
 }
Exemplo n.º 53
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Parses the specified string into a list of characters. The unparsed list is a
		/// string of valid characters delimited with the specified delimiter.
		/// </summary>
		/// <param name="chars">The string containing a delimited list of characters.</param>
		/// <param name="delimiter">The delimiter (passed as a string, but really just a single
		/// character).</param>
		/// <param name="cpe">The character property engine.</param>
		/// <param name="invalidCharacters">The invalid characters.</param>
		/// <returns>List of unique characters</returns>
		/// ------------------------------------------------------------------------------------
		public static List<string> ParseCharString(string chars, string delimiter, ILgCharacterPropertyEngine cpe, out List<string> invalidCharacters)
		{
			invalidCharacters = new List<string>();
			return ParseCharString(chars, delimiter, cpe, invalidCharacters);
		}
Exemplo n.º 54
0
		/// <summary>
		/// Executes in two distinct scenarios.
		///
		/// 1. If disposing is true, the method has been called directly
		/// or indirectly by a user's code via the Dispose method.
		/// Both managed and unmanaged resources can be disposed.
		///
		/// 2. If disposing is false, the method has been called by the
		/// runtime from inside the finalizer and you should not reference (access)
		/// other managed objects, as they already have been garbage collected.
		/// Only unmanaged resources can be disposed.
		/// </summary>
		/// <param name="disposing"></param>
		/// <remarks>
		/// If any exceptions are thrown, that is fine.
		/// If the method is being done in a finalizer, it will be ignored.
		/// If it is thrown by client code calling Dispose,
		/// it needs to be handled by fixing the bug.
		///
		/// If subclasses override this method, they should call the base implementation.
		/// </remarks>
		protected override void Dispose(bool disposing)
		{
			//Debug.WriteLineIf(!disposing, "****************** " + GetType().Name + " 'disposing' is false. ******************");
			// Must not be run more than once.
			if (IsDisposed)
			{
				Debug.Assert(m_cpe == null);
				return;
			}

			if (disposing)
			{
				// Log disposing event - removed logging as part of fix for TE-6551
				//string message = "Disposing TeEditingHelper...\n" +
				//    "Stack Trace:\n" + Environment.StackTrace;
				//SIL.Utils.Logger.WriteEvent(message);

				// Dispose managed resources here.
				if (m_InsertVerseMessageFilter != null)
					Application.RemoveMessageFilter(m_InsertVerseMessageFilter);

				if (m_syncHandler != null)
				{
					m_syncHandler.ReferenceChanged -= ScrollToReference;
					m_syncHandler.AnnotationChanged -= ScrollToCitedText;
					m_syncHandler.Dispose();
				}

				if (m_annotationAdjuster != null)
					m_annotationAdjuster.Dispose();
			}

			// Dispose unmanaged resources here, whether disposing is true or false.
			PasteFixTssEvent -= RemoveHardFormatting;
			m_syncHandler = null;
			m_scr = null;
			m_bookFilter = null;
			m_InsertVerseMessageFilter = null;
			m_restoreCursor = null;
			m_lastFootnoteTextRepSelection = null;
			m_oldReference = null;
			if (m_cpe != null)
			{
				if (Marshal.IsComObject(m_cpe))
					Marshal.ReleaseComObject(m_cpe);
				m_cpe = null;
			}
			m_annotationAdjuster = null;
			base.Dispose(disposing);
		}
Exemplo n.º 55
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Parses the specified string into a list of characters. The unparsed list is a
		/// string of valid characters delimited with the specified delimiter.
		/// </summary>
		/// <param name="chars">The string containing a delimited list of characters.</param>
		/// <param name="delimiter">The delimiter (passed as a string, but really just a single
		/// character).</param>
		/// <param name="cpe">The character property engine.</param>
		/// <param name="invalidCharacters">List of bogus characters (digraphs, undefined
		/// Unicode characters, lone diacritics, etc.)encountered. if set to <c>null</c> ignores
		/// bogus characters.</param>
		/// <returns>List of unique characters</returns>
		/// ------------------------------------------------------------------------------------
		private static List<string> ParseCharString(string chars, string delimiter, ILgCharacterPropertyEngine cpe, List<string> invalidCharacters)
		{
			if (string.IsNullOrEmpty(chars))
				return new List<string>();

			if (string.IsNullOrEmpty(delimiter))
				delimiter = " ";

			string[] charsArray = chars.Split(delimiter.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);

			List<string> charsList = new List<string>(charsArray.Length);

			foreach (string chr in charsArray)
			{
				if (IsValidChar(chr, cpe))
				{
					if (!charsList.Contains(chr))
						charsList.Add(chr);
				}

				else if (invalidCharacters != null)
				{
					invalidCharacters.Add(chr);
				}
			}

			// If the original list of characters started with a space and that was the delimiter,
			// then make sure to add it back into the list because it will have been lost by the
			// Split above
			if (delimiter == " " && chars[0] == delimiter[0])
				charsList.Insert(0, delimiter);

			return charsList;
		}
Exemplo n.º 56
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// After last call to AppendRun for the current paragraph, but before calling
		/// CreateParagraph, call this method to trim the last character in the builder
		/// if it is a trailing space.
		/// </summary>
		/// ------------------------------------------------------------------------------------
		public void TrimTrailingSpaceInPara()
		{
			CheckDisposed();
			if (m_cpe == null)
				m_cpe = m_cache.UnicodeCharProps;
			// check if the last char sent to the builder is a space
			if (Length != 0 && m_cpe.get_IsSeparator(FinalCharInPara))
				m_ParaStrBldr.Replace(Length - 1, Length, null, null);
		}
Exemplo n.º 57
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Finds the first base character/combining diacritic combination and removes any
		/// remaining characters.
		/// </summary>
		/// <param name="origChars">The original string of characters.</param>
		/// <param name="cpe">The character property engine.</param>
		/// ------------------------------------------------------------------------------------
		public static string ValidateCharacterSequence(string origChars, ILgCharacterPropertyEngine cpe)
		{
			// Allow spaces (Zs), hard line breaks (Zl), and other formatting characters (Cf) in
			// isolation only.
			if (origChars.Length == 1)
			{
				if (cpe.get_GeneralCategory(origChars[0]) == LgGeneralCharCategory.kccZl || cpe.get_GeneralCategory(origChars[0]) == LgGeneralCharCategory.kccZs || cpe.get_GeneralCategory(origChars[0]) == LgGeneralCharCategory.kccCf)
				{
					return origChars;
				}
			}

			var newChars = new StringBuilder();
			bool baseFound = false;
			bool fPrecedingCharWasMark = false;
			// Extract first base character and any following diacritics
			for (int ich = 0; ich < origChars.Length; ich++)
			{
				char chr = origChars[ich];

				if (!baseFound)
				{
					// If this is not a valid base character, keep looking.
					if (!cpe.get_IsLetter(chr) && !cpe.get_IsNumber(chr) && cpe.get_GeneralCategory(chr) != LgGeneralCharCategory.kccCo && !cpe.get_IsPunctuation(chr) && !cpe.get_IsSymbol(chr))
						continue;

					baseFound = true;
				}

				else
				{
					// If this is not a diacritic or a ZWJ or ZWNJ between diacritics,
					// discard the rest of the string.
					if (IsMark(chr, cpe))
					{
						fPrecedingCharWasMark = true;
					}

					else if ((chr == '\u200C' || chr == '\u200D') && fPrecedingCharWasMark && origChars.Length > ich + 1 && IsMark(origChars[ich + 1], cpe))
					{
						fPrecedingCharWasMark = false;
					}
					else
					{
						// This handles special situations like Korean, where multiple base letters
						// (representing phonemes) can compose into a single base letter (representing a
						// syllable).
						string composed = Icu.Normalize(origChars, Icu.UNormalizationMode.UNORM_NFKC);
						if (composed.Length == 1)
							return composed;
						break;
					}
				}

				if (baseFound)
					newChars.Append(chr);
			}

			return newChars.ToString();
		}
Exemplo n.º 58
0
		/// <summary>
		/// Executes in two distinct scenarios.
		///
		/// 1. If disposing is true, the method has been called directly
		/// or indirectly by a user's code via the Dispose method.
		/// Both managed and unmanaged resources can be disposed.
		///
		/// 2. If disposing is false, the method has been called by the
		/// runtime from inside the finalizer and you should not reference (access)
		/// other managed objects, as they already have been garbage collected.
		/// Only unmanaged resources can be disposed.
		/// </summary>
		/// <param name="disposing"></param>
		/// <remarks>
		/// If any exceptions are thrown, that is fine.
		/// If the method is being done in a finalizer, it will be ignored.
		/// If it is thrown by client code calling Dispose,
		/// it needs to be handled by fixing the bug.
		///
		/// If subclasses override this method, they should call the base implementation.
		/// </remarks>
		protected virtual void Dispose(bool disposing)
		{
			//Debug.WriteLineIf(!disposing, "****************** " + GetType().Name + " 'disposing' is false. ******************");
			// Must not be run more than once.
			if (m_isDisposed)
				return;

			if (disposing)
			{
			}
			if (m_cpe != null)
			{
				if (Marshal.IsComObject(m_cpe))
					Marshal.ReleaseComObject(m_cpe);
				m_cpe = null;
			}
			m_cache = null;
			m_ParaStyle = null;
			m_ParaStrBldr = null;
			m_ParaProps = null;

			m_isDisposed = true;
		}
Exemplo n.º 59
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Initializes a new instance of the <see cref="DummyCharPropEngine"/> class.
		/// </summary>
		/// ------------------------------------------------------------------------------------
		public DummyCharPropEngine()
		{
			m_cpe = new DynamicMock(typeof(ILgCharacterPropertyEngine));
			m_mockCPE = (ILgCharacterPropertyEngine)m_cpe.MockInstance;
		}
Exemplo n.º 60
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Initializes this object.
		/// </summary>
		/// <param name="langDef">The language definition</param>
		/// <param name="invalidChars">The list of invalid characters encountered.</param>
		/// ------------------------------------------------------------------------------------
		private void Init(LanguageDefinition langDef, out List<string> invalidChars)
		{
			m_cpe = LgIcuCharPropEngineClass.Create();
			m_langDef = langDef;

			Reset();

			if (Other != null)
				Other = Other.Replace(kSpaceReplacment, " ");

			m_WordFormingCharacters = ParseCharString(WordForming, ksDelimiter, m_cpe,
				langDef, out invalidChars);
			List<string> invalidCharsTemp;
			m_NumericCharacters = ParseCharString(Numeric, ksDelimiter, m_cpe,
				langDef, out invalidCharsTemp, m_WordFormingCharacters);
			invalidChars.AddRange(invalidCharsTemp);
			m_OtherCharacters = ParseCharString(Other, ksDelimiter, m_cpe, langDef,
				out invalidCharsTemp, m_WordFormingCharacters, m_NumericCharacters);
			invalidChars.AddRange(invalidCharsTemp);
		}