Exemplo n.º 1
0
 /// <summary>
 /// Test the ToLower function.
 /// Enhance JohnT: should ideally test the case where output is > 10 characters longer than
 /// input. However, I have not yet been able to find a Unicode character that is IN FACT
 /// longer when converted to lower case.
 /// </summary>
 public void TestToLower()
 {
     Assert.AreEqual("abc", Icu.ToLower("ABC", "en"));
     Assert.AreEqual("abc", Icu.ToLower("abc", "en"));
     Assert.AreEqual("abc", Icu.ToLower("Abc", "en"));
     Assert.AreEqual(";,.", Icu.ToLower(";,.", "en"));
 }
Exemplo n.º 2
0
 /// <summary>
 /// This guess factors in the placement of an occurrence in its segment for making other
 /// decisions like matching lowercase alternatives for sentence initial occurrences.
 /// </summary>
 public IAnalysis GetBestGuess(AnalysisOccurrence occurrence)
 {
     // first see if we can make a guess based on the lowercase form of a sentence initial (non-lowercase) wordform
     // TODO: make it look for the first word in the sentence...may not be at Index 0!
     if (occurrence.Analysis is IWfiWordform && occurrence.Index == 0)
     {
         ITsString tssWfBaseline        = occurrence.BaselineText;
         CoreWritingSystemDefinition ws = Cache.ServiceLocator.WritingSystemManager.Get(tssWfBaseline.get_WritingSystemAt(0));
         string sLower = Icu.ToLower(tssWfBaseline.Text, ws.IcuLocale);
         // don't bother looking up the lowercased wordform if the instanceOf is already in lowercase form.
         if (sLower != tssWfBaseline.Text)
         {
             ITsString    tssLower = TsStringUtils.MakeString(sLower, TsStringUtils.GetWsAtOffset(tssWfBaseline, 0));
             IWfiWordform lowercaseWf;
             if (Cache.ServiceLocator.GetInstance <IWfiWordformRepository>().TryGetObject(tssLower, out lowercaseWf))
             {
                 IAnalysis bestGuess;
                 if (TryGetBestGuess(lowercaseWf, occurrence.BaselineWs, out bestGuess))
                 {
                     return(bestGuess);
                 }
             }
         }
     }
     if (occurrence.BaselineWs == -1)
     {
         return(null);                // happens with empty translation lines
     }
     return(GetBestGuess(occurrence.Analysis, occurrence.BaselineWs));
 }
Exemplo n.º 3
0
        internal static void DisplayEntries(FdoCache cache, IWin32Window owner, Mediator mediatorIn,
                                            IHelpTopicProvider helpProvider, string helpFileKey, ITsString tssWfIn)
        {
            ITsString  tssWf = tssWfIn;
            List <int> rghvo = LexEntryUi.FindEntriesForWordform(cache, tssWf);

            // if we do not find a match for the word then try converting it to lowercase and see if there
            // is an entry in the lexicon for the Wordform in lowercase. This is needed for occurences of
            // words which are capitalized at the beginning of sentences.  LT-7444 RickM
            if (rghvo == null || rghvo.Count == 0)
            {
                //We need to be careful when converting to lowercase therefore use Icu.ToLower()
                //get the WS of the tsString
                int wsWf = StringUtils.GetWsAtOffset(tssWf, 0);
                //use that to get the locale for the WS, which is used for
                string        wsLocale = cache.LanguageWritingSystemFactoryAccessor.get_EngineOrNull(wsWf).IcuLocale;
                string        sLower   = Icu.ToLower(tssWf.Text, wsLocale);
                ITsTextProps  ttp      = tssWf.get_PropertiesAt(0);
                ITsStrFactory tsf      = TsStrFactoryClass.Create();
                tssWf = tsf.MakeStringWithPropsRgch(sLower, sLower.Length, ttp);
                rghvo = LexEntryUi.FindEntriesForWordform(cache, tssWf);
            }

            StringTable   stOrig;
            Mediator      mediator;
            IVwStylesheet styleSheet;
            bool          fRestore = EnsureFlexTypeSetup(cache, mediatorIn, out stOrig, out mediator, out styleSheet);

            if (rghvo == null || rghvo.Count == 0)
            {
                int hvoLe = ShowFindEntryDialog(cache, mediator, tssWf, owner);
                if (hvoLe == 0)
                {
                    // Restore the original string table in the mediator if needed.
                    if (fRestore)
                    {
                        mediator.StringTbl = stOrig;
                    }
                    return;
                }
                rghvo = new List <int>(1);
                rghvo.Add(hvoLe);
            }
            using (SummaryDialogForm form =
                       new SummaryDialogForm(rghvo, tssWf, helpProvider, helpFileKey, styleSheet, cache, mediator))
            {
                form.ShowDialog(owner);
                if (form.ShouldLink)
                {
                    form.LinkToLexicon();
                }
            }
            // Restore the original string table in the mediator if needed.
            if (fRestore)
            {
                mediator.StringTbl = stOrig;
            }
        }
Exemplo n.º 4
0
        /// <summary>
        /// Get the lead character, either a single character or a composite matching something
        /// in the sort rules.  (We need to support multi-graph letters.  See LT-9244.)
        /// </summary>
        private string GetLeadChar(string sEntryNFD, string sWs)
        {
            if (string.IsNullOrEmpty(sEntryNFD))
            {
                return("");
            }
            string sEntry = Icu.ToLower(sEntryNFD, sWs);
            Dictionary <string, string> mapChars;
            Set <string> sortChars = GetDigraphs(sWs, out mapChars);
            string       sEntryT   = sEntry;
            bool         fChanged  = false;

            do
            {
                foreach (string key in mapChars.Keys)
                {
                    sEntry = sEntry.Replace(key, mapChars[key]);
                }
                fChanged = sEntryT != sEntry;
                sEntryT  = sEntry;
            } while (fChanged);
            string sFirst = sEntry.Substring(0, 1);

            foreach (string sChar in sortChars)
            {
                if (sEntry.StartsWith(sChar))
                {
                    if (sFirst.Length < sChar.Length)
                    {
                        sFirst = sChar;
                    }
                }
            }
            // We don't want sFirst for an ignored first character or digraph.
            ILgCollatingEngine lce = LgIcuCollatorClass.Create();

            lce.Open(sWs);
            byte[] ka = (byte[])lce.get_SortKeyVariant(sFirst, LgCollatingOptions.fcoDefault);
            if (ka[0] == 1)
            {
                string sT = sEntry.Substring(sFirst.Length);
                return(GetLeadChar(sT, sWs));
            }
            return(sFirst);
        }
Exemplo n.º 5
0
 /// <summary>
 /// Convert string to lower case equivalent.
 /// </summary>
 /// <param name="input"></param>
 /// <returns></returns>
 public string ToLower(string input)
 {
     return(Icu.ToLower(input, m_icuLocale));
 }
Exemplo n.º 6
0
        /// <summary>
        /// Get the set of significant digraphs (multigraphs) for the writing system.  At the
        /// moment, these are derived from ICU sorting rules associated with the writing system.
        /// </summary>
        private Set <string> GetDigraphs(string sWs, out Dictionary <string, string> mapChars)
        {
            Set <string> digraphs = null;

            if (m_mapWsDigraphs.TryGetValue(sWs, out digraphs))
            {
                mapChars = m_mapWsMapChars[sWs];
                return(digraphs);
            }
            digraphs = new Set <string>();
            mapChars = new Dictionary <string, string>();
            int            ws        = m_cache.LanguageWritingSystemFactoryAccessor.GetWsFromStr(sWs);
            IWritingSystem wsX       = null;
            ICollation     coll      = null;
            string         sIcuRules = null;

            if (ws > 0)
            {
                wsX = m_cache.LanguageWritingSystemFactoryAccessor.get_EngineOrNull(ws);
                if (wsX.CollationCount > 0)
                {
                    coll      = wsX.get_Collation(0);
                    sIcuRules = coll.IcuRules;
                    if (String.IsNullOrEmpty(sIcuRules))
                    {
                        // The ICU rules may not be loaded for built-in languages, but are
                        // still helpful for our purposes here.
                        string sIcuOrig = sIcuRules;
                        coll.LoadIcuRules(sWs);
                        sIcuRules     = coll.IcuRules;
                        coll.IcuRules = sIcuOrig;                               // but we don't want to actually change anything!
                    }
                }
            }
            if (!String.IsNullOrEmpty(sIcuRules) && sIcuRules.Contains("&"))
            {
                string[] rgsRules = sIcuRules.Split(new char[] { '&' }, StringSplitOptions.RemoveEmptyEntries);
                for (int i = 0; i < rgsRules.Length; ++i)
                {
                    string sRule = rgsRules[i];
                    // This is a valid rule that specifies that the digraph aa should be ignored
                    // [last tertiary ignorable] = \u02bc = aa
                    // but the code here will ignore this. YAGNI the chances of a user specifying a digraph
                    // as ignorable may never happen.
                    if (sRule.Contains("["))
                    {
                        sRule = sRule.Substring(0, sRule.IndexOf("["));
                    }
                    if (String.IsNullOrEmpty(sRule.Trim()))
                    {
                        continue;
                    }
                    sRule = sRule.Replace("<<<", "=");
                    sRule = sRule.Replace("<<", "=");
                    if (sRule.Contains("<"))
                    {
                        // "&N<ng<<<Ng<ny<<<Ny" => "&N<ng=Ng<ny=Ny"
                        // "&N<ñ<<<Ñ" => "&N<ñ=Ñ"
                        // There are other issues we are not handling proplerly such as the next line
                        // &N<\u006e\u0067
                        string[] rgsPieces = sRule.Split(new char[] { '<', '=' }, StringSplitOptions.RemoveEmptyEntries);
                        for (int j = 0; j < rgsPieces.Length; ++j)
                        {
                            string sGraph = rgsPieces[j];
                            sGraph = sGraph.Trim();
                            if (String.IsNullOrEmpty(sGraph))
                            {
                                continue;
                            }
                            sGraph = Icu.Normalize(sGraph, Icu.UNormalizationMode.UNORM_NFD);
                            if (sGraph.Length > 1)
                            {
                                sGraph = Icu.ToLower(sGraph, sWs);
                                if (!digraphs.Contains(sGraph))
                                {
                                    digraphs.Add(sGraph);
                                }
                            }
                        }
                    }
                    else if (sRule.Contains("="))
                    {
                        // "&ae<<æ<<<Æ" => "&ae=æ=Æ"
                        string[] rgsPieces     = sRule.Split(new char[] { '=' }, StringSplitOptions.RemoveEmptyEntries);
                        string   sGraphPrimary = rgsPieces[0].Trim();
                        Debug.Assert(!String.IsNullOrEmpty(sGraphPrimary));
                        sGraphPrimary = Icu.ToLower(sGraphPrimary, sWs);
                        for (int j = 1; j < rgsPieces.Length; ++j)
                        {
                            string sGraph = rgsPieces[j];
                            sGraph = sGraph.Trim();
                            if (String.IsNullOrEmpty(sGraph))
                            {
                                continue;
                            }
                            sGraph = Icu.Normalize(sGraph, Icu.UNormalizationMode.UNORM_NFD);
                            sGraph = Icu.ToLower(sGraph, sWs);
                            if (sGraph != sGraphPrimary)
                            {
                                if (!mapChars.ContainsKey(sGraph))
                                {
                                    mapChars.Add(sGraph, sGraphPrimary);
                                }
                            }
                        }
                    }
                }
            }
            m_mapWsDigraphs.Add(sWs, digraphs);
            m_mapWsMapChars.Add(sWs, mapChars);
            return(digraphs);
        }
Exemplo n.º 7
0
        internal static void DisplayEntry(FdoCache cache, IWin32Window owner, Mediator mediatorIn,
                                          IHelpTopicProvider helpProvider, string helpFileKey, ITsString tssWfIn)
        {
            ITsString  tssWf    = tssWfIn;
            LexEntryUi leui     = null;
            Mediator   mediator = null;

            try
            {
                leui = FindEntryForWordform(cache, tssWf);

                // if we do not find a match for the word then try converting it to lowercase and see if there
                // is an entry in the lexicon for the Wordform in lowercase. This is needed for occurences of
                // words which are capitalized at the beginning of sentences.  LT-7444 RickM
                if (leui == null)
                {
                    //We need to be careful when converting to lowercase therefore use Icu.ToLower()
                    //get the WS of the tsString
                    int wsWf = TsStringUtils.GetWsAtOffset(tssWf, 0);
                    //use that to get the locale for the WS, which is used for
                    string       wsLocale = cache.ServiceLocator.WritingSystemManager.Get(wsWf).IcuLocale;
                    string       sLower   = Icu.ToLower(tssWf.Text, wsLocale);
                    ITsTextProps ttp      = tssWf.get_PropertiesAt(0);
                    tssWf = cache.TsStrFactory.MakeStringWithPropsRgch(sLower, sLower.Length, ttp);
                    leui  = FindEntryForWordform(cache, tssWf);
                }

                // Ensure that we have a valid mediator with the proper string table.
                bool        fRestore;
                StringTable stOrig;
                mediator = EnsureValidMediator(mediatorIn, out fRestore, out stOrig);
                FdoCache cache2 = (FdoCache)mediator.PropertyTable.GetValue("cache");
                if (cache2 != cache)
                {
                    mediator.PropertyTable.SetProperty("cache", cache);
                }
                EnsureWindowConfiguration(mediator);
                IVwStylesheet styleSheet = GetStyleSheet(cache, mediator);
                if (leui == null)
                {
                    ILexEntry entry = ShowFindEntryDialog(cache, mediator, tssWf, owner);
                    if (entry == null)
                    {
                        // Restore the original string table in the mediator if needed.
                        if (fRestore)
                        {
                            mediator.StringTbl = stOrig;
                        }
                        return;
                    }
                    leui = new LexEntryUi(entry);
                }
                if (mediator != null)
                {
                    leui.Mediator = mediator;
                }
                leui.ShowSummaryDialog(owner, tssWf, helpProvider, helpFileKey, styleSheet);
                // Restore the original string table in the mediator if needed.
                if (fRestore)
                {
                    mediator.StringTbl = stOrig;
                }
            }
            finally
            {
                if (leui != null)
                {
                    leui.Dispose();
                }
                if (mediator != mediatorIn)
                {
                    mediator.Dispose();
                }
            }
        }