A 32-bit representation of a Unicode character. Treats all characters as a 32 bit value. Gives access to a character's code point as a string. [!] Only handles the Basic Mulitlingual Plane (BMP). Code points > U+FFFF will throw exceptions. [!] Does NOT support surrogate pairs.
Ejemplo n.º 1
0
        /// <summary>
        /// Build a data table for all of an ASCII character's bestfit mappings.
        /// </summary>
        /// <param name="cAscii">The ASCII character to query on.</param>
        /// <param name="lBestFit">Reference to a List you want to populate with data.</param>
        /// <param name="sCharset">An optional charset to filter results by.
        /// </param>
        public void BuildBestfitTable(char cAscii, 
            ref List<BestFitMapping> lBestFit,
            string sCharset = "")
        {
            UniChar uc = new UniChar();
            uc.CodePoint = uc.GetCodePoint(cAscii);
            IEnumerable<XElement> query;

            if (String.IsNullOrEmpty(sCharset))
            {
                query = from mapping in XDocBestfit.Descendants("Mapping")
                        where (string)mapping.Element("Ascii") == uc.CodePoint
                        select mapping;
            }
            else
            {
                query = (from mapping in XDocBestfit.Descendants("Mapping")
                            where (string)mapping.Element("Ascii") == uc.CodePoint &&
                                  (string)mapping.Element("Charset") == sCharset
                            select mapping);
            }

            var count = query.Count();
            foreach (var item in query.Distinct())
            {
                BestFitMapping bf = new BestFitMapping();
                UniChar uc2 = new UniChar();
                bf.Ascii = item.Element("Ascii").Value;
                bf.Unicode = item.Element("Unicode").Value;
                bf.Character = uc2.ConvertCodePointToString(bf.Unicode);
                bf.Charset = item.Element("Charset").Value;
                bf.Name = item.Element("Name").Value;
                lBestFit.Add(bf);
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Get all normalization mappings for an ASCII character.  Optionally specify a specific transform
        /// as a filter.
        /// </summary>
        /// <param name="cAscii">A single ASCII character.</param>
        /// <param name="sTransform">Valid transformations specified as:
        ///
        /// Simple_Lowercase_Mapping
        /// Lowercase_Mapping
        /// Simple_Case_Folding
        /// cf
        /// Simple_Uppercase_Mapping
        /// Simple_Titlecase_Mapping
        /// Uppercase_Mapping
        /// Titlecase_Mapping
        /// Decomposition_Mapping
        /// FC_NFKC
        /// </param>
        /// <returns></returns>
        public List <String> GetNormalizationMappings(char cAscii, string sTransform = "")
        {
            UnicodeMapping um = new UnicodeMapping();

            // If something is passed in we don't know about then set transform to a wildcard
            if (!um.transformations.Contains(sTransform))
            {
                sTransform = "";
            }
            UniChar uc = new UniChar();

            uc.CodePoint = uc.GetCodePoint(cAscii);
            IEnumerable <string> query;

            // If a transformation wasn't specified, filter by the ASCII character
            if (String.IsNullOrEmpty(sTransform))
            {
                query = (from mapping in XDocUnicode.Descendants("Mapping")
                         where
                         (string)mapping.Element("Ascii") == uc.CodePoint
                         select mapping.Element("Unicode").Value);
            }

            // else filter by the transformation too
            else
            {
                query = (from mapping in XDocUnicode.Descendants("Mapping")
                         where
                         (string)mapping.Element("Ascii") == "0043" &&
                         (string)mapping.Element("Transform") == sTransform
                         select mapping.Element("Unicode").Value);
            }

            List <String> data = new List <string>();

            foreach (var item in query.Distinct())
            {
                data.Add(item);
            }
            return(data);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Send me an ASCII character and I'll return you a list of Unicode characters that
        /// best fit map to it.  Since you're not telling me a specific charset your're
        /// interested in, I'm going to send you data for all of them.
        /// </summary>
        /// <param name="cAscii">The ASCII character to query on.</param>
        /// <param name="sCharset">An option charset name to filter by, valid values include:
        /// APL-ISO-IR-68
        /// CP424
        /// IBMGRAPH
        /// US-ASCII-QUOTES
        /// windows-1250
        /// windows-1251
        /// windows-1252
        /// windows-1253
        /// windows-1254
        /// windows-1255
        /// windows-1256
        /// windows-1257
        /// windows-1258
        /// windows-874
        /// CP864
        /// CP037
        /// CP1026
        /// CP500
        /// CP875
        /// DINGBATS
        /// KEYBOARD
        /// SYMBOL
        /// symbol
        /// zdingbat
        /// JAPANESE
        /// GSM0338
        /// </param>
        /// <returns></returns>
        public List <String> GetBestfitMappings(char cAscii, string sCharset = "")
        {
            BestFitMapping bm = new BestFitMapping();

            // If an invald charset was entered then set it to the wildcard
            if (!bm.charsets.Contains(sCharset))
            {
                sCharset = "";
            }
            UniChar uc = new UniChar();

            uc.CodePoint = uc.GetCodePoint(cAscii);
            IEnumerable <string> query;

            // If a charset wasn't specified, filter by the ASCII character
            if (String.IsNullOrEmpty(sCharset))
            {
                query = (from mapping in XDocBestfit.Descendants("Mapping")
                         where
                         (string)mapping.Element("Ascii") == uc.CodePoint
                         select mapping.Element("Unicode").Value);
            }

            // else filter by the charset too
            else
            {
                query = (from mapping in XDocBestfit.Descendants("Mapping")
                         where
                         (string)mapping.Element("Ascii") == "0043" &&
                         (string)mapping.Element("Charset") == sCharset
                         select mapping.Element("Unicode").Value);
            }

            List <String> data = new List <string>();

            foreach (var item in query.Distinct())
            {
                data.Add(item);
            }
            return(data);
        }
        public String GetExpandedUnicodeCharacter(char ascii)
        {
            IEnumerable <String> searchResults;
            UniChar unicode = new UniChar();

            unicode.CodePoint = unicode.GetCodePoint(ascii);

            searchResults = (from mapping in XDocExpandedUnicode.Descendants("Mapping")
                             select mapping.Element("Unicode").Value);

            List <String> data = new List <string>();

            foreach (var item in searchResults.Distinct())
            {
                data.Add(item);
            }

            Random rand = new Random();
            int    size = rand.Next(data.Count());

            return(data.ElementAtOrDefault(size));
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Build a data table for all of an ASCII character's bestfit mappings.
        /// </summary>
        /// <param name="cAscii">The ASCII character to query on.</param>
        /// <param name="lTransformations">Reference to a List you want to populate with data.</param>
        /// <param name="sTransform">An optional charset to filter results by.</param>
        public void BuildTransformationsTable(char cAscii,
                                              ref List <UnicodeMapping> lTransformations,
                                              string sTransform = "")
        {
            UniChar uc = new UniChar();

            uc.CodePoint = uc.GetCodePoint(cAscii);
            IEnumerable <XElement> query;

            if (String.IsNullOrEmpty(sTransform))
            {
                query = from mapping in xDocUnicode.Descendants("Mapping")
                        where (string)mapping.Element("Ascii") == uc.CodePoint
                        select mapping;
            }
            else
            {
                query = (from mapping in XDocUnicode.Descendants("Mapping")
                         where (string)mapping.Element("Ascii") == uc.CodePoint &&
                         (string)mapping.Element("Transform") == sTransform
                         select mapping);
            }

            var count = query.Count();

            foreach (var item in query.Distinct())
            {
                UnicodeMapping um  = new UnicodeMapping();
                UniChar        uc2 = new UniChar();
                um.Ascii     = item.Element("Ascii").Value;
                um.Unicode   = item.Element("Unicode").Value;
                um.Character = uc2.ConvertCodePointToString(um.Unicode);
                um.Transform = item.Element("Transform").Value;
                um.Name      = item.Element("Name").Value;
                lTransformations.Add(um);
            }
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Build a data table for all of an ASCII character's bestfit mappings.
        /// </summary>
        /// <param name="cAscii">The ASCII character to query on.</param>
        /// <param name="lBestFit">Reference to a List you want to populate with data.</param>
        /// <param name="sCharset">An optional charset to filter results by.
        /// </param>
        public void BuildBestfitTable(char cAscii,
                                      ref List <BestFitMapping> lBestFit,
                                      string sCharset = "")
        {
            UniChar uc = new UniChar();

            uc.CodePoint = uc.GetCodePoint(cAscii);
            IEnumerable <XElement> query;

            if (String.IsNullOrEmpty(sCharset))
            {
                query = from mapping in XDocBestfit.Descendants("Mapping")
                        where (string)mapping.Element("Ascii") == uc.CodePoint
                        select mapping;
            }
            else
            {
                query = (from mapping in XDocBestfit.Descendants("Mapping")
                         where (string)mapping.Element("Ascii") == uc.CodePoint &&
                         (string)mapping.Element("Charset") == sCharset
                         select mapping);
            }

            var count = query.Count();

            foreach (var item in query.Distinct())
            {
                BestFitMapping bf  = new BestFitMapping();
                UniChar        uc2 = new UniChar();
                bf.Ascii     = item.Element("Ascii").Value;
                bf.Unicode   = item.Element("Unicode").Value;
                bf.Character = uc2.ConvertCodePointToString(bf.Unicode);
                bf.Charset   = item.Element("Charset").Value;
                bf.Name      = item.Element("Name").Value;
                lBestFit.Add(bf);
            }
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Get all normalization mappings for an ASCII character.  Optionally specify a specific transform
        /// as a filter.  
        /// </summary>
        /// <param name="cAscii">A single ASCII character.</param>
        /// <param name="sTransform">Valid transformations specified as:
        /// 
        /// Simple_Lowercase_Mapping
        /// Lowercase_Mapping
        /// Simple_Case_Folding
        /// cf
        /// Simple_Uppercase_Mapping
        /// Simple_Titlecase_Mapping
        /// Uppercase_Mapping
        /// Titlecase_Mapping
        /// Decomposition_Mapping
        /// FC_NFKC
        /// </param>
        /// <returns></returns>
        public List<String> GetNormalizationMappings(char cAscii, string sTransform = "")
        {
            UnicodeMapping um = new UnicodeMapping();

            // If something is passed in we don't know about then set transform to a wildcard
            if (!um.transformations.Contains(sTransform))
            {
                sTransform = "";
            }
            UniChar uc = new UniChar();
            uc.CodePoint = uc.GetCodePoint(cAscii);
            IEnumerable<string> query;

            // If a transformation wasn't specified, filter by the ASCII character
            if (String.IsNullOrEmpty(sTransform))
            {

                query = (from mapping in XDocUnicode.Descendants("Mapping")
                         where
                          (string)mapping.Element("Ascii") == uc.CodePoint
                         select mapping.Element("Unicode").Value);
            }

            // else filter by the transformation too
            else
            {

                query = (from mapping in XDocUnicode.Descendants("Mapping")
                         where
                             (string)mapping.Element("Ascii") == "0043" &&
                             (string)mapping.Element("Transform") == sTransform
                         select mapping.Element("Unicode").Value);
            }

            List<String> data = new List<string>();
            foreach (var item in query.Distinct())
            {
                data.Add(item);
            }
            return data;
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Send me an ASCII character and I'll return you a list of Unicode characters that
        /// best fit map to it.  Since you're not telling me a specific charset your're
        /// interested in, I'm going to send you data for all of them.
        /// </summary>
        /// <param name="cAscii">The ASCII character to query on.</param>
        /// <param name="sCharset">An option charset name to filter by, valid values include:
        /// APL-ISO-IR-68
        /// CP424
        /// IBMGRAPH
        /// US-ASCII-QUOTES
        /// windows-1250
        /// windows-1251
        /// windows-1252
        /// windows-1253
        /// windows-1254
        /// windows-1255
        /// windows-1256
        /// windows-1257
        /// windows-1258
        /// windows-874
        /// CP864
        /// CP037
        /// CP1026
        /// CP500
        /// CP875
        /// DINGBATS
        /// KEYBOARD
        /// SYMBOL
        /// symbol
        /// zdingbat
        /// JAPANESE
        /// GSM0338
        /// </param>
        /// <returns></returns>
        public List<String> GetBestfitMappings(char cAscii, string sCharset = "")
        {
            BestFitMapping bm = new BestFitMapping();
            // If an invald charset was entered then set it to the wildcard
            if (!bm.charsets.Contains(sCharset))
            {
                sCharset = "";
            }
            UniChar uc = new UniChar();
            uc.CodePoint = uc.GetCodePoint(cAscii);
            IEnumerable<string> query;

            // If a charset wasn't specified, filter by the ASCII character
            if (String.IsNullOrEmpty(sCharset))
            {

                query = (from mapping in XDocBestfit.Descendants("Mapping")
                                       where
                                        (string)mapping.Element("Ascii") == uc.CodePoint
                                       select mapping.Element("Unicode").Value);
            }

            // else filter by the charset too
            else
            {

                query = (from mapping in XDocBestfit.Descendants("Mapping")
                                       where
                                           (string)mapping.Element("Ascii") == "0043" &&
                                           (string)mapping.Element("Charset") == sCharset
                                       select mapping.Element("Unicode").Value);
            }

            List<String> data = new List<string>();
            foreach (var item in query.Distinct())
            {
                data.Add(item);
            }
            return data;
        }
Ejemplo n.º 9
0
        /// <summary>
        /// Build a data table for all of an ASCII character's bestfit mappings.
        /// </summary>
        /// <param name="cAscii">The ASCII character to query on.</param>
        /// <param name="lTransformations">Reference to a List you want to populate with data.</param>
        /// <param name="sTransform">An optional charset to filter results by.</param>
        public void BuildTransformationsTable(char cAscii,
            ref List<UnicodeMapping> lTransformations,
            string sTransform = "")
        {
            UniChar uc = new UniChar();
            uc.CodePoint = uc.GetCodePoint(cAscii);
            IEnumerable<XElement> query;

            if (String.IsNullOrEmpty(sTransform))
            {
                query = from mapping in xDocUnicode.Descendants("Mapping")
                        where (string)mapping.Element("Ascii") == uc.CodePoint
                        select mapping;
            }
            else
            {
                query = (from mapping in XDocUnicode.Descendants("Mapping")
                         where (string)mapping.Element("Ascii") == uc.CodePoint &&
                               (string)mapping.Element("Transform") == sTransform
                         select mapping);
            }

            var count = query.Count();
            foreach (var item in query.Distinct())
            {
                UnicodeMapping um = new UnicodeMapping();
                UniChar uc2 = new UniChar();
                um.Ascii = item.Element("Ascii").Value;
                um.Unicode = item.Element("Unicode").Value;
                um.Character = uc2.ConvertCodePointToString(um.Unicode);
                um.Transform= item.Element("Transform").Value;
                um.Name = item.Element("Name").Value;
                lTransformations.Add(um);
            }
        }
Ejemplo n.º 10
0
        private void buttonGetUnicode_Click(object sender, EventArgs e)
        {
            textBoxOutput.Text = "";
            List<String> transforms = new List<string>();
            transforms = Data.GetNormalizationMappings(Input);

            string output = String.Empty;

            foreach (string transform in transforms)
            {
                UniChar uc = new UniChar();
                if (!String.IsNullOrEmpty(transform))
                {
                    output += uc.ConvertCodePointToString(transform) + "\r\n";
                }
            }

            textBoxOutput.Text = output;

            // Fill DataGrid
            List<UnicodeMapping> lTransformations = new List<UnicodeMapping>();
            dataGridViewBestFit.DataSource = null;
            Data.BuildTransformationsTable(Input, ref lTransformations, Transform);
            dataGridViewBestFit.DataSource = lTransformations;
        }
Ejemplo n.º 11
0
        private void buttonGetBestfit_Click(object sender, EventArgs e)
        {
            textBoxOutput.Text = "";
            List<String> bestfits = new List<string>();
            bestfits = Data.GetBestfitMappings(Input);

            string output = String.Empty;

            foreach (string bestfit in bestfits)
            {
                UniChar uc = new UniChar();
                if (!String.IsNullOrEmpty(bestfit))
                {
                    output += uc.ConvertCodePointToString(bestfit) + "\r\n";
                }
            }

            textBoxOutput.Text = output;

            // Fill DataGrid
            List<BestFitMapping> lBestfits = new List<BestFitMapping>();
            dataGridViewBestFit.DataSource = null;
            Data.BuildBestfitTable(Input,ref lBestfits, Charset);
            dataGridViewBestFit.DataSource = lBestfits;
        }