/// ------------------------------------------------------------------------------------ /// <summary> /// Constructs a new BidiCharacter, copying all the values from <c>puaChar</c> /// </summary> /// ------------------------------------------------------------------------------------ public IUcdCharacter Create(IPuaCharacter puaChar) { return new BidiCharacter(puaChar); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Creates a new Normalization character with the given normalization property. /// </summary> /// ------------------------------------------------------------------------------------ public IUcdCharacter Create(IPuaCharacter puaChar, string property) { return(new NormalizationCharacter(puaChar, property)); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Creates a new Normalization character with the given normalization property. /// </summary> /// ------------------------------------------------------------------------------------ public IUcdCharacter Create(IPuaCharacter puaChar, string property) { return new NormalizationCharacter(puaChar, property); }
/// <summary> /// Writes a UnicodeData.txt style line including comments. /// </summary> /// <param name="puaChar">The character to write</param> /// <param name="tw">The writer to write it to.</param> private void WriteUnicodeDataLine(IPuaCharacter puaChar, TextWriter tw) { tw.WriteLine("{0} #{1}", puaChar, m_comment); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Constructs a new BidiCharacter, copying all the values from <c>puaChar</c> /// </summary> /// ------------------------------------------------------------------------------------ public IUcdCharacter Create(IPuaCharacter puaChar) { return(new BidiCharacter(puaChar)); }
/// <summary> /// Inserts the given PUADefinitions (any Unicode character) into the UnicodeData.txt file. /// /// This accounts for all the cases of inserting into the "first/last" blocks. That /// is, it will split the blocks into two or move the first and last tags to allow a /// codepoint to be inserted correctly. /// /// Also, this accounts for Hexadecimal strings that are within the unicode range, not /// just four digit unicode files. /// /// <list type="number"> /// <listheader>Assumptions made about the format</listheader> /// <item>The codepoints are in order</item> /// <item>There first last block will always have no space between the word first and /// the following ">"</item> /// <item>No other data entries contain the word first followed by a ">"</item> /// <item>There will always be a "last" on the line directly after a "first".</item> /// </list> /// /// </summary> /// <remarks> /// Pseudocode for inserting lines: /// if the unicodePoint is a first tag /// Get first and last uncodePoint range /// Stick into array all the xmlPoints that fit within the uncodePoint range /// Look at the next xmlPoint /// if there are any /// call WriteCodepointBlock subroutine /// else if the unicodePoint is greater than the last point but less than or equal to "the xmlPoint" /// insert the missing line or replace the line /// look at the next xmlPoint /// else /// do nothing except write the line ///</remarks> /// <param name="puaDefinitions">An array of PUADefinitions to insert into UnicodeDataOverrides.txt.</param> /// <param name="originalOverrides">original to merge into</param> /// <param name="customOverrides">where to write output</param> private void InsertCharacters(IPuaCharacter[] puaDefinitions, string originalOverrides, string customOverrides) { // Open the file for reading and writing LogFile.AddVerboseLine("StreamReader on <" + originalOverrides + ">"); using (var reader = new StreamReader(originalOverrides, Encoding.ASCII)) { reader.Peek(); // force autodetection of encoding. using (var writer = new StreamWriter(customOverrides, false, Encoding.ASCII)) { try { // Insert the PUA via their codepoints string line; var lastCode = 0; // Start looking at the first codepoint var codeIndex = 0; var newCode = Convert.ToInt32(puaDefinitions[codeIndex].CodePoint, 16); // Used to find the type for casting ArrayLists to IPuaCharacter[] //var factory = new PuaCharacterFactory(); //var puaCharForType = factory.Create(""); //var puaCharType = puaCharForType.GetType(); //While there is a line to be read in the file while ((line = reader.ReadLine()) != null) { // skip entirely blank lines if (line.Length <= 0) continue; if (line.StartsWith("Code") || line.StartsWith("block")) // header line or special instruction { writer.WriteLine(line); continue; } //Grab codepoint var strFileCode = line.Substring(0, line.IndexOf(';')).Trim(); // current code in file var fileCode = Convert.ToInt32(strFileCode, 16); // If the new codepoint is greater than the last one processed in the file, but // less than or equal to the current codepoint in the file. if (newCode > lastCode && newCode <= fileCode) { while (newCode <= fileCode) { LogCodepoint(puaDefinitions[codeIndex].CodePoint); // Replace the line with the new PuaDefinition writer.WriteLine("{0} #{1}", puaDefinitions[codeIndex], m_comment); lastCode = newCode; // Look for the next PUA codepoint that we wish to insert, we are done // with this one If we are all done, push through the rest of the file. if (++codeIndex >= puaDefinitions.Length) { // Write out the original top of the section if it hasn't been replaced. if (fileCode != lastCode) { writer.WriteLine(line); } while ((line = reader.ReadLine()) != null) writer.WriteLine(line); break; } newCode = Convert.ToInt32(puaDefinitions[codeIndex].CodePoint, 16); } if (codeIndex >= puaDefinitions.Length) break; // Write out the original top of the section if it hasn't been replaced. if (fileCode != lastCode) { writer.WriteLine(line); } } //if it's not a first tag and the codepoints don't match else { writer.WriteLine(line); } lastCode = fileCode; } // Output any codepoints after the old end while (codeIndex < puaDefinitions.Length) { LogCodepoint(puaDefinitions[codeIndex].CodePoint); // Add a line with the new PuaDefinition writer.WriteLine("{0} #{1}", puaDefinitions[codeIndex], m_comment); codeIndex++; } } finally { writer.Flush(); writer.Close(); reader.Close(); } } } }
/// <summary> /// Checks whether the IPuaCharacter needs to be added to the lists, and adds if necessary. /// </summary> /// <param name="line">The line of the UnicodeData.txt that will be replaced. /// If a property matches, the value will not be added to the lists.</param> /// <param name="puaDefinition">The puaCharacter that is being inserted.</param> /// <param name="addToBidi"></param> /// <param name="addToNorm"></param> /// <param name="removeFromBidi"></param> /// <param name="removeFromNorm"></param> private static void AddToLists(string line, IPuaCharacter puaDefinition, List<IUcdCharacter> addToBidi, List<IUcdCharacter> removeFromBidi, List<IUcdCharacter> addToNorm, List<IUcdCharacter> removeFromNorm) { #if DEBUGGING_SOMETHING int temp = line.IndexOf("F16F"); // junk for a debugging breakpoint... temp++; #endif // If the bidi type doesn't match add it to the lists to replace var bidi = GetField(line, kiBidi + 1); if (!puaDefinition.Bidi.Equals(bidi)) { var factory = new BidiCharacterFactory(); removeFromBidi.Add(new BidiCharacter(line)); addToBidi.Add(factory.Create(puaDefinition)); } // If the new character doesn't match the decomposition, add it to the lists string decomposition = GetField(line, 5); string puaRawDecomp = puaDefinition.Data[5 - 1]; if (decomposition != puaRawDecomp) { var factory = new NormalizationCharacterFactory(); // Perform a quick attempt to remove basic decompositions // TODO: Extend this to actually remove more complicated entries? // Currently this will remove anything that we have added. if (decomposition.Trim() != string.Empty) { // If there is a '>' character in the decomposition field // then it is a compatability decomposition if (decomposition.IndexOf(">") != -1) removeFromNorm.Add(factory.Create(line, "NFKD_QC; N")); removeFromNorm.Add(factory.Create(line, "NFD_QC; N")); } // Add the normalization to the lists, if necessary. if (puaDefinition.Decomposition != string.Empty) { // Add a canonical decomposition if necessary if (puaDefinition.DecompositionType == string.Empty) addToNorm.Add(factory.Create(puaDefinition, "NFD_QC; N")); // Add a compatability decomposition always // (Apparently canonical decompositions are compatability decompositions, // but not vise-versa addToNorm.Add(factory.Create(puaDefinition, "NFKD_QC; N")); } } }
/// <summary> /// Constructs a UCDCharacter based off a copy of the given puaChar. /// </summary> /// <param name="puaChar"></param> public UCDCharacter(IPuaCharacter puaChar) : base (puaChar) { }
/// <summary> /// Copy data from the given PUACharacter /// </summary> /// <param name="sourceIPuaChar">The character to copy.</param> public void Copy(IPuaCharacter sourceIPuaChar) { PUACharacter sourcePuaChar = (PUACharacter)sourceIPuaChar; this.CodePoint = sourcePuaChar.m_codepoint; this.Name = sourcePuaChar.m_name; this.m_generalCategory = sourcePuaChar.m_generalCategory; this.m_canonicalCombiningClass = sourcePuaChar.m_canonicalCombiningClass; this.m_bidiClass = sourcePuaChar.m_bidiClass; this.m_decompositionType = sourcePuaChar.m_decompositionType; this.m_decomposition = sourcePuaChar.m_decomposition; this.m_numericType = sourcePuaChar.m_numericType; this.m_numericValue = sourcePuaChar.m_numericValue; this.m_bidiMirrored = sourcePuaChar.m_bidiMirrored; this.m_unicode1Name = sourcePuaChar.m_unicode1Name; this.m_isoComment = sourcePuaChar.m_isoComment; this.m_upper = sourcePuaChar.m_upper; this.m_lower = sourcePuaChar.m_lower; this.m_title = sourcePuaChar.m_title; }
/// <summary> /// Makes a PUACharacter. /// This is useful when you are extending the class so that further constructors /// can make sure that we copy all the important data internally. /// </summary> /// <param name="puaChar">The PUACharacter that we are copying.</param> public PUACharacter(IPuaCharacter puaChar) { this.Copy(puaChar); }
/// <summary> /// Constructs a new Normalization character with the given normalization property. /// </summary> /// <param name="puaChar"></param> /// <param name="property"></param> public NormalizationCharacter(IPuaCharacter puaChar, string property) : base(puaChar) { this.property = property; }
/// <summary> /// Constructs a new BidiCharacter, copying all the values from <c>puaChar</c> /// </summary> /// <param name="puaChar"></param> public BidiCharacter(IPuaCharacter puaChar) : base(puaChar){}