Ejemplo n.º 1
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Constructs a new BidiCharacter, copying all the values from <c>puaChar</c>
		/// </summary>
		/// ------------------------------------------------------------------------------------
		public IUcdCharacter Create(IPuaCharacter puaChar)
		{
			return new BidiCharacter(puaChar);
		}
Ejemplo n.º 2
0
 /// ------------------------------------------------------------------------------------
 /// <summary>
 /// Creates a new Normalization character with the given normalization property.
 /// </summary>
 /// ------------------------------------------------------------------------------------
 public IUcdCharacter Create(IPuaCharacter puaChar, string property)
 {
     return(new NormalizationCharacter(puaChar, property));
 }
Ejemplo n.º 3
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Creates a new Normalization character with the given normalization property.
		/// </summary>
		/// ------------------------------------------------------------------------------------
		public IUcdCharacter Create(IPuaCharacter puaChar, string property)
		{
			return new NormalizationCharacter(puaChar, property);
		}
Ejemplo n.º 4
0
		/// <summary>
		/// Writes a UnicodeData.txt style line including comments.
		/// </summary>
		/// <param name="puaChar">The character to write</param>
		/// <param name="tw">The writer to write it to.</param>
		private void WriteUnicodeDataLine(IPuaCharacter puaChar, TextWriter tw)
		{
			tw.WriteLine("{0} #{1}", puaChar, m_comment);
		}
Ejemplo n.º 5
0
 /// ------------------------------------------------------------------------------------
 /// <summary>
 /// Constructs a new BidiCharacter, copying all the values from <c>puaChar</c>
 /// </summary>
 /// ------------------------------------------------------------------------------------
 public IUcdCharacter Create(IPuaCharacter puaChar)
 {
     return(new BidiCharacter(puaChar));
 }
Ejemplo n.º 6
0
		/// <summary>
		/// Inserts the given PUADefinitions (any Unicode character) into the UnicodeData.txt file.
		///
		/// This accounts for all the cases of inserting into the "first/last" blocks.  That
		/// is, it will split the blocks into two or move the first and last tags to allow a
		/// codepoint to be inserted correctly.
		///
		/// Also, this accounts for Hexadecimal strings that are within the unicode range, not
		/// just four digit unicode files.
		///
		/// <list type="number">
		/// <listheader>Assumptions made about the format</listheader>
		/// <item>The codepoints are in order</item>
		/// <item>There first last block will always have no space between the word first and
		/// the following ">"</item>
		/// <item>No other data entries contain the word first followed by a ">"</item>
		/// <item>There will always be a "last" on the line directly after a "first".</item>
		/// </list>
		///
		/// </summary>
		/// <remarks>
		/// Pseudocode for inserting lines:
		///	if the unicodePoint	is a first tag
		///		Get	first and last uncodePoint range
		///		Stick into array all the xmlPoints that fit within the uncodePoint range
		///			Look at the next xmlPoint
		///			if there are any
		///				call WriteCodepointBlock subroutine
		///	else if the unicodePoint is greater than the last point but less than or equal to "the xmlPoint"
		///		insert the missing line or replace	the	line
		///		look at	the	next xmlPoint
		///	else
		///		do nothing except write	the	line
		///</remarks>
		/// <param name="puaDefinitions">An array of PUADefinitions to insert into UnicodeDataOverrides.txt.</param>
		/// <param name="originalOverrides">original to merge into</param>
		/// <param name="customOverrides">where to write output</param>
		private void InsertCharacters(IPuaCharacter[] puaDefinitions, string originalOverrides, string customOverrides)
		{
			// Open the file for reading and writing
			LogFile.AddVerboseLine("StreamReader on <" + originalOverrides + ">");
			using (var reader = new StreamReader(originalOverrides, Encoding.ASCII))
			{
				reader.Peek();	// force autodetection of encoding.
				using (var writer = new StreamWriter(customOverrides, false, Encoding.ASCII))
				{
					try
					{
						// Insert the PUA via their codepoints

						string line;
						var lastCode = 0;
						// Start looking at the first codepoint
						var codeIndex = 0;
						var newCode = Convert.ToInt32(puaDefinitions[codeIndex].CodePoint, 16);

						// Used to find the type for casting ArrayLists to IPuaCharacter[]
						//var factory = new PuaCharacterFactory();
						//var puaCharForType = factory.Create("");
						//var puaCharType = puaCharForType.GetType();

						//While there is a line to be read in the file
						while ((line = reader.ReadLine()) != null)
						{
							// skip entirely blank lines
							if (line.Length <= 0)
								continue;
							if (line.StartsWith("Code") || line.StartsWith("block")) // header line or special instruction
							{
								writer.WriteLine(line);
								continue;
							}

							//Grab codepoint
							var strFileCode = line.Substring(0, line.IndexOf(';')).Trim(); // current code in file
							var fileCode = Convert.ToInt32(strFileCode, 16);

							// If the new codepoint is greater than the last one processed in the file, but
							// less than or equal to the current codepoint in the file.
							if (newCode > lastCode && newCode <= fileCode)
							{
								while (newCode <= fileCode)
								{
									LogCodepoint(puaDefinitions[codeIndex].CodePoint);

									// Replace the line with the new PuaDefinition
									writer.WriteLine("{0} #{1}", puaDefinitions[codeIndex], m_comment);
									lastCode = newCode;

									// Look for the next PUA codepoint that we wish to insert, we are done
									// with this one If we are all done, push through the rest of the file.
									if (++codeIndex >= puaDefinitions.Length)
									{
										// Write out the original top of the section if it hasn't been replaced.
										if (fileCode != lastCode)
										{
											writer.WriteLine(line);
										}
										while ((line = reader.ReadLine()) != null)
											writer.WriteLine(line);
										break;
									}
									newCode = Convert.ToInt32(puaDefinitions[codeIndex].CodePoint, 16);
								}
								if (codeIndex >= puaDefinitions.Length)
									break;
								// Write out the original top of the section if it hasn't been replaced.
								if (fileCode != lastCode)
								{
									writer.WriteLine(line);
								}
							}
							//if it's not a first tag and the codepoints don't match
							else
							{
								writer.WriteLine(line);
							}
							lastCode = fileCode;
						}
						// Output any codepoints after the old end
						while (codeIndex < puaDefinitions.Length)
						{
							LogCodepoint(puaDefinitions[codeIndex].CodePoint);

							// Add a line with the new PuaDefinition
							writer.WriteLine("{0} #{1}", puaDefinitions[codeIndex], m_comment);
							codeIndex++;
						}
					}
					finally
					{
						writer.Flush();
						writer.Close();
						reader.Close();
					}
				}
			}
		}
Ejemplo n.º 7
0
		/// <summary>
		/// Checks whether the IPuaCharacter needs to be added to the lists, and adds if necessary.
		/// </summary>
		/// <param name="line">The line of the UnicodeData.txt that will be replaced.
		///		If a property matches, the value will not be added to the lists.</param>
		/// <param name="puaDefinition">The puaCharacter that is being inserted.</param>
		/// <param name="addToBidi"></param>
		/// <param name="addToNorm"></param>
		/// <param name="removeFromBidi"></param>
		/// <param name="removeFromNorm"></param>
		private static void AddToLists(string line, IPuaCharacter puaDefinition,
			List<IUcdCharacter> addToBidi, List<IUcdCharacter> removeFromBidi, List<IUcdCharacter> addToNorm,
			List<IUcdCharacter> removeFromNorm)
		{
#if DEBUGGING_SOMETHING
			int temp = line.IndexOf("F16F");	// junk for a debugging breakpoint...
			temp++;
#endif

			// If the bidi type doesn't match add it to the lists to replace
			var bidi = GetField(line, kiBidi + 1);
			if (!puaDefinition.Bidi.Equals(bidi))
			{
				var factory = new BidiCharacterFactory();
				removeFromBidi.Add(new BidiCharacter(line));
				addToBidi.Add(factory.Create(puaDefinition));
			}
			// If the new character doesn't match the decomposition, add it to the lists
			string decomposition = GetField(line, 5);
			string puaRawDecomp = puaDefinition.Data[5 - 1];
			if (decomposition != puaRawDecomp)
			{
				var factory = new NormalizationCharacterFactory();
				// Perform a quick attempt to remove basic decompositions
				// TODO: Extend this to actually remove more complicated entries?
				// Currently this will remove anything that we have added.
				if (decomposition.Trim() != string.Empty)
				{
					// If there is a '>' character in the decomposition field
					// then it is a compatability decomposition
					if (decomposition.IndexOf(">") != -1)
						removeFromNorm.Add(factory.Create(line, "NFKD_QC; N"));
					removeFromNorm.Add(factory.Create(line, "NFD_QC; N"));
				}
				// Add the normalization to the lists, if necessary.
				if (puaDefinition.Decomposition != string.Empty)
				{
					// Add a canonical decomposition if necessary
					if (puaDefinition.DecompositionType == string.Empty)
						addToNorm.Add(factory.Create(puaDefinition, "NFD_QC; N"));
					// Add a compatability decomposition always
					// (Apparently canonical decompositions are compatability decompositions,
					//		but not vise-versa
					addToNorm.Add(factory.Create(puaDefinition, "NFKD_QC; N"));
				}
			}
		}
Ejemplo n.º 8
0
		/// <summary>
		/// Constructs a UCDCharacter based off a copy of the given puaChar.
		/// </summary>
		/// <param name="puaChar"></param>
		public UCDCharacter(IPuaCharacter puaChar) : base (puaChar)
		{
		}
Ejemplo n.º 9
0
		/// <summary>
		/// Copy data from the given PUACharacter
		/// </summary>
		/// <param name="sourceIPuaChar">The character to copy.</param>
		public void Copy(IPuaCharacter sourceIPuaChar)
		{
			PUACharacter sourcePuaChar = (PUACharacter)sourceIPuaChar;
			this.CodePoint = sourcePuaChar.m_codepoint;
			this.Name = sourcePuaChar.m_name;
			this.m_generalCategory = sourcePuaChar.m_generalCategory;
			this.m_canonicalCombiningClass = sourcePuaChar.m_canonicalCombiningClass;
			this.m_bidiClass = sourcePuaChar.m_bidiClass;
			this.m_decompositionType = sourcePuaChar.m_decompositionType;
			this.m_decomposition = sourcePuaChar.m_decomposition;
			this.m_numericType = sourcePuaChar.m_numericType;
			this.m_numericValue = sourcePuaChar.m_numericValue;
			this.m_bidiMirrored = sourcePuaChar.m_bidiMirrored;
			this.m_unicode1Name = sourcePuaChar.m_unicode1Name;
			this.m_isoComment = sourcePuaChar.m_isoComment;
			this.m_upper = sourcePuaChar.m_upper;
			this.m_lower = sourcePuaChar.m_lower;
			this.m_title = sourcePuaChar.m_title;
		}
Ejemplo n.º 10
0
		/// <summary>
		/// Makes a PUACharacter.
		/// This is useful when you are extending the class so that further constructors
		/// can make sure that we copy all the important data internally.
		/// </summary>
		/// <param name="puaChar">The PUACharacter that we are copying.</param>
		public PUACharacter(IPuaCharacter puaChar)
		{
			this.Copy(puaChar);
		}
Ejemplo n.º 11
0
		/// <summary>
		/// Constructs a new Normalization character with the given normalization property.
		/// </summary>
		/// <param name="puaChar"></param>
		/// <param name="property"></param>
		public NormalizationCharacter(IPuaCharacter puaChar, string property) : base(puaChar)
		{
			this.property = property;
		}
Ejemplo n.º 12
0
		/// <summary>
		/// Constructs a new BidiCharacter, copying all the values from <c>puaChar</c>
		/// </summary>
		/// <param name="puaChar"></param>
		public BidiCharacter(IPuaCharacter puaChar) : base(puaChar){}