/** * Do a normalization using the iterative API in the given direction. * @param str a Java StringCharacterIterator * @param buf scratch buffer * @param dir either +1 or -1 */ private String iterativeNorm(StringCharacterIterator str, NormalizerMode mode, StringBuffer buf, int dir, int options) { normalizer.SetText(str); normalizer.SetMode(mode); buf.Length = (0); normalizer.SetOption(-1, false); // reset all options normalizer.SetOption(options, true); // set desired options int ch; if (dir > 0) { for (ch = normalizer.First(); ch != Normalizer.Done; ch = normalizer.Next()) { buf.Append(UTF16.ValueOf(ch)); } } else { for (ch = normalizer.Last(); ch != Normalizer.Done; ch = normalizer.Previous()) { buf.Insert(0, UTF16.ValueOf(ch)); } } return(buf.ToString()); }
private void cross(String s1, String s2, NormalizerMode mode) { String result = Normalizer.Normalize(s1, mode); if (!result.Equals(s2)) { Errln("cross test failed s1: " + Utility.Hex(s1) + " s2: " + Utility.Hex(s2)); } }
internal static ColumnBase Create(string input, string output, NormalizerMode mode) { switch (mode) { case NormalizerMode.MinMax: return(new MinMaxColumn(input, output)); case NormalizerMode.MeanVariance: return(new MeanVarColumn(input, output)); case NormalizerMode.LogMeanVariance: return(new LogMeanVarColumn(input, output)); case NormalizerMode.Binning: return(new BinningColumn(input, output)); default: throw Contracts.ExceptParam(nameof(mode), "Unknown normalizer mode"); } }
private static int getModeNumber(NormalizerMode mode) { if (mode == NormalizerMode.NFD) { return(0); } if (mode == NormalizerMode.NFKD) { return(1); } if (mode == NormalizerMode.NFC) { return(2); } if (mode == NormalizerMode.NFKC) { return(3); } return(-1); }
bool checkNorm(NormalizerMode mode, int options, // Normalizer2 norm2, String s, String exp, int field) { String modeString = kModeStrings[getModeNumber(mode)]; String msg = String.Format(kMessages[getModeNumber(mode)], field); StringBuffer buf = new StringBuffer(); String @out = Normalizer.Normalize(s, mode, options.AsFlagsToEnum <NormalizerUnicodeVersion>()); if (!assertEqual(modeString, "", s, @out, exp, msg)) { return(false); } @out = iterativeNorm(s, mode, buf, +1, options); if (!assertEqual(modeString, "(+1)", s, @out, exp, msg)) { return(false); } @out = iterativeNorm(s, mode, buf, -1, options); if (!assertEqual(modeString, "(-1)", s, @out, exp, msg)) { return(false); } @out = iterativeNorm(new StringCharacterIterator(s), mode, buf, +1, options); if (!assertEqual(modeString, "(+1)", s, @out, exp, msg)) { return(false); } @out = iterativeNorm(new StringCharacterIterator(s), mode, buf, -1, options); if (!assertEqual(modeString, "(-1)", s, @out, exp, msg)) { return(false); } return(true); }
public Normalizer(IHostEnvironment env, NormalizerMode mode, params (string inputColumn, string outputColumn)[] columns)
public Normalizer(IHostEnvironment env, string columnName, NormalizerMode mode = NormalizerMode.MinMax) : this(env, mode, (columnName, columnName)) { }
/// <summary> /// Initializes a new instance of <see cref="NormalizingEstimator"/>. /// </summary> /// <param name="env">Host Environment.</param> /// <param name="inputColumn">Name of the output column.</param> /// <param name="outputColumn">Name of the column to be transformed. If this is null '<paramref name="inputColumn"/>' will be used.</param> /// <param name="mode">The <see cref="NormalizerMode"/> indicating how to the old values are mapped to the new values.</param> public NormalizingEstimator(IHostEnvironment env, string inputColumn, string outputColumn = null, NormalizerMode mode = NormalizerMode.MinMax) : this(env, mode, (inputColumn, outputColumn ?? inputColumn)) {
/// <summary> /// Initializes a new instance of <see cref="NormalizingEstimator"/>. /// </summary> /// <param name="env">Host Environment.</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of the column to transform. /// If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="mode">The <see cref="NormalizerMode"/> indicating how to the old values are mapped to the new values.</param> public NormalizingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, NormalizerMode mode = NormalizerMode.MinMax) : this(env, mode, (outputColumnName, inputColumnName ?? outputColumnName)) {
internal static ColumnOptionsBase Create(string outputColumnName, string inputColumnName, NormalizerMode mode) { switch (mode) { case NormalizerMode.MinMax: return(new MinMaxColumnOptions(outputColumnName, inputColumnName)); case NormalizerMode.MeanVariance: return(new MeanVarColumnOptions(outputColumnName, inputColumnName)); case NormalizerMode.LogMeanVariance: return(new LogMeanVarColumnOptions(outputColumnName, inputColumnName)); case NormalizerMode.Binning: return(new BinningColumnOptions(outputColumnName, inputColumnName)); case NormalizerMode.SupervisedBinning: return(new SupervisedBinningColumOptions(outputColumnName, inputColumnName)); default: throw Contracts.ExceptParam(nameof(mode), "Unknown normalizer mode"); } }