/// <summary> /// Helper function for get_NormalizedFormAndFixOffsets below. /// Take indexes from original string segment, and figure out what indexes they correspond to in the /// corresponding segment of the decomposed output string. Also keep track of whether a given match /// is the *first* offset of the decomposed segment, because when fixing up offsets of selections, /// an offset that pointed to (say) LATIN SMALL LETTER U WITH HOOK should end up pointing to the /// decomposed LATIN SMALL LETTER U, and should never end up pointing to COMBINING HOOK ABOVE. /// Algorithm: decompose each codepoint of the original segment one at a time, and match it up with /// the codepoints of the normalized segment. /// </summary> /// <param name="segment">Segment of original string</param> /// <param name="normalizedSegment">Corresponding segment from normalized string</param> /// <param name="icuNormalizer">ICU normalizer that created the corresponding segment</param> /// <returns></returns> private IEnumerable <RearrangedIndexMapping> MatchUpIndexesAfterNormalization( string segment, string normalizedSegment, Normalizer2 icuNormalizer) { // We'll want to preserve (and later, return) the indexes of the *characters*, which won't // be the same as the indexes of the codepoints if there are any surrogate pairs involved. List <KeyValuePair <int, int> > origCodepointsByIndex = CodepointsByIndex(segment); List <KeyValuePair <int, int> > normCodepointsByIndex = CodepointsByIndex(normalizedSegment); var sentinel = new KeyValuePair <int, int>(-1, -1); // Value that can never match a real index/codepoint pair foreach (KeyValuePair <int, int> indexAndCodePoint in origCodepointsByIndex) { int origIdx = indexAndCodePoint.Key; int origCodePoint = indexAndCodePoint.Value; var normalizedStringFromOrigCodePoint = icuNormalizer.GetDecomposition(origCodePoint) ?? char.ConvertFromUtf32(origCodePoint); foreach (KeyValuePair <int, int> indexAndResultingCodePoint in CodepointsByIndex(normalizedStringFromOrigCodePoint)) { int resultingCodePoint = indexAndResultingCodePoint.Value; // Some algorithms (like fixing up offsets) care about finding the first character of the decomposition -- because if an // offset pointed to U-WITH-HOOK before NFD, we want that offset to end up pointing at the U, not at the combining hook. bool isFirstChar = indexAndResultingCodePoint.Key == 0; int i = normCodepointsByIndex.FindIndex(kv => kv.Value == resultingCodePoint); if (i < 0) // Should never happen, but let's guard against it anyway { continue; } // i is an index of *codepoints*. To properly match things up, we need a *character* index. Good thing we stored one! int matchingIdxInNormalizedSegment = normCodepointsByIndex[i].Key; normCodepointsByIndex[i] = sentinel; // Ensure we won't match this position ever again yield return(new RearrangedIndexMapping(origIdx, matchingIdxInNormalizedSegment, isFirstChar)); } } }
public void DisplayMeshMain(HalfEdgeData2 meshData, Normalizer2 normalizer) { //UnNormalize and to 3d HalfEdgeData3 meshDataUnNormalized_3d = new HalfEdgeData3(); //We dont want to modify the original data //HalfEdgeData2 meshDataUnNormalized = normalizer.UnNormalize(meshData); HashSet <HalfEdgeFace2> faces_2d = meshData.faces; foreach (HalfEdgeFace2 f in faces_2d) { MyVector2 p1 = f.edge.v.position; MyVector2 p2 = f.edge.nextEdge.v.position; MyVector2 p3 = f.edge.nextEdge.nextEdge.v.position; p1 = normalizer.UnNormalize(p1); p2 = normalizer.UnNormalize(p2); p3 = normalizer.UnNormalize(p3); meshDataUnNormalized_3d.AddTriangle(p1.ToMyVector3_Yis3D(), p2.ToMyVector3_Yis3D(), p3.ToMyVector3_Yis3D()); } this.meshData = meshDataUnNormalized_3d.faces; DisplayMesh(meshDataUnNormalized_3d.faces, displayMeshHere); //Normalize again //meshData = normalizer.Normalize(meshDataUnNormalized); }
/// <summary>Creates a new <see cref="ICUNormalizer2FilterFactory"/>.</summary> public ICUNormalizer2FilterFactory(IDictionary <string, string> args) : base(args) { string name = Get(args, "name", "nfkc_cf"); string mode = Get(args, "mode", new string[] { "compose", "decompose" }, "compose"); Normalizer2 normalizer = Normalizer2.GetInstance (null, name, "compose".Equals(mode, StringComparison.Ordinal) ? Normalizer2Mode.Compose : Normalizer2Mode.Decompose); string filter = Get(args, "filter"); if (filter != null) { UnicodeSet set = new UnicodeSet(filter); if (set.Any()) { set.Freeze(); normalizer = new FilteredNormalizer2(normalizer, set); } } if (args.Count > 0) { throw new ArgumentException(string.Format(J2N.Text.StringFormatter.CurrentCulture, "Unknown parameters: {0}", args)); } this.normalizer = normalizer; }
public void DoTestMode(Normalizer2 normalizer, int maxLength, int iterations, int bufferSize) { using (Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { return(new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.KEYWORD, false))); }, initReader: (fieldName, reader) => { return(new ICUNormalizer2CharFilter(reader, normalizer, bufferSize)); })) { for (int i = 0; i < iterations; i++) { String input = TestUtil.RandomUnicodeString(Random, maxLength); if (input.Length == 0) { continue; } String normalized = normalizer.Normalize(input); if (normalized.Length == 0) { continue; // MockTokenizer doesnt tokenize empty string... } CheckOneTerm(a, input, normalized); } } }
private void TestSutherlandHodgman(List <MyVector2> poly, List <MyVector2> clipPoly) { //Normalize to range 0-1 //We have to use all data to normalize List <MyVector2> allPoints = new List <MyVector2>(); allPoints.AddRange(poly); allPoints.AddRange(clipPoly); Normalizer2 normalizer = new Normalizer2(allPoints); List <MyVector2> poly_normalized = normalizer.Normalize(poly); List <MyVector2> clipPoly_normalized = normalizer.Normalize(clipPoly); //Main algorithm List <MyVector2> polygonAfterClipping_Normalized = SutherlandHodgman.ClipPolygon(poly_normalized, clipPoly_normalized); //UnNormalize List <MyVector2> polygonAfterClipping = normalizer.UnNormalize(polygonAfterClipping_Normalized); //2d to 3d List <Vector3> polygonAfterClipping3D = new List <Vector3>(); foreach (MyVector2 v in polygonAfterClipping) { polygonAfterClipping3D.Add(v.ToVector3()); } //Display DisplayPolygon(polygonAfterClipping3D, Color.red); }
public void TestRandomStrings() { // nfkc_cf using (Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { return(new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false))); }, initReader: (fieldName, reader) => { return(new ICUNormalizer2CharFilter(reader, Normalizer2.GetInstance(null, "nfkc_cf", Normalizer2Mode.Compose))); })) { CheckRandomData(Random, a, 1000 * RandomMultiplier); // huge strings CheckRandomData(Random, a, 100 * RandomMultiplier, 8192); } // nfkd using (Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { return(new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false))); }, initReader: (fieldName, reader) => { return(new ICUNormalizer2CharFilter(reader, Normalizer2.GetInstance(null, "nfkc", Normalizer2Mode.Decompose))); })) { CheckRandomData(Random, a, 1000 * RandomMultiplier); // huge strings CheckRandomData(Random, a, 100 * RandomMultiplier, 8192); } }
/// <summary>Creates a new <see cref="ICUNormalizer2CharFilterFactory"/>.</summary> public ICUNormalizer2CharFilterFactory(IDictionary <string, string> args) : base(args) { string name = Get(args, "name", "nfkc_cf"); string mode = Get(args, "mode", new string[] { "compose", "decompose" }, "compose"); Normalizer2 normalizer = Normalizer2.GetInstance (null, name, "compose".Equals(mode) ? Normalizer2Mode.Compose : Normalizer2Mode.Decompose); string filter = Get(args, "filter"); if (filter != null) { UnicodeSet set = new UnicodeSet(filter); if (set.Any()) { set.Freeze(); normalizer = new FilteredNormalizer2(normalizer, set); } } if (args.Count > 0) { throw new ArgumentException("Unknown parameters: " + args); } this.normalizer = normalizer; }
private void UxNormalizedClick(object sender, EventArgs e) { using (Normalizer2 normalizer = Normalizer2.GetNFKCCasefoldInstance()) { string normalized = normalizer.Normalize(this.uxText.Text); MessageBox.Show(normalized); } }
static ICUFoldingFilter() { // TODO: if the wrong version of the ICU jar is used, loading these data files may give a strange error. // maybe add an explicit check? http://icu-project.org/apiref/icu4j/com/ibm/icu/util/VersionInfo.html normalizer = Normalizer2.GetInstance( typeof(ICUFoldingFilter).GetTypeInfo().Assembly.FindAndGetManifestResourceStream(typeof(ICUFoldingFilter), "utr30.nrm"), "utr30", Normalizer2Mode.Compose); }
/// <summary> /// Create a new <see cref="ICUNormalizer2CharFilter"/> with the specified <see cref="Normalizer2"/>. /// </summary> /// <param name="input">Input text.</param> /// <param name="normalizer">Normalizer to use.</param> public ICUNormalizer2CharFilter(TextReader input, Normalizer2 normalizer) : this(input, normalizer, 128) { if (normalizer == null) { throw new ArgumentNullException("normalizer"); } this.normalizer = normalizer; }
// for testing ONLY internal ICUNormalizer2CharFilter(TextReader input, Normalizer2 normalizer, int bufferSize) : base(input) { if (normalizer == null) { throw new ArgumentNullException("normalizer"); } this.normalizer = normalizer; this.tmpBuffer = CharacterUtils.NewCharacterBuffer(bufferSize); }
private static Normalizer2 GetNormalizer(UNormalizationMode mode) { return(Normalizer2.GetInstance(null, mode == UNormalizationMode.UNORM_NFC || mode == UNormalizationMode.UNORM_NFD ? "nfc" : "nfkc", mode == UNormalizationMode.UNORM_NFC || mode == UNormalizationMode.UNORM_NFKC ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE)); }
public void TestAlternate() { using Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); return(new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter( tokenizer, /* specify nfc with decompose to get nfd */ Normalizer2.GetInstance(null, "nfc", Normalizer2Mode.Decompose)))); }); // decompose EAcute into E + combining Acute AssertAnalyzesTo(a, "\u00E9", new String[] { "\u0065\u0301" }); }
public void TestVeryLargeInputOfNonInertChars() { char[] text = new char[1000000]; Arrays.Fill(text, 'a'); using Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { return(new TokenStreamComponents(new KeywordTokenizer(reader))); }, initReader: (fieldName, reader) => { return(new ICUNormalizer2CharFilter(reader, Normalizer2.GetInstance(null, "nfkc_cf", Normalizer2Mode.Compose))); }); CheckAnalysisConsistency(Random, a, false, new string(text)); }
private void GenerateDelaunay(HashSet <MyVector2> points_2d) { //Normalize Normalizer2 normalizer = new Normalizer2(new List <MyVector2>(points_2d)); HashSet <MyVector2> points_2d_normalized = normalizer.Normalize(points_2d); //Generate delaunay //HalfEdgeData2 delaunayData = _Delaunay.FlippingEdges(points_2d_normalized, new HalfEdgeData2()); HalfEdgeData2 delaunayData = _Delaunay.PointByPoint(points_2d_normalized, new HalfEdgeData2()); //UnNormalize HalfEdgeData2 triangleData = normalizer.UnNormalize(delaunayData); //From halfedge to triangle HashSet <Triangle2> triangles = _TransformBetweenDataStructures.HalfEdge2ToTriangle2(triangleData); //Make sure they have the correct orientation triangles = HelpMethods.OrientTrianglesClockwise(triangles); //2d to 3d HashSet <Triangle3> triangles_3d = new HashSet <Triangle3>(); int counter = -1; foreach (Triangle2 t in triangles) { counter++; //if (counter != 2) //{ // continue; //} triangles_3d.Add(new Triangle3(t.p1.ToMyVector3_Yis3D(), t.p2.ToMyVector3_Yis3D(), t.p3.ToMyVector3_Yis3D())); //Debug.Log($"p1: {t.p1.x} {t.p1.y} p2: {t.p2.x} {t.p2.y} p3: {t.p3.x} {t.p3.y}"); //MyVector2 circleCenter = _Geometry.CalculateCircleCenter(t.p1, t.p2, t.p3); //Debug.Log("Circle center: " + circleCenter.x + " " + circleCenter.y); } Mesh delaunayMesh = _TransformBetweenDataStructures.Triangle3ToCompressedMesh(triangles_3d); //Display the delaunay triangles TestAlgorithmsHelpMethods.DisplayMeshEdges(delaunayMesh, Color.black); }
public void TestTokenStream() { // '℃', '№', '㈱', '㌘', 'サ'+'<<', 'ソ'+'<<', '㌰'+'<<' String input = "℃ № ㈱ ㌘ ザ ゾ ㌰゙"; CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input), Normalizer2.GetInstance(null, "nfkc", Normalizer2Mode.Compose)); Tokenizer tokenStream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); AssertTokenStreamContents(tokenStream, new String[] { "°C", "No", "(株)", "グラム", "ザ", "ゾ", "ピゴ" }, new int[] { 0, 2, 4, 6, 8, 11, 14 }, new int[] { 1, 3, 5, 7, 10, 13, 16 }, input.Length); }
public void TestCuriousString() { String text = "\udb40\udc3d\uf273\ue960\u06c8\ud955\udc13\ub7fc\u0692 \u2089\u207b\u2073\u2075"; using Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { return(new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false))); }, initReader: (fieldName, reader) => { return(new ICUNormalizer2CharFilter(reader, Normalizer2.GetInstance(null, "nfkc_cf", Normalizer2Mode.Compose))); }); for (int i = 0; i < 1000; i++) { CheckAnalysisConsistency(Random, a, false, text); } }
public void TestMassiveLigature() { String input = "\uFDFA"; CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input), Normalizer2.GetInstance(null, "nfkc_cf", Normalizer2Mode.Compose)); Tokenizer tokenStream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); AssertTokenStreamContents(tokenStream, new String[] { "صلى", "الله", "عليه", "وسلم" }, new int[] { 0, 0, 0, 0 }, new int[] { 0, 0, 0, 1 }, input.Length ); }
public void TestTokenStream2() { // '㌰', '<<'゙, '5', '℃', '№', '㈱', '㌘', 'サ', '<<', 'ソ', '<<' String input = "㌰゙5℃№㈱㌘ザゾ"; CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input), Normalizer2.GetInstance(null, "nfkc_cf", Normalizer2Mode.Compose)); Tokenizer tokenStream = new NGramTokenizer(TEST_VERSION_CURRENT, reader, 1, 1); AssertTokenStreamContents(tokenStream, new String[] { "ピ", "ゴ", "5", "°", "c", "n", "o", "(", "株", ")", "グ", "ラ", "ム", "ザ", "ゾ" }, new int[] { 0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 7, 9 }, new int[] { 1, 2, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 7, 9, 11 }, input.Length ); }
private void TestGreinerHormann(List <MyVector2> poly, List <MyVector2> clipPoly) { //Normalize to range 0-1 //We have to use all data to normalize List <MyVector2> allPoints = new List <MyVector2>(); allPoints.AddRange(poly); allPoints.AddRange(clipPoly); Normalizer2 normalizer = new Normalizer2(allPoints); List <MyVector2> poly_normalized = normalizer.Normalize(poly); List <MyVector2> clipPoly_normalized = normalizer.Normalize(clipPoly); //In this case we can get back multiple parts of the polygon because one of the //polygons doesnt have to be convex //If you pick boolean operation: intersection you should get the same result as with the Sutherland-Hodgman List <List <MyVector2> > finalPolygon = GreinerHormann.ClipPolygons(poly_normalized, clipPoly_normalized, BooleanOperation.Intersection); Debug.Log("Total polygons: " + finalPolygon.Count); for (int i = 0; i < finalPolygon.Count; i++) { List <MyVector2> thisPolygon_normalized = finalPolygon[i]; Debug.Log("Vertices in this polygon: " + thisPolygon_normalized.Count); //Unnormalized List <MyVector2> thisPolygon = normalizer.UnNormalize(thisPolygon_normalized); //2d to 3d List <Vector3> polygonAfterClipping3D = new List <Vector3>(); foreach (MyVector2 v in thisPolygon) { polygonAfterClipping3D.Add(v.ToVector3()); } //Display DisplayPolygon(polygonAfterClipping3D, Color.red); } }
/// <summary> /// Given an ICU normalizer, enumerate the limit indices of the "segments" of this string. /// A "segment" is defined as a group of characters that interact with each other in this /// normalization, and which therefore can't be split apart and normalized separately without /// changing the result of the normalization. For example, under NFC, if LATIN SMALL LETTER C (U+0063) /// is followed by COMBINING CEDILLA (U+0327) which is followed by LATIN SMALL LETTER D (U+0064), /// then the c and cedilla will form one "segment": splitting them apart and normalizing them /// separately would produce a different result than normalizing them together. So this function /// would yield (among other values) the index of LATIN SMALL LETTER D, the first index that is /// not part of the segment (that is, the limit index). /// /// The last index yielded by this function will be equal to the length of the string, and it /// will never yield the index 0. (If the string is empty, it will return an empty enumerable). /// Therefore, it is always safe to do GetChars(previousIndex, thisIndex) in a foreach loop to get /// the "current" segment (assuming previousIndex is set to 0 the first time through the loop). /// </summary> /// <param name="icuNormalizer">ICU normalizer to use (get this from CustomIcu.GetIcuNormalizer)</param> /// <returns>An enumerable of indexes into "this" TsString, at all the normalization "segment" boundaries, suitable for passing into GetChars(prevIdx, thisIdx)</returns> private IEnumerable <int> EnumerateSegmentLimits(Normalizer2 icuNormalizer) { if (string.IsNullOrEmpty(Text)) { yield break; } int i = 0; while (i < Text.Length) { int codepoint = Char.ConvertToUtf32(Text, i); if (icuNormalizer.HasBoundaryBefore(codepoint) && i > 0) { yield return(i); } i += codepoint > 0xffff ? 2 : 1; } yield return(Text.Length); }
private void OnDrawGizmos() { // // Init the sites // HashSet <Vector3> sites_3d = GetRandomSites(); //HashSet<Vector3> sites_3d = GetCustomSites(); //HashSet<Vector3> sites_3d = GetCustomSites2(); //3d to 2d HashSet <MyVector2> sites_2d = new HashSet <MyVector2>(); foreach (Vector3 v in sites_3d) { sites_2d.Add(v.ToMyVector2()); } //Normalize Normalizer2 normalizer = new Normalizer2(new List <MyVector2>(sites_2d)); HashSet <MyVector2> randomSites_2d_normalized = normalizer.Normalize(sites_2d); //Generate the voronoi HashSet <VoronoiCell2> voronoiCells = _Voronoi.DelaunyToVoronoi(randomSites_2d_normalized); //Unnormalize voronoiCells = normalizer.UnNormalize(voronoiCells); //Display the voronoi diagram DisplayVoronoiCells(voronoiCells); //Display the sites TestAlgorithmsHelpMethods.DisplayPoints(sites_3d, 0.5f, Color.black); //Generate delaunay for comparisons GenerateDelaunay(sites_2d); }
/// <summary> /// Get an opaque pointer to the CustomIcu normalizer object for a given mode (NFC, NFD, etc.) /// Used in several parts of the TsString normalization code. /// </summary> public static Normalizer2 GetIcuNormalizer(FwNormalizationMode normalizationMode) { string name; Normalizer2.Mode mode; switch (normalizationMode) { case FwNormalizationMode.knmNFC: case FwNormalizationMode.knmNFSC: name = HaveCustomIcuLibrary ? "nfc_fw" : "nfc"; mode = Normalizer2.Mode.COMPOSE; break; case FwNormalizationMode.knmNFD: name = HaveCustomIcuLibrary ? "nfc_fw" : "nfc"; mode = Normalizer2.Mode.DECOMPOSE; break; case FwNormalizationMode.knmNFKC: name = HaveCustomIcuLibrary ? "nfkc_fw" : "nfkc"; mode = Normalizer2.Mode.COMPOSE; break; case FwNormalizationMode.knmNFKD: name = HaveCustomIcuLibrary ? "nfkc_fw" : "nfkc"; mode = Normalizer2.Mode.DECOMPOSE; break; case FwNormalizationMode.knmFCD: name = HaveCustomIcuLibrary ? "nfc_fw" : "nfc"; mode = Normalizer2.Mode.FCD; break; default: throw new NotImplementedException("Unimplemented value for FwNormalizationMode"); } return(Normalizer2.GetInstance(null, name, mode)); }
public static void Demo() { PCANetwork network = PCANetwork.Create(2, 1); var dataSet = DataGenerator.GenerateDataSet2(); var pca = new PCA(network, 0.7); PCATrainer trainer = new PCATrainer(network, 50, pca, 0.0000005); Normalizer2 normalizer = new Normalizer2(); normalizer.Fit(dataSet.XList); var normalizedX = normalizer.Normalize(dataSet.XList); trainer.Fit(normalizedX); List <List <double> > convertedX = trainer.GetConvertedDim(normalizedX); List <List <double> > denormalX = normalizer.DeNormalize(convertedX); network.Display(); Console.WriteLine("OLD VECTORS===>"); Utils.DisplayListList(dataSet.XList); Console.WriteLine("OLD VECTORS(NORMALIZED)===>"); Utils.DisplayListList(normalizedX); Console.WriteLine("NEW VECTORS(NORMALIZED)===>"); Utils.DisplayListList(convertedX); Console.WriteLine("NEW VECTORS===>"); Utils.DisplayListList(denormalX); }
public void TestNormalization() { String input = "ʰ㌰゙5℃№㈱㌘,バッファーの正規化のテスト.㋐㋑㋒㋓㋔カキクケコザジズゼゾg̈각/각நிเกषिchkʷक्षि"; Normalizer2 normalizer = Normalizer2.GetInstance(null, "nfkc_cf", Normalizer2Mode.Compose); String expectedOutput = normalizer.Normalize(input); CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input), normalizer); char[] tempBuff = new char[10]; StringBuilder output = new StringBuilder(); while (true) { int length = reader.read(tempBuff); if (length == -1) { break; } output.Append(tempBuff, 0, length); assertEquals(output.toString(), normalizer.Normalize(input.Substring(0, reader.CorrectOffset(output.Length) - 0))); } assertEquals(expectedOutput, output.toString()); }
/// <summary> /// Create a new <see cref="ICUNormalizer2CharFilter"/> with the specified <see cref="Normalizer2"/>. /// </summary> /// <param name="input">Input text.</param> /// <param name="normalizer">Normalizer to use.</param> public ICUNormalizer2CharFilter(TextReader input, Normalizer2 normalizer) : this(input, normalizer, 128) { this.normalizer = normalizer ?? throw new ArgumentNullException(nameof(normalizer)); }
/// <summary> /// Create a new <see cref="ICUNormalizer2CharFilter"/> that combines NFKC normalization, Case /// Folding, and removes Default Ignorables (NFKC_Casefold). /// </summary> /// <param name="input"></param> public ICUNormalizer2CharFilter(TextReader input) : this(input, Normalizer2.GetInstance(null, "nfkc_cf", Normalizer2Mode.Compose)) { }
public static void Demo() { SOMNetwork network = SOMNetwork.Create(9, 3); network.Display(); var dataSet = DataGenerator.GenerateDataSet3(); var som = new SOM(network); List <SOMSetting> setting = new List <SOMSetting>(); setting.Add(new SOMSetting() { FromEpoch = 0, ToEpoch = 500, LearningRate = 0.8, Radius = 2 }); setting.Add(new SOMSetting() { FromEpoch = 501, ToEpoch = 1000, LearningRate = 0.4, Radius = 1 }); SOMTrainer trainer = new SOMTrainer(network, 1000, som, setting); //Normalizer2 normalizer = new Normalizer2(); Normalizer2 normalizer = new Normalizer2(); normalizer.Fit(dataSet.XList); var normalizedX = normalizer.Normalize(dataSet.XList); trainer.Fit(normalizedX); List <List <double> > convertedX = trainer.GetOutputs(normalizedX); List <List <double> > convertedX2 = trainer.GetOutputs2(normalizedX); network.Display(); Console.WriteLine("OLD VECTORS===>"); Utils.DisplayListList(dataSet.XList); Console.WriteLine("OLD VECTORS(NORMALIZED)===>"); Utils.DisplayListList(normalizedX); Console.WriteLine("NEW CLUSTERED VECTORS===>"); Utils.DisplayListList(convertedX); Console.WriteLine("NEW CLUSTERED VECTORS(DISTANCE)===>"); Utils.DisplayListList(convertedX2); StringBuilder sb_x = new StringBuilder(); StringBuilder sb_y = new StringBuilder(); foreach (var l in convertedX2) { var maxIndex = l.IndexOf(l.Max()); for (var x = 0; x < network.OutputNeuronCountPerDim; x++) { for (var y = 0; y < network.OutputNeuronCountPerDim; y++) { if (x * network.OutputNeuronCountPerDim + y == maxIndex) { sb_x.Append(x + ","); sb_y.Append(y + ","); Console.Write("(" + x + "," + y + "),"); } } } Console.WriteLine(); } StringBuilder sb = new StringBuilder(); sb.AppendLine("import matplotlib.pyplot as plt"); sb.AppendLine("x1=[" + sb_x.ToString().TrimEnd(",".ToCharArray()) + "]"); sb.AppendLine("y1=[" + sb_y.ToString().TrimEnd(",".ToCharArray()) + "]"); sb.AppendLine("plt.plot(x1,y1,'b^')"); sb.AppendLine("plt.show()"); var file = System.IO.Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "display.py"); File.WriteAllText(file, sb.ToString()); Console.WriteLine("saved to path: " + file); System.Diagnostics.Process.Start("C:\\ProgramData\\Anaconda3\\envs\\keras\\python.exe", "\"" + file + "\""); }
public void TestNFKC_CFHuge() { DoTestMode(Normalizer2.GetInstance(null, "nfkc_cf", Normalizer2Mode.Compose), 256, RandomMultiplier * 500, 16); }
public void TestCuriousMassiveString() { String text = "yqt \u0728\u0707\u0712\u0720\u0734 \u204c\u201d hyipy \u2667\u2619" + "\u26ec\u267b\u26da uboyjwfbv \u2012\u205d\u2042\u200a\u2047\u2040 gyxmmz yvv %" + "\ufb86 \n<script> hupvobbv jvsztd x rww ct{1,5} brteyi dfgyzqbm hdykd ahgeizyhv" + " kLn c#\ud8f8\udd74 fPezd ktedq \ufcea=\ud997\uddc9\u876bJ\u0231\ud98a\udce0\uf872" + " zquqah \ub9d6\u144e\uc686 3\ud93d\udfca\u1215\ud614 tnorask \u0348\u0334\u0334" + "\u0300\u033d geqdeoghh foojebut \ufb52\u227ag\ud9bd\udc3a\u07efK nyantr lksxw fs" + "zies ubzqfolksjpgk \u6fa3\ud859\udc36\u0501\ucca0\u0306\u001e\ua756\u052f \ucaf7" + "\u0247\u0009\ufdddg \ud83c\udd02\ud83c\uddaf \u05628\u2b49\u02e3\u0718\u0769\u4f1b" + "\u0496\u0766\uecaa\ufb44 \u001d \u0006hr\u00f0\ue649\u041a\uda6f\udfa5\uf31b\ue274" + " ptgjf \ud8cc\udf83M\u0013\u04c6i \u205f\u2004\u2032\u2001\u2057\u2066 \u07d0\uacdb" + "\u06a5z pqfxwgbwe \ud1bc\u2eba\u2d45\u02ee\u56df xnujtfs \u1b19\u1b17\u1b39\u1b20" + "\u1b69\u1b58\u1b03\u1b6e\u1b73\u1b20 afsl zxlzziqh ahrhckhktf \ud801\udc5b\ud801\udc61" + " bkpmeyyqobwi qnkunmpjpihezll plhhws \u37f7\u41d6\u3dca\u3e80\u4923\u36b8 \u195a\u1959" + "\u196f\u1968\u1958\u197f </p \u0006s\u019f\uda82\udc90H_\u079d\ufd6f: idpp \u217c" + "\u2168\u2185\u2179\u2156\u2159\u217c\u2154\u2158 ({1,5}( jkieylqzmb bfirnaj \uea71" + "\uf17f\u0749\u054c \ud9ea\udf83\udbea\ude91j x\u3366\u09c2\ud828\udd13~\u6fda\ueeee" + " \ud834\udcd3\ud834\udc2b\ud834\udc8b\ud834\udcd8 dl \ud802\ude3a\ud802\ude36\ud802" + "\ude23\ud802\ude56 \u20ad\u20a0\u20a0\u20a0\u20b8\u20b4\u20ad lcql \u0547\u0156]" + "\ue344V] \ud90c\udd7d\u000eS\uf965\uf15e\u008f qdn \udac3\ude3c buy m qni \u31a4\u31a3" + "\u31b2\u31a9\u31a7 \u20df\u20ee\u20d3\u20d4\u20f1\u20de \u4dc1\u4dc4\u4dde\u4dec\u4df9" + "\u4dee\u4dc5 \udb40\udc36 gstfqnfWY \u24ab\u8d69\u0301 n?]v||]i )- \udb0b\ude77\uf634" + "\u0762 tpkkjlbcntsk eebtzirw xo hktxy \n</ vxro xpr mtlp p|tjf|i?.- lxpfo \udbd7" + "\udf78\udbf5\udf57 u..b jjj]p?e jtckvhqb \u20eb\u20ea\u20fa\u20ef\u20e1\u20ed\u20eb vvm" + "uhbsvyi jx mqwxckf \u00d7 qqzs \u05ae\u022d\udb7c\udfb1\u070b vnhkz egnutyuat \udaa2" + "\udf20\ufa45#O\u2b61\u0d0e \u09a2\u0996\u09cd\u09b4 v \udbdb\ude9bqp owsln \ua837\ua833" + "\ua83f\ua83f\ua83f\ua831\ua83c\ua839 \u1a15\u1a1c\u1a12 \ud83c\ude20 >&pt&#x>129 \ud9f1" + "\udf8c\uecdd \ud809\udc48\ud809\udc72 wswskop \ua70d\ua70e ohcjsboriux \u2025\u2000\u206e" + "\u2039\u2036\u2002\u200e \u1741\u1741\u175e bsykot \u0696\ufab5W ifj cnosdrAxa qtv wvrdn" + " dmt (){0 udqg Z5U\ufcab\ue9ed\u0378 ts zndsksqtu uxbkAbn \u0535\"\u001b\ud923\udcc3\udae7" + "\udccf\u02ac \u0131\ua72e\u1273\u02f8\u2329\u5d83Q icakme oojxv hpec xndjzxre )e{0,5}. " + "\u0005!\u0009\u0004\u000bt\u0006N vqxnokp cdshqof \ua654\ua681\ua667\ua676\ua698 vwwp " + "\ufe4b\ufe3b\ufe4d\ufe42\ufe44\ufe38\ufe4b\ufe4c\ufe4b\ufe38 \u2ffb\u2ffa\u2ff6\u2ff7" + "\u2ffc\u2ff6\u2ff5 \u3116\u312c\u3100\u3102\u310f\u3116 agyueiij \u0764\u1178\ud866\udca1" + "\u00f2\u039d \ud802\udc12\ud802\udc1e\ud802\udc28\ud802\udc19 kygiv fxctjyj \n" + " omu \ud855\udeb1\u063c\ufd54\u9dbf\uf80a\ufc60 \u76ee\u3365\uf998\u70a8\u02d2 \u05e5" + "\u0478\u61bb \ua71c\ua708\ua70d\ua71a\ua712\ua712\ua719\ua706\ua71f\ua71c\ua71e\ua718 pgq" + "arvtzcoduk pyok \u1779\u1766\u1778\u177b \u16e5\u16a7\u16f3\u16fe\u16c8\u16ba\u16a4 \u0527" + "\u052f\u0513\u0500 iisgh hxd \u13dd\u13c6\u13db\u13ee\u13d7 \u0019\ufed4 \ud802\udd3c\ud802" + "\udd30\ud802\udd3d\ud802\udd24\ud802\udd2c jm gvylt eax xsbt mvuvnw \u0246\u5514\udb16\uddcf" + "\u1dc2\ud07b\u07af\u12e8\u8e8f\u0004 phy haduzzw \u04a1\u8334\u14b5\uf0de\udb4b\udec0\u6b69 " + "dubdl \u0cd2\u06c7\uf0297\u45efy-\u05e9\u01a3\uf46f aiafsh &# \u0003\ue5ab\uedcd] xhz vil" + "wdlytsj \uda10\udf4f\u87b2 tomuca \u1fe4\u1f4c\u1fab \u035d\u0332 xgeel nzp -)r)]r([{ nbc " + "\u01b4\ud80f\udff5\u0008\u0091 tbugdgv \ud9cc\udd57\u0776t\uda0f\udc17 rsuwxqxm \u2d71\u2d3d" + "\u2d6e zsvuicin \udb50\ude9d\u7424\u30c7\uff73\ue11e\u0014 qxtxx dlssfvxg ud \u0c1f\ud9d9" + "\udce4\u2317\u0482\u017b \ud83c\udd91\ud83c\uddaf\ud83c\udd84 \ud834\udf7d\ud834\udf70\ud834" + "\udf61 \uabfc\uabe1\uabcd\uabd1\uabe8\uabf9 \u292a\u2916\u295d\u290b\u296d\u291f \uf476\u0283" + "\u03d5\ucfe2 h)(o? lqeatt \u20c9\u20a5\u20cd \u1634d\u001a\ua052:\u00db \ud83c\udc8a\ud83c" + "\udc41\ud83c\udc8a\ud83c\udc6e k civyjm ptz \uf20d\uea32&\ud8db\ude48\uf657s\u06dc\u9aa5\udbd7" + "\udc74\u0111 cehuo \u2090\u209b\u2099\u208c\u209a\u2088\u2092\u207e\u207b\u2089 efpifnvdd zstz" + "duuyb \u04af3 \u2e5f\u2e7e\u2e7c\u2e54\u2e0f\u2e68\u2e0d\u2e05 <??> \u28d3\udbb7\udf6fJ\uf089" + "\ub617\ufb80\u07d0\uf141 \u0820\u083b\u0800\u0801\u0832\u0823\u0820\u081c\u0835 r laxzpfbcvz " + "iuwbmq scpeqaq nvw{1,5}s) \u234c\u231f\u231e\u23cc\u23d8\u2302\u2345\u231b\u239d\u231e 154614 " + "wgefnmgq \udbbe\udc2a\uee8c ayaeg \u243c\u2404\u2409\u241a dd hxuawoswx jqghoznw \u0019\u70cd& " + "\ufe0a\ufe02\ufe04\ufe0c\ufe0d \u0510\u0523\u0510\u0501\u0508\u0513\u050e ropgywv guqsrcz kmf " + "\u0d1f\u0d5c\u0d24\u0d5f\u0d0b\u0d14 xgklnql oprw \u0365\u034e\u036a\u0310\u034f\u0316\u031f " + "\u01b2\u55f6\uf1eeo\ue994\u07c4? wewz idjwsqwo \ufe67\ufe66\ufe52\ufe6a\ufe5b\ufe57\ufe68\ufe62" + " \u27fd\u27fe\u27f6\u27f8 fcsqqvoy edrso \u5580\ue897 vantkitto sm \uff60\ud957\udf48\uf919w" + "\ud889\udf3e\ud9c8\uddf6 jhas uqzmlr \ua4ce\ua4aa\ua4b3\ua4b5\ua4c2\ua4a5 kvuop ><script> " + "\ud802\udd0f\ud802\udd15\ud802\udd11 \u16bc gjyabb mlaewr \u1175\u11cc\u1192 ri ]\u01eb\ue4ca" + " \uedca\ucd66\u597c\u03df\uaf8f\u0619 &#x; ]?e \ua6c2\ua6ed\ua6eb\ua6ea\ua6cd\ua6e2 gfpafsr" + " pooo \u20cc\u20c4\u20a7\u20c8\u20a6\u20b0\u20ad \udb40\udd5b tgcdmeuau \u141f\u637d \ufba8" + "\ufcc7\ufde1\ufc85\ufbfc\ufbed b \u319a\u3193\u3192\u3195\u319e\u319d\u3195\u3194\u3197\u319c " + "\u0514\u0503\u051c\u050e\u0502\u0520 \u1db3\u1db5\u1d96 \ud977\udde8w\u02ec&\u01dd\u29ed\ufead" + "y\u03e3 ukzqq {0,5}{0 \u000f\uf028$\u046f7\udb7e\uded2 <p><l \uea5e(\uf1dcX\u376b ([mi[n(a Jev" + "xsJl \ubd09\u04c1\ua0f3\uff7b \u1cd3\u1cd9\u1cf6\u1cf0\u1cd8\u1cdd\u1cdd\u1ce7\u1ce3\u1cd9 " + "\ud800\udf91\ud800\udf99\ud800\udf83 \"135541 \u18b3\u18c0\u18c2\u18ea\u18c4\u18fe\u18b2\u18fd" + "\u18c3 uwykvfd lqiflow afdfctcz ol[hemp strmhxmk \ua732\ua748\ua731\ua791\ua78b\ua7ee\ua7ea" + "\ua789\ua766\ua7e4 gmzpmrpzr dqfp wfxwjzepdj M\udb03\udeff\u13c5 afsncdwp \ue716\u0734\ud8f9" + "\udc986\u0017\u0211 t r vhczf (]|q.{0 \u195e\u1973\u1958\u1956\u196c\u1973\u1966\u196c\u197a" + " \u2595\u258e\u259a\u2591\u2583\u2595 l kgopyql wes E\u6611\ub713\u7058^ bipq dx 7507 \ua90b" + "\ua90b ktjeqqx \u0e1d\u0e7f\u0e35 #(\u71b7'\u06e5\u03e4(\uf714\u6ff2 advbgh \u319c\u3191 \uef11" + "% \uc8a7C\ud8ed\udf4c rjb \u02ca\uf5bd\ue379n \ud834\udd7d\ud834\udd83 jracjcd rpearfd ujzvdc" + " ofg \u09df\u09f4\u0980\u09b3\u09bf\u09b7 \ud9cc\uddf4$\udb08\udf72 iqcnwsyjmo </scri ]-q jsit" + "gjg naeajiix vvmq dnlihau o cgyp tqsfe uixlzmhz sixhftpr uvtbvv mphcWojZs \u190b\ud9c3\ude7c" + "\u008b\u0606\ua8b1 a \u000ebq# \u1e57\u0b66\uda41\ude32\ubfd6 ohph b- ze \ue2a9\u0000 zatuye" + " \ufd26\ufdfa\ufbbf\ufdb4\ufde3\ufd14\ufc25\ufcb8 sbtpb nxtacgjo \ud834\ude2a\ud834\ude0f" + "\ud834\ude14\ud834\ude27 \ua835\ua835 ujpjkkyhujx \u001e\ud9a7\udc45\u0011>\u1aef\u0d0d <" + " hcefg \u01f0\u01d3 gxlwv \ud802\udd2f\ud802\udd34 \udb9c\udcc8\udbb6\ude1e\udbaf\ude33\udbae" + "\udc49 xyzlzynd \ud83c\udd44 vynhdwh \u315d\u3157\u314d\u3180\u317d\u318d\u317d\u3156 ?>\" " + "\ud800\udfdb\ud800\udfb8\ud800\udfa6\ud800\udfa7 hszn sspwldpdwjfhk vrbag \ueabd\ud9f2\udfb5" + "\udafb\udd28\uf6a4y\ufdeb \u0798\u078f\u0789 \ue80c\uf5c1\u001a\ud84b\uddef ywdrudo odqsts nnp" + "vfl nvotk rdwhr rquj cn \ud7d4\ud7b3\ud7c7\ud7bf\ud7bd � emdq pnqsbbgzs \u2c7d\u2c7e fj" + "kwhku >&c \ud800\udf85\ud800\udf88\ud800\udf93\ud800\udf84\ud800\udf82\ud800\udf8b '\n\"<p></p>" + " xltgxaa vqjmhiz n .m]c]tr( qerokel lc kugmimmtfxi <?� [g)|n|[ cij </ BwbZfg " + "pta bmhn \uf60dz\u54ca fwdp gnkz \u030ba\ue115}\udb7d\ude86\ud813\udc50\uedb9 \u1745\u1749\u174d" + "\u1747\u174b\u174f \ud802\udf09\ud802\udf3f\ud802\udf03\ud802\udf00 agmj \u1db7 \u60669\u000f" + "j\u000f\u02e4e\u05f5f p \udaac\udc9f\uc257\u6e58U zcwsy \u19a7\u19cf\u19aa\u199f\u19b7 zhll" + " sbmv .uud \u040c\u2e78\ud9fc\udd0d\ufb7f\uf2e1\u04bf vqkyxua \ud834\udf5e\ud834\udf45\ud834" + "\udf23 \ud834\uddbe\ud834\udd9b\ud834\uddc4 f [{0,5}t ovbsy tcj nyalqyabn yzhes mlwpw \ud835" + "\uddd5\ud835\udfdf\ud835\uddb4\ud835\ude3e epqfkk cssusatbbq \u1424\u1413\u1652\u15f0 dtfy zN" + " \u2417\u2435\u2407 qtz \u2fff\u2ff1\u2ff8\u2ff8\u2ff7\u2ff7 \ud802\ude43 gfpe stimmb ugPwlr " + "\u0737\u0712\u0712\u071c \u21de \u01d8\u079e\u8215\ue5b9\u07ef\u3cff\u0478 \u05dd\u05e5 gwqset" + "iyyh jbncv \u68ba\u52aa) kcoeiuogjsh n[nh- \uf5ff\u7ec3Z zrptjltqstzvx ts tbod 𕠗 \u07fd" + "\u07c1\u07c0\u07e9\u07fa\u07f2\u07e3\u07e8\u07cb\u07eb\u07d8 fisslh \ue40f\u012b\u02cf\u0766" + " \u1c25\u1c4f\u1c1d\u1c20 \"--> dPhwz \ud808\udef9\ud808\udf4a\ud808\uddd3 cicqhk D\ue7d3=\u5228" + "\udbc3\udd18\ueb0d\u0012\u0744\ufb04U\u001e\uf388c\u0306 \u2c08\u2c1e Xxzcd \u001d\u0230\u45e8" + "\u0653 <zlx \u1e8f\u1e28\u1e3c\u1e8d\u1ee8\u1e69 zcoeork d gusesc \ud802\udd3e nyr vkfrgi \u2cde" + " mo nklnsqweh <script gtoppwsbik vrcdbsx pz \udb0d\ude0c|\u93d0\uf241\u28a8\u0531 \ud83c" + "\udc2b\ud83c\udc10 \ud800\udf91\ud800\udf8e qwmmkty \u19f7\u19f7\u19e8\u19e0\u19f9\u19f6\u19e6" + " \u7a60\u7b7b\u878c\u603c\u53c6\u6552\u6dfe \u0776\u0775 foxaqi m cdjd guyqxodsccy \ucd7d\ub5eb" + "\ud29e\ub9ad\uba00\uac9d\ud2f0 pxjtj \ue362\u079fx\uf193 ){1,5}[{ hmpujw \u3193\u319b\u3195" + "\u319c\u3198\u3193\u3195\u319d\u319e \udb40\udd65\udb40\udd29\udb40\udd5c\udb40\uddba \ud801" + "\udc18\ud801\udc24\ud801\udc4f\ud801\udc15\ud801\udc04 \u1970\u196c\u1963\u196f\u1979 vjpeg " + "\ufeb9 lhoeh 吋 Szu \u0c21\u0c21\u0c36\u0c0e oyb \u1c7c\ue0ba\u001e gskf \ud826\udd47" + "\u0018 ooxki \u001d\u5b0d \uf0e2\u05ba\u000e\u6200 \u2ecc\u2e8a\u2eb8\u2ee5\u2edb\u2ee4\u2ec7" + "\u2ef9\u2e9e\u2e99 xpyijb bqvtjfjrxkjv sruyitq jobm u \u0752\u075d\u0778\u0754\u075c \ua4af" + "\ua4a5\ua4be\ua4a6\ua4b9\ua4b9 \ua835\ua832\ua838\ua83d \ud83c\udc3a\ud83c\udc9f\ud83c\udc4e" + " \ud7fb\ud7ce\ud7c6\ud7f8 erkzdwed ftzpcx ecbylf geptanc jxbhc ophh wqng \ue48c\u9c86Z imkj" + "nwetjbz njivpvo \u6d9a\ud8da\udcba \u29f4\u29fd\u29a6\u2980\u2989\u29f3\u29ec\u2991\u29e5" + "\u29c6 \udb08\ude9d\u2ecb\u037e chmap <!--<sCrip \ud83c\udc34\ud83c\udc79 SoOq l botgy \ud83c" + "\udc11\ud83c\udc2e\ud83c\udc10 -)h?|] \ud801\udc2e\ud801\udc47 pjlrcjij lpdft v.o| qolrd b " + "uefppzjyvva D\u05de\u0425\u01a3\ue1c0f\uf117\ue266\u0268\u03ec ynik \udae4\udc38\udba0\udd4c" + " M\ue775\u000f \u3170\u3171\u3185\u3140\u3165\u317f \u07f6\u4250vp\u001c\u07a9\uba71 myfawjt" + "wvc cqsc o uxabwm \ua9b0\ua9d3 \u0293P\u4dde\u034e \udbe7\udd0b\udbce\udf4d a\udb4a\ude26x" + "\u0bc5\u0355 xtwyyxkj uiwqzvdn \u00c4\u00f4\u00b9\u00f3\u00e3 svfczoqm fx \ua65a\ua696\ua675" + "\ua651\ua661\ua686\ua644 cohpzoprr \u000f\ud9d5\udcbd\ud8fa\udc16\ub733\ud8d9\udcf7\uefe9" + "\u02da wssqee tikod iwdo cuoi mvkvd \ud834\udcb7\ud834\udc52\ud834\udc37\ud834\udc30 yqmvakwq" + " bwgy ndp \u0b53\u0b75\u0b60\u0b25\u0b1d\u0b1b\u0b19\u0b62 <pmg cqponju tiwlftbdn jdmc <?" + "</p waesof \u3fea\u43bd\u3524\u3d5b \uf87f\u03ab\\\u0727?\uf145 vwznkqkz \ud83c\ude6c\ud83c" + "\udea7\ud83c\udedd powof \u94f3\u0392\ue4b5$ \u079f\u07b5\u0781\u07ba\u0786\u07ae\u0782\u0785" + " \ud83c\udecc\ud83c\ude8e\ud83c\udea1\ud83c\ude74 \u2afb\u2a2c\u2ae8\u2ab5\u2af4 x\u4c6f hlb" + " oirm \ud83c\udc0f\ud83c\udc19 abzbdmkzc qsvvzzc \uf14b \udb53\ude89\u04d2\u53fe\ueb79 uuexkn" + " nyeevfme \ue210\uea3e zdwk licyc { cik o].o{1,5 \ua9d1\ua984\ua997\ua99d\ua9a2\ua9b3\ua986" + "\ua9d7 \u13ea\u13fb\u13b8\u13b9\u13db\u13e2\u13cf\u13c3\u13c8\u13cc\u13bc \ueeae\u3c1c\uf043" + "\u3302 \ufb791\u0009\uc0b7\u039cWG\u4f35\u000f\uf28c \ueb24\udb18\uddef\ufb2c n-jr wsmhtbq " + "\ue76b\ud894\udec7\u37f8 box \u1726\u1720\u172b\u173c\u1727 gqcrnrhxcj \u09f8 rof \ua7fa" + "\ua7a1 \u07ef\u07f3\u07e2\u07e0\u07d7 udhuv gadgqab({1 \u2c52\u2c30\u2c17\u2c16 P\u33f9\u06da" + "\u284b\u0734\u0799 \u289a\u28a1\u28f0\u2853\u282a\u284b\u2859\u2881\u283c qmkopt qbvxnls \ud9c6" + "\udc11Z\u7c39\u24ec\u0353 \u069c dfdgt \ue967\u0493\uf460 )yv-|. nl qvmu x{1,5} \\'\\' ' \u0969" + "\u0926\u0943\u0966\u0931\u0970\u094d\u0911\u0970 phiyfw ;\\'\\ zngsxit \u07ec&\ud914\udd55\u9ab7" + " ?[| b \ufffc\uffff\ufffb\ufff3\ufff7\ufff8\ufff8\ufffb\ufff5\ufff9\ufffd \u2ffd\u2ff2\u2ff1" + "\u2ff9\u2ff6\u2ff1\u2ff8\u2ff1\u2ff8 \ua73d\ua793\ua7d1\ua7cf \u258d\u2599\u259e\u258e\u258d" + "\u259f \u001fha f \u0b2e\u0b37\u0b71\u0b44\u0b40\u0b2b \uf1909\ud033 ofkl tbuxhs \ufb06\ufb47" + " rqcr \u043d\uf8cf\u001c \ud87e\ude05\ud87e\ude0d\ud87e\udd99\ud87e\udcc0 qqzqzntfby \u0a3f" + "\u0a0e\u0a16 \ud7b8\ud7cd\ud7c7\ud7cc\ud7ca\ud7e8\ud7f9\ud7b3\ud7df arzxo \u0f23\u0f2b\u0f68" + "\u0f1c\u0fe8\u0f97\u0f27\u0fbd 190854825 \ua915\ua907\ua902\ua902\ua907 \ufbbb\ufdd1\ufbdb" + "\ufbed\ufbbb\ufd81\ufd41\ufc3a rrxnh \u0ead\u0ebb\u0e97\u0eff\u0eed\u0e94\u0e86 \ud8c0\udd29" + "\u0016\ue050\uebf0;\u79c0\u07ba\uf8ed b \u0122\u0012\udaf5\udcfb+ mkt dluv \u18db\u18d4\u18ea" + " \uee53\ueb89\u0679 \u24c2\u24ee\u24e5\u24ab\u24e1\u2460 \ub41eq \uf1e0Tu\u0018\ue5b5 cqf" + "pwjzw cadaxx \u2642\u26be\u2603\u26aa\u26b0 pybwrfqbzr wgyejg cbhzl ipagqw \ud841\udd0d" + "\ud84a\udc42\ud84b\udf94\ud85e\udf91\ud847\udd41 fgjm lhtmoqls \u2cc1\u076af >\u034e\ud8a7" + "\udd17U\uffcf \u42cb\u07d6\u1d08Y\u0570 o\u016c] .ed]l.u oxms :\uf3cc\u0f67\u0014\u22c6" + "\u0720E \u1fef\u1f6f\u1f6a <scri \u63fb\u0508d\ueb2a\u001d\ue3f5\ue915\ud33d \ud800\udf43" + "\ud800\udf43\ud800\udf4c\ud800\udf46 \ud802\udc3c\ud802\udc00 ktdajettd urkyb \u040e\uaacf" + "\ufd7f\uf130\u048f\u80a6g\u0277\u0013\u8981\uc35d xckws icodgomr \udbf2\ude88\u9e5f o " + "h{0,5}x cu oqtvvb ohbuuew ggh 0\u001d=\u8479\ufc33\ue941\ue518 \uff87\u0012\u0226\u743d" + "\uef94e\ue0e2\u05cc \ue261\u0015\uf9dc\u8233\u0305/\u111e3\udbb7\udcb5 mxgbvl \uf20f\ud860" + "\udc00\uf9f2\uecd2 fl \u03d1\u1664\u5e1d\u619b\uda19\udfe0v/ \ud863\udfa2U\ue0c1\u07f1" + "\ue071\udb8f\udeb6 miz \u0641\udb66\udce0' >\ud9c0\udfaf\u07b3J\uf240\ud863\udff8\u01bf" + "\u2257\u008b\u0186\u0006 \uaa90\uaa92\uaa9a\uaad6\uaaa7\uaac1\uaa9d\uaaa0\uaaab vorp \u1380" + "\u1392\u139e\u138b\u1390\u1386 \uf626\uda58\uddb3\u0014 qrzl fzrb rllb \uc5e5\uf606\u0749" + "\ufff8\ud88a\udec12\ud97e\udee4 zscmiqtr \u01be\n \u05f2\u05a0\u05ca\u05de\u059d\u05ac " + "\u2e21\u2e62\u2e72 \u0293 \ufff0\ufff3\ufff8\uffff\ufff2 grtuzvl \ua8bc\ua880\ua89a kprmh " + "\ud802\ude51\ud802\ude2e\ud802\ude09\ud802\ude15 cwauqnjs Ou \u31c9\u31dc\u31e4\u31d1\u31e5" + "\u31c1\u31d1\u31ce\u31c8 \u31f6\u31fd\u31f0\u31fa\u31f0\u31f2\u31f3\u31f9 wpyi awanm " + "irnupwe 񾍅 vzwzc qhupnxm qbaboo gtxfdycm vnvaptcc \u0356\ud93f\udf7a {0,5})[k oxnum" + "pqyhkg \ufc2c\u0213\ue36e\u0017\ud98b\udc43 \u27f3\u27f7\u27ff\u27ff\u27f5\u27ff\u27f1 hm" + "kmwm j{1,5} \u0293\u0262\u2c6d\u0278\u0267\u2c64\u02a8\u0296\u0284 thjrit \u12e3 \ud808" + "\udf7d\ud808\udca7 b prkjpdeavdooly \"\\\u06d5\ud9dc\uddb6;\ufdd6\u05bd\u077f kyql \u2d2e" + "\u2d04\u2d2e\u2d2a\u2d03\u2d1d scjl higfadu \u3041\u306c\u3073\u305c\u308a\u308e\u3075" + "\u3086 akfmdqrrwkw rfmqcxyekf \ud86c\udd70\ud86c\udcdc\ud86b\udea2 c< cwab t \ud800\udd13" + "\ud800\udd23 \u0138\ud817\uddcd\uf9f2 zisakv \uea3e\u0314\u07be\ufe67b\ud38b\u0439\r " + "\ua766\ua7c5\ua769\ua7a8\ua794 ksfyrg ({1,5}j)?wl \ua94a\ua943\ua932\ua939\ua946\ua95c" + "\ua955\ua952\ua958\ua94c pshdyg lhxpypaug blqtrdsvc wycmf ndrzh ekvdkm bnnceq napak n Ko" + "KomfgoU \ud83c\uded0\ud83c\udeee \n-->169 mopdhsp \uda82\udca1\\T\udb22\udea8\ufa82C\"" + "\u06d9\u0218 \u8708 \u18cd\u18c0\u18e8\u18fc\u18be\u18fd\u18c0 yldjbofabwj \u1720\u1739" + "\u1729 ([[m{1,5} blqaoru pvsvfall ydsz \ufd6f\ufce2\ufd4d\ufd07\ufde5\ufddc\ufb6c\ufbc9" + "\ufd14\ufc4f\ufd05 \u216b\u218a\u2152\u2172\u217d\u2181\u2188 savpwhs {1,5}f[ha-y[) xnzz " + "gksck \u783a\u517a\u513e\u7355\u8741 kicgsn \u3117\u311c\u3104\u310c\u312e\u3104\u3103 " + "\u0291\u430b\uc9bfd\ue6e1\uf2d6~0 \ud802\udd38 \ub2cd\uca67\u1c0d\u034c\uf3e2 \u03a2\u0009" + "\uda96\udfde \u0010\ufb41\u06dd\u06d0\ue4ef\u241b \ue1a3d\ub55d=\ud8fd\udd54\ueb5f\ud844" + "\udf25 xnygolayn txnlsggei yhn \u0e5c\u0e02 \\ fornos oe epp "; using (Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { return(new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false))); }, initReader: (fieldName, reader) => { return(new ICUNormalizer2CharFilter(reader, Normalizer2.GetInstance(null, "nfkc_cf", Normalizer2Mode.Compose))); })) { for (int i = 0; i < 25; i++) { CheckAnalysisConsistency(Random, a, false, text); } } }