C# (CSharp) BasicTypes.NormalizerCode NormalizeExplicit.NormalizeText示例

编程语言: C# (CSharp)

命名空间/包名称: BasicTypes.NormalizerCode

方法/功能: NormalizeText

hotexamples.com的示例: 2

C# (CSharp) BasicTypes.NormalizerCode NormalizeExplicit.NormalizeText - 已找到2个示例。这些是从开源项目中提取的最受好评的BasicTypes.NormalizerCode.NormalizeExplicit.NormalizeText现实C# (CSharp)示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

NormalizeText(2)

常用方法

NormalizeText (2)

示例#1

显示文件

文件： NormalizerTests.cs 项目： willnationsdev/tokipona.parser

        public void Normalization_Explicit_IsIdempotent()
        {
            //Normalize implicit is not expected to be indempotent.

            int               i       = 0;
            Dialect           dialect = Dialect.LooseyGoosey;
            NormalizeExplicit norm    = new NormalizeExplicit(dialect);
            SentenceSplitter  ss      = new SentenceSplitter(dialect);

            CorpusFileReader reader = new CorpusFileReader(true);

            foreach (string s in reader.NextFile())
            {
                if (reader.currentFile.ContainsCheck("janKipo"))
                {
                    continue;
                }

                string[] sentenceStrings = ss.ParseIntoNonNormalizedSentences(s);
                foreach (string sentence in sentenceStrings)
                {
                    string result1 = norm.NormalizeText(sentence);
                    string result2 = norm.NormalizeText(result1);
                    //Assert.AreEqual(result1,result2);
                    if (result1 != result2)
                    {
                        Console.WriteLine("1: " + (result1 ?? "NULL"));
                        Console.WriteLine("2: " + (result2 ?? "NULL"));
                    }
                    i++;
                }
            }
            Console.WriteLine("Sentences normalized: " + i);
        }

示例#2

显示文件

文件： Normalizer.cs 项目： willnationsdev/tokipona.parser

        public string NormalizeText(string text) //= null
        {
            if (!dialect.InferCompoundsPrepositionsForeignText)
            {
                //HACK: Not the way this should work.
                NormalizeExplicit ex = new NormalizeExplicit(dialect);
                return(ex.NormalizeText(text));
            }
            SentenceDiagnostics sd = new SentenceDiagnostics(text, "N/A");

            //Nothing to parse.
            if (string.IsNullOrWhiteSpace(text) || NormalizationTasks.IsNullWhiteOrPunctuation(text))
            {
                return("");
            }

            //Don't normalize a comment.
            if (text.StartCheck("///") && !text.Contains("\n"))
            {
                return(text);
            }

            string normalized = NormalizationTasks.TimeWhiteSpaceAndSpaceBeforeSentenceTerminators(text);

            normalized = NormalizationTasks.RemoveInternalWhiteSpace(normalized);

            //Is this better early or later?
            if (normalized.Contains(@""""""))
            {
                normalized = normalized.Replace(@"""""", @"""");
            }

            //Hide tokens that otherwise have a different meaning.
            if (normalized.ContainsCheck(" li pi "))
            {
                normalized = normalized.Replace(" li pi ", " li XXXXZiXXXX ");
            }


            //  "/\\*.*?\\*/"
            // Things that cross sentences should already be deal with earlier.
            if (normalized.ContainsCheck("/*") && normalized.ContainsCheck("*/"))
            {
                normalized = NormalizationTasks.ApplyNormalization(normalized, "Comments", NormalizationTasks.StripMultilineComments);
            }

            //Process explicit explicit Foreign text. (this always happens)
            if (normalized.ContainsCheck("\""))
            {
                normalized = NormalizationTasks.ApplyNormalization(normalized, "ForeignSpace", NormalizationTasks.ProcessWhiteSpaceInForeignText, dialect);
            }

            //Process explict Foreign Text (this always happens)
            if (dialect.InferCompoundsPrepositionsForeignText)
            {
                normalized = NormalizationTasks.ApplyNormalization(normalized, "Foreign", NormalizeForeignText.NormalizeImplicit, dialect);
            }

            //Hyphenated words. This could cause a problem for compound words that cross lines.
            if (normalized.ContainsCheck("-\n"))
            {
                normalized = normalized.Replace("-\n", "");
            }

            //can't cope with line breaks.
            if (normalized.ContainsCheck("\n"))
            {
                normalized = normalized.Replace("\n", " ");
            }
            if (normalized.ContainsCheck("\t"))
            {
                normalized = normalized.Replace("\t", " ");
            }

            //must be after - processing
            if (dialect.InferNumbers)
            {
                normalized = NormalizationTasks.ApplyNormalization(normalized, "Numbers", NormalizeNumbers.FindNumbers, dialect);
            }



            //Extraneous punctuation-- TODO, expand to most other symbols.
            if (normalized.ContainsCheck("(") || normalized.ContainsCheck(")"))
            {
                normalized = normalized.Replace("(", "");
                normalized = normalized.Replace(")", "");
            }

            //Extraneous commas
            if (normalized.ContainsCheck(","))
            {
                //Benefit of the doubt. if you see , sama, ==> ~sama
                //Otherwise, assume it is garbage.
                foreach (string prep in Particles.Prepositions)
                {
                    if (normalized.ContainsCheck("," + prep))
                    {
                        normalized = normalized.Replace("," + prep, "~" + prep);
                    }
                    if (normalized.ContainsCheck(", " + prep))
                    {
                        normalized = normalized.Replace(", " + prep, " ~" + prep);
                    }
                }


                normalized = NormalizationTasks.ApplyNormalization(normalized, "ExtraCommas", NormalizationTasks.ProcessExtraneousCommas);
            }

            //Left overs from initial parsing.
            if (normalized.ContainsCheck("[NULL]"))
            {
#if DEBUG
                throw new NormalizationException("Stop adding [NULL] to normalized sentences.");
#else
                normalized = normalized.Replace("[NULL]", "");
#endif
            }
            //Normalize prepositions to ~, so that we don't have tokens with embedded spaces (e.g. foo, kepeken => [foo],[, kepeken])

            if (normalized.ContainsCheck(" "))
            {
                normalized = NormalizationTasks.ApplyNormalization(normalized, "ExtraWhiteSpace", NormalizationTasks.ProcessExtraneousWhiteSpace);
            }



            //Okay, phrases should be recognizable now.
            if (dialect.InferCompoundsPrepositionsForeignText)
            {
                normalized = NormalizationTasks.ApplyNormalization(normalized, "Compounds", cw.ProcessCompoundWords);
            }


            if (dialect.InferCompoundsPrepositionsForeignText)
            {
                normalized = NormalizationTasks.MarkImplicitPrepositions(text, normalized);
            }

            //la o
            //invisible implicit subject.
            if (normalized.ContainsCheck(" la o "))
            {
                normalized = normalized.Replace(" la o ", " la jan Sanwan o ");
            }

            normalized = NormalizeMiSina.MiSinaProcessAndUndoOverNormalization(normalized);

            if (normalized.ContainsCheck("~"))
            {
                normalized = NormalizationTasks.ThoseArentPrepositions(normalized);
            }

            normalized = Regex.Replace(normalized, @"^\s+|\s+$", ""); //Remove extraneous whitespace


            //If it is a sentence fragment, I really can't deal with prep phrase that may or may not be in it.
            if (normalized.ContainsCheck("~") &&
                !normalized.ContainsCheck(" li ") && //full sentence okay
                !normalized.StartCheck("o ")    //imperative okay
                )
            {
                normalized = normalized.Replace("~", ""); //HACK: This may erase ~ added by user at the start?
            }

            normalized = NormalizeMiSina.ProcessMiSinaOvernormalizationWithPrepositions(normalized);


            normalized = NormalizeMiSina.ProcessMiSinaOverNormalizationWithoutPrepositions(text, normalized);

            //One off that comes back?
            foreach (string oneOff in new[] {
                "li ~lon poka e",                                          //place something next to
                "li ~tawa tu e"
            })
            {
                normalized = normalized.Replace(oneOff, oneOff.Replace("~", ""));
            }


            if (normalized.ContainsCheck("'"))
            {
                normalized = NormalizationTasks.ApplyNormalization(normalized, "DirectQuotes", NormalizationTasks.AddDirectedQuotes);
            }

            //Post conditions.
            if (normalized.StartCheck("« »"))
            {
                throw new NormalizationException("quote recognition went wrong: " + text);
            }


            //Probably added above by mistake
            normalized = NormalizationTasks.RemoveInternalWhiteSpace(normalized);
            normalized = NormalizationTasks.TimeWhiteSpaceAndSpaceBeforeSentenceTerminators(normalized);

            sd = new SentenceDiagnostics(text, normalized);
            return(normalized);
        }