TextTokenizer, TextTransformer C# (CSharp) 코드 예제들

예제 #1

0

파일 보기

파일: Transformation.cs 프로젝트: hermetique/text-munger

        private string Munge()
        {
            // http://stackoverflow.com/a/5383519/41153
            //var s = Source.ToCharArray();
            var s = new TextTokenizer(Granularity, Source).Tokens;

            var rnd = new Random();
            // TODO: if char-level, optionall preserve first and last chars...
            // see algorithm @ http://stackoverflow.com/questions/273313/randomize-a-listt-in-c-sharp
            var result = s.OrderBy(item => rnd.Next());

            var joiner = string.Empty;

            switch (Granularity)
            {
            case Granularity.Character:
                joiner = string.Empty;
                break;

            case Granularity.Word:
                joiner = " ";
                break;

            case Granularity.Paragraph:
                joiner = "\r\n\r\n";
                break;

            default:
                throw new ArgumentNullException(string.Format("{0} is not supported for Shuffler.", Granularity));
            }

            return(string.Join(joiner, result.ToArray()));
        }

예제 #2

0

파일 보기

파일: Transformation.cs 프로젝트: hermetique/text-munger

        private string Munge(string source)
        {
            var lines = new TextTokenizer(Granularity.Line, source).Tokens;

            var sb = new StringBuilder();

            foreach (var line in lines)
            {
                if (line.Length > 0) // don't waste energy on empty lines. but do preserve them
                {
                    // space-number is based on uncased alpha of first char
                    // a:=0, b:=1..z:=25
                    // punctuation will be considered as a
                    var firstLetter = line.ToLower()[0];

                    const int aAsInt = 97;
                    const int zAsInt = 123;
                    var       offset = ((firstLetter >= aAsInt && firstLetter <= zAsInt) ? firstLetter - aAsInt : 0);
                    var       spaces = new string(' ', offset);
                    sb.Append(spaces);
                }
                sb.Append(line).Append(Environment.NewLine); // invariant
            }

            return(sb.ToString());
        }

예제 #3

0

파일 보기

파일: Transformation.cs 프로젝트: hermetique/text-munger

        private string Munge(string source)
        {
            var words = new TextTokenizer(Granularity.Word, source).Tokens;
            var sb    = new StringBuilder();

            foreach (var word in words)
            {
                sb.Append(word);
                var newline = TransformerTools.GetPercentage();
                if (newline <= ProbabilityNewLine)
                {
                    var lines = 1;
                    var mult  = TransformerTools.GetPercentage();
                    if (mult <= ProbabilityMultiple)
                    {
                        lines += _rnd.Next(0, MultipleRange);
                    }

                    for (int i = 0; i < lines; i++)
                    {
                        sb.Append(Environment.NewLine);
                    }
                }
                else
                {
                    // random number of spaces ?
                    sb.Append(" ");
                }
            }

            return(sb.ToString());
        }

예제 #4

0

파일 보기

        private string Munge()
        {
            // TODO: density notes...
            // break into words
            // loop through words
            // add punctuation between words
            // depending upon density
            // 0 = all punct, no source
            // 100 = all source, no punct
            var words = new TextTokenizer(Granularity.Word, Source).Tokens;

            var sb = new StringBuilder();

            foreach (var word in words)
            {
                var t = GetPadding(Percentage);
                if (Percentage > 0)
                {
                    // skip word if density = 0% all punct
                    t += word;
                }
                sb.Append(t);
            }

            return(sb.ToString());
        }

예제 #5

0

파일 보기

파일: Transformation.cs 프로젝트: hermetique/text-munger

        private string Munge()
        {
            var s      = new TextTokenizer(Granularity.Paragraph, Source).Tokens;
            var rnd    = new Random();
            var result = s.OrderBy(item => rnd.Next());

            return(string.Join("\r\n\r\n", result.ToArray()));
        }

예제 #6

0

파일 보기

파일: TransformerFromFile.cs 프로젝트: hermetique/text-munger

        private string Munge()
        {
            // if we process multiple words, add a space as padding
            // if we process a single word, no padding
            // and, yeah, this will only work on space-padded multi-words. c'est la vie.
            var words   = new TextTokenizer(Granularity.Word, Source).Tokens;
            var padding = (words.Any()) ? " " : string.Empty;

            var sb  = new StringBuilder();
            var rnd = new Random();

            // forget about mixed-caps -- too difficult to replicate with words of different lengths
            // although, if identical lengths, could make a go at it
            // but... not worth it?
            // TODO: look into using a regex for all of this. faster?
            // hunh. who knows. time it.

            // if word is in dictionary
            // replace it with replacement
            // if multiple replacements, select at random
            foreach (var word in words)
            {
                var replace = word;

                // so far, files are lowercase
                // if not, we will have to change code
                if (Replacers.ContainsKey(word.ToLower()))
                {
                    var index = rnd.Next(0, Replacers[word.ToLower()].Count); // random.next range := 0..(Count-1)
                    replace = Replacers[word.ToLower()][index];

                    if (AllCaps(word))
                    {
                        replace = replace.ToUpper();
                    }
                    else if (InitialCap(word))
                    {
                        var first = replace[0].ToString().ToUpper();
                        replace = first + replace.Substring(1);
                    }
                }
                sb.Append(replace + padding);
            }

            return(sb.ToString());
        }

예제 #7

0

파일 보기

        private string PadItOut(string source)
        {
            var lines = new TextTokenizer(Granularity.Line, source).Tokens;
            var sb    = new StringBuilder();

            foreach (var line in lines)
            {
                var append = string.Empty;
                var l      = line.Length;
                if (l < LineLengthLimit)
                {
                    append = new String(' ', LineLengthLimit - l);
                }

                // TODO: aargh, string concatenation inside of string-buildering!
                sb.AppendLine(line + append);
            }

            return(sb.ToString());
        }

예제 #8

0

파일 보기

        private string Munge()
        {
            var mod = Source;
            var sb  = new StringBuilder();

            // TODO: we need to analyze the incoming, better
            // if multiple lines, we need to retain that,
            //    BUT enforce the line-length
            // if "single" line, we need to split it

            var lines = new TextTokenizer(Granularity.Line, mod).Tokens;

            if (lines.Count == 1)
            {
                // monolithic block, chop it up
                var regex = string.Format("(.{{1,{0}}})", LineLengthLimit);

                // THIS WORKS AWESOMELY if the source-text has NO LINE-BREAKS
                //
                // if the source-text HAS LINE-BREAKS... THIS SUCKS
                var line = Regex.Replace(lines[0], regex, "$1\r\n");
                lines[0] = PadItOut(line);
            }

            // this is a pre-line-length operation
            if (!RetainAlignment)
            {
                // process density first, as that removes line-breaks
                Density.Source = string.Join("", ((List <string>)lines).ToArray());
                mod            = Density.Munged;
                var regex = string.Format("(.{{1,{0}}})", LineLengthLimit);
                mod = Regex.Replace(mod, regex, "$1\r\n");
                return(mod);
            }
            else
            {
                foreach (var line in lines)
                {
                    // if too long, cut it off
                    // if too short, pad it out
                    // also, flush-left, flush-right it

                    var newline = string.Empty;

                    if (line.Length > LineLengthLimit)
                    {
                        // wait. if eveything comes in AS ONE LINE
                        // this is f****D
                        // we need more analysis, here....
                        newline = line.Substring(0, LineLengthLimit);
                    }
                    else
                    {
                        newline = line.PadRight(LineLengthLimit);
                    }

                    if (FlushLeft)
                    {
                        newline = newline.PadLeft(LineLengthLimit);
                    }
                    else if (FlushRight)
                    {
                        newline = newline.Trim().PadRight(LineLengthLimit);
                    }

                    if (RetainAlignment)
                    {
                        newline = newline.Replace(" ", ".");
                    }
                    sb.AppendLine(newline);
                }
            }

            // this is a post-line-length operation
            //if (RetainAlignment)
            //{
            //    // TODO: what if we get tabs, etc?
            //    mod = mod.Replace(" ", ".");
            //}

            return(sb.ToString());
        }

C# (CSharp) TextTransformer TextTokenizer 예제들