Exemplo n.º 1
0
        void Analyze(FlowDocument document)
        {
            before.Document = document;

            string text = new TextRange(document.ContentStart, document.ContentEnd).Text;
            //Tutaj jakaś deobfuskacja
            MostCommonWords mostCommonWords = new MostCommonWords();
            var             res             = mostCommonWords.Find(text, 3).GetValueOrDefault("and", 0);

            // Jeśli and jest pośród 3 najczęstrzych wyrazów to zamień , and na kropki
            if (res > 0)
            {
                text = string.Join(". ", text.Split(", and ").Select(words => Vocabulary.capitalize(words)));
            }

            BasicAnalizer basicAnalizer    = new BasicAnalizer();
            var           baRes            = basicAnalizer.AnalyzeSingleText(text);
            var           sentenceCount    = baRes.GetValueOrDefault("sentenceCount", 0);
            var           generalNounCount = baRes.GetValueOrDefault("generalNounCount", 0);

            // Jeśli generalizacji jest co najwyżej 10 razy mniej niż zdań to spróbuj je usunąć.
            if (generalNounCount / sentenceCount > 0.1)
            {
                List <string> sentences       = new List <string>(text.Split("."));
                List <string> outputSentences = new List <string>();
                for (var i = 0; i < sentences.Count - 1; i++)
                {
                    var           sentence      = sentences[i].Trim();
                    List <string> splitSentence = new List <string>(sentence.Split(" "));
                    // Jeśli zdanie posiada generalizacje
                    if (splitSentence.Where(e => Vocabulary.generalNouns.Contains(e)).Count() > 0)
                    {
                        var           nextSentence      = sentences[i + 1].Trim();
                        List <string> splitNextSentence = new List <string>(nextSentence.Split(" "));
                        if (splitNextSentence.Count < 5)
                        {
                            outputSentences.Add(sentence);
                            continue;
                        }

                        bool hasIs         = splitNextSentence[2] == "was" || splitNextSentence[2] == "is";
                        bool hasA          = splitNextSentence[3] == "a" || splitNextSentence[3] == "an";
                        var  unGeneralized = splitSentence.Where(e => Vocabulary.generalNouns.Contains(e)).Where(e => {
                            if (Vocabulary.generalizations.TryGetValue(splitNextSentence[4], out (string, string)value))
                            {
                                return(value.Item2 == e);
                            }

                            return(false);
                        });

                        if (splitNextSentence.Count == 5 && hasIs && hasA && unGeneralized.Count() > 0)
                        {
                            var newSentence = sentence;
                            foreach (var a in unGeneralized)
                            {
                                newSentence = newSentence.Replace(a, splitNextSentence[4]);
                            }
                            outputSentences.Add(newSentence);
                            i++;
                        }
                        else
                        {
                            outputSentences.Add(sentence);
                        }
                    }
                    else
                    {
                        outputSentences.Add(sentence);
                    }
                }
                var texts = outputSentences.Select(s => s.Trim()).Where(s => s != "").Select(s => Vocabulary.capitalize(s)).ToArray();
                text = string.Join(". ", texts) + ".";
            }

            // Zapisz deobfuskowany tekst
            deobfDoc = @"<FlowDocument xmlns=""http://schemas.microsoft.com/winfx/2006/xaml/presentation""
xmlns:local=""clr-namespace:AnalyzerObfuscator.test_documents""
ColumnWidth=""400"" FontSize=""14"" FontFamily=""Georgia"" ColumnGap=""20"" PagePadding=""20"">

<Paragraph>
" + text +
                       @"
</Paragraph>
</FlowDocument>";
            FlowDocument content = XamlReader.Parse(deobfDoc) as FlowDocument;

            reader.Document = content;
        }
        void Analyze(FlowDocument document, Dictionary <string, double> obfsNames)
        {
            string text = new TextRange(document.ContentStart, document.ContentEnd).Text;

            text = text.Trim().Substring(0, text.Trim().Length - 1);
            List <IObfuscator> obfs = new List <IObfuscator>();

            if (obfsNames.ContainsKey("gen"))
            {
                obfs.Add(new GeneralizationObfuscator(obfsNames.GetValueOrDefault("gen")));
            }
            if (obfsNames.ContainsKey("syn"))
            {
                obfs.Add(new SynonymObfuscator(obfsNames.GetValueOrDefault("syn")));
            }
            if (obfsNames.ContainsKey("pas"))
            {
                obfs.Add(new PassiveObfuscator());
            }
            if (obfsNames.ContainsKey("and"))
            {
                obfs.Add(new AndObfuscator(obfsNames.GetValueOrDefault("and")));
            }

            string[] results = IObfuscator.JoinObf(obfs, text.Replace(". ", ".")).Split(".").Select(s => s.Trim()).Where(s => s != "").Select(s => Vocabulary.capitalize(s)).ToArray();
            string   result  = string.Join(". ", results) + ".";

            obfDoc = @"<FlowDocument xmlns=""http://schemas.microsoft.com/winfx/2006/xaml/presentation""
xmlns:local=""clr-namespace:AnalyzerObfuscator.test_documents""
ColumnWidth=""400"" FontSize=""14"" FontFamily=""Georgia"" ColumnGap=""20"" PagePadding=""20"">

<Paragraph>
" + result +
                     @"
</Paragraph>
</FlowDocument>";
            FlowDocument content = XamlReader.Parse(obfDoc) as FlowDocument;

            before.Document = document;
            reader.Document = content;
        }
        string MakePassive(string sentence)
        {
            int           verbIdx       = -1;
            Verb          verb          = null;
            List <string> splitSentence = new List <string>(sentence.Split(" "));

            for (int i = splitSentence.Count - 1; i >= 0; i--)
            {
                if (splitSentence[i].Length == 0 || splitSentence[i].Equals(" "))
                {
                    splitSentence.RemoveAt(i);
                }
            }
            sentence = string.Join(" ", splitSentence);

            for (int i = 0; i < splitSentence.Count; i++)
            {
                var verbOpt = Verb.ParseVerb(splitSentence[i]);
                if (verbOpt != null)
                {
                    verbIdx = i;
                    verb    = verbOpt;
                }
            }

            if (verb == null || !verb.IsPassive)
            {
                return(sentence);
            }

            int  n1Idx = -1;
            Noun n1    = null;

            for (int i = verbIdx - 1; i >= 0; i--)
            {
                var nounOpt = Noun.ParseNoun(splitSentence[i]);
                if (nounOpt != null)
                {
                    n1    = nounOpt;
                    n1Idx = i;
                    break;
                }
            }

            int  n2Idx = -1;
            Noun n2    = null;

            for (int i = verbIdx + 1; i < splitSentence.Count; i++)
            {
                var nounOpt = Noun.ParseNoun(splitSentence[i]);
                if (nounOpt != null)
                {
                    n2    = nounOpt;
                    n2Idx = i;
                    break;
                }
            }

            if (n1 == null || n2 == null)
            {
                return(sentence);
            }

            try
            {
                int p1Idx = -1;
                int p2Idx = -1;
                if (n1Idx >= 1 && IsParticle(splitSentence[n1Idx - 1]))
                {
                    p1Idx = n1Idx - 1;
                }
                else if (n1Idx >= 2 && IsParticle(splitSentence[n1Idx - 2]))
                {
                    p1Idx = n1Idx - 2;
                }

                if (n2Idx >= 1 && IsParticle(splitSentence[n2Idx - 1]))
                {
                    p2Idx = n2Idx - 1;
                }
                else if (n2Idx >= 2 && IsParticle(splitSentence[n2Idx - 2]))
                {
                    p2Idx = n2Idx - 2;
                }
                splitSentence[n1Idx]   = n2.Text;
                splitSentence[n2Idx]   = n1.Text;
                splitSentence[verbIdx] = "being " + verb.Text + "en by";

                if (p1Idx > -1 && p2Idx > -1)
                {
                    splitSentence[p1Idx] = n2.Particle;
                    splitSentence[p2Idx] = n1.Particle;

                    if (p1Idx == n1Idx - 2)
                    {
                        splitSentence[p2Idx]    += " " + splitSentence[n1Idx - 1];
                        splitSentence[n1Idx - 1] = "";
                    }

                    if (p2Idx == n2Idx - 2)
                    {
                        splitSentence[p1Idx]    += " " + splitSentence[n2Idx - 1];
                        splitSentence[n2Idx - 1] = "";
                    }
                }

                splitSentence[0] = Vocabulary.capitalize(splitSentence[0]);

                return(string.Join(" ", splitSentence));
            }
            catch (Exception)
            {
                return(sentence);
            }
        }
Exemplo n.º 4
0
        public string ObfuscateText(string text)
        {
            string[]      sentences       = text.Split(".");
            List <string> outputSentences = new List <string>();

            for (int i = 0; i < sentences.Length; i++)
            {
                if (rand.NextDouble() >= _probability)
                {
                    outputSentences.Add(sentences[i]);
                    continue;
                }

                string        givenSubject  = null;
                string        sentence      = Vocabulary.lower(sentences[i]);
                List <string> splitSentence = new List <string>(sentence.Split(" "));
                splitSentence[0] = Vocabulary.lower(splitSentence[0]);

                List <string> tmpSentences = new List <string>();
                foreach (KeyValuePair <string, (string, string)> subject in Vocabulary.generalizations)
                {
                    if (splitSentence.Contains(subject.Key))
                    {
                        int idxOfSub = splitSentence.IndexOf(subject.Key);
                        if (idxOfSub >= 1 && Vocabulary.IsParticle(splitSentence[idxOfSub - 1]))
                        {
                            splitSentence[idxOfSub - 1] = subject.Value.Item1;
                        }
                        else if (idxOfSub >= 2 && Vocabulary.IsParticle(splitSentence[idxOfSub - 2]))
                        {
                            splitSentence[idxOfSub - 2] = subject.Value.Item1;
                        }
                        splitSentence[idxOfSub] = subject.Value.Item2;

                        givenSubject = subject.Key;

                        Vocabulary.subjects.TryGetValue(subject.Key, out Noun noun);
                        if (noun == null)
                        {
                            break;
                        }

                        tmpSentences.Add("The " + subject.Value.Item2 + " was " + noun.Particle + " " + subject.Key);
                        break;
                    }
                }
                if (givenSubject == null)
                {
                    foreach (KeyValuePair <string, Noun> subject in Vocabulary.subjects)
                    {
                        if (splitSentence.Contains(subject.Key))
                        {
                            givenSubject = subject.Key;
                            break;
                        }
                    }
                }

                if (givenSubject != null)
                {
                    foreach (KeyValuePair <string, string> adjective in Vocabulary.adjectives)
                    {
                        if (splitSentence.Contains(adjective.Key))
                        {
                            int index = splitSentence.IndexOf(adjective.Key);
                            if (index > 1 && index < splitSentence.Count - 1 && (splitSentence[index - 1].ToLower() == "a" || splitSentence[index - 1].ToLower() == "an") && Vocabulary.subjects.TryGetValue(splitSentence[index + 1], out Noun noun))
                            {
                                splitSentence[index - 1] = noun.Particle;
                            }
                            splitSentence.Remove(adjective.Key);
                            tmpSentences.Add("The " + givenSubject + " was " + adjective.Key);
                        }
                    }
                    sentence = String.Join(" ", splitSentence);
                }

                outputSentences.Add(Vocabulary.capitalize(sentence.Trim()));
                outputSentences.AddRange(tmpSentences.Select(s => Vocabulary.capitalize(s.Trim())));
            }
            return(string.Join(".", outputSentences.ToArray()));
        }