Example #1
0
        public static IList <float?> checkStability(LangDescriptor language)
        {
            IList <float?> errorRates = new List <float?>();

            // format the corpus into tmp dir
            LeaveOneOutValidator validator0 = new LeaveOneOutValidator(language.corpusDir, language);
            Triple <IList <Formatter>, IList <float>, IList <float> > results0 = validator0.validateDocuments(false, "/tmp/stability/1");

            errorRates.Add(BuffUtils.median(results0.c));

            IList <Formatter> formatters0 = results0.a;

            // now try formatting it over and over
            for (int i = 1; i <= STAGES; i++)
            {
                string inputDir  = "/tmp/stability/" + i;
                string outputDir = "/tmp/stability/" + (i + 1);
                LeaveOneOutValidator validator = new LeaveOneOutValidator(inputDir, language);
                Triple <IList <Formatter>, IList <float>, IList <float> > results = validator.validateDocuments(false, outputDir);
                IList <Formatter> formatters = results.a;
                IList <float?>    distances  = new List <float?>();
                for (int j = 0; j < formatters.Count; j++)
                {
                    Formatter f0           = formatters0[j];
                    Formatter f            = formatters[j];
                    float     editDistance = Dbg.normalizedLevenshteinDistance(f.Output, f0.Output);
                    distances.Add(editDistance);
                }
                errorRates.Add(BuffUtils.median(distances));
            }

            return(errorRates);
        }
Example #2
0
        public void OnBuffSetted(Buff buff, NebulaObject source)
        {
            if (source != null && noTarget && (nebulaObject.getItemType() != ItemType.Avatar))
            {
                if (BuffUtils.IsDebuff(buff.buffType))
                {
                    if (nebulaObject.IsBot())
                    {
                        var weap            = GetComponent <BaseWeapon>();
                        var character       = GetComponent <CharacterObject>();
                        var sourceCharacter = source.GetComponent <CharacterObject>();

                        if (weap != null && character != null && sourceCharacter != null)
                        {
                            var relation = character.RelationTo(sourceCharacter);

                            if (relation == FractionRelation.Enemy || relation == FractionRelation.Neutral)
                            {
                                SetTarget(source);
                            }
                        }
                    }
                }
            }
        }
Example #3
0
        public static string testAllLanguages(LangDescriptor[] languages, string[] corpusDirs, string imageFileName)
        {
            IList <string> languageNames = BuffUtils.map(languages, l => l.name + "_err");
            //		Collections.sort(languageNames);
            IDictionary <string, int?> corpusSizes = new Dictionary <string, int?>();

            for (int i = 0; i < languages.Length; i++)
            {
                LangDescriptor language  = languages[i];
                IList <string> filenames = Tool.getFilenames(corpusDirs[i], language.fileRegex);
                corpusSizes[language.name] = filenames.Count;
            }
            IList <string> languageNamesAsStr = BuffUtils.map(languages, l => '"' + l.name + "\\nn=" + corpusSizes[l.name] + '"');
            //		Collections.sort(languageNamesAsStr);

            StringBuilder data = new StringBuilder();

            for (int i = 0; i < languages.Length; i++)
            {
                LangDescriptor       language  = languages[i];
                string               corpus    = corpusDirs[i];
                LeaveOneOutValidator validator = new LeaveOneOutValidator(corpus, language);
                Triple <IList <Formatter>, IList <float>, IList <float> > results = validator.validateDocuments(true, "/tmp");
                IList <Formatter> formatters = results.a;
                IList <float>     distances  = results.b;
                IList <float>     errors     = results.c;
                //			data.append(language.name+"_dist = "+distances+"\n");
                data.Append(language.name + "_err = " + errors + "\n");
            }

            string python = "#\n" + "# AUTO-GENERATED FILE. DO NOT EDIT\n" + "# CodeBuff %s '%s'\n" + "#\n" + "import numpy as np\n" + "import pylab\n" + "import matplotlib.pyplot as plt\n\n" + "%s\n" + "language_data = %s\n" + "labels = %s\n" + "fig = plt.figure()\n" + "ax = plt.subplot(111)\n" + "ax.boxplot(language_data,\n" + "           whis=[10, 90], # 10 and 90 %% whiskers\n" + "           widths=.35,\n" + "           labels=labels,\n" + "           showfliers=False)\n" + "ax.set_xticklabels(labels, rotation=60, fontsize=18)\n" + "ax.tick_params(axis='both', which='major', labelsize=18)\n" + "plt.xticks(range(1,len(labels)+1), labels, rotation=60, fontsize=18)\n" + "pylab.ylim([0,.28])\n" + "ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5)\n" + "ax.set_xlabel(\"Grammar and corpus size\", fontsize=20)\n" + "ax.set_ylabel(\"Misclassification Error Rate\", fontsize=20)\n" + "# ax.set_title(\"Leave-one-out Validation Using Error Rate\\nBetween Formatted and Original File\")\n" + "plt.tight_layout()\n" + "fig.savefig('images/%s', format='pdf')\n" + "plt.show()\n";

            return(string.Format(python, Tool.version, DateTime.Now, data, languageNames, languageNamesAsStr, imageFileName));
        }
Example #4
0
 private void InitVariables()
 {
     if (mSpeedDebuffs == null)
     {
         mSpeedDebuffs = BuffUtils.GetDebuffsForParameter(BuffParameter.speed).ToList();
     }
 }
Example #5
0
        public virtual Triple <Formatter, float, float> validate(LangDescriptor language, IList <InputDocument> documents, string fileToExclude, int k, FeatureMetaData[] injectWSFeatures, FeatureMetaData[] alignmentFeatures, string outputDir, bool computeEditDistance, bool collectAnalysis)
        {
            string path = System.IO.Path.GetFullPath(fileToExclude);
            IList <InputDocument> others   = BuffUtils.filter(documents, d => !d.fileName.Equals(path));
            IList <InputDocument> excluded = BuffUtils.filter(documents, d => d.fileName.Equals(path));

            Debug.Assert(others.Count == documents.Count - 1);
            //		kNNClassifier.resetCache();
            if (excluded.Count == 0)
            {
                Console.Error.WriteLine("Doc not in corpus: " + path);
                return(null);
            }
            InputDocument testDoc = excluded[0];
            DateTime      start   = System.DateTime.Now;
            Corpus        corpus  = new Corpus(others, language);

            corpus.train();
            DateTime      stop         = System.DateTime.Now;
            Formatter     formatter    = new Formatter(corpus, language.indentSize, k, injectWSFeatures, alignmentFeatures);
            InputDocument originalDoc  = testDoc;
            DateTime      format_start = System.DateTime.Now;
            string        output       = formatter.format(testDoc, collectAnalysis);
            DateTime      format_stop  = System.DateTime.Now;
            float         editDistance = 0;

            if (computeEditDistance)
            {
                editDistance = Dbg.normalizedLevenshteinDistance(testDoc.content, output);
            }
            ClassificationAnalysis analysis = new ClassificationAnalysis(originalDoc, formatter.AnalysisPerToken);

            Console.WriteLine(testDoc.fileName + ": edit distance = " + editDistance + ", error rate = " + analysis.ErrorRate);
            if (!string.ReferenceEquals(outputDir, null))
            {
                string dir = outputDir + "/" + language.name + "/" + Tool.version;
                if (!System.IO.Directory.Exists(dir))
                {
                    System.IO.Directory.CreateDirectory(dir);
                }
                org.antlr.codebuff.misc.Utils.writeFile(dir + "/" + System.IO.Path.GetFileName(testDoc.fileName), output);
            }
            var tms = (stop - start);
            var fms = format_stop - format_start;

            trainingTimes.Add((double)tms.Milliseconds);
            float tokensPerMS = testDoc.tokens.Size / (float)fms.TotalMilliseconds;

            formattingTokensPerMS.Add((double)tokensPerMS);
            Console.Write("Training time = {0:D} ms, formatting {1:D} ms, {2,5:F3} tokens/ms ({3:D} tokens)\n", tms, fms, tokensPerMS, testDoc.tokens.Size);
            //		System.out.printf("classify calls %d, hits %d rate %f\n",
            //		                  kNNClassifier.nClassifyCalls, kNNClassifier.nClassifyCacheHits,
            //		                  kNNClassifier.nClassifyCacheHits/(float) kNNClassifier.nClassifyCalls);
            //		System.out.printf("kNN calls %d, hits %d rate %f\n",
            //						  kNNClassifier.nNNCalls, kNNClassifier.nNNCacheHits,
            //						  kNNClassifier.nNNCacheHits/(float) kNNClassifier.nNNCalls);
            return(new Triple <Formatter, float, float>(formatter, editDistance, analysis.ErrorRate));
        }
Example #6
0
        /// <summary>
        /// Select one document at random, then n others w/o replacement as corpus </summary>
        public virtual org.antlr.codebuff.misc.Pair <InputDocument, IList <InputDocument> > selectSample(IList <InputDocument> documents, int n)
        {
            int                   i            = random.Next(documents.Count);
            InputDocument         testDoc      = documents[i];
            IList <InputDocument> others       = BuffUtils.filter(documents, d => d != testDoc);
            IList <InputDocument> corpusSubset = getRandomDocuments(others, n);

            return(new org.antlr.codebuff.misc.Pair <InputDocument, IList <InputDocument> >(testDoc, corpusSubset));
        }
Example #7
0
        public static void Main(string[] args)
        {
            string         langname     = args[0].Substring(1);
            string         testFilename = args[1];
            LangDescriptor language     = null;

            for (int i = 0; i < languages.length; i++)
            {
                if (languages[i].name.Equals(langname))
                {
                    language = languages[i];
                    break;
                }
            }
            if (language == null)
            {
                Log.WriteLine("Language " + langname + " unknown");
                return;
            }

            // load all files up front
            DateTime              load_start = System.DateTime.Now;
            IList <string>        allFiles   = Tool.getFilenames(language.corpusDir, language.fileRegex);
            IList <InputDocument> documents  = Tool.load(allFiles, language);
            DateTime              load_stop  = System.DateTime.Now;
            DateTime              load_time  = (load_stop - load_start) / 1000000;

            Log.Write("Loaded {0:D} files in {1:D}ms\n", documents.Count, load_time);

            string path = System.IO.Path.GetFullPath(testFilename);
            IList <InputDocument> others   = BuffUtils.filter(documents, d => !d.fileName.Equals(path));
            IList <InputDocument> excluded = BuffUtils.filter(documents, d => d.fileName.Equals(path));

            Debug.Assert(others.Count == documents.Count - 1);
            if (excluded.Count == 0)
            {
                Log.WriteLine("Doc not in corpus: " + path);
                return;
            }
            InputDocument testDoc = excluded[0];

            IList <int> training   = new List <int>();
            IList <int> formatting = new List <int>();

            for (int i = 1; i <= TRIALS; i++)
            {
                org.antlr.codebuff.misc.Pair <int, int> timing = test(language, others, testDoc);
                training.Add(timing.a);
                formatting.Add(timing.b);
            }
            // drop first four
            training   = training.subList(5, training.Count);
            formatting = formatting.subList(5, formatting.Count);
            Log.Write("median of [5:{0:D}] training {1:D}ms\n", TRIALS - 1, BuffUtils.median(training));
            Log.Write("median of [5:{0:D}] formatting {1:D}ms\n", TRIALS - 1, BuffUtils.median(formatting));
        }
Example #8
0
 public void ScaleDebuffInterval(float mult)
 {
     foreach (var bonusPair in bonuses)
     {
         if (BuffUtils.IsDebuff(bonusPair.Key))
         {
             bonusPair.Value.ScaleBuffInterval(mult);
         }
     }
 }
Example #9
0
        public virtual Triple <IList <Formatter>, IList <float>, IList <float> > validateDocuments(FeatureMetaData[] injectWSFeatures, FeatureMetaData[] alignmentFeatures, bool computeEditDistance, string outputDir)
        {
            IList <Formatter> formatters = new List <Formatter>();
            IList <float>     distances  = new List <float>();
            IList <float>     errors     = new List <float>();

            System.DateTime start = System.DateTime.Now;
            try
            {
                IList <string>        allFiles          = Tool.getFilenames(rootDir, language.fileRegex);
                IList <InputDocument> documents         = Tool.load(allFiles, language);
                IList <InputDocument> parsableDocuments = BuffUtils.filter(documents, d => d.tree != null);
                System.DateTime       stop = System.DateTime.Now;
                //Console.Write("Load/parse all docs from {0} time {1:D} ms\n", rootDir, (stop - start) / 1000000);

                int ncpu = 1;
                if (FORCE_SINGLE_THREADED)
                {
                    ncpu = 2;
                }

                for (int i = 0; i < parsableDocuments.Count; i++)
                {
                    string fileName = parsableDocuments[i].fileName;

                    {
                        try
                        {
                            Triple <Formatter, float, float> results = validate(language, parsableDocuments, fileName,
                                                                                Formatter.DEFAULT_K, injectWSFeatures, alignmentFeatures, outputDir, computeEditDistance, false);
                            formatters.Add(results.a);
                            float editDistance = results.b;
                            distances.Add(editDistance);
                            float errorRate = results.c;
                            errors.Add(errorRate);
                        }
                        catch (Exception t)
                        {
                            System.Console.WriteLine(t.StackTrace);
                        }
                        return(null);
                    }
                }
            }
            finally
            {
                DateTime final_stop            = System.DateTime.Now;
                double   medianTrainingTime    = BuffUtils.median(trainingTimes);
                double   medianFormattingPerMS = BuffUtils.median(formattingTokensPerMS);
                Console.Write("Total time {0:D}ms\n", final_stop - start);
                Console.Write("Median training time {0:D}ms\n", medianTrainingTime);
                Console.Write("Median formatting time tokens per ms {0,5:F4}ms, min {1,5:F4} max {2,5:F4}\n", medianFormattingPerMS, BuffUtils.min(formattingTokensPerMS), BuffUtils.max(formattingTokensPerMS));
            }
            return(new Triple <IList <Formatter>, IList <float>, IList <float> >(formatters, distances, errors));
        }
Example #10
0
        public static IList <Tree> getSeparators <T1>(ParserRuleContext ctx, IList <T1> siblings)
            where T1 : Antlr4.Runtime.ParserRuleContext
        {
            ParserRuleContext first = siblings[0] as ParserRuleContext;
            ParserRuleContext last  = siblings[siblings.Count - 1] as ParserRuleContext;
            int start = BuffUtils.indexOf(ctx, first);
            int end   = BuffUtils.indexOf(ctx, last);
            IEnumerable <ITree> xxxx     = Trees.GetChildren(ctx).Where((n, i) => i >= start && i < end + 1);
            IList <Tree>        elements = xxxx.ToList();

            return(BuffUtils.filter(elements, c => c is TerminalNode));
        }
Example #11
0
        public static void Main(string[] args)
        {
            LangDescriptor[] languages  = new LangDescriptor[] { Tool.ANTLR4_DESCR };
            IList <string>   corpusDirs = BuffUtils.map(languages, l => l.corpusDir);

            string[] dirs     = corpusDirs.ToArray();
            string   python   = testAllLanguages(languages, dirs, "leave_one_out.pdf");
            string   fileName = "python/src/leave_one_out.py";

            org.antlr.codebuff.misc.Utils.writeFile(fileName, python);
            Console.WriteLine("wrote python code to " + fileName);
        }
Example #12
0
        public static IList <float> getCategoryRatios(ICollection <int> catCounts)
        {
            IList <float> ratios = new List <float>();
            int           n      = BuffUtils.sum(catCounts);

            foreach (int count in catCounts)
            {
                float probCat = count / (float)n;
                ratios.Add(probCat);
            }
            return(ratios);
        }
        public static void Main(string[] args)
        {
            LangDescriptor[] languages  = new LangDescriptor[] { JAVA_DESCR, JAVA8_DESCR, JAVA_GUAVA_DESCR };
            IList <string>   corpusDirs = BuffUtils.map(languages, l => l.corpusDir);

            string[] dirs     = corpusDirs.ToArray();
            string   python   = LeaveOneOutValidator.testAllLanguages(languages, dirs, "all_java_leave_one_out.pdf");
            string   fileName = "python/src/all_java_leave_one_out.py";

            org.antlr.codebuff.misc.Utils.writeFile(fileName, python);
            Log.WriteLine("wrote python code to " + fileName);
        }
Example #14
0
 /// <summary>
 /// Complete block debuf action. Check if we has buff on block debuff and than remove this buff from list
 /// </summary>
 /// <param name="buff"></param>
 /// <returns></returns>
 private bool IsDebuffBlocked(Buff buff)
 {
     if (BuffUtils.IsDebuff(buff.buffType))
     {
         var bonus = GetBonus(BonusType.block_debuff);
         if (bonus != null && Mathf.NotEqual(bonus.value, 0.0f))
         {
             bonus.Clear();
             return(true);
         }
     }
     return(false);
 }
Example #15
0
        public ConcurrentBag <BuffInfo> GetAllDebuffInfo()
        {
            ConcurrentBag <BuffInfo> result = new ConcurrentBag <BuffInfo>();

            foreach (var pBonus in bonuses)
            {
                if (BuffUtils.IsDebuff(pBonus.Key))
                {
                    foreach (var pBuff in pBonus.Value.GetBuffInfoCollection())
                    {
                        result.Add(pBuff);
                    }
                }
            }
            return(result);
        }
Example #16
0
        public virtual IDictionary <ParentSiblingListKey, SiblingListStats> getListStats(IDictionary <ParentSiblingListKey, IList <int> > map)
        {
            IDictionary <ParentSiblingListKey, SiblingListStats> listSizes = new Dictionary <ParentSiblingListKey, SiblingListStats>();

            foreach (ParentSiblingListKey pair in map.Keys)
            {
                IList <int> lens     = map[pair];
                var         new_lens = lens.OrderBy(i => i).ToList();
                lens = new_lens;
                int    n      = lens.Count;
                int?   min    = lens[0];
                int?   median = lens[n / 2];
                int?   max    = lens[n - 1];
                double @var   = BuffUtils.variance(lens);
                listSizes[pair] = new SiblingListStats(n, min.Value, median.Value, @var, max.Value);
            }
            return(listSizes);
        }
Example #17
0
        public static void runCaptureForOneLanguage(LangDescriptor language)
        {
            IList <string>        filenames = Tool.getFilenames(language.corpusDir, language.fileRegex);
            IList <InputDocument> documents = Tool.load(filenames, language);

            foreach (string fileName in filenames)
            {
                // Examine info for this file in isolation
                Corpus fileCorpus = new Corpus(fileName, language);
                fileCorpus.train();
                Console.WriteLine(fileName);
                //			examineCorpus(corpus);
                ArrayListMultiMap <FeatureVectorAsObject, int> ws   = getWSContextCategoryMap(fileCorpus);
                ArrayListMultiMap <FeatureVectorAsObject, int> hpos = getHPosContextCategoryMap(fileCorpus);

                // Compare with corpus minus this file
                string path = fileName;
                IList <InputDocument> others = BuffUtils.filter(documents, d => !d.fileName.Equals(path));
                Corpus corpus = new Corpus(others, language);
                corpus.train();
                //			examineCorpus(corpus);
                ArrayListMultiMap <FeatureVectorAsObject, int> corpus_ws   = getWSContextCategoryMap(corpus);
                ArrayListMultiMap <FeatureVectorAsObject, int> corpus_hpos = getHPosContextCategoryMap(corpus);

                foreach (FeatureVectorAsObject x in ws.Keys)
                {
                    HashBag <int> fwsCats   = getCategoriesBag(ws[x]);
                    IList <float> fwsRatios = getCategoryRatios(fwsCats.Values);
                    HashBag <int> wsCats    = getCategoriesBag(corpus_ws[x]);
                    IList <float> wsRatios  = getCategoryRatios(wsCats.Values);
                    // compare file predictions with corpus predictions
                    if (!fwsRatios.SequenceEqual(wsRatios))
                    {
                        Console.WriteLine(fwsRatios + " vs " + wsRatios);
                    }

                    HashBag <int> fhposCats = getCategoriesBag(hpos[x]);
                    HashBag <int> hposCats  = getCategoriesBag(corpus_hpos[x]);
                }

                break;
            }
        }
Example #18
0
        public override bool TryCast(NebulaObject source, PlayerSkill skill, out Hashtable info)
        {
            info = new Hashtable();
            if (!CheckForShotEnemy(source, skill))
            {
                return(false);
            }
            float dmgMult       = skill.data.Inputs.Value <float>("dmg_mult");
            float effectProb    = skill.data.Inputs.Value <float>("effect_prob");
            var   sourceWeapon  = source.GetComponent <BaseWeapon>();
            var   targetBonuses = source.GetComponent <PlayerTarget>().targetObject.GetComponent <PlayerBonuses>();

            bool mastery = RollMastery(source);

            if (mastery)
            {
                dmgMult    *= 2;
                effectProb *= 2;
            }
            WeaponHitInfo hit;
            var           shotInfo = sourceWeapon.Fire(out hit, skill.data.Id, dmgMult);

            if (hit.normalOrMissed)
            {
                if (Rand.Float01() < effectProb)
                {
                    BuffParameter prm     = CommonUtils.GetRandomEnumValue <BuffParameter>(new List <BuffParameter>());
                    BonusType[]   debuffs = BuffUtils.GetBuffsForParameter(prm);
                    foreach (var dbf in debuffs)
                    {
                        targetBonuses.RemoveBuffs(dbf);
                    }
                }
                source.GetComponent <MmoMessageComponent>().SendShot(EventReceiver.OwnerAndSubscriber, shotInfo);
                return(true);
            }
            else
            {
                source.GetComponent <MmoMessageComponent>().SendShot(EventReceiver.OwnerAndSubscriber, shotInfo);
                return(false);
            }
        }
Example #19
0
        public static void writePython(LangDescriptor[] languages, IList <int?> ks, float[][] medians)
        {
            StringBuilder data = new StringBuilder();
            StringBuilder plot = new StringBuilder();

            for (int i = 0; i < languages.Length; i++)
            {
                LangDescriptor language        = languages[i];
                IList <float?> filteredMedians = BuffUtils.filter(Arrays.asList(medians[i]), m => m != null);
                data.Append(language.name + '=' + filteredMedians + '\n');
                plot.Append(string.Format("ax.plot(ks, {0}, label=\"{1}\", marker='{2}', color='{3}')\n", language.name, language.name, nameToGraphMarker.get(language.name), nameToGraphColor.get(language.name)));
            }

            string python = "#\n" + "# AUTO-GENERATED FILE. DO NOT EDIT\n" + "# CodeBuff %s '%s'\n" + "#\n" + "import numpy as np\n" + "import matplotlib.pyplot as plt\n\n" + "%s\n" + "ks = %s\n" + "fig = plt.figure()\n" + "ax = plt.subplot(111)\n" + "%s" + "ax.tick_params(axis='both', which='major', labelsize=18)\n" + "ax.set_xlabel(\"$k$ nearest neighbors\", fontsize=20)\n" + "ax.set_ylabel(\"Median error rate\", fontsize=20)\n" + "#ax.set_title(\"k Nearest Neighbors vs\\nLeave-one-out Validation Error Rate\")\n" + "plt.legend(fontsize=18)\n\n" + "fig.savefig('images/vary_k.pdf', format='pdf')\n" + "plt.show()\n";
            string code   = string.format(python, Tool.version, DateTime.Now, data, ks, plot);

            string fileName = "python/src/vary_k.py";

            org.antlr.codebuff.misc.Utils.writeFile(fileName, code);
            Log.WriteLine("wrote python code to " + fileName);
        }
Example #20
0
        /// <summary>
        /// Remove any positive Bonus Type from object
        /// </summary>
        public void RemoveAnyPositiveBuff()
        {
            bool      buffFounded     = false;
            BonusType foundedBuffType = BonusType.block_heal;

            foreach (var pBonus in bonuses)
            {
                if (BuffUtils.IsBuff(pBonus.Key))
                {
                    foundedBuffType = pBonus.Key;
                    buffFounded     = true;
                    break;
                }
            }

            if (buffFounded)
            {
                PlayerBonus removedBonus = null;
                bonuses.TryRemove(foundedBuffType, out removedBonus);
            }
        }
Example #21
0
        public static void examineCorpus(Corpus corpus)
        {
            ArrayListMultiMap <FeatureVectorAsObject, int> wsByFeatureVectorGroup = ArrayListMultiMap <FeatureVectorAsObject, int> .create();

            ArrayListMultiMap <FeatureVectorAsObject, int> hposByFeatureVectorGroup = ArrayListMultiMap <FeatureVectorAsObject, int> .create();

            int numContexts = corpus.featureVectors.Count;

            for (int i = 0; i < numContexts; i++)
            {
                int[] X  = corpus.featureVectors[i];
                int   y1 = corpus.injectWhitespace[i];
                int   y2 = corpus.hpos[i];
                wsByFeatureVectorGroup.Add(new FeatureVectorAsObject(X, Trainer.FEATURES_INJECT_WS), y1);
                hposByFeatureVectorGroup.Add(new FeatureVectorAsObject(X, Trainer.FEATURES_HPOS), y2);
            }
            IList <double> wsEntropies   = new List <double>();
            IList <double> hposEntropies = new List <double>();

            foreach (FeatureVectorAsObject x in wsByFeatureVectorGroup.Keys)
            {
                var           cats        = wsByFeatureVectorGroup[x];
                var           cats2       = hposByFeatureVectorGroup[x];
                HashBag <int> wsCats      = getCategoriesBag(cats);
                HashBag <int> hposCats    = getCategoriesBag(cats2);
                double        wsEntropy   = getNormalizedCategoryEntropy(getCategoryRatios(wsCats.Values));
                double        hposEntropy = getNormalizedCategoryEntropy(getCategoryRatios(hposCats.Values));
                wsEntropies.Add(wsEntropy);
                hposEntropies.Add(hposEntropy);
                Console.Write("{0,130} : {1},{2} {3},{4}\n", x, wsCats, wsEntropy, hposCats, hposEntropy);
            }
            Console.WriteLine("MEAN " + BuffUtils.mean(wsEntropies));
            Console.WriteLine("MEAN " + BuffUtils.mean(hposEntropies));
            float contextRichness = wsEntropies.Count / (float)numContexts;              // 0..1 where 1 means every token had different context

            Console.WriteLine("Context richness = " + contextRichness + " uniq ctxs=" + wsEntropies.Count + ", nctxs=" + numContexts);
        }
Example #22
0
        public static void computeConsistency(LangDescriptor language, bool report)
        {
            if (report)
            {
                Console.WriteLine("-----------------------------------");
                Console.WriteLine(language.name);
                Console.WriteLine("-----------------------------------");
            }
            Corpus corpus = new Corpus(language.corpusDir, language);

            corpus.train();
            // a map of feature vector to list of exemplar indexes of that feature
            MyMultiMap <FeatureVectorAsObject, int> wsContextToIndex   = new MyMultiMap <FeatureVectorAsObject, int>();
            MyMultiMap <FeatureVectorAsObject, int> hposContextToIndex = new MyMultiMap <FeatureVectorAsObject, int>();

            int n = corpus.featureVectors.Count;

            for (int i = 0; i < n; i++)
            {
                int[] features = corpus.featureVectors[i];
                wsContextToIndex.Map(new FeatureVectorAsObject(features, Trainer.FEATURES_INJECT_WS), i);
                hposContextToIndex.Map(new FeatureVectorAsObject(features, Trainer.FEATURES_HPOS), i);
            }

            int num_ambiguous_ws_vectors   = 0;
            int num_ambiguous_hpos_vectors = 0;

            // Dump output grouped by ws vs hpos then feature vector then category
            if (report)
            {
                Console.WriteLine(" --- INJECT WS ---");
            }
            IList <double> ws_entropies = new List <double>();

            foreach (FeatureVectorAsObject fo in wsContextToIndex.Keys)
            {
                var exemplarIndexes = wsContextToIndex[fo];

                // we have group by feature vector, now group by cat with that set for ws
                MyMultiMap <int, int> wsCatToIndexes = new MyMultiMap <int, int>();
                foreach (int i in exemplarIndexes)
                {
                    wsCatToIndexes.Map(corpus.injectWhitespace[i], i);
                }
                if (wsCatToIndexes.Count == 1)
                {
                    continue;
                }
                if (report)
                {
                    Console.WriteLine("Feature vector has " + exemplarIndexes.size() + " exemplars");
                }
                IList <int> catCounts = BuffUtils.map(wsCatToIndexes.Values, (x) => x.size());
                double      wsEntropy = Entropy.getNormalizedCategoryEntropy(Entropy.getCategoryRatios(catCounts));
                if (report)
                {
                    Console.Write("entropy={0,5:F4}\n", wsEntropy);
                }
                wsEntropy *= exemplarIndexes.size();
                ws_entropies.Add(wsEntropy);
                num_ambiguous_ws_vectors += exemplarIndexes.size();
                if (report)
                {
                    Console.Write(Trainer.featureNameHeader(Trainer.FEATURES_INJECT_WS));
                }

                if (report)
                {
                    foreach (int cat in wsCatToIndexes.Keys)
                    {
                        var indexes = wsCatToIndexes[cat];
                        foreach (int i in indexes)
                        {
                            string display = getExemplarDisplay(Trainer.FEATURES_INJECT_WS, corpus, corpus.injectWhitespace, i);
                            Console.WriteLine(display);
                        }
                        Console.WriteLine();
                    }
                }
            }

            if (report)
            {
                Console.WriteLine(" --- HPOS ---");
            }
            IList <double> hpos_entropies = new List <double>();

            foreach (FeatureVectorAsObject fo in hposContextToIndex.Keys)
            {
                MyHashSet <int> exemplarIndexes = hposContextToIndex[fo];

                // we have group by feature vector, now group by cat with that set for hpos
                MyMultiMap <int, int> hposCatToIndexes = new MyMultiMap <int, int>();
                foreach (int i in exemplarIndexes)
                {
                    hposCatToIndexes.Map(corpus.hpos[i], i);
                }
                if (hposCatToIndexes.Count == 1)
                {
                    continue;
                }
                if (report)
                {
                    Console.WriteLine("Feature vector has " + exemplarIndexes.size() + " exemplars");
                }
                IList <int> catCounts   = BuffUtils.map(hposCatToIndexes.Values, (x) => x.size());
                double      hposEntropy = Entropy.getNormalizedCategoryEntropy(Entropy.getCategoryRatios(catCounts));
                if (report)
                {
                    Console.Write("entropy={0,5:F4}\n", hposEntropy);
                }
                hposEntropy *= exemplarIndexes.size();
                hpos_entropies.Add(hposEntropy);
                num_ambiguous_hpos_vectors += exemplarIndexes.size();
                if (report)
                {
                    Console.Write(Trainer.featureNameHeader(Trainer.FEATURES_HPOS));
                }

                if (report)
                {
                    foreach (int cat in hposCatToIndexes.Keys)
                    {
                        var indexes = hposCatToIndexes[cat];
                        foreach (int?i in indexes)
                        {
                            string display = getExemplarDisplay(Trainer.FEATURES_HPOS, corpus, corpus.hpos, i.Value);
                            Console.WriteLine(display);
                        }
                        Console.WriteLine();
                    }
                }
            }
            Console.WriteLine();
            Console.WriteLine(language.name);
            Console.WriteLine("There are " + wsContextToIndex.Count + " unique ws feature vectors out of " + n + " = " + string.Format("{0,3:F1}%", 100.0 * wsContextToIndex.Count / n));
            Console.WriteLine("There are " + hposContextToIndex.Count + " unique hpos feature vectors out of " + n + " = " + string.Format("{0,3:F1}%", 100.0 * hposContextToIndex.Count / n));
            float prob_ws_ambiguous = num_ambiguous_ws_vectors / (float)n;

            Console.Write("num_ambiguous_ws_vectors   = {0,5:D}/{1,5:D} = {2,5:F3}\n", num_ambiguous_ws_vectors, n, prob_ws_ambiguous);
            float prob_hpos_ambiguous = num_ambiguous_hpos_vectors / (float)n;

            Console.Write("num_ambiguous_hpos_vectors = {0,5:D}/{1,5:D} = {2,5:F3}\n", num_ambiguous_hpos_vectors, n, prob_hpos_ambiguous);
            //		Collections.sort(ws_entropies);
            //		System.out.println("ws_entropies="+ws_entropies);
            Console.WriteLine("ws median,mean = " + BuffUtils.median(ws_entropies) + "," + BuffUtils.mean(ws_entropies));
            double expected_ws_entropy = (BuffUtils.sumDoubles(ws_entropies) / num_ambiguous_ws_vectors) * prob_ws_ambiguous;

            Console.WriteLine("expected_ws_entropy=" + expected_ws_entropy);

            Console.WriteLine("hpos median,mean = " + BuffUtils.median(hpos_entropies) + "," + BuffUtils.mean(hpos_entropies));
            double expected_hpos_entropy = (BuffUtils.sumDoubles(hpos_entropies) / num_ambiguous_hpos_vectors) * prob_hpos_ambiguous;

            Console.WriteLine("expected_hpos_entropy=" + expected_hpos_entropy);
        }
Example #23
0
        public override bool TryCast(NebulaObject source, PlayerSkill skill, out Hashtable info)
        {
            info = new Hashtable();
            if (NotEnemyCheck(source, skill, info))
            {
                return(false);
            }

            var targ            = source.Target().targetObject;
            var sourceCharacter = source.Character();
            var sourceWeapon    = source.Weapon();
            var message         = source.GetComponent <MmoMessageComponent>();

            float dmgMult = skill.GetFloatInput("dmg_mult");
            float radius  = skill.GetFloatInput("radius");

            bool mastery = RollMastery(source);

            if (mastery)
            {
                dmgMult *= 2;
                radius  *= 2;
            }

            WeaponHitInfo hit;
            var           shot = sourceWeapon.Fire(out hit, skill.data.Id, dmgMult);

            BonusType[] speedDebuffs = BuffUtils.GetDebuffsForParameter(BuffParameter.speed);

            if (hit.normalOrMissed)
            {
                message.SendShot(EventReceiver.OwnerAndSubscriber, shot);

                var items = source.mmoWorld().GetItems((item) => {
                    var itemBonuses          = item.Bonuses();
                    var itemCharacter        = item.Character();
                    var itemDamagable        = item.Damagable();
                    bool allComponentPresent = itemBonuses && itemCharacter && itemDamagable;
                    if (allComponentPresent)
                    {
                        if (item.Id != targ.Id)
                        {
                            float distanceToTarg = targ.transform.DistanceTo(item.transform);
                            if (distanceToTarg <= radius)
                            {
                                if (itemBonuses.ContainsAny(speedDebuffs))
                                {
                                    var relation = sourceCharacter.RelationTo(itemCharacter);
                                    if (relation == FractionRelation.Enemy || relation == FractionRelation.Neutral)
                                    {
                                        return(true);
                                    }
                                }
                            }
                        }
                    }
                    return(false);
                });

                foreach (var pitem in items)
                {
                    WeaponHitInfo itemHit;
                    var           itemShot = sourceWeapon.Fire(pitem.Value, out itemHit, skill.data.Id, dmgMult);
                    if (hit.normalOrMissed)
                    {
                        message.SendShot(EventReceiver.OwnerAndSubscriber, itemShot);
                    }
                    else
                    {
                        message.SendShot(EventReceiver.OwnerAndSubscriber, shot);
                    }
                }

                return(true);
            }
            else
            {
                source.GetComponent <MmoMessageComponent>().SendShot(EventReceiver.OwnerAndSubscriber, shot);
                return(false);
            }
        }
Example #24
0
 private bool IsBuff(BonusType type)
 {
     return(!BuffUtils.IsDebuff(type));
 }
Example #25
0
        public static string Main(object[] args)
        {
            Log.Reset();
            try
            {
                if (args.Length < 7)
                {
                    Log.WriteLine("org.antlr.codebuff.Tool -g grammar-name -rule start-rule -corpus root-dir-of-samples \\\n" + "   [-files file-extension] [-indent num-spaces] \\" + "   [-comment line-comment-name] [-o output-file] file-to-format");
                    return(Log.Message());
                }

                formatted_output = null;
                string outputFileName  = "";
                string grammarName     = null;
                string startRule       = null;
                string corpusDir       = null;
                string indentS         = "4";
                string commentS        = null;
                string input_file_name = null;
                string fileExtension   = null;
                int    i           = 0;
                Type   parserClass = null;
                Type   lexerClass  = null;
                while (i < args.Length && ((string)args[i]).StartsWith("-", StringComparison.Ordinal))
                {
                    switch (args[i])
                    {
                    case "-g":
                        i++;
                        grammarName = (string)args[i++];
                        break;

                    case "-lexer":
                        i++;
                        lexerClass = (Type)args[i++];
                        break;

                    case "-parser":
                        i++;
                        parserClass = (Type)args[i++];
                        break;

                    case "-rule":
                        i++;
                        startRule = (string)args[i++];
                        break;

                    case "-corpus":
                        i++;
                        corpusDir = (string)args[i++];
                        break;

                    case "-files":
                        i++;
                        fileExtension = (string)args[i++];
                        break;

                    case "-indent":
                        i++;
                        indentS = (string)args[i++];
                        break;

                    case "-comment":
                        i++;
                        commentS = (string)args[i++];
                        break;

                    case "-o":
                        i++;
                        outputFileName = (string)args[i++];
                        break;

                    case "-inoutstring":
                        i++;
                        formatted_output = "";
                        outputFileName   = null;
                        break;
                    }
                }
                input_file_name = (string)args[i]; // must be last

                Log.WriteLine("gramm: " + grammarName);
                string parserClassName = grammarName + "Parser";
                string lexerClassName  = grammarName + "Lexer";
                Lexer  lexer           = null;
                if (lexerClass == null || parserClass == null)
                {
                    Log.WriteLine("You must specify a lexer and parser.");
                }
                if (parserClass == null | lexerClass == null)
                {
                    return(Log.Message());
                }
                int indentSize            = int.Parse(indentS);
                int singleLineCommentType = -1;
                if (!string.ReferenceEquals(commentS, null))
                {
                    try
                    {
                        lexer = getLexer(lexerClass, null);
                    }
                    catch (Exception e)
                    {
                        Log.WriteLine("Can't instantiate lexer " + lexerClassName);
                        Log.WriteLine(e.StackTrace);
                    }
                    if (lexer == null)
                    {
                        return(Log.Message());
                    }
                    IDictionary <string, int> tokenTypeMap = lexer.TokenTypeMap;
                    if (tokenTypeMap.ContainsKey(commentS))
                    {
                        singleLineCommentType = tokenTypeMap[commentS];
                    }
                }
                string fileRegex = null;
                if (!string.ReferenceEquals(fileExtension, null))
                {
                    var pattern            = "";
                    var allowable_suffices = fileExtension.Split(';').ToList <string>();
                    foreach (var s in allowable_suffices)
                    {
                        var no_dot = s.Substring(s.IndexOf('.') + 1);
                        pattern = pattern == "" ? ("(" + no_dot) : (pattern + "|" + no_dot);
                    }
                    pattern   = pattern + ")";
                    fileRegex = ".*\\." + pattern;
                }
                LangDescriptor language = new LangDescriptor(grammarName, corpusDir, fileRegex, lexerClass, parserClass, startRule, indentSize, singleLineCommentType);

                ////////
                // load all corpus files up front
                IList <string>        allFiles  = getFilenames(language.corpusDir, language.fileRegex);
                IList <InputDocument> documents = load(allFiles, language);

                // Handle formatting of document if it's passed as a string or not.
                if (unformatted_input == null)
                {
                    // Don't include file to format in corpus itself.
                    string path = System.IO.Path.GetFullPath(input_file_name);
                    IList <InputDocument> others = BuffUtils.filter(documents, d => !d.fileName.Equals(path));
                    // Perform training of formatter.
                    Corpus corpus = new Corpus(others, language);
                    corpus.train();

                    // Parse code contained in file.
                    InputDocument unformatted_document = parse(input_file_name, language);

                    // Format document.
                    Formatter formatter = new Formatter(corpus, language.indentSize, Formatter.DEFAULT_K, Trainer.FEATURES_INJECT_WS, Trainer.FEATURES_HPOS);
                    formatted_output = formatter.format(unformatted_document, false);
                }
                else
                {
                    // Perform training of formatter.
                    Corpus corpus = new Corpus(documents, language);
                    corpus.train();

                    // Parse code that was represented as a string.
                    InputDocument unformatted_document = parse(input_file_name, unformatted_input, language);

                    // Format document.
                    Formatter formatter = new Formatter(corpus, language.indentSize, Formatter.DEFAULT_K, Trainer.FEATURES_INJECT_WS, Trainer.FEATURES_HPOS);
                    formatted_output = formatter.format(unformatted_document, false);
                }
                ///////
                if (outputFileName != null && outputFileName == "")
                {
                    Log.WriteLine(formatted_output);
                }
                else if (!string.IsNullOrEmpty(outputFileName))
                {
                    org.antlr.codebuff.misc.Utils.writeFile(outputFileName, formatted_output);
                }
            }
            catch (Exception e)
            {
                throw e;
            }

            return(formatted_output);
        }
Example #26
0
        /// <summary>
        /// Return a new map from rulename to List of (a,b) pairs stripped of
        ///  tuples (a,b) where a or b is in rule repeated token set.
        ///  E.g., before removing repeated token ',', we see:
        ///
        ///  elementValueArrayInitializer: 4:'{',',' 1:'{','}' 4:',','}'
        ///
        ///  After removing tuples containing repeated tokens, we get:
        ///
        ///  elementValueArrayInitializer: 1:'{','}'
        /// </summary>
        protected internal virtual IDictionary <RuleAltKey, IList <org.antlr.codebuff.misc.Pair <int, int> > > stripPairsWithRepeatedTokens()
        {
            IDictionary <RuleAltKey, IList <org.antlr.codebuff.misc.Pair <int, int> > > ruleToPairsWoRepeats = new Dictionary <RuleAltKey, IList <org.antlr.codebuff.misc.Pair <int, int> > >();

            // For each rule
            foreach (RuleAltKey ruleAltKey in ruleToPairsBag.Keys)
            {
                ISet <int> ruleRepeatedTokens = null;
                ruleToRepeatedTokensSet.TryGetValue(ruleAltKey, out ruleRepeatedTokens);
                ISet <org.antlr.codebuff.misc.Pair <int, int> > pairsBag = null;
                ruleToPairsBag.TryGetValue(ruleAltKey, out pairsBag);
                // If there are repeated tokens for this rule
                if (ruleRepeatedTokens != null)
                {
                    // Remove all (a,b) for b in repeated token set
                    IList <org.antlr.codebuff.misc.Pair <int, int> > pairsWoRepeats = BuffUtils.filter(pairsBag, p => !ruleRepeatedTokens.Contains(p.a) && !ruleRepeatedTokens.Contains(p.b));
                    ruleToPairsWoRepeats[ruleAltKey] = pairsWoRepeats;
                }
                else
                {
                    ruleToPairsWoRepeats[ruleAltKey] = new List <org.antlr.codebuff.misc.Pair <int, int> >(pairsBag);
                }
            }
            return(ruleToPairsWoRepeats);
        }
Example #27
0
        public static void Main(string[] args)
        {
            if (args.Length < 2)
            {
                Console.Error.WriteLine("Dbg [-leave-one-out] [-java|-java8|-antlr|-sqlite|-tsql] test-file");
            }

            int    arg             = 0;
            bool   leaveOneOut     = true;
            bool   collectAnalysis = true;
            string language        = args[arg++];

            language = language.Substring(1);
            string        testFilename = args[arg];
            string        output       = "???";
            InputDocument testDoc      = null;
            IList <TokenPositionAnalysis> analysisPerToken = null;

            org.antlr.codebuff.misc.Pair <string, IList <TokenPositionAnalysis> > results;
            LangDescriptor lang = null;

            System.DateTime start, stop;
            for (int i = 0; i < Tool.languages.Length; i++)
            {
                if (Tool.languages[i].name.Equals(language))
                {
                    lang = Tool.languages[i];
                    break;
                }
            }
            if (lang != null)
            {
                start = System.DateTime.Now;
                LeaveOneOutValidator             validator = new LeaveOneOutValidator(lang.corpusDir, lang);
                Triple <Formatter, float, float> val       = validator.validateOneDocument(testFilename, null, collectAnalysis);
                testDoc = Tool.parse(testFilename, lang);
                stop    = System.DateTime.Now;
                Formatter formatter = val.a;
                output = formatter.Output;
                Console.WriteLine("output len = " + output.Length);
                float editDistance = normalizedLevenshteinDistance(testDoc.content, output);
                Console.WriteLine("normalized Levenshtein distance: " + editDistance);
                analysisPerToken = formatter.AnalysisPerToken;

                Regex             rex             = new Regex("^\\s+$");
                CommonTokenStream original_tokens = Tool.tokenize(testDoc.content, lang.lexerClass);
                IList <Token>     wsTokens        = BuffUtils.filter(original_tokens.GetTokens(), t => rex.IsMatch(t.Text));
                string            originalWS      = tokenText(wsTokens);
                Console.WriteLine("origin ws tokens len: " + originalWS.Length);
                CommonTokenStream formatted_tokens = Tool.tokenize(output, lang.lexerClass);
                wsTokens = BuffUtils.filter(formatted_tokens.GetTokens(), t => rex.IsMatch(t.Text));
                string formattedWS = tokenText(wsTokens);
                Console.WriteLine("formatted ws tokens len: " + formattedWS.Length);
                editDistance  = levenshteinDistance(originalWS, formattedWS);
                editDistance /= Math.Max(testDoc.content.Length, output.Length);
                Console.WriteLine("Levenshtein distance of ws normalized to output len: " + editDistance);

                ClassificationAnalysis analysis = new ClassificationAnalysis(testDoc, analysisPerToken);
                Console.WriteLine(analysis);
            }

            if (lang != null)
            {
                //            GUIController controller;
                //            controller = new GUIController(analysisPerToken, testDoc, output, lang.lexerClass);
                //controller.show();
                //			System.out.println(output);
                //Console.Write("formatting time {0:D}s\n", (stop - start) / 1000000);
                Console.Write("classify calls {0:D}, hits {1:D} rate {2:F}\n", kNNClassifier.nClassifyCalls, kNNClassifier.nClassifyCacheHits, kNNClassifier.nClassifyCacheHits / (float)kNNClassifier.nClassifyCalls);
                Console.Write("kNN calls {0:D}, hits {1:D} rate {2:F}\n", kNNClassifier.nNNCalls, kNNClassifier.nNNCacheHits, kNNClassifier.nNNCacheHits / (float)kNNClassifier.nNNCalls);
            }
        }
Example #28
0
        public static void Main(string[] args)
        {
            if (args.Length < 7)
            {
                Console.Error.WriteLine("org.antlr.codebuff.Tool -g grammar-name -rule start-rule -corpus root-dir-of-samples \\\n" + "   [-files file-extension] [-indent num-spaces] \\" + "   [-comment line-comment-name] [-o output-file] file-to-format");
                return;
            }

            formatted_output = null;
            string outputFileName  = "";
            string grammarName     = null;
            string startRule       = null;
            string corpusDir       = null;
            string indentS         = "4";
            string commentS        = null;
            string input_file_name = null;
            string fileExtension   = null;
            int    i = 0;

            while (i < args.Length && args[i].StartsWith("-", StringComparison.Ordinal))
            {
                switch (args[i])
                {
                case "-g":
                    i++;
                    grammarName = args[i++];
                    break;

                case "-rule":
                    i++;
                    startRule = args[i++];
                    break;

                case "-corpus":
                    i++;
                    corpusDir = args[i++];
                    break;

                case "-files":
                    i++;
                    fileExtension = args[i++];
                    break;

                case "-indent":
                    i++;
                    indentS = args[i++];
                    break;

                case "-comment":
                    i++;
                    commentS = args[i++];
                    break;

                case "-o":
                    i++;
                    outputFileName = args[i++];
                    break;

                case "-inoutstring":
                    i++;
                    formatted_output = "";
                    outputFileName   = null;
                    break;
                }
            }
            input_file_name = args[i];             // must be last

            Console.WriteLine("gramm: " + grammarName);
            string parserClassName = grammarName + "Parser";
            string lexerClassName  = grammarName + "Lexer";
            Type   parserClass     = null;
            Type   lexerClass      = null;
            Lexer  lexer           = null;

            try
            {
                parserClass = (Type)Type.GetType(parserClassName);
                lexerClass  = (Type)Type.GetType(lexerClassName);
            }
            catch (Exception e)
            {
                Console.Error.WriteLine("Can't load " + parserClassName + " or maybe " + lexerClassName);
                Console.Error.WriteLine("Make sure they are generated by ANTLR, compiled, and in CLASSPATH");
                System.Console.WriteLine(e.StackTrace);
            }
            if (parserClass == null | lexerClass == null)
            {
                return;                 // don't return from catch!
            }
            int indentSize            = int.Parse(indentS);
            int singleLineCommentType = -1;

            if (!string.ReferenceEquals(commentS, null))
            {
                try
                {
                    lexer = getLexer(lexerClass, null);
                }
                catch (Exception e)
                {
                    Console.Error.WriteLine("Can't instantiate lexer " + lexerClassName);
                    System.Console.WriteLine(e.StackTrace);
                }
                if (lexer == null)
                {
                    return;
                }
                IDictionary <string, int> tokenTypeMap = lexer.TokenTypeMap;
                if (tokenTypeMap.ContainsKey(commentS))
                {
                    singleLineCommentType = tokenTypeMap[commentS];
                }
            }
            string fileRegex = null;

            if (!string.ReferenceEquals(fileExtension, null))
            {
                fileRegex = ".*\\." + fileExtension;
            }
            LangDescriptor language = new LangDescriptor(grammarName, corpusDir, fileRegex, lexerClass, parserClass, startRule, indentSize, singleLineCommentType);

            ////////
            // load all corpus files up front
            IList <string>        allFiles  = getFilenames(language.corpusDir, language.fileRegex);
            IList <InputDocument> documents = load(allFiles, language);

            // Handle formatting of document if it's passed as a string or not.
            if (unformatted_input == null)
            {
                // Don't include file to format in corpus itself.
                string path = System.IO.Path.GetFullPath(input_file_name);
                IList <InputDocument> others = BuffUtils.filter(documents, d => !d.fileName.Equals(path));
                // Perform training of formatter.
                Corpus corpus = new Corpus(others, language);
                corpus.train();

                // Parse code contained in file.
                InputDocument unformatted_document = parse(input_file_name, language);

                // Format document.
                Formatter formatter = new Formatter(corpus, language.indentSize, Formatter.DEFAULT_K, Trainer.FEATURES_INJECT_WS, Trainer.FEATURES_HPOS);
                formatted_output = formatter.format(unformatted_document, false);
            }
            else
            {
                // Perform training of formatter.
                Corpus corpus = new Corpus(documents, language);
                corpus.train();

                // Parse code that was represented as a string.
                InputDocument unformatted_document = parse(input_file_name, unformatted_input, language);

                // Format document.
                Formatter formatter = new Formatter(corpus, language.indentSize, Formatter.DEFAULT_K, Trainer.FEATURES_INJECT_WS, Trainer.FEATURES_HPOS);
                formatted_output = formatter.format(unformatted_document, false);
            }
            ///////
            if (outputFileName != null && outputFileName == "")
            {
                System.Console.WriteLine(formatted_output);
            }
            else if (!string.IsNullOrEmpty(outputFileName))
            {
                org.antlr.codebuff.misc.Utils.writeFile(outputFileName, formatted_output);
            }
        }
Example #29
0
        public static void runCaptureForOneLanguage(LangDescriptor language)
        {
            IList <string> filenames         = Tool.getFilenames(language.corpusDir, language.fileRegex);
            IList <float>  selfEditDistances = new List <float>();

            foreach (string fileName in filenames)
            {
                Corpus corpus = new Corpus(fileName, language);
                corpus.train();
                InputDocument testDoc   = Tool.parse(fileName, corpus.language);
                Formatter     formatter = new Formatter(corpus, language.indentSize);
                string        output    = formatter.format(testDoc, false);
                //		System.out.println(output);
                float editDistance = Dbg.normalizedLevenshteinDistance(testDoc.content, output);
                Log.WriteLine(fileName + " edit distance " + editDistance);
                selfEditDistances.Add(editDistance);
            }

            {
                Corpus corpus = new Corpus(language.corpusDir, language);
                corpus.train();

                IList <float> corpusEditDistances = new List <float>();
                foreach (string fileName in filenames)
                {
                    InputDocument testDoc   = Tool.parse(fileName, corpus.language);
                    Formatter     formatter = new Formatter(corpus, language.indentSize);
                    string        output    = formatter.format(testDoc, false);
                    //		System.out.println(output);
                    float editDistance = Dbg.normalizedLevenshteinDistance(testDoc.content, output);
                    Log.WriteLine(fileName + "+corpus edit distance " + editDistance);
                    corpusEditDistances.Add(editDistance);
                }
                // heh this gives info on within-corpus variability. i.e., how good/consistent is my corpus?
                // those files with big difference are candidates for dropping from corpus or for cleanup.
                IList <string> labels = BuffUtils.map(filenames, f => '"' + System.IO.Path.GetFileName(f) + '"');

                string python = "#\n" + "# AUTO-GENERATED FILE. DO NOT EDIT\n" + "# CodeBuff <version> '<date>'\n" + "#\n" +
                                "import numpy as np\n" + "import matplotlib.pyplot as plt\n\n" + "fig = plt.figure()\n" +
                                "ax = plt.subplot(111)\n" + "labels = <labels>\n" + "N = len(labels)\n\n" +
                                "featureIndexes = range(0,N)\n" + "<lang>_self = <selfEditDistances>\n" +
                                "<lang>_corpus = <corpusEditDistances>\n" +
                                "<lang>_diff = np.abs(np.subtract(<lang>_self, <lang>_corpus))\n\n" +
                                "all = zip(<lang>_self, <lang>_corpus, <lang>_diff, labels)\n" +
                                "all = sorted(all, key=lambda x : x[2], reverse=True)\n" +
                                "<lang>_self, <lang>_corpus, <lang>_diff, labels = zip(*all)\n\n" +
                                "ax.plot(featureIndexes, <lang>_self, label=\"<lang>_self\")\n" +
                                "#ax.plot(featureIndexes, <lang>_corpus, label=\"<lang>_corpus\")\n" +
                                "ax.plot(featureIndexes, <lang>_diff, label=\"<lang>_diff\")\n" +
                                "ax.set_xticklabels(labels, rotation=60, fontsize=8)\n" +
                                "plt.xticks(featureIndexes, labels, rotation=60)\n" +
                                "ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5)\n\n" +
                                "ax.text(1, .25, 'median $f$ self distance = %5.3f, corpus+$f$ distance = %5.3f' %" +
                                "    (np.median(<lang>_self),np.median(<lang>_corpus)))\n" + "ax.set_xlabel(\"File Name\")\n" +
                                "ax.set_ylabel(\"Edit Distance\")\n" +
                                "ax.set_title(\"Difference between Formatting File <lang> $f$\\nwith Training=$f$ and Training=$f$+Corpus\")\n" +
                                "plt.legend()\n" + "plt.tight_layout()\n" + "fig.savefig(\"images/" + language.name +
                                "_one_file_capture.pdf\", format='pdf')\n" + "plt.show()\n";
                ST pythonST = new ST(python);

                pythonST.add("lang", language.name);
                pythonST.add("version", version);
                pythonST.add("date", DateTime.Now);
                pythonST.add("labels", labels.ToString());
                pythonST.add("selfEditDistances", selfEditDistances.ToString());
                pythonST.add("corpusEditDistances", corpusEditDistances.ToString());

                string code = pythonST.render();

                {
                    string fileName = "python/src/" + language.name + "_one_file_capture.py";
                    org.antlr.codebuff.misc.Utils.writeFile(fileName, code);
                    Log.WriteLine("wrote python code to " + fileName);
                }
            }
        }