Example #1
0
        public static Triple <Formatter, float, float> validate(LangDescriptor language, IList <InputDocument> documents, InputDocument testDoc, bool saveOutput, bool computeEditDistance)
        {
            //		kNNClassifier.resetCache();
            Corpus corpus = new Corpus(documents, language);

            corpus.train();
            //		System.out.printf("%d feature vectors\n", corpus.featureVectors.size());
            Formatter formatter    = new Formatter(corpus, language.indentSize);
            string    output       = formatter.format(testDoc, false);
            float     editDistance = 0;

            if (computeEditDistance)
            {
                editDistance = Dbg.normalizedLevenshteinDistance(testDoc.content, output);
            }
            ClassificationAnalysis analysis = new ClassificationAnalysis(testDoc, formatter.AnalysisPerToken);

            //		System.out.println(testDoc.fileName+": edit distance = "+editDistance+", error rate = "+analysis.getErrorRate());
            if (saveOutput)
            {
                File dir = new File(outputDir + "/" + language.name);
                if (saveOutput)
                {
                    dir = new File(outputDir + "/" + language.name);
                    dir.mkdir();
                }
                org.antlr.codebuff.misc.Utils.writeFile(dir.Path + "/" + System.IO.Path.GetFileName(testDoc.fileName), output);
            }
            return(new Triple <Formatter, float?, float?>(formatter, editDistance, analysis.ErrorRate));
        }
Example #2
0
 //public IQueryable<BookStats> SaveVerse(int id)
 public IActionResult SaveVerse(int id,[FromBody] Corpus ptext)
 //public IActionResult SaveVerse(int id,Corpus ptext)
 {
     Console.WriteLine($"StatsController.SaveVerse(?): id={id} text={ptext} isnull="+(ptext==null?"true":"false"));
     //db.Corpus.Find(id).PText=ptext;
     db.Entry(ptext).State = EntityState.Modified;
     //return Ok($"test complete {ptext}");
     try
     {
         db.SaveChanges();
     }
     catch (Exception x)
     {
         if (!VerseExists(id))
         {
             return NotFound();
         }
         else
         {
             return BadRequest(x.Message);
         }
     }
     //return StatusCode(HttpStatusCode.NoContent);
     return Ok("Save Successful!");
 }
Example #3
0
        /// <summary>
        /// Create a simple entity called 'TestSource' with a single attribute
        /// </summary>
        /// <param name="entityName"></param>
        /// <param name="attributesParams"></param>
        /// <returns></returns>
        public CdmEntityDefinition CreateBasicEntity(string entityName, List <TypeAttributeParam> attributesParams)
        {
            CdmEntityDefinition entity = Corpus.MakeObject <CdmEntityDefinition>(CdmObjectType.EntityDef, entityName);

            foreach (TypeAttributeParam attributesParam in attributesParams)
            {
                CdmTypeAttributeDefinition attribute = Corpus.MakeObject <CdmTypeAttributeDefinition>(CdmObjectType.TypeAttributeDef, nameOrRef: attributesParam.AttributeName, simpleNameRef: false);
                attribute.DataType    = Corpus.MakeRef <CdmDataTypeReference>(CdmObjectType.DataTypeRef, refObj: attributesParam.AttributeDataType, simpleNameRef: true);
                attribute.Purpose     = Corpus.MakeRef <CdmPurposeReference>(CdmObjectType.PurposeRef, refObj: attributesParam.AttributePurpose, simpleNameRef: true);
                attribute.DisplayName = attributesParam.AttributeName;

                entity.Attributes.Add(attribute);
            }

            CdmDocumentDefinition entityDoc = Corpus.MakeObject <CdmDocumentDefinition>(CdmObjectType.DocumentDef, $"{entityName}.cdm.json", false);

            entityDoc.Imports.Add(AllImportsDocName);
            entityDoc.Definitions.Add(entity);

            LocalStorageRoot.Documents.Add(entityDoc, entityDoc.Name);
            DefaultManifest.Entities.Add(entity);
            AllImports.Imports.Add(entity.InDocument.Name);

            return(entity);
        }
Example #4
0
        public static string getExemplarDisplay(FeatureMetaData[] FEATURES, Corpus corpus, IList <int> Y, int corpusVectorIndex)
        {
            int[]         X        = corpus.featureVectors[corpusVectorIndex];
            InputDocument doc      = corpus.documentsPerExemplar[corpusVectorIndex];
            string        features = Trainer._toString(FEATURES, doc, X);
            int           line     = X[Trainer.INDEX_INFO_LINE];
            string        lineText = doc.getLine(line);
            int           col      = X[Trainer.INDEX_INFO_CHARPOS];

            // insert a dot right before char position
            if (!string.ReferenceEquals(lineText, null))
            {
                lineText = lineText.Substring(0, col) + '\u00B7' + lineText.Substring(col, lineText.Length - col);
            }
            int    cat = Y[corpusVectorIndex];
            string displayCat;

            if ((cat & 0xFF) == Trainer.CAT_INJECT_WS || (cat & 0xFF) == Trainer.CAT_INJECT_NL)
            {
                displayCat = Formatter.getWSCategoryStr(cat);
            }
            else
            {
                displayCat = Formatter.getHPosCategoryStr(cat);
            }

            return(string.Format("{0} {1,9} {2}", features, displayCat, lineText));
        }
        static void Main(string[] args)
        {
            Logger.LogFile = $"{nameof(Seq2SeqConsole)}_{GetTimeStamp(DateTime.Now)}.log";

            Options   options   = new Options();
            ArgParser argParser = new ArgParser(args, options);

            AttentionSeq2Seq ss = null;

            if (String.Equals(options.TaskName, "train", StringComparison.InvariantCultureIgnoreCase))
            {
                Corpus trainCorpus = new Corpus(options.TrainCorpusPath, options.SrcLang, options.TgtLang, options.ShuffleBlockSize);
                if (File.Exists(options.ModelFilePath) == false)
                {
                    ss = new AttentionSeq2Seq(options.WordVectorSize, options.HiddenSize, options.Depth, trainCorpus, options.SrcVocab, options.TgtVocab, options.SrcEmbeddingModelFilePath, options.TgtEmbeddingModelFilePath,
                                              options.SparseFeature, true, options.ModelFilePath);
                }
                else
                {
                    Logger.WriteLine($"Loading model from '{options.ModelFilePath}'...");
                    ss = new AttentionSeq2Seq();
                    ss.Load(options.ModelFilePath);
                    ss.TrainCorpus = trainCorpus;
                }

                Logger.WriteLine($"Source Language = '{options.SrcLang}'");
                Logger.WriteLine($"Target Language = '{options.TgtLang}'");
                Logger.WriteLine($"SSE Enable = '{System.Numerics.Vector.IsHardwareAccelerated}'");
                Logger.WriteLine($"SSE Size = '{System.Numerics.Vector<float>.Count * 32}'");
                Logger.WriteLine($"Processor counter = '{Environment.ProcessorCount}'");
                Logger.WriteLine($"Hidden Size = '{ss.HiddenSize}'");
                Logger.WriteLine($"Word Vector Size = '{ss.WordVectorSize}'");
                Logger.WriteLine($"Learning Rate = '{options.LearningRate}'");
                Logger.WriteLine($"Network Layer = '{ss.Depth}'");
                Logger.WriteLine($"Use Sparse Feature = '{options.SparseFeature}'");

                ss.IterationDone += ss_IterationDone;
                ss.Train(300, options.LearningRate);
            }
            else if (String.Equals(options.TaskName, "test", StringComparison.InvariantCultureIgnoreCase))
            {
                ss = new AttentionSeq2Seq();
                ss.Load(options.ModelFilePath);

                List <string> outputLines     = new List <string>();
                var           data_sents_raw1 = File.ReadAllLines(options.InputTestFile);
                foreach (string line in data_sents_raw1)
                {
                    List <string> outputWords = ss.Predict(line.ToLower().Trim().Split(' ').ToList());
                    outputLines.Add(String.Join(" ", outputWords));
                }

                File.WriteAllLines(options.OutputTestFile, outputLines);
            }
            else
            {
                argParser.Usage();
            }
        }
Example #6
0
        /// <summary>
        /// Create an entity attribute
        /// </summary>
        /// <param name="entityAttributeName"></param>
        /// <param name="projectionSourceEntityRef"></param>
        /// <returns></returns>
        public CdmEntityAttributeDefinition CreateEntityAttribute(string entityAttributeName, CdmEntityReference projectionSourceEntityRef)
        {
            CdmEntityAttributeDefinition entityAttribute = Corpus.MakeObject <CdmEntityAttributeDefinition>(CdmObjectType.EntityAttributeDef, nameOrRef: entityAttributeName, simpleNameRef: false);

            entityAttribute.Entity = projectionSourceEntityRef;

            return(entityAttribute);
        }
Example #7
0
        public void Corpus_GetLetter_NullIfNoLetter()
        {
            // Act
            var letter = new Corpus().GetLetter("a");

            // Assert
            Assert.That(letter, Is.Null);
        }
Example #8
0
        /// <summary>
        /// Create an inline entity reference for a projection
        /// </summary>
        /// <param name="projectionSourceName"></param>
        /// <returns></returns>
        public CdmEntityReference CreateProjectionInlineEntityReference(CdmProjection projection)
        {
            CdmEntityReference projectionInlineEntityRef = Corpus.MakeObject <CdmEntityReference>(CdmObjectType.EntityRef, null);

            projectionInlineEntityRef.ExplicitReference = projection;

            return(projectionInlineEntityRef);
        }
Example #9
0
        /// <summary>
        /// Create a simple projection object
        /// </summary>
        /// <param name="projectionSourceName"></param>
        /// <returns></returns>
        public CdmProjection CreateProjection(string projectionSourceName)
        {
            CdmProjection projection = Corpus.MakeObject <CdmProjection>(CdmObjectType.ProjectionDef);

            projection.Source = Corpus.MakeObject <CdmEntityReference>(CdmObjectType.EntityRef, projectionSourceName, simpleNameRef: true);

            return(projection);
        }
Example #10
0
 public void Basic_Add()
 {
     Corpus c = new Corpus();
     c.Add("one");
     c.Add("two");
     c.Add("three");
     Assert.AreEqual(3, c.Tokens.Count);
 }
Example #11
0
 public void CreateCorpus(Corpus corpus)
 {
     if (corpus == null)
     {
         throw new ArgumentNullException(nameof(corpus));
     }
     _context.Corpuses.Add(corpus);
 }
Example #12
0
        /// <summary>
        /// Create a default manifest
        /// </summary>
        public CdmManifestDefinition CreateDefaultManifest()
        {
            CdmManifestDefinition manifestDefault = Corpus.MakeObject <CdmManifestDefinition>(CdmObjectType.ManifestDef, ManifestName);

            LocalStorageRoot.Documents.Add(manifestDefault, ManifestDocName);

            return(manifestDefault);
        }
Example #13
0
        static void Main(string[] args)
        {
            Logger.LogFile = $"{nameof(Seq2SeqConsole)}_{GetTimeStamp(DateTime.Now)}.log";

            //Parse command line
            Options   options   = new Options();
            ArgParser argParser = new ArgParser(args, options);

            AttentionSeq2Seq ss       = null;
            ArchTypeEnums    archType = (ArchTypeEnums)options.ArchType;

            //Parse device ids from options
            int[] deviceIds = options.DeviceIds.Split(',').Select(x => int.Parse(x)).ToArray();

            if (String.Equals(options.TaskName, "train", StringComparison.InvariantCultureIgnoreCase))
            {
                ShowOptions(args, options);

                Corpus trainCorpus = new Corpus(options.TrainCorpusPath, options.SrcLang, options.TgtLang, options.BatchSize * deviceIds.Length,
                                                options.ShuffleBlockSize, options.MaxSentLength);
                if (File.Exists(options.ModelFilePath) == false)
                {
                    //New training
                    ss = new AttentionSeq2Seq(options.WordVectorSize, options.HiddenSize, options.Depth, trainCorpus, options.SrcVocab, options.TgtVocab,
                                              options.SrcEmbeddingModelFilePath, options.TgtEmbeddingModelFilePath, true, options.ModelFilePath, options.BatchSize, options.DropoutRatio,
                                              archType, deviceIds);
                }
                else
                {
                    //Incremental training
                    Logger.WriteLine($"Loading model from '{options.ModelFilePath}'...");
                    ss             = new AttentionSeq2Seq(options.ModelFilePath, options.BatchSize, archType, deviceIds);
                    ss.TrainCorpus = trainCorpus;
                }

                ss.IterationDone += ss_IterationDone;
                ss.Train(options.MaxEpochNum, options.LearningRate, options.GradClip);
            }
            else if (String.Equals(options.TaskName, "test", StringComparison.InvariantCultureIgnoreCase))
            {
                //Test trained model
                ss = new AttentionSeq2Seq(options.ModelFilePath, 1, archType, deviceIds);

                List <string> outputLines     = new List <string>();
                var           data_sents_raw1 = File.ReadAllLines(options.InputTestFile);
                foreach (string line in data_sents_raw1)
                {
                    List <List <string> > outputWordsList = ss.Predict(line.ToLower().Trim().Split(' ').ToList(), options.BeamSearch);
                    outputLines.AddRange(outputWordsList.Select(x => String.Join(" ", x)));
                }

                File.WriteAllLines(options.OutputTestFile, outputLines);
            }
            else
            {
                argParser.Usage();
            }
        }
Example #14
0
        public virtual Triple <Formatter, float, float> validate(LangDescriptor language, IList <InputDocument> documents, string fileToExclude, int k, FeatureMetaData[] injectWSFeatures, FeatureMetaData[] alignmentFeatures, string outputDir, bool computeEditDistance, bool collectAnalysis)
        {
            string path = System.IO.Path.GetFullPath(fileToExclude);
            IList <InputDocument> others   = BuffUtils.filter(documents, d => !d.fileName.Equals(path));
            IList <InputDocument> excluded = BuffUtils.filter(documents, d => d.fileName.Equals(path));

            Debug.Assert(others.Count == documents.Count - 1);
            //		kNNClassifier.resetCache();
            if (excluded.Count == 0)
            {
                Console.Error.WriteLine("Doc not in corpus: " + path);
                return(null);
            }
            InputDocument testDoc = excluded[0];
            DateTime      start   = System.DateTime.Now;
            Corpus        corpus  = new Corpus(others, language);

            corpus.train();
            DateTime      stop         = System.DateTime.Now;
            Formatter     formatter    = new Formatter(corpus, language.indentSize, k, injectWSFeatures, alignmentFeatures);
            InputDocument originalDoc  = testDoc;
            DateTime      format_start = System.DateTime.Now;
            string        output       = formatter.format(testDoc, collectAnalysis);
            DateTime      format_stop  = System.DateTime.Now;
            float         editDistance = 0;

            if (computeEditDistance)
            {
                editDistance = Dbg.normalizedLevenshteinDistance(testDoc.content, output);
            }
            ClassificationAnalysis analysis = new ClassificationAnalysis(originalDoc, formatter.AnalysisPerToken);

            Console.WriteLine(testDoc.fileName + ": edit distance = " + editDistance + ", error rate = " + analysis.ErrorRate);
            if (!string.ReferenceEquals(outputDir, null))
            {
                string dir = outputDir + "/" + language.name + "/" + Tool.version;
                if (!System.IO.Directory.Exists(dir))
                {
                    System.IO.Directory.CreateDirectory(dir);
                }
                org.antlr.codebuff.misc.Utils.writeFile(dir + "/" + System.IO.Path.GetFileName(testDoc.fileName), output);
            }
            var tms = (stop - start);
            var fms = format_stop - format_start;

            trainingTimes.Add((double)tms.Milliseconds);
            float tokensPerMS = testDoc.tokens.Size / (float)fms.TotalMilliseconds;

            formattingTokensPerMS.Add((double)tokensPerMS);
            Console.Write("Training time = {0:D} ms, formatting {1:D} ms, {2,5:F3} tokens/ms ({3:D} tokens)\n", tms, fms, tokensPerMS, testDoc.tokens.Size);
            //		System.out.printf("classify calls %d, hits %d rate %f\n",
            //		                  kNNClassifier.nClassifyCalls, kNNClassifier.nClassifyCacheHits,
            //		                  kNNClassifier.nClassifyCacheHits/(float) kNNClassifier.nClassifyCalls);
            //		System.out.printf("kNN calls %d, hits %d rate %f\n",
            //						  kNNClassifier.nNNCalls, kNNClassifier.nNNCacheHits,
            //						  kNNClassifier.nNNCacheHits/(float) kNNClassifier.nNNCalls);
            return(new Triple <Formatter, float, float>(formatter, editDistance, analysis.ErrorRate));
        }
Example #15
0
        public void TestClustering()
        {
            List <string> word_sequence = new List <string>();
            Corpus        corpus        = new Corpus();

            using (StreamReader reader = new StreamReader("sample.txt"))
            {
                string[] words = reader.ReadToEnd().Split(new char[] { ' ', '?', ',', ':', '"', '\n', '\t' }, StringSplitOptions.RemoveEmptyEntries);
                foreach (string word in words)
                {
                    string w2 = word.Trim();
                    if (w2 == ".")
                    {
                        continue;
                    }
                    if (w2.EndsWith("."))
                    {
                        w2 = w2.Substring(0, w2.Length - 1);
                    }
                    if (!string.IsNullOrEmpty(w2) && word.Length > 1)
                    {
                        word_sequence.Add(w2);
                        corpus.Add(w2);
                    }
                }
            }

            int M = 70;

            Console.WriteLine("M: {0}", M);
            Console.WriteLine("Corpus Size: {0}", corpus.Count);
            Console.WriteLine("Document Size: {0}", word_sequence.Count);

            BrownClustering bc = new BrownClustering(M);

            bc.Cluster(corpus, word_sequence);

            Dictionary <string, List <string> > clusters = bc.GetClustersWithCodewordsOfLength(10);

            foreach (string codeword in clusters.Keys)
            {
                Console.WriteLine("In Cluster {0}", codeword);
                foreach (string word in clusters[codeword])
                {
                    Console.Write("{0}, ", word);
                }
                Console.WriteLine();
            }

            XmlDocument doc  = new XmlDocument();
            XmlElement  root = bc.ToXml(doc);

            doc.AppendChild(root);

            doc.Save("BrownClusteringResult.xml");
        }
Example #16
0
        static private void initializeEkmanAsTarget(Corpus corpus, int startIndex, int endIndex, IList <Sentence> sentences, ref IList <TargetWorker> targetWorkerList)
        {
            string[] data = File.ReadAllLines(corpus + "/EkmanData" + startIndex + "-" + endIndex + ".csv");
            foreach (string row in data)
            {
                string[]     labels = row.Split(',');
                TargetWorker worker = new TargetWorker(labels[0]);
                if (!targetWorkerList.Contains(worker))//重复的人不再添加
                {
                    targetWorkerList.Add(worker);
                }
                else
                {
                    worker = targetWorkerList.First(x => x.Equals(worker));
                }
                IList <Label> trueLabels = new List <Label>();
                for (int i = 1; i <= (endIndex - startIndex + 1) * (Constant.EkmanLabelArray.Length + 1); ++i)
                {
                    switch (labels[i])
                    {
                    case "Anger":
                        trueLabels.Add(Label.Anger);
                        break;

                    case "Sadness":
                        trueLabels.Add(Label.Sadness);
                        break;

                    case "Joy":
                        trueLabels.Add(Label.Joy);
                        break;

                    case "Disgust":
                        trueLabels.Add(Label.Disgust);
                        break;

                    case "Surprise":
                        trueLabels.Add(Label.Surprise);
                        break;

                    case "Fear":
                        trueLabels.Add(Label.Fear);
                        break;
                    }
                    if (i % (Constant.EkmanLabelArray.Length + 1) == 0)
                    {
                        //取出SentenceList里的一个Sentence
                        Sentence         sentence         = sentences[startIndex + (i - 1) / (Constant.EkmanLabelArray.Length + 1)];
                        TargetAnnotation targetAnnotation = new TargetAnnotation(trueLabels.ToArray());
                        trueLabels.Clear();
                        worker.SentenceTargetAnnotationDic.Add(sentence, targetAnnotation);
                        sentence.TargetWorkerTargetAnnotationDic.Add(worker, targetAnnotation);
                    }
                }
            }
        }
Example #17
0
 private static bool GetFromSpecificCorpus(Corpus corpus, string text, out string result)
 {
     if (corpus.ContainsKey(text) && corpus[text].ContainsKey(PDDLanguage.Current.GoogleCode))
     {
         result = corpus[text][PDDLanguage.Current.GoogleCode];
         return(true);
     }
     result = null;
     return(false);
 }
Example #18
0
 private void CorpusLoaded(Corpus c)
 {
     this.CorpusBeingLoaded = false;
     if (c == null)
     {
         return;
     }
     this.loadProgress = 1f;
     this.corpus       = c;
 }
Example #19
0
        static void Main(string[] args)
        {
            //if no arguments are entered
            if (args.Length == 0)
            {
                printHelp();
            }
            for (int i = 0; i < args.Length; i++)
            {
                //wmc slant
                if (args[i] == "slant")
                {
                    //directory slant calc
                    if (args[i + 1] == "-dir")
                    {
                        if (args[i + 2] != "")
                        {
                            Corpus c = new Corpus(args[i + 2]);

                            if (args[i + 3] != "")
                            {
                                WordList wl = new WordList(args[i + 3]);
                                //not printing the value bc writeKslant will be called in determining kSlant
                                Slant.determineSlant(c, wl);
                            }
                        }
                    }
                    //file slant calc
                    else if (args[i + 1] == "-file")
                    {
                        //TO-DO
                    }
                }
                //wmc version
                else if (args[i] == "version")
                {
                    Console.WriteLine("wmc Version " + typeof(Program).Assembly.GetName().Version);
                }
            }

            //WORDLIST debug
            //WordList wordlist = new WordList(args[0]);
            //Console.Write(wordlist.ToString());

            //slant/Corpus debug
            //Corpus corpus = new Corpus(args[1]);
            //slant/Corpus/CorpusSegment debug
            //Console.WriteLine(corpus.CorpusSegments[0].filename + "\t" + corpus.CorpusSegments[0].contentM);

            //slant calc debug
            //Console.WriteLine(Slant.determineSlant(corpus, new WordList(args[2])));

            //only for debugging
            //Console.ReadKey();
        }
Example #20
0
 private void PrepateToClearCorpusOnNavigateAway()
 {
     this.os.delayer.Post(ActionDelayer.Wait(this.os.lastGameTime.ElapsedGameTime.TotalSeconds * 1.999), (Action)(() =>
     {
         if (!(this.os.display.command != this.name))
         {
             return;
         }
         this.corpus = (Corpus)null;
     }));
 }
Example #21
0
        /// <summary>
        /// 生成Corpus的原始Sentence List,并加入到总Sentence List里。
        /// </summary>
        /// <param name="corpus">所针对的Corpus。</param>
        /// <returns>SentenceList。</returns>
        static public IList <Sentence> SentenceList(Corpus corpus)
        {
            IList <Sentence> result = new List <Sentence>();

            foreach (string speech in File.ReadAllLines(corpus + "/sentences.txt"))
            {
                result.Add(new Sentence(result.Count, speech));
            }
            Constant.SentenceList.AddRange(result);//此处决定Sentence不能是Struct,只能是Class
            return(result);
        }
Example #22
0
 public void Delete(Corpus corpus)
 {
     Debug.Assert(corpus.Id != -1);
     using (var conn = this.Connection()) {
         /* Delete the corpus and all of its content in one
          * database transaction */
         conn.Execute(
             @" DELETE FROM la.Corpus WHERE Id=@Id ",
             new { Id = corpus.Id });
     }
 }
Example #23
0
        public Form1()
        {
            InitializeComponent();

            _corpus = Corpus.init();
            var crawler = new Crawler(_corpus);

            _querier = new Querier(_corpus);

            _suggestMethod = new Func <string, CancellationTokenSource, HashSet <string> >(_querier.AutoCompleteWord);
            _queryMethod   = new QueryDelegate(_querier.Query);
        }
        public AttentionSeq2Seq(int inputSize, int hiddenSize, int depth, Corpus trainCorpus, string srcVocabFilePath, string tgtVocabFilePath, string srcEmbeddingFilePath, string tgtEmbeddingFilePath,
                                bool useDropout, string modelFilePath, int batchSize, float dropoutRatio, ArchTypeEnums archType, int[] deviceIds)
        {
            CheckParameters(batchSize, archType, deviceIds);
            if (archType == ArchTypeEnums.GPU_CUDA)
            {
                TensorAllocator.InitDevices(deviceIds);
                SetDefaultDeviceIds(deviceIds.Length);
            }

            m_dropoutRatio  = dropoutRatio;
            m_batchSize     = batchSize;
            m_archType      = archType;
            m_modelFilePath = modelFilePath;
            m_deviceIds     = deviceIds;

            TrainCorpus    = trainCorpus;
            Depth          = depth;
            WordVectorSize = inputSize;
            HiddenSize     = hiddenSize;

            //If vocabulary files are specified, we load them from file, otherwise, we build them from training corpus
            if (String.IsNullOrEmpty(srcVocabFilePath) == false && String.IsNullOrEmpty(tgtVocabFilePath) == false)
            {
                Logger.WriteLine($"Loading vocabulary files from '{srcVocabFilePath}' and '{tgtVocabFilePath}'...");
                LoadVocab(srcVocabFilePath, tgtVocabFilePath);
            }
            else
            {
                Logger.WriteLine("Building vocabulary from training corpus...");
                BuildVocab(trainCorpus);
            }

            //Initializng weights in encoders and decoders
            InitWeights();

            for (int i = 0; i < m_deviceIds.Length; i++)
            {
                //If pre-trained embedding weights are speicifed, loading them from files
                if (String.IsNullOrEmpty(srcEmbeddingFilePath) == false)
                {
                    Logger.WriteLine($"Loading ExtEmbedding model from '{srcEmbeddingFilePath}' for source side.");
                    LoadWordEmbedding(srcEmbeddingFilePath, m_srcEmbedding[i], m_srcWordToIndex);
                }

                if (String.IsNullOrEmpty(tgtEmbeddingFilePath) == false)
                {
                    Logger.WriteLine($"Loading ExtEmbedding model from '{tgtEmbeddingFilePath}' for target side.");
                    LoadWordEmbedding(tgtEmbeddingFilePath, m_tgtEmbedding[i], m_tgtWordToIndex);
                }
            }
        }
Example #25
0
        public AttentionSeq2Seq(int embeddingDim, int hiddenDim, int encoderLayerDepth, int decoderLayerDepth, Corpus trainCorpus, string srcVocabFilePath, string tgtVocabFilePath,
                                string srcEmbeddingFilePath, string tgtEmbeddingFilePath, string modelFilePath, int batchSize, float dropoutRatio, int multiHeadNum, int warmupSteps,
                                ArchTypeEnums archType, EncoderTypeEnums encoderType, int[] deviceIds)
        {
            TensorAllocator.InitDevices(archType, deviceIds);
            SetDefaultDeviceIds(deviceIds.Length);

            m_dropoutRatio  = dropoutRatio;
            m_batchSize     = batchSize;
            m_modelFilePath = modelFilePath;
            m_deviceIds     = deviceIds;
            m_multiHeadNum  = multiHeadNum;
            m_encoderType   = encoderType;
            m_warmupSteps   = warmupSteps + 1;

            TrainCorpus         = trainCorpus;
            m_encoderLayerDepth = encoderLayerDepth;
            m_decoderLayerDepth = decoderLayerDepth;
            m_embeddingDim      = embeddingDim;
            m_hiddenDim         = hiddenDim;

            //If vocabulary files are specified, we load them from file, otherwise, we build them from training corpus
            if (String.IsNullOrEmpty(srcVocabFilePath) == false && String.IsNullOrEmpty(tgtVocabFilePath) == false)
            {
                Logger.WriteLine($"Loading vocabulary files from '{srcVocabFilePath}' and '{tgtVocabFilePath}'...");
                LoadVocab(srcVocabFilePath, tgtVocabFilePath);
            }
            else
            {
                Logger.WriteLine("Building vocabulary from training corpus...");
                BuildVocab(trainCorpus);
            }

            //Initializng weights in encoders and decoders
            CreateEncoderDecoderEmbeddings();

            for (int i = 0; i < m_deviceIds.Length; i++)
            {
                //If pre-trained embedding weights are speicifed, loading them from files
                if (String.IsNullOrEmpty(srcEmbeddingFilePath) == false)
                {
                    Logger.WriteLine($"Loading ExtEmbedding model from '{srcEmbeddingFilePath}' for source side.");
                    LoadWordEmbedding(srcEmbeddingFilePath, m_srcEmbedding[i], m_srcWordToIndex);
                }

                if (String.IsNullOrEmpty(tgtEmbeddingFilePath) == false)
                {
                    Logger.WriteLine($"Loading ExtEmbedding model from '{tgtEmbeddingFilePath}' for target side.");
                    LoadWordEmbedding(tgtEmbeddingFilePath, m_tgtEmbedding[i], m_tgtWordToIndex);
                }
            }
        }
Example #26
0
        /// <summary>
        /// Initializes the natural language class
        /// </summary>
        /// <param name="spamfile">Spam words definition text file</param>
        /// <param name="goodfile">Good words definition text file</param>
        public NaturalLanguage(string spamfile, string goodfile)
        {
            Corpus bad  = new Corpus();
            Corpus good = new Corpus();

            t = new Translator();

            bad.LoadFromFile(spamfile);
            good.LoadFromFile(goodfile);

            filter = new SpamFilter();
            filter.Load(good, bad);
        }
Example #27
0
        /// <summary>
        /// Create a Type Attribute
        /// </summary>
        /// <returns></returns>
        public CdmTypeAttributeDefinition CreateTypeAttribute(string attributeName, string dataType, string purpose)
        {
            CdmDataTypeReference dataTypeRef = Corpus.MakeRef <CdmDataTypeReference>(CdmObjectType.DataTypeRef, dataType, simpleNameRef: false);

            CdmPurposeReference purposeRef = Corpus.MakeRef <CdmPurposeReference>(CdmObjectType.PurposeRef, purpose, simpleNameRef: false);

            CdmTypeAttributeDefinition attribute = Corpus.MakeObject <CdmTypeAttributeDefinition>(CdmObjectType.TypeAttributeDef, nameOrRef: attributeName, simpleNameRef: false);

            attribute.DataType = dataTypeRef;
            attribute.Purpose  = purposeRef;

            return(attribute);
        }
Example #28
0
        public void AddFile(Corpus corpus, string fileName)
        {
            var path = connectorString + @"CorporaStore\" + corpus.Title + @"\" + Path.GetFileName(fileName);

            corpus.Add(new TextFile()
            {
                Title = Path.GetFileName(fileName), Info = path
            });
            FileInfo fileInfo = new FileInfo(fileName);

            fileInfo.CopyTo(path, true);
            //fileInfo.Delete();
        }
Example #29
0
 public invt::IWearable this[Corpus n] {
     get { return(list[(int)n]); }
     set { var temp = (invt::IWearable)list[(int)n];
           if (temp != null && temp != value)
           {
               Player.Stow(temp);
           }
           var item = (invt::Item)value;
           item.transform.parent        = anchors[(int)n].transform;
           item.transform.localPosition = Vector3.zero;
           item.transform.localRotation = Quaternion.identity;
           list[(int)n] = value; }
 }
Example #30
0
    public string Tag(string text, bool xmlOutput)
    {
        while (!Global.mReady)
        {
            Thread.Sleep(100);
        }
        Corpus corpus = new Corpus();

        corpus.LoadFromTextSsjTokenizer(text);
        int lemmaCorrect, lemmaCorrectLowercase, lemmaWords;

        Global.mPosTagger.Tag(corpus, out lemmaCorrect, out lemmaCorrectLowercase, out lemmaWords, /*xmlMode=*/ false);
        return(xmlOutput ? corpus.ToString("XML-MI") : corpus.ToString("TBL"));
    }
Example #31
0
 public string Add([FromBody] Corpus corpus)
 {
     if (corpus == null)
     {
         /* The JSON sent was not in the correct format */
         Response.StatusCode = 400;  /* Bad Request */
         var error = new LexicalAnalyzer.Models.Error();
         error.Message = "Invalid structure for Corpus object";
         return(JsonConvert.SerializeObject(error));
     }
     corpus.Id = -1;
     m_context.CorpusRepository.Add(corpus);
     return("success");
 }
Example #32
0
        public void Test_Token_Probability()
        {
            Corpus good = new Corpus();
            Corpus bad = new Corpus();

            good.Add("the chicken jumped over the moon", 3);
            bad.Add("the cow ran threw the moon", 3);

            Calculator c = new Calculator(Calculator.Defaults);

            Assert.AreEqual<double>(0.3333333333333333, c.CalculateTokenProbability("the", good, bad));
            Assert.AreEqual<double>(0.3333333333333333, c.CalculateTokenProbability("moon", good, bad));
            //Assert.AreEqual<double>(Calculator.Defaults.LikelySpamScore, c.CalculateTokenProbability("ran", good, bad));
            //Assert.AreEqual<double>(Calculator.Defaults.LikelySpamScore, c.CalculateTokenProbability("cow", good, bad));
        }
Example #33
0
        public void Test_Calculate_Probability()
        {
            Corpus good = new Corpus();
            Corpus bad = new Corpus();

            good.Add("the chicken jumped over the moon", 3);
            bad.Add("the cow ran threw the moon", 3);

            Calculator c = new Calculator(Calculator.Defaults);
            Probability prob = c.CalculateProbabilities(good, bad);

            Assert.AreEqual<double>(0.3333333333333333, prob.Prob["the"]);
            Assert.AreEqual<double>(0.3333333333333333, prob.Prob["moon"]);
            //Assert.AreEqual<double>(Calculator.Defaults.LikelySpamScore, prob.Prob["ran"]);
            //Assert.AreEqual<double>(Calculator.Defaults.LikelySpamScore, prob.Prob["cow"]);
        }
Example #34
0
        public void Test_Matching()
        {
            Corpus good = new Corpus();
            Corpus bad = new Corpus();

            good.Add("the chicken jumped over the moon", 3);
            bad.Add("the cow ran threw the moon", 3);

            Calculator c = new Calculator(Calculator.Defaults);
            Probability prob = c.CalculateProbabilities(good, bad);

            Filter target = new Filter(prob);

            target.Test("the cow ran over the moon", 3);

            Assert.IsTrue(target.Test("the cow ran threw the moon", 3) > 0.98);
            Assert.IsTrue(target.Test("the cow ran over the moon", 3) > 0.25);
        }
        public List<CoNLLSentence> ReadFile(String path, Corpus corpus)
        {
            System.IO.StreamReader file = new System.IO.StreamReader(path);
            List<CoNLLSentence> sentences = new List<CoNLLSentence>();

            int sentenceNumber = 1;
            CoNLLSentence nextSentence = new CoNLLSentence();
            nextSentence.ID = sentenceNumber++;
            nextSentence.Tokens = new List<CoNLLToken>();
            String line;
            
            while ((line = file.ReadLine()) != null)
            {
                if (line.StartsWith("#")) 
                {
                    continue;
                }
                if (String.IsNullOrEmpty(line))
                {
                    sentences.Add(nextSentence);
                    nextSentence = new CoNLLSentence();
                    nextSentence.ID = sentenceNumber++;
                    nextSentence.Tokens = new List<CoNLLToken>();
                }
                else
                {
                    String[] values = line.Split(new String[] { "\t" }, StringSplitOptions.RemoveEmptyEntries);
                    if (corpus == Corpus.Tiger || corpus == Corpus.HDT)
                    {
                        nextSentence.Tokens.Add(new CoNLLToken()
                        {
                            ID = values[0],
                            Form = values[1],
                            Lemma = values[2],
                            POS = values[4],
                        });
                    }
                    else 
                    {
                        String lemma = values[6];
                        if (lemma == "#refl") 
                        {
                            lemma = String.Empty;
                        }
                        if (lemma.Contains("#")) 
                        {
                            lemma = lemma.Replace("#", String.Empty);
                        }
                        nextSentence.Tokens.Add(new CoNLLToken()
                        {
                            ID = values[2],
                            Form = values[3],
                            Lemma = lemma,
                            POS = values[4],
                        });                        
                    }

                }
            }
            return sentences;
        }
Example #36
0
 public void Basic_List_Add()
 {
     Corpus c = new Corpus();
     c.Add(new string[] { "one", "two", "three" });
     Assert.AreEqual(3, c.Tokens.Count);
 }
Example #37
0
 public void Basic_Builder_Add()
 {
     Corpus c = new Corpus();
     c.Add("one two three a 333 3adsf a123", 3);
     Assert.AreEqual(4, c.Tokens.Count);
 }
Example #38
0
			public invt::IWearable this[Corpus n] {
				get { return list[(int) n]; }
				set { var temp = (invt::IWearable) list[(int) n];
					if (temp!=null && temp!=value) Player.Stow(temp);
					var item = (invt::Item) value;
					item.transform.parent = anchors[(int) n].transform;
					item.transform.localPosition = Vector3.zero;
					item.transform.localRotation = Quaternion.identity;
					list[(int) n] = value; }}
Example #39
0
 public Classifier(Corpus positiveCorpus, Corpus negativeCorpus)
 {
     _positiveCorpus = positiveCorpus;
     _negativeCorpus = negativeCorpus;
 }
Example #40
0
        static void ProcessCommand(string command)
        {
            string[] blocks = command.Split();

            switch (blocks[0].ToLower())
            {
                case "":
                    return;

                case "exit":
                    if (blocks.Length == 1)
                        _exitFlag = true;
                    else
                        Console.WriteLine("Unknown Parameters.");
                    return;

                case "clear":
                    if (blocks.Length == 1)
                        Console.Clear();
                    else
                        Console.WriteLine("Unknown Parameters.");
                    return;

                case "genas":
                    AnnotationDictionary dict = new AnnotationDictionary();
                    AnnotationSet ans = new AnnotationSet("WEMAS");
                    dict.Load("WEMAS.xml");

                    foreach (var entry in dict._Dictionary)
                    {
                        ans.SetAnnotationDescription(entry.Key, entry.Value);
                    }

                    foreach (var sep in dict.SentenceSeparators)
                    {
                        ans.AddSentenceSeparator(sep);
                    }

                    ans.Save("WEMAS.wemas");
                    return;

                case "testas":
                    AnnotationSet ans2 = new AnnotationSet("WEMAS");
                    ans2.Load("WEMAS.wemas");
                    ans2.Description =
                        "WEB ENTITY MINER Default Annotation Set.";

                    ans2.Save("WEMAS2.wemas");
                    return;

                case "testws":
                    AnnotationSet wemas = new AnnotationSet();
                    wemas.Load("WEMAS.wemas");

                    ICTCLASAnnotator ano = new ICTCLASAnnotator(wemas, null);
                    CRFPPAnnotator crfAno = new CRFPPAnnotator(wemas,
                        new Model(
                            AppDomain.CurrentDomain.BaseDirectory
                            + "model.crfppmodel"));

                    Corpus c = new Corpus(Encoding.UTF8);
                    SentenceFactory.AnnotationSet = wemas;
                    SentenceFactory.InputLanguage = Language.SimplifiedChinese;
                    SentenceFactory.OutputEncoding = Encoding.UTF8;

                    var sens = SentenceFactory.GetSentences(
                        new FileStream("utf8.txt", FileMode.Open, FileAccess.Read));

                    WEMDocument doc = new WEMDocument(Encoding.UTF8);
                    foreach (var sen in sens)
                    {
                        doc.AddSentence(sen);
                    }

                    c.AddDocument(doc);

                    try
                    {
                        ano.ProcessCorpus(c);
                        crfAno.ProcessCorpus(c);
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine("Unhandled Exception:\n{0}",
                            ex.Message);
                    }

                    doc.Save("RESULT.xml");

                    foreach (var sententce in doc.Sentences)
                    {
                        foreach (var word in sententce.Words)
                        {
                            if (word is Entity)
                            {
                                Console.WriteLine("{0}/ENTITY:{1}",
                                    word.Content,
                                    wemas[((Entity)word).EntityId]);
                            }
                            else
                            {
                                Console.WriteLine("{0}", word.Content);
                            }
                        }
                    }
                    return;

                default:
                    Console.WriteLine("Unknown Command: '{0}'.", blocks[0]);
                    return;
            }
        }
Example #41
0
    public static void Main(string[] args)
    {
        if (!File.Exists("big.txt"))
        {
            Console.Error.WriteLine("Cannot find big.txt.");
            return;
        }

        var sample = File.ReadAllText("big.txt");
        var corpus = new Corpus(sample);
        corrector = new SpellCorrect(corpus);

        ReadFromStdIn();
    }