Пример #1
0
        public void ParseEmbeddingTest()
        {
            //Create parser
            EmbeddingParser embeddingParser = new EmbeddingParser(Init("\"left<func1() \"text\">right\""));
            Embedding       parsedEmbedding = embeddingParser.ParseEmbedding();

            //Test PreText
            Assert.AreEqual("left", parsedEmbedding.GetPreText().GetText());

            //Test Embed
            Assert.AreEqual(typeof(ExpressionEmbed), parsedEmbedding.GetEmbed().GetType());
            ExpressionEmbed expressionEmbed = (ExpressionEmbed)parsedEmbedding.GetEmbed();

            Assert.AreEqual("\"text\"", expressionEmbed.GetExpression().ToString());

            //Test Markup
            Markup markup = (Markup)expressionEmbed.GetMarkups().Get(0);

            Assert.AreEqual("func1", markup.GetDesignator().ToString());
            Assert.AreEqual(0, markup.GetArguments().Count);

            //Test TextTail
            Assert.AreEqual(typeof(PostTextTail), parsedEmbedding.GetTextTail().GetType());
            PostTextTail postTextTail = (PostTextTail)parsedEmbedding.GetTextTail();

            Assert.AreEqual("right", postTextTail.GetPostText().GetText());
        }
Пример #2
0
        public void ParseNestedEmbeddingTest()
        {
            //Create parser
            EmbeddingParser embeddingParser = new EmbeddingParser(Init("\"pretext<em \"eerste\">midtext <em \"tweede\">posttexttail\""));
            Embedding       parsedEmbedding = embeddingParser.ParseEmbedding();

            //Test pretext
            Assert.AreEqual("\"pretext<", parsedEmbedding.GetPreText().ToString());

            //Test 1st embed
            Assert.AreEqual(typeof(ExpressionEmbed), parsedEmbedding.GetEmbed().GetType());
            ExpressionEmbed expressionEmbed = (ExpressionEmbed)parsedEmbedding.GetEmbed();

            Assert.AreEqual("em", expressionEmbed.GetMarkups().Get(0).ToString());
            Assert.AreEqual("\"eerste\"", expressionEmbed.GetExpression().ToString());

            //Test TextTail
            Assert.AreEqual(typeof(MidTextTail), parsedEmbedding.GetTextTail().GetType());
            MidTextTail midTextTail = (MidTextTail)parsedEmbedding.GetTextTail();

            Assert.AreEqual(">midtext <", midTextTail.GetMidText().ToString());

            //Test 2th embed
            Assert.AreEqual(typeof(ExpressionEmbed), midTextTail.GetEmbed().GetType());
            ExpressionEmbed expressionEmbed2 = (ExpressionEmbed)midTextTail.GetEmbed();

            Assert.AreEqual("em", expressionEmbed2.GetMarkups().Get(0).ToString());
            Assert.AreEqual("\"tweede\"", expressionEmbed2.GetExpression().ToString());

            //Test PostTextTail
            Assert.AreEqual(typeof(PostTextTail), midTextTail.GetTextTail().GetType());
            Assert.AreEqual(">posttexttail\"", midTextTail.GetTextTail().ToString());
        }
Пример #3
0
        public void ParserMarkupEmbeddingStatementTest()
        {
            //Create parser
            StatementParser statementParser = new StatementParser(Init("p p \"left<func1() \"text\">right\";"));
            Statement       parsedStatement = statementParser.ParseMarkupStatement();

            //Test statement
            Assert.AreEqual(typeof(MarkupEmbeddingStatement), parsedStatement.GetType());

            //Test MarkupEmbeddingStatement
            MarkupEmbeddingStatement markupEmbeddingStatement = (MarkupEmbeddingStatement)parsedStatement;

            Assert.AreEqual("p", markupEmbeddingStatement.GetMarkups().Get(0).ToString());
            Assert.AreEqual("p", markupEmbeddingStatement.GetMarkups().Get(1).ToString());

            //Test embedding
            Embedding embedding = markupEmbeddingStatement.GetEmbedding();

            Assert.AreEqual("\"left<", embedding.GetPreText().ToString());
            Assert.AreEqual(typeof(ExpressionEmbed), embedding.GetEmbed().GetType());

            ExpressionEmbed embed = (ExpressionEmbed)embedding.GetEmbed();

            Assert.AreEqual(1, embed.GetMarkups().Count);
            Assert.AreEqual("func1", embed.GetMarkups().Get(0).ToString());
            Assert.AreEqual(typeof(TextExpression), embed.GetExpression().GetType());
            Assert.AreEqual("\"text\"", embed.GetExpression().ToString());

            Assert.AreEqual(typeof(PostTextTail), embedding.GetTextTail().GetType());
            PostTextTail postTextTail = (PostTextTail)embedding.GetTextTail();

            Assert.AreEqual(">right\"", postTextTail.GetPostText().ToString());
        }
Пример #4
0
 private void actQuickPair_Execute(ExecuteEventArgs ea)
 {
     string left = actQuickPair.Parameters["Left"].ValueAsStr;
     string right = actQuickPair.Parameters["Right"].ValueAsStr;
     bool positionCaretBetweenDelimiters = actQuickPair.Parameters["CaretBetween"].ValueAsBool;
     if (left.StartsWith("\","))
     {
         if (right == "true")
             positionCaretBetweenDelimiters = true;
         right = left.Substring(2).Trim();
         left = "\"";
     }
     TextDocument activeTextDocument = CodeRush.Documents.ActiveTextDocument;
     if (activeTextDocument == null)
         return;
     TextView activeView = activeTextDocument.ActiveView;
     if (activeView == null)
         return;
     TextViewSelection selection = activeView.Selection;
     if (selection == null)
         return;
     TextViewCaret caret = activeView.Caret;
     if (caret == null)
         return;
     if (selection.Exists)
     {
         Embedding embedding = new Embedding();
         embedding.Style = EmbeddingStyle.StartEnd;
         string[] top = { left };
         string[] bottom = { right };
         embedding.Top = top;
         embedding.Bottom = bottom;
         if (positionCaretBetweenDelimiters)
             embedding.AdjustSelection = PostEmbeddingSelectionAdjustment.Leave;
         else
             embedding.AdjustSelection = PostEmbeddingSelectionAdjustment.Extend;
         bool needToMoveCaretToTheRight = false;
     if (selection.AnchorPosition < selection.ActivePosition)
             needToMoveCaretToTheRight = true;
         using (activeTextDocument.NewCompoundAction(STR_EmbedQuickPair))
         {
             activeTextDocument.EmbedSelection(embedding);
             if (needToMoveCaretToTheRight)
             {
                 selection.Set(selection.ActivePosition, selection.AnchorPosition);
             }
         }
     }
     else
     {
         if (CodeRush.Windows.IntellisenseEngine.HasSelectedCompletionSet(activeView))
             CodeRush.Command.Execute("Edit.InsertTab");
         using (activeTextDocument.NewCompoundAction(STR_QuickPair))
             if (positionCaretBetweenDelimiters)
                 activeTextDocument.ExpandText(caret.SourcePoint, left + "«Caret»«Field()»" + right + "«FinalTarget»");
             else
                 activeTextDocument.InsertText(caret.SourcePoint, left + right);
     }
 }
Пример #5
0
        public Embedding Prototype(Node node, IEnumerable <NodePath> arguments)
        {
            Embedding ret = new Embedding((Node)node.Clone(), arguments);

            Add(ret);

            return(ret);
        }
Пример #6
0
 public Task AddVoiceVecAsync(Embedding embedding, UserInfo userInfo)
 {
     this.storedValues.Add(new StorageModel()
     {
         UserInfo = userInfo, Embedding = embedding
     });
     Serialization.WriteToBinaryFile(this.resourcePaths.DataDumpFile, this.storedValues, false);
     return(Task.Delay(0));
 }
        public LearnedPositionalEmbedding(int numEmbeddings, int embeddingDim, int padTokenIndex)
            : base(embeddingDim, padTokenIndex, nameof(LearnedPositionalEmbedding))
        {
            _numEmbeddings = numEmbeddings;
            Embedding      = torch.nn.Embedding(numEmbeddings, embeddingDim, PadPositionIndex);

            ModelUtils.InitNormal(Embedding.weight, mean: 0, std: Math.Pow(EmbeddingDim, -0.5));
            ModelUtils.InitZeros(Embedding.weight[PadPositionIndex]);

            RegisterComponents();
        }
Пример #8
0
 public NeuralCorefModel(SimpleMatrix antecedentMatrix, SimpleMatrix anaphorMatrix, SimpleMatrix pairFeaturesMatrix, SimpleMatrix pairwiseFirstLayerBias, IList <SimpleMatrix> anaphoricityModel, IList <SimpleMatrix> pairwiseModel, Embedding wordEmbeddings
                         )
 {
     this.antecedentMatrix       = antecedentMatrix;
     this.anaphorMatrix          = anaphorMatrix;
     this.pairFeaturesMatrix     = pairFeaturesMatrix;
     this.pairwiseFirstLayerBias = pairwiseFirstLayerBias;
     this.anaphoricityModel      = anaphoricityModel;
     this.pairwiseModel          = pairwiseModel;
     this.wordEmbeddings         = wordEmbeddings;
 }
Пример #9
0
        /// <summary>
        /// Specify additional assemblies to embed.
        /// This allows to embed assemblies not in reference tree.
        /// Normally should not be used. See <see cref="EmbedReferencedAssemblies"/> if you want to embed referenced assembly.
        /// This method only schedules the operation and returns immediately. The operation itself will be performed at
        /// the appropriate time, after the <see cref="IContext.Run()"/> method is called.
        /// </summary>
        /// <param name="ctx">Netfuser context</param>
        /// <param name="modules">Netfuser will embed assemblies that contain these modules</param>
        /// <returns>Netfuser context</returns>
        public static IContext EmbedAssemblies(this IContext ctx, params ModuleDef[] modules)
        {
            var embedder = ctx.Embedder(NetfuserFactory.EmbedderIndexName);

            foreach (var m in modules)
            {
                var emb = new Embedding((IContextImpl)ctx, m.Assembly.FullName, new ReadableFile(m.Location));
                emb.Properties.Add(ResourceEntry.KeyIsAssembly, true.ToString());
                embedder.Add(emb);
            }
            return(ctx);
        }
Пример #10
0
        /// <summary>
        /// Interpret Embedding
        /// </summary>
        /// <param name="embedding">Embedding to interpret</param>
        public override void Visit(Embedding embedding)
        {
            //Add content of pretext
            XHTMLElement element = new XHTMLElement(embedding.GetPreText().GetText(), Current);

            element.SetTagState(false);
            AddElement(element);

            //Interpret Embed and TextTail
            embedding.GetEmbed().AcceptVisitor(this);
            embedding.GetTextTail().AcceptVisitor(this);
        }
Пример #11
0
        public Task <NearestUser[]> GetNearestNeighbors(Embedding vector, int count, IDistanceProvider distanceProvider)
        {
            var result = this.storedValues
                         .Select(storedValue => new NearestUser()
            {
                Name     = storedValue.UserInfo.UserName,
                Distance = distanceProvider.Measure(storedValue.Embedding, vector)
            })
                         .OrderBy(elem => elem.Distance)
                         .Take(count)
                         .ToArray();

            return(Task.FromResult(result));
        }
Пример #12
0
            public TransformerModel(long ntokens, long ninputs, long nheads, long nhidden, long nlayers, double dropout = 0.5) : base("Transformer")
            {
                this.ninputs = ninputs;

                pos_encoder = new PositionalEncoding(ninputs, dropout);
                var encoder_layers = TransformerEncoderLayer(ninputs, nheads, nhidden, dropout);

                transformer_encoder = TransformerEncoder(encoder_layers, nlayers);
                encoder             = Embedding(ntokens, ninputs);
                decoder             = Linear(ninputs, ntokens);
                InitWeights();

                RegisterComponents();
            }
Пример #13
0
        /// <summary>
        /// Embeds native libraries exposed via .deps.json
        /// </summary>
        /// <param name="ctx">Netfuser context</param>
        /// <returns>Netfuser context</returns>
        public static IContext EmbedNativeLibraries(this IContext ctx)
        {
            var deps = ctx.Deps();

            ctx.OfType <NetfuserEvent.WillMergeModules>().Subscribe(e =>
            {
                var c  = (IContextImpl)ctx;
                var dc = deps.Deps;
                if (dc != null)
                {
                    var embedder = ctx.Embedder(NetfuserFactory.EmbedderIndexName);
                    var bd       = Path.GetDirectoryName(c.MainSourceModule.Location);
                    foreach (var nm in dc.RuntimeLibraries.SelectMany(l => l.NativeLibraryGroups))
                    {
                        foreach (var p in nm.AssetPaths)
                        {
                            var fp = Path.Combine(bd, p);
                            if (File.Exists(fp))
                            {
                                var emb = new Embedding(c, Path.GetFileName(p), new ReadableFile(fp));
                                emb.Properties.Add(ResourceEntry.KeyRid, nm.Runtime);
                                emb.Properties.Add(ResourceEntry.KeyPath, p);
                                emb.Properties.Add(ResourceEntry.KeyIsNativeLib, true.ToString());
                                embedder.Add(emb);
                            }
                        }
                    }
                    foreach (var nm in dc.RuntimeLibraries.SelectMany(l => l.RuntimeAssemblyGroups).Where(m => !string.IsNullOrEmpty(m.Runtime)))
                    {
                        foreach (var p in nm.RuntimeFiles)
                        {
                            var fp = Path.Combine(bd, p.Path);
                            if (File.Exists(fp))
                            {
                                // using var m=ModuleDefMD.Load(fp);
                                var n = AssemblyName.GetAssemblyName(fp);

                                var emb = new Embedding(c, n.FullName, new ReadableFile(fp));
                                emb.Properties.Add(ResourceEntry.KeyPath, p.Path);
                                emb.Properties.Add(ResourceEntry.KeyRid, nm.Runtime);
                                emb.Properties.Add(ResourceEntry.KeyIsAssembly, true.ToString());
                                embedder.Add(emb);
                            }
                        }
                    }
                }
            });
            return(ctx);
        }
        public async Task PredictOnEmbeddingModelShouldBeSuccessful()
        {
            string modelID = Client.PublicModels.GeneralEmbeddingModel.ModelID;

            ClarifaiResponse <ClarifaiOutput <Embedding> > predictResponse =
                await Client.Predict <Embedding>(
                    modelID,
                    new ClarifaiURLImage(CELEB1))
                .ExecuteAsync();

            AssertResponseSuccess(predictResponse);
            Embedding embedding = predictResponse.Get().Data[0];

            Assert.NotNull(embedding.Vector);
        }
Пример #15
0
        /// <summary>
        /// Implementation of custom NN model
        /// </summary>
        /// <param name="data"></param>
        /// <param name="yearVar"></param>
        /// <param name="montVar"></param>
        /// <param name="shopVar"></param>
        /// <param name="itemVar"></param>
        /// <param name="cnt3Var"></param>
        /// <param name="label"></param>
        /// <param name="device"></param>
        /// <returns></returns>
        private static Function PredictFutureSalesModel(List <Variable> variables, DeviceDescriptor device)
        {
            //define features and label vars
            Variable yearVar = variables[0];
            Variable montVar = variables[1];
            Variable shopVar = variables[2];
            Variable itemVar = variables[3];
            Variable cnt3Var = variables[4];
            Variable label   = variables[5];

            //create rnn object
            var ffNet = new FeedForwaredNN(device);

            //predefined parameters
            var H_DIMS    = 11;
            var CELL_DIMS = 3;
            var DROPRATRE = 0.2f;
            var outDim    = label.Shape.Dimensions.Last();

            //embedding layer and dimensionality reduction
            var yearEmb    = Embedding.Create(yearVar, yearVar.Shape.Dimensions[0] - 1, DataType.Float, device, 1, yearVar.Name + "_emb");
            var monthEmb   = Embedding.Create(montVar, montVar.Shape.Dimensions[0] / 2, DataType.Float, device, 1, montVar.Name + "_emb");
            var varshopEmb = Embedding.Create(shopVar, shopVar.Shape.Dimensions[0] / 2, DataType.Float, device, 1, shopVar.Name + "_emb");

            var itemEmb  = Embedding.Create(itemVar, itemVar.Shape.Dimensions[0] / 2, DataType.Float, device, 1, itemVar.Name + "_emb");
            var itemEmb2 = Embedding.Create(itemEmb, itemEmb.Output.Shape.Dimensions[0] / 4, DataType.Float, device, 1, itemEmb.Name + "_emb");

            //join all embedding layers with input variable of previous product sales
            var emb = CNTKLib.Splice(new VariableVector()
            {
                yearEmb, monthEmb, varshopEmb, itemEmb2, cnt3Var
            }, new Axis(0));

            //create recurrence for time series on top of joined layer
            var lstmLayer = RNN.RecurrenceLSTM(emb, H_DIMS, CELL_DIMS, DataType.Float, device, false, Activation.TanH, true, true);

            //create dense on top of LSTM recurrence layers
            var denseLayer = ffNet.Dense(lstmLayer, 33, Activation.TanH);

            //create dropout layer on top of dense layer
            var dropoutLay = CNTKLib.Dropout(denseLayer, DROPRATRE);

            //create dense layer without activation function
            var outLayer = ffNet.Dense(dropoutLay, outDim, Activation.None, label.Name);

            //
            return(outLayer);
        }
Пример #16
0
            public Encoder(int sequenceLength, int vocabularySize, int wordVectorSize, int hiddenSize) : base(sequenceLength, hiddenSize)
            {
                this.embedding = new Embedding(sequenceLength, vocabularySize, wordVectorSize, (fanIn, fanOut) => 0.01 * Initializers.LeCunNormal(fanIn));
                this.recurrent = new LSTM(wordVectorSize, hiddenSize, sequenceLength, false, false, (fanIn, fanOut) => Initializers.LeCunNormal(fanIn));
                this.weights   = new double[this.embedding.Weights.Length + this.recurrent.Weights.Length];

                for (int i = 0; i < this.embedding.Weights.Length; i++)
                {
                    this.weights[i] = this.embedding.Weights[i];
                }

                for (int i = 0, j = this.embedding.Weights.Length; i < this.recurrent.Weights.Length; i++, j++)
                {
                    this.weights[j] = this.recurrent.Weights[i];
                }
            }
Пример #17
0
        public Model(Context ctx, int numInputSteps, Config cfg, bool isTraining = true)
        {
            var addDropout = isTraining && cfg.DropoutProbability > 0.0;

            EncoderInputs = Library.Variable <int>(PartialShape.Create(numInputSteps, cfg.BatchSize));
            Embedding     = new Embedding <float>(EncoderInputs, cfg.VocabularySize, cfg.HiddenSize, initScale: cfg.InitScale);

            EmbeddingOutput = addDropout ? new Dropout <float>(Embedding.Output, cfg.DropoutProbability).Output : Embedding.Output;

            var rnnType = new LstmRnnType();

            EncoderRnn       = new Rnn <float>(rnnType, EmbeddingOutput, cfg.NumLayers, cfg.HiddenSize, isTraining: isTraining, dropout: addDropout ? cfg.DropoutProbability : 0.0);
            EncoderRnnOutput = addDropout ? new Dropout <float>(EncoderRnn.Y, cfg.DropoutProbability).Output : EncoderRnn.Y;

            // attention model
        }
Пример #18
0
        public async Task <Person> AddAsync(string name, string link, string ucn, string city, List <double> embedding, DateTime quarantine)
        {
            var person = new Person
            {
                Name = name,
                City = city,
                UCN  = ucn,
                QuarantineEndDate = quarantine,
            };

            var image = new Image
            {
                Link = link
            };

            person.Images.Add(new PersonImage
            {
                Image  = image,
                Person = person
            });

            var embed = new Embedding
            {
                Image  = image,
                Person = person
            };

            image.Embeddings.Add(embed);

            embed.Values = embedding.Select((e, index) => new EmbeddingValue
            {
                Embedding = embed,
                Value     = e,
                Index     = index
            }).ToList();

            await this.context.Embeddings.AddAsync(embed);

            await this.context.Images.AddAsync(image);

            await this.context.Persons.AddAsync(person);

            await this.context.SaveChangesAsync();

            return(person);
        }
Пример #19
0
        private void FillContainer_button_Click(object sender, EventArgs e)
        {
            string message   = Message_textBox.Text;
            Image  container = Container_pictureBox.Image;
            Image  filledContainer;
            Method method = GetCurrentMethod();

            Channel channel      = Red_radioButton.Checked ? Channel.R : Green_radioButton.Checked ? Channel.G : Channel.B;
            int     firstNumber  = Convert.ToInt32(BeginNumber_nud.Value);
            int     secondNumber = Convert.ToInt32(EndNumber_nud.Value);

            switch (method)
            {
            case Method.Simple:
                filledContainer = Embedding.Simple(message, container, channel);
                break;

            case Method.BitsSkipping:
                filledContainer = Embedding.Simple(message, container, channel, firstNumber);
                break;

            case Method.RandBitsSkipping:
                filledContainer = Embedding.RandBitsSkipping(message, container, firstNumber, secondNumber);
                break;

            case Method.BlockOneChannel:
                filledContainer = Embedding.BlockOneChannel(message, container, channel, firstNumber, secondNumber);
                break;

            case Method.BlockThreeChannel:
                filledContainer = Embedding.BlockThreeChannel(message, container, firstNumber, secondNumber);
                break;

            default:
                throw new Exception("Error.");
            }

            FilledContainer_pictureBox.Image = filledContainer;
            if (FilledContainer_pictureBox.Image != null)
            {
                SaveFillContainer_button.Enabled = true;
            }
        }
Пример #20
0
        /// <summary>
        ///     Build a one direction recurrent neural network (RNN) with long-short-term-memory (LSTM) cells.
        ///     http://colah.github.io/posts/2015-08-Understanding-LSTMs/
        /// </summary>
        /// <param name="input">the input variable</param>
        /// <param name="numOutputClasses">number of output classes</param>
        /// <param name="embeddingDim">dimension of the embedding layer</param>
        /// <param name="LSTMDim">LSTM output dimension</param>
        /// <param name="cellDim">cell dimension</param>
        /// <param name="device">CPU or GPU device to run the model</param>
        /// <param name="outputName">name of the model output</param>
        /// <returns>the RNN model</returns>
        private static Function LSTMSequenceClassifierNet(
            Variable input,
            int numOutputClasses,
            int embeddingDim,
            int LSTMDim,
            int cellDim)
        {
            var embeddingFunction = new Embedding(embeddingDim).ToFunction(input);
            Func <Variable, Function> pastValueRecurrenceHook = x => CNTKLib.PastValue(x);
            var LSTMFunction = LSTMPComponentWithSelfStabilization <float>(
                embeddingFunction,
                new[] { LSTMDim },
                new[] { cellDim },
                pastValueRecurrenceHook,
                pastValueRecurrenceHook).Item1;
            var thoughtVectorFunction = CNTKLib.SequenceLast(LSTMFunction);

            return(new Dense(numOutputClasses).ToFunction((Data.Function)thoughtVectorFunction));
        }
Пример #21
0
        internal virtual void ReadWordVectors()
        {
            Embedding embedding = new Embedding(op.wordVectors, op.numHid);

            this.wordVectors = Generics.NewTreeMap();
            //    Map<String, SimpleMatrix> rawWordVectors = NeuralUtils.readRawWordVectors(op.wordVectors, op.numHid);
            //    for (String word : rawWordVectors.keySet()) {
            foreach (string word in embedding.KeySet())
            {
                // TODO: factor out unknown word vector code from DVParser
                wordVectors[word] = embedding.Get(word);
            }
            string       unkWord           = op.unkWord;
            SimpleMatrix unknownWordVector = wordVectors[unkWord];

            wordVectors[UnknownWord] = unknownWordVector;
            if (unknownWordVector == null)
            {
                throw new Exception("Unknown word vector not specified in the word vector file");
            }
        }
Пример #22
0
        /// <summary>
        /// Parser for Embedding
        /// </summary>
        /// <returns>Parsed Embedding</returns>
        public Embedding ParseEmbedding()
        {
            //Get iterator for specific embedding
            if (TokenStream.Peek(1).GetType() == TokenType.EMBEDDING)
            {   //Store embedding in internal tokeniterator to parse internally
                CurrentToken         = TokenStream.NextToken();
                EmbeddingTokenStream = ((EmbeddingToken)CurrentToken).GetTokenIterator();
            }
            else
            {   //Raise exception
                throw new UnexpectedToken("Embedding expected, but found:", CurrentToken.GetValue().ToString(), CurrentToken.GetLine());
            }

            //Let's parse embedding
            Embedding embedding = new Embedding();

            embedding.SetPreText(ParsePreText());
            embedding.SetEmbed(ParseEmbed());
            embedding.SetTextTail(ParseTextTail());

            return(embedding);
        }
Пример #23
0
        public Result Collect(Node[] forest)
        {
            Calculate(forest);

            // d_nodes is a list of all root nodes constituting the embeddings
            // d_reverseMapping is a map from the roots to all the original nodes
            // which are contained in that root
            Result result = new Result();

            foreach (Node root in d_nodes)
            {
                List <Node> mapping = d_reverseMapping[root];

                // Replace the subexpression that was mapped on this root with an
                // embedding
                Node            prototype = (Node)root.Clone();
                List <NodePath> arguments = new List <NodePath>();

                // Calculate the placeholder nodes
                foreach (Node node in prototype.Descendants)
                {
                    if (Node.IsPlaceholder(node.Instruction))
                    {
                        arguments.Add(node.Path);
                    }
                }

                Embedding proto = result.Prototype(prototype, arguments);

                // Now we generate all the full expressions for this embedding
                foreach (Node inst in mapping)
                {
                    // Replace inst in top hiearchy with embedding node
                    proto.Embed(((Node)inst.Top.Clone()).FromPath(inst.Path));
                }
            }

            return(result);
        }
Пример #24
0
        private void AddResult(Result ret, List <Node> lst)
        {
            Node            proto     = (Node)lst[0].Clone();
            List <NodePath> arguments = new List <NodePath>();

            // Find anonymous labels
            foreach (Node node in proto.Descendants)
            {
                if (node.Label[0] == Node.PlaceholderCode)
                {
                    arguments.Add(node.Path);
                }
            }

            // Create embedding
            Embedding embedding = ret.Prototype(proto, arguments);

            foreach (Node node in lst)
            {
                embedding.Embed(node);
            }
        }
Пример #25
0
        public TransformerEncoder(
            int paddingIdx,
            int vocabSize,
            double dropout                = 0.1f,
            double attentionDropout       = 0.1f,
            double activationDropout      = 0.1f,
            string activationFn           = "relu",
            bool dynamicDropout           = false,
            bool addBiasKv                = false,
            bool addZeroAttention         = false,
            int maxSeqLen                 = 256,
            bool learnedPositionEmbedding = true,
            int embedSize                 = -1,
            int?embedScale                = null,
            IList <int> arches            = null,
            bool usePositionEmbedding     = true,
            bool offsetPositionsByPadding = true,
            int numSegments               = 2,
            bool encoderNormalizeBefore   = false,
            int numEncoderLayers          = 6,
            bool applyBertInit            = false,
            bool freezeEmbeddings         = false,
            bool freezeLayers             = false,
            bool freezeTransfer           = false,
            int nTransLayersToFreeze      = 0)
            : base(nameof(TransformerEncoder))
        {
            Contracts.AssertValue(arches);
            Contracts.AssertNonEmpty(arches);

            PaddingIdx     = paddingIdx;
            DiscreteArches = arches.ToList();
            DistillBlocks  = 4;

            // Embedding modules
            EmbedScale          = embedScale;
            TokenEmbedding      = torch.nn.Embedding(vocabSize, embedSize, paddingIdx);
            PositionalEmbedding = usePositionEmbedding
                ? PositionalEmbedding.GetPositionalEmbedding(maxSeqLen, embedSize,
                                                             paddingIdx, learnedPositionEmbedding)
                : null;
            SegmentEmbedding = numSegments > 0
                ? torch.nn.Embedding(numSegments, embedSize)
                : null;
            EmbeddingLayerNorm = encoderNormalizeBefore
                ? torch.nn.LayerNorm(new long[] { embedSize })
                : null;
            DropoutLayer = torch.nn.Dropout(dropout);

            ModelUtils.InitNormal(TokenEmbedding.weight, mean: 0.0, std: 0.02);
            ModelUtils.InitZeros(TokenEmbedding.weight[paddingIdx]);
            if (SegmentEmbedding != null)
            {
                ModelUtils.InitNormal(SegmentEmbedding.weight, mean: 0.0, std: 0.02);
            }

            // Encoder layers
            var layers = Enumerable.Range(0, numEncoderLayers)
                         .Select(i => new TransformerCellDiscrete(
                                     arches[i],
                                     dropout,
                                     attentionDropout,
                                     activationDropout,
                                     activationFn,
                                     addBiasKv,
                                     addZeroAttention,
                                     dynamicDropout) as torch.nn.Module)
                         .ToArray();

            Layers = new ModuleList(layers);

            var blockPerLayer = numEncoderLayers / DistillBlocks;

            HiddenSizePerBlock = CheckBlockHiddenSize(blockPerLayer);

            EmbedTransfer = new EmbedTransferDiscrete(embedSize, HiddenSizePerBlock[0]);
            var hiddenSizePerBlockExtend = HiddenSizePerBlock.Append(HiddenSizePerBlock[HiddenSizePerBlock.Count - 1]).ToList();
            var hiddenTransferList       = Enumerable.Range(0, HiddenSizePerBlock.Count)
                                           .Select(i => new HiddenTransferDiscrete(hiddenSizePerBlockExtend[i],
                                                                                   hiddenSizePerBlockExtend[i + 1]) as torch.nn.Module)
                                           .ToArray();

            HiddenTransferList = new ModuleList(hiddenTransferList);

            if (freezeEmbeddings)
            {
                ModelUtils.FreezeModuleParams(TokenEmbedding);
                ModelUtils.FreezeModuleParams(PositionalEmbedding);
                ModelUtils.FreezeModuleParams(SegmentEmbedding);
                ModelUtils.FreezeModuleParams(EmbeddingLayerNorm);
            }

            if (freezeLayers)
            {
                ModelUtils.FreezeModuleParams(Layers);
                ModelUtils.FreezeModuleParams(HiddenTransferList);
            }

            if (freezeTransfer)
            {
                ModelUtils.FreezeModuleParams(HiddenTransferList);
            }

            for (var i = 0; i < nTransLayersToFreeze; ++i)
            {
                ModelUtils.FreezeModuleParams(Layers[i]);
            }

            RegisterComponents();
        }
Пример #26
0
 public double Measure(Embedding v1, Embedding v2)
 {
     return((1f - GetCosineSimilarity(v1.Values, v2.Values)) * 0.5f);
 }
Пример #27
0
        /// <summary>
        /// Create cntk model function by providing parameters. The method is able for create:
        ///     - feedforward  with one hidden layer and any number of neurons
        ///     - deep neural network with any number of hidden layers and any number of neurons. Each hidden number has the same number of neurons
        ///     - LSTM NN with any number of hidden layers of LSTM , and any number of LSTM Cells in each layer. Also at the top of the network you can define
        ///             one dense layer and one dropout layer.
        /// </summary>
        /// <param name="nnParams"></param>
        /// <returns></returns>
        public static Function CreateNetwrok(List <NNLayer> layers, List <Variable> inputVars, Variable outpuVar, DeviceDescriptor device)
        {
            DataType type       = DataType.Float;
            Variable inputLayer = null;

            if (inputVars.Count > 1)
            {
                var vv = new VariableVector();
                foreach (var v in inputVars)
                {
                    //check if variable is stores as Sparse then we should create one embedding layer before slice
                    //since mixing sparse and dense data is not supported
                    if (v.IsSparse)
                    {
                        var v1 = Embedding.Create(v, v.Shape.Dimensions.Last(), type, device, 1, v.Name + "_sp_emb");
                        vv.Add(v1);
                    }
                    else
                    {
                        vv.Add(v);
                    }
                }


                //
                inputLayer = (Variable)CNTKLib.Splice(vv, new Axis(0));
            }
            else //define input layer
            {
                inputLayer = inputVars.First();
            }


            //Create network
            var net = inputLayer;
            var ff  = new FeedForwaredNN(device, type);

            //set last layer name to label name
            layers.Last().Name = outpuVar.Name;

            //get last LSTM layer
            var lastLSTM = layers.Where(x => x.Type == LayerType.LSTM).LastOrDefault();

            //
            foreach (var layer in layers)
            {
                if (layer.Type == LayerType.Dense)
                {
                    net = ff.Dense(net, layer.HDimension, layer.Activation, layer.Name);
                }
                else if (layer.Type == LayerType.Drop)
                {
                    net = CNTKLib.Dropout(net, layer.Value / 100.0f);
                }
                else if (layer.Type == LayerType.Embedding)
                {
                    net = Embedding.Create(net, layer.HDimension, type, device, 1, layer.Name);
                }
                else if (layer.Type == LayerType.LSTM)
                {
                    var returnSequence = true;
                    if (layers.IndexOf(lastLSTM) == layers.IndexOf(layer))
                    {
                        returnSequence = false;
                    }
                    net = RNN.RecurrenceLSTM(net, layer.HDimension, layer.CDimension, type, device, returnSequence, layer.Activation,
                                             layer.Peephole, layer.SelfStabilization, 1);
                }
            }

            //check if last layer is compatible with the output
            if (net.Shape.Dimensions.Last() != outpuVar.Shape.Dimensions.Last())
            {
                ff.CreateOutputLayer(net, outpuVar, Activation.None);
            }

            return(net);
        }
 public EmbeddingExtractor(bool conll, Embedding staticWordEmbeddings, Embedding tunedWordEmbeddings)
 {
     this.conll = conll;
     this.staticWordEmbeddings = staticWordEmbeddings;
     this.tunedWordEmbeddings  = tunedWordEmbeddings;
 }
        public virtual void ReadWordVectors()
        {
            SimpleMatrix unknownNumberVector         = null;
            SimpleMatrix unknownCapsVector           = null;
            SimpleMatrix unknownChineseYearVector    = null;
            SimpleMatrix unknownChineseNumberVector  = null;
            SimpleMatrix unknownChinesePercentVector = null;

            wordVectors = Generics.NewTreeMap();
            int numberCount         = 0;
            int capsCount           = 0;
            int chineseYearCount    = 0;
            int chineseNumberCount  = 0;
            int chinesePercentCount = 0;
            //Map<String, SimpleMatrix> rawWordVectors = NeuralUtils.readRawWordVectors(op.lexOptions.wordVectorFile, op.lexOptions.numHid);
            Embedding rawWordVectors = new Embedding(op.lexOptions.wordVectorFile, op.lexOptions.numHid);

            foreach (string word in rawWordVectors.KeySet())
            {
                SimpleMatrix vector = rawWordVectors.Get(word);
                if (op.wordFunction != null)
                {
                    word = op.wordFunction.Apply(word);
                }
                wordVectors[word] = vector;
                if (op.lexOptions.numHid <= 0)
                {
                    op.lexOptions.numHid = vector.GetNumElements();
                }
                // TODO: factor out all of these identical blobs
                if (op.trainOptions.unknownNumberVector && (NumberPattern.Matcher(word).Matches() || DgPattern.Matcher(word).Matches()))
                {
                    ++numberCount;
                    if (unknownNumberVector == null)
                    {
                        unknownNumberVector = new SimpleMatrix(vector);
                    }
                    else
                    {
                        unknownNumberVector = unknownNumberVector.Plus(vector);
                    }
                }
                if (op.trainOptions.unknownCapsVector && CapsPattern.Matcher(word).Matches())
                {
                    ++capsCount;
                    if (unknownCapsVector == null)
                    {
                        unknownCapsVector = new SimpleMatrix(vector);
                    }
                    else
                    {
                        unknownCapsVector = unknownCapsVector.Plus(vector);
                    }
                }
                if (op.trainOptions.unknownChineseYearVector && ChineseYearPattern.Matcher(word).Matches())
                {
                    ++chineseYearCount;
                    if (unknownChineseYearVector == null)
                    {
                        unknownChineseYearVector = new SimpleMatrix(vector);
                    }
                    else
                    {
                        unknownChineseYearVector = unknownChineseYearVector.Plus(vector);
                    }
                }
                if (op.trainOptions.unknownChineseNumberVector && (ChineseNumberPattern.Matcher(word).Matches() || DgPattern.Matcher(word).Matches()))
                {
                    ++chineseNumberCount;
                    if (unknownChineseNumberVector == null)
                    {
                        unknownChineseNumberVector = new SimpleMatrix(vector);
                    }
                    else
                    {
                        unknownChineseNumberVector = unknownChineseNumberVector.Plus(vector);
                    }
                }
                if (op.trainOptions.unknownChinesePercentVector && ChinesePercentPattern.Matcher(word).Matches())
                {
                    ++chinesePercentCount;
                    if (unknownChinesePercentVector == null)
                    {
                        unknownChinesePercentVector = new SimpleMatrix(vector);
                    }
                    else
                    {
                        unknownChinesePercentVector = unknownChinesePercentVector.Plus(vector);
                    }
                }
            }
            string unkWord = op.trainOptions.unkWord;

            if (op.wordFunction != null)
            {
                unkWord = op.wordFunction.Apply(unkWord);
            }
            SimpleMatrix unknownWordVector = wordVectors[unkWord];

            wordVectors[UnknownWord] = unknownWordVector;
            if (unknownWordVector == null)
            {
                throw new Exception("Unknown word vector not specified in the word vector file");
            }
            if (op.trainOptions.unknownNumberVector)
            {
                if (numberCount > 0)
                {
                    unknownNumberVector = unknownNumberVector.Divide(numberCount);
                }
                else
                {
                    unknownNumberVector = new SimpleMatrix(unknownWordVector);
                }
                wordVectors[UnknownNumber] = unknownNumberVector;
            }
            if (op.trainOptions.unknownCapsVector)
            {
                if (capsCount > 0)
                {
                    unknownCapsVector = unknownCapsVector.Divide(capsCount);
                }
                else
                {
                    unknownCapsVector = new SimpleMatrix(unknownWordVector);
                }
                wordVectors[UnknownCaps] = unknownCapsVector;
            }
            if (op.trainOptions.unknownChineseYearVector)
            {
                log.Info("Matched " + chineseYearCount + " chinese year vectors");
                if (chineseYearCount > 0)
                {
                    unknownChineseYearVector = unknownChineseYearVector.Divide(chineseYearCount);
                }
                else
                {
                    unknownChineseYearVector = new SimpleMatrix(unknownWordVector);
                }
                wordVectors[UnknownChineseYear] = unknownChineseYearVector;
            }
            if (op.trainOptions.unknownChineseNumberVector)
            {
                log.Info("Matched " + chineseNumberCount + " chinese number vectors");
                if (chineseNumberCount > 0)
                {
                    unknownChineseNumberVector = unknownChineseNumberVector.Divide(chineseNumberCount);
                }
                else
                {
                    unknownChineseNumberVector = new SimpleMatrix(unknownWordVector);
                }
                wordVectors[UnknownChineseNumber] = unknownChineseNumberVector;
            }
            if (op.trainOptions.unknownChinesePercentVector)
            {
                log.Info("Matched " + chinesePercentCount + " chinese percent vectors");
                if (chinesePercentCount > 0)
                {
                    unknownChinesePercentVector = unknownChinesePercentVector.Divide(chinesePercentCount);
                }
                else
                {
                    unknownChinesePercentVector = new SimpleMatrix(unknownWordVector);
                }
                wordVectors[UnknownChinesePercent] = unknownChinesePercentVector;
            }
            if (op.trainOptions.useContextWords)
            {
                SimpleMatrix start = SimpleMatrix.Random(op.lexOptions.numHid, 1, -0.5, 0.5, rand);
                SimpleMatrix end   = SimpleMatrix.Random(op.lexOptions.numHid, 1, -0.5, 0.5, rand);
                wordVectors[StartWord] = start;
                wordVectors[EndWord]   = end;
            }
        }
Пример #30
0
 public void SetEmbedding(Embedding.Embedding embedding)
 {
     EchoEmbedding = embedding;
 }
Пример #31
0
        public static void Main(string[] args)
        {
            string lexiconFile = null;

            string trainFile = null;

            string developmentFile = null;

            string modelFile = null;

            List <Dictionary> posDictionaries = new List <Dictionary>();

            List <Embedding> posEmbeddings = new List <Embedding>();

            List <Dictionary> neDictionaries = new List <Dictionary>();

            List <Embedding> neEmbeddings = new List <Embedding>();

            int posBeamSize = 8;

            int neBeamSize = 4;

            string language = null;

            bool preserve = false;

            bool plainOutput = false;

            string fold = null;

            int maximumPosIterations = 16;

            int maximumNeIterations = 16;

            bool extendLexicon = true;

            bool hasNe = true;

            for (int i = 0; i < args.Length; i++)
            {
                if (args[i].Equals("-lexicon"))
                {
                    lexiconFile = args[++i];
                }
                else if (args[i].Equals("-dict"))
                {
                    string destination = args[++i];

                    Dictionary dictionary = new Dictionary();

                    try
                    {
                        dictionary.FromFile(args[++i]);
                    }
                    catch (IOException e)
                    {
                        Console.WriteLine("Can not load dictionary file.");

                        Console.WriteLine(e.StackTrace);

                        Environment.Exit(1);
                    }

                    if (destination.Equals("pos"))
                    {
                        posDictionaries.Add(dictionary);
                    }
                    else if (destination.Equals("ne"))
                    {
                        neDictionaries.Add(dictionary);
                    }
                    else if (destination.Equals("all"))
                    {
                        posDictionaries.Add(dictionary);

                        neDictionaries.Add(dictionary);
                    }
                    else
                    {
                        Console.WriteLine("Expected pos/ne/all.");

                        Environment.Exit(1);
                    }
                }
                else if (args[i].Equals("-lang"))
                {
                    language = args[++i];
                }
                else if (args[i].Equals("-extendlexicon"))
                {
                    extendLexicon = true;
                }
                else if (args[i].Equals("-noextendlexicon"))
                {
                    extendLexicon = false;
                }
                else if (args[i].Equals("-noner"))
                {
                    hasNe = false;
                }
                else if (args[i].Equals("-positers"))
                {
                    maximumPosIterations = int.Parse(args[++i]);
                }
                else if (args[i].Equals("-neiters"))
                {
                    maximumNeIterations = int.Parse(args[++i]);
                }
                else if (args[i].Equals("-posbeamsize"))
                {
                    posBeamSize = int.Parse(args[++i]);
                }
                else if (args[i].Equals("-nebeamsize"))
                {
                    neBeamSize = int.Parse(args[++i]);
                }
                else if (args[i].Equals("-preserve"))
                {
                    preserve = true;
                }
                else if (args[i].Equals("-plain"))
                {
                    plainOutput = true;
                }
                else if (args[i].Equals("-fold"))
                {
                    fold = args[++i];
                }
                else if (args[i].Equals("-embed"))
                {
                    string destination = args[++i];

                    Embedding embedding = new Embedding();

                    try
                    {
                        embedding.FromFile(args[++i]);
                    }
                    catch (IOException e)
                    {
                        Console.WriteLine("Can not load embedding file.");

                        Console.WriteLine(e.StackTrace);

                        Environment.Exit(1);
                    }

                    if (destination.Equals("pos"))
                    {
                        posEmbeddings.Add(embedding);
                    }
                    else if (destination.Equals("ne"))
                    {
                        neEmbeddings.Add(embedding);
                    }
                    else if (destination.Equals("all"))
                    {
                        posEmbeddings.Add(embedding);

                        neEmbeddings.Add(embedding);
                    }
                    else
                    {
                        Console.WriteLine("Expected pos/ne/all.");

                        Environment.Exit(1);
                    }
                }
                else if (args[i].Equals("-trainfile"))
                {
                    trainFile = args[++i];
                }
                else if (args[i].Equals("-devfile"))
                {
                    developmentFile = args[++i];
                }
                else if (args[i].Equals("-modelfile"))
                {
                    modelFile = args[++i];
                }
                else if (args[i].Equals("-train"))
                {
                    TaggedToken[][] developmentSentences = null;

                    if (trainFile == null || modelFile == null || language == null)
                    {
                        Console.WriteLine("Insufficient data.");

                        Environment.Exit(1);
                    }

                    TaggedData taggedData = new TaggedData(language);

                    TaggedToken[][] trainSentences = taggedData.ReadConll(trainFile, null, true, !trainFile.EndsWith(".conll"));

                    if (developmentFile != null)
                    {
                        developmentSentences = taggedData.ReadConll(developmentFile, null, true, !developmentFile.EndsWith(".conll"));
                    }

                    Console.WriteLine($"Read {trainSentences.Length} training sentences and {developmentSentences?.Length ?? 0} development sentences.");

                    Tagger tagger = GetTagger(language, taggedData, posBeamSize, neBeamSize);

                    tagger.BuildLexicons(trainSentences);

                    Lexicon lexicon = tagger.PosLexicon;

                    Console.WriteLine($"POS lexicon size (corpus) {lexicon.Size}.");

                    if (lexiconFile != null)
                    {
                        Console.WriteLine(extendLexicon ? $"Reading lexicon '{lexiconFile}'." : $"Reading lexicon (not extending profiles) '{lexiconFile}'.");

                        lexicon.FromFile(lexiconFile, taggedData.PosTagSet, extendLexicon);

                        Console.WriteLine($"POS lexicon size (external) {lexicon.Size}.");
                    }

                    tagger.PosDictionaries = posDictionaries;

                    tagger.PosEmbeddings = posEmbeddings;

                    tagger.NeDictionaries = neDictionaries;

                    tagger.NeEmbeddings = neEmbeddings;

                    tagger.MaximumPosIterations = maximumPosIterations;

                    tagger.MaximumNeIterations = maximumNeIterations;

                    tagger.Train(trainSentences, developmentSentences);

                    BinaryFormatter formatter = new BinaryFormatter();

                    formatter.Serialize(new FileStream(modelFile, FileMode.Create), tagger);
                }
                else if (args[i].Equals("-cross"))
                {
                    TaggedData taggedData = new TaggedData(language);

                    TaggedToken[][] allSentences = taggedData.ReadConll(trainFile, null, true, !trainFile.EndsWith(".conll"));

                    Tagger tagger = GetTagger(language, taggedData, posBeamSize, neBeamSize);

                    tagger.PosDictionaries = posDictionaries;

                    tagger.PosEmbeddings = posEmbeddings;

                    tagger.NeDictionaries = neDictionaries;

                    tagger.NeEmbeddings = neEmbeddings;

                    const int foldsCount = 10;

                    Evaluation evaluation = new Evaluation();

                    for (int j = 0; j < foldsCount; j++)
                    {
                        Evaluation localEvaluation = new Evaluation();

                        TaggedToken[][][] parts = GetSUCFold(allSentences, j);

                        Console.WriteLine($"Fold {j}, train ({parts[0].Length}), development ({parts[1].Length}), test ({parts[2].Length})");

                        Lexicon lexicon = tagger.PosLexicon;

                        lexicon.Clear();

                        tagger.BuildLexicons(parts[0]);

                        if (lexiconFile != null)
                        {
                            lexicon.FromFile(lexiconFile, taggedData.PosTagSet, extendLexicon);
                        }

                        tagger.Train(parts[0], parts[1]);

                        foreach (TaggedToken[] sentence in parts[2])
                        {
                            TaggedToken[] taggedSentence = tagger.TagSentence(sentence, true, false);

                            evaluation.Evaluate(taggedSentence, sentence);

                            localEvaluation.Evaluate(taggedSentence, sentence);

                            tagger.TaggedData.WriteConllGold(new StreamWriter(Console.OpenStandardOutput()), taggedSentence, sentence, plainOutput);
                        }

                        Console.WriteLine($"Local POS accuracy: {localEvaluation.GetPosAccuracy()} ({localEvaluation.PosCorrect} / {localEvaluation.PosTotal})");
                    }

                    Console.WriteLine($"POS accuracy: {evaluation.GetPosAccuracy()} ({evaluation.PosCorrect} / {evaluation.PosTotal})");

                    Console.WriteLine($"NE precision: {evaluation.GetNePrecision()}");

                    Console.WriteLine($"NE recall:    {evaluation.GetNeRecall()}");

                    Console.WriteLine($"NE F-score:   {evaluation.GetNeFScore()}");

                    Console.WriteLine($"NE total:     {evaluation.NeTotal}");

                    Console.WriteLine($"NE correct:   {evaluation.NeCorrect}");

                    Console.WriteLine($"NE found:     {evaluation.NeFound}");
                }
                else if (args[i].Equals("-server"))
                {
                    if (modelFile == null || i >= args.Length - 1)
                    {
                        Console.WriteLine("Insufficient data.");

                        Environment.Exit(1);
                    }

                    IPAddress serverIp = Dns.GetHostAddresses(args[++i]).FirstOrDefault();

                    int serverPort = int.Parse(args[++i]);

                    BinaryFormatter formatter = new BinaryFormatter();

                    Console.WriteLine("Loading Stagger model ...");

                    Tagger tagger = (Tagger)formatter.Deserialize(new FileStream(modelFile, FileMode.Open));

                    language = tagger.TaggedData.Language;

                    TcpListener tcpListener = new TcpListener(serverIp, serverPort);

                    tcpListener.Start(4);

                    while (true)
                    {
                        Socket sock = null;

                        try
                        {
                            sock = tcpListener.AcceptSocket();

                            Console.WriteLine($"Connected to {sock.RemoteEndPoint}");

                            NetworkStream networkStream = new NetworkStream(sock);

                            byte[] lengthBuffer = new byte[4];

                            if (networkStream.Read(lengthBuffer) != 4)
                            {
                                throw new IOException("Can not read length.");
                            }

                            int length = BitConverter.ToInt32(lengthBuffer);

                            if (length < 1 || length > 100000)
                            {
                                throw new IOException($"Invalid data size {length}.");
                            }

                            byte[] dataBuf = new byte[length];
                            if (networkStream.Read(dataBuf) != length)
                            {
                                throw new IOException("Can not read data.");
                            }

                            StringReader reader = new StringReader(Encoding.UTF8.GetString(dataBuf));

                            StreamWriter writer = new StreamWriter(networkStream, Encoding.UTF8);

                            Tokenizer tokenizer = GetTokenizer(reader, language);

                            List <Token> sentence;

                            int sentenceIndex = 0;

                            string fileId = "net";

                            while ((sentence = tokenizer.ReadSentence()) != null)
                            {
                                TaggedToken[] taggedSentence = new TaggedToken[sentence.Count];

                                if (tokenizer.SentenceId != null)
                                {
                                    if (!fileId.Equals(tokenizer.SentenceId))
                                    {
                                        fileId = tokenizer.SentenceId;

                                        sentenceIndex = 0;
                                    }
                                }

                                for (int j = 0; j < sentence.Count; j++)
                                {
                                    Token token = sentence[j];

                                    var id = $"{fileId}:{sentenceIndex}:{token.Offset}";

                                    taggedSentence[j] = new TaggedToken(token, id);
                                }

                                TaggedToken[] taggedSent = tagger.TagSentence(taggedSentence, true, false);

                                tagger.TaggedData.WriteConllSentence(writer ?? new StreamWriter(Console.OpenStandardOutput()), taggedSent, plainOutput);

                                sentenceIndex++;
                            }

                            tokenizer.Close();

                            if (sock.Connected)
                            {
                                Console.WriteLine($"Closing connection to {sock.RemoteEndPoint}.");

                                writer.Close();
                            }
                        }
                        catch (IOException e)
                        {
                            Console.WriteLine(e.StackTrace);

                            if (sock != null)
                            {
                                Console.WriteLine($"Connection failed to {sock.RemoteEndPoint}.");

                                if (sock.Connected)
                                {
                                    sock.Close();
                                }
                            }
                        }
                    }
                }
                else if (args[i].Equals("-tag"))
                {
                    if (modelFile == null || i >= args.Length - 1)
                    {
                        Console.WriteLine("Insufficient data.");

                        Environment.Exit(1);
                    }

                    List <string> inputFiles = new List <string>();

                    for (i++; i < args.Length && !args[i].StartsWith("-"); i++)
                    {
                        inputFiles.Add(args[i]);
                    }

                    if (inputFiles.Count < 1)
                    {
                        Console.WriteLine("No files to tag.");

                        Environment.Exit(1);
                    }

                    BinaryFormatter formatter = new BinaryFormatter();

                    Console.WriteLine("Loading Stagger model ...");

                    Tagger tagger = (Tagger)formatter.Deserialize(new FileStream(modelFile, FileMode.Open));

                    language = tagger.TaggedData.Language;

                    tagger.ExtendLexicon = extendLexicon;

                    if (!hasNe)
                    {
                        tagger.HasNe = false;
                    }

                    foreach (string inputFile in inputFiles)
                    {
                        if (!(inputFile.EndsWith(".txt") || inputFile.EndsWith(".txt.gz")))
                        {
                            TaggedToken[][] inputSentence = tagger.TaggedData.ReadConll(inputFile, null, true, !inputFile.EndsWith(".conll"));

                            Evaluation evaluation = new Evaluation();

                            int count = 0;

                            StreamWriter writer = new StreamWriter(Console.OpenStandardOutput(), Encoding.UTF8);

                            foreach (TaggedToken[] sentence in inputSentence)
                            {
                                if (count % 100 == 0)
                                {
                                    Console.WriteLine($"Tagging sentence number {count}.\r");
                                }

                                count++;

                                TaggedToken[] taggedSentence = tagger.TagSentence(sentence, true, preserve);

                                evaluation.Evaluate(taggedSentence, sentence);

                                tagger.TaggedData.WriteConllGold(writer, taggedSentence, sentence, plainOutput);
                            }

                            writer.Close();

                            Console.WriteLine($"Tagging sentence number {count}.");

                            Console.WriteLine($"POS accuracy: {evaluation.GetPosAccuracy()} ({evaluation.PosCorrect} / {evaluation.PosTotal}).");

                            Console.WriteLine($"NE precision: {evaluation.GetNePrecision()}.");

                            Console.WriteLine($"NE recall:    {evaluation.GetNeRecall()}.");

                            Console.WriteLine($"NE F-score:   {evaluation.GetNeFScore()}.");
                        }
                        else
                        {
                            string fileId = Path.GetFileNameWithoutExtension(inputFile);

                            TextReader reader = OpenUtf8File(inputFile);

                            StreamWriter writer;

                            if (inputFiles.Count > 1)
                            {
                                string outputFile = $"{inputFile}{(plainOutput ? ".plain" : ".conll")}";

                                writer = new StreamWriter(new FileStream(outputFile, FileMode.Create), Encoding.UTF8);
                            }
                            else
                            {
                                writer = new StreamWriter(Console.OpenStandardOutput(), Encoding.UTF8);
                            }

                            Tokenizer tokenizer = GetTokenizer(reader, language);

                            List <Token> sentence;

                            int sentenceIndex = 0;

                            while ((sentence = tokenizer.ReadSentence()) != null)
                            {
                                TaggedToken[] sent = new TaggedToken[sentence.Count];

                                if (tokenizer.SentenceId != null)
                                {
                                    if (!fileId.Equals(tokenizer.SentenceId))
                                    {
                                        fileId = tokenizer.SentenceId;

                                        sentenceIndex = 0;
                                    }
                                }

                                for (int j = 0; j < sentence.Count; j++)
                                {
                                    Token tok = sentence[j];

                                    var id = $"{fileId}:{sentenceIndex}:{tok.Offset}";

                                    sent[j] = new TaggedToken(tok, id);
                                }

                                TaggedToken[] taggedSent = tagger.TagSentence(sent, true, false);

                                tagger.TaggedData.WriteConllSentence(writer ?? new StreamWriter(Console.OpenStandardOutput()), taggedSent, plainOutput);

                                sentenceIndex++;
                            }

                            tokenizer.Close();

                            writer?.Close();
                        }
                    }
                }
                else if (args[i].Equals("-tokenize"))
                {
                    string inputFile = args[++i];

                    TextReader reader = OpenUtf8File(inputFile);

                    Tokenizer tokenizer = GetTokenizer(reader, language);

                    List <Token> sentence;

                    while ((sentence = tokenizer.ReadSentence()) != null)
                    {
                        if (sentence.Count == 0)
                        {
                            continue;
                        }

                        if (!plainOutput)
                        {
                            Console.Write(sentence[0].Value.Replace(' ', '_'));

                            for (int j = 1; j < sentence.Count; j++)
                            {
                                Console.Write($" {sentence[j].Value.Replace(' ', '_')}");
                            }

                            Console.WriteLine("");
                        }
                        else
                        {
                            foreach (Token token in sentence)
                            {
                                Console.WriteLine(token.Value);
                            }

                            Console.WriteLine();
                        }
                    }

                    tokenizer.Close();
                }
            }
        }
        protected static List <IPrediction> DeserializePredictions(ModelType modelType,
                                                                   dynamic jsonObject)
        {
            var propertyValues = (JObject)jsonObject.data;

            var data = new List <IPrediction>();

            if (propertyValues.Count > 0)
            {
                string typeName = modelType.Prediction.Name;
                switch (typeName)
                {
                case "Color":
                {
                    foreach (dynamic color in jsonObject.data.colors)
                    {
                        data.Add(Color.Deserialize(color));
                    }
                    break;
                }

                case "Concept":
                {
                    foreach (dynamic concept in jsonObject.data.concepts)
                    {
                        data.Add(Concept.Deserialize(concept));
                    }
                    break;
                }

                case "Demographics":
                {
                    foreach (dynamic demographics in jsonObject.data.regions)
                    {
                        data.Add(Demographics.Deserialize(demographics));
                    }
                    break;
                }

                case "Embedding":
                {
                    foreach (dynamic embedding in jsonObject.data.embeddings)
                    {
                        data.Add(Embedding.Deserialize(embedding));
                    }
                    break;
                }

                case "FaceConcepts":
                {
                    foreach (dynamic faceConcepts in
                             jsonObject.data.regions)
                    {
                        data.Add(FaceConcepts.Deserialize(faceConcepts));
                    }
                    break;
                }

                case "FaceDetection":
                {
                    foreach (dynamic faceDetection in jsonObject.data.regions)
                    {
                        data.Add(FaceDetection.Deserialize(faceDetection));
                    }
                    break;
                }

                case "FaceEmbedding":
                {
                    foreach (dynamic faceEmbedding in jsonObject.data.regions)
                    {
                        data.Add(FaceEmbedding.Deserialize(faceEmbedding));
                    }
                    break;
                }

                case "Focus":
                {
                    foreach (dynamic focus in jsonObject.data.regions)
                    {
                        data.Add(Focus.Deserialize(focus,
                                                   (decimal)jsonObject.data.focus.value));
                    }
                    break;
                }

                case "Frame":
                {
                    foreach (dynamic frame in jsonObject.data.frames)
                    {
                        data.Add(Frame.Deserialize(frame));
                    }
                    break;
                }

                case "Logo":
                {
                    foreach (dynamic logo in jsonObject.data.regions)
                    {
                        data.Add(Logo.Deserialize(logo));
                    }
                    break;
                }

                default:
                {
                    throw new ClarifaiException(
                              string.Format("Unknown output type `{0}`", typeName));
                }
                }
            }
            return(data);
        }