예제 #1
0
        public Dictionary <string, string> ExportSettings(bool suppressWarnings)
        {
            Dictionary <string, string> SettingsDict = new Dictionary <string, string>();

            SettingsDict.Add("SelectedEncoding", SelectedEncoding);
            SettingsDict.Add("IncomingTextLocation", IncomingTextLocation);
            SettingsDict.Add("OutputLocation", OutputLocation);
            SettingsDict.Add("VocabSize", VocabSize.ToString());
            SettingsDict.Add("VectorSize", VectorSize.ToString());
            SettingsDict.Add("CosineCutoff", CosineCutoff.ToString());

            int WordListLength = 0;

            if (WordList != null)
            {
                WordListLength = WordList.Length;
            }

            SettingsDict.Add("WordListLength", WordListLength.ToString());

            for (int i = 0; i < WordListLength; i++)
            {
                SettingsDict.Add("WordList" + i.ToString(), WordList[i]);
            }

            return(SettingsDict);
        }
예제 #2
0
 /// <summary>
 /// Завершает инициализацию текущего экземпляра вектора
 /// </summary>
 protected override void FinalizeCreation()
 {
     GetAsInputArray       = new VectorArray(cvInputArrayFromVectorOfInt);
     GetAsInputOutputArray = new VectorArray(cvInputOutputArrayFromVectorOfInt);
     GetAsOutputArray      = new VectorArray(cvOutputArrayFromVectorOfInt);
     GetSize         = new VectorSize(VectorOfIntGetSize);
     GetStartAddress = new VectorStartAddress(VectorOfIntGetStartAddress);
     ClearData       = new VectorClear(VectorOfIntClear);
     Release         = new VectorRelease(VectorOfIntRelease);
 }
예제 #3
0
        public Dictionary <string, string> ExportSettings(bool suppressWarnings)
        {
            Dictionary <string, string> SettingsDict = new Dictionary <string, string>();

            SettingsDict.Add("InputModelFilename", InputModelFilename);
            SettingsDict.Add("SelectedEncoding", SelectedEncoding);
            SettingsDict.Add("VocabSize", VocabSize.ToString());
            SettingsDict.Add("VectorSize", VectorSize.ToString());

            return(SettingsDict);
        }
예제 #4
0
        public enum VectorSize { V64, V128, V256 } // do not change

        /// <summary>
        /// Indicates whether hardware acceleration is available for a certain type and vectorSize size
        /// </summary>
        /// <typeparam name="T">The type to check hardware support for</typeparam>
        /// <param name="vectorSize">The size of the vector to check hardware support for</param>
        /// <returns><see langword="true"/> if the type is a valid vectorSize element, and is hardware accelerated for the size <paramref name="vectorSize"/>, else <see langword="false"/></returns>
        public static bool IsHwAccelerated <T>(VectorSize vectorSize)
        {
            if ((int)vectorSize > 2 || vectorSize < 0)
            {
                throw new ArgumentException("Invalid vectorSize size");
            }

            if (!IsSupportedVectorType <T>())
            {
                return(false);
            }

            if (typeof(T) == typeof(float))
            {
#pragma warning disable CS8509 // The switch expression does not handle all possible inputs (it is not exhaustive).
                return(vectorSize switch
                {
                    VectorSize.V64 => false,
                    VectorSize.V128 => Sse.IsSupported,
                    VectorSize.V256 => Avx.IsSupported
                });
예제 #5
0
        private void SetFolderButton_Click(object sender, System.EventArgs e)
        {
            using (var dialog = new OpenFileDialog())
            {
                dialog.Multiselect     = false;
                dialog.CheckFileExists = true;
                dialog.CheckPathExists = true;
                dialog.ValidateNames   = true;
                dialog.Title           = "Please choose the model file that you would like to read";
                dialog.FileName        = "Model.txt";
                dialog.Filter          = "Word Embedding Model (.txt,.vec)|*.txt;*.vec";
                if (dialog.ShowDialog() == DialogResult.OK)
                {
                    try
                    {
                        using (var stream = File.OpenRead(dialog.FileName))
                            using (var reader = new StreamReader(stream, encoding: Encoding.GetEncoding(EncodingDropdown.SelectedItem.ToString())))
                            {
                                string[] firstLine = reader.ReadLine().Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);

                                if (firstLine.Length == 2)
                                {
                                    VocabSize  = int.Parse(firstLine[0]);
                                    VectorSize = int.Parse(firstLine[1]);
                                    ModelDetailsTextbox.Text = "Vocab size: " + firstLine[0] + "; Vector Size: " + firstLine[1];
                                }
                                else
                                {
                                    VectorSize = firstLine.Length - 1;
                                    VocabSize  = -1;
                                    ModelDetailsTextbox.Text = "Vocab size: unknown; Vector Size: " + VectorSize.ToString();
                                }



                                SelectedFileTextbox.Text = dialog.FileName;
                            }
                    }
                    catch
                    {
                        MessageBox.Show("There was an error while trying to read your word embedding model. It is possible that your file is not correctly formatted, or that your model file is open in another program.", "Error reading model", MessageBoxButtons.OK, MessageBoxIcon.Error);
                        return;
                    }
                }
            }
        }
예제 #6
0
 /// <summary>
 /// Завершает инициализацию текущего экземпляра
 /// </summary>
 protected override void FinalizeCreation()
 {
     GetSize = new VectorSize(cveStringGetLength);
     Release = new VectorRelease(cveStringRelease);
 }
예제 #7
0
 /// <summary>
 /// Завершает инициализацию текущего экземпляра вектора
 /// </summary>
 protected override void FinalizeCreation()
 {
     GetAsInputArray = new VectorArray(cvInputArrayFromVectorOfPoint);
     GetAsInputOutputArray = new VectorArray(cvInputOutputArrayFromVectorOfPoint);
     GetAsOutputArray = new VectorArray(cvOutputArrayFromVectorOfPoint);
     GetSize = new VectorSize(VectorOfPointGetSize);
     GetStartAddress = new VectorStartAddress(VectorOfPointGetStartAddress);
     ClearData = new VectorClear(VectorOfPointClear);
     Release = new VectorRelease(VectorOfPointRelease);
 }
예제 #8
0
 /// <summary>
 /// Завершает инициализацию текущего экземпляра
 /// </summary>
 protected override void FinalizeCreation()
 {
     GetSize = new VectorSize(cveStringGetLength);
     Release = new VectorRelease(cveStringRelease);
 }
예제 #9
0
        public void Initialize()
        {
            TotalNumRows = 0;
            string leadingZeroes = "D" + VectorSize.ToString().Length.ToString();


            //we could use a List<double[]> to load in the word vectors, then
            //just .ToArray() it to make jagged arrays. However, I *really* want to avoid
            //having to hold the model in memory twice
            WordToArrayMap = new Dictionary <string, int>();
            if (VocabSize != -1)
            {
                model = new double[VocabSize][];
            }

            try
            {
                #region capture dictionary words and initialize model, if vocabsize is known
                //now, during initialization, we actually go through and want to establish the word group vectors
                using (var stream = File.OpenRead(InputModelFilename))
                    using (var reader = new StreamReader(stream, encoding: Encoding.GetEncoding(SelectedEncoding)))
                    {
                        if (VocabSize != -1)
                        {
                            string[] firstLine = reader.ReadLine().Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                        }

                        int WordsFound = 0;

                        while (!reader.EndOfStream)
                        {
                            string   line      = reader.ReadLine().TrimEnd();
                            string[] splitLine = line.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                            string   RowWord   = splitLine[0].Trim();
                            double[] RowVector = new double[VectorSize];
                            for (int i = 0; i < VectorSize; i++)
                            {
                                RowVector[i] = Double.Parse(splitLine[i + 1]);
                            }

                            if (!WordToArrayMap.ContainsKey(RowWord))
                            {
                                WordToArrayMap.Add(RowWord, TotalNumRows);
                                if (VocabSize != -1)
                                {
                                    model[TotalNumRows] = RowVector;
                                }
                            }

                            TotalNumRows++;
                        }
                    }


                #endregion



                //if we didn't know the vocab size initially, we know it now that we've walked the whole model
                #region if vocab size was unknown, now we load up the whole model into memory
                if (VocabSize == -1)
                {
                    model        = new double[TotalNumRows][];
                    TotalNumRows = 0;

                    //now, during initialization, we actually go through and want to establish the word group vectors
                    using (var stream = File.OpenRead(InputModelFilename))
                        using (var reader = new StreamReader(stream, encoding: Encoding.GetEncoding(SelectedEncoding)))
                        {
                            while (!reader.EndOfStream)
                            {
                                string   line      = reader.ReadLine().TrimEnd();
                                string[] splitLine = line.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                                string   RowWord   = splitLine[0].Trim();
                                double[] RowVector = new double[VectorSize];
                                for (int i = 0; i < VectorSize; i++)
                                {
                                    RowVector[i] = Double.Parse(splitLine[i + 1]);
                                }

                                if (WordToArrayMap.ContainsKey(RowWord))
                                {
                                    model[TotalNumRows] = RowVector;
                                }

                                TotalNumRows++;
                            }
                        }
                }
                #endregion
            }
            catch (OutOfMemoryException OOM)
            {
                MessageBox.Show("Plugin Error: Latent Semantic Similarity. This plugin encountered an \"Out of Memory\" error while trying to load your pre-trained model. More than likely, you do not have enough RAM in your computer to hold this model in memory. Consider using a model with a smaller vocabulary or fewer dimensions.", "Plugin Error (Out of Memory)", MessageBoxButtons.OK, MessageBoxIcon.Error);
            }

            tokenizer = new TwitterAwareTokenizer();
        }