public void IrbisStopWords_ToLines_1()
        {
            IrbisStopWords words = new IrbisStopWords();

            string[] lines = words.ToLines();
            Assert.AreEqual(0, lines.Length);
        }
        public void IrbisStopWords_ToText_1()
        {
            IrbisStopWords words = new IrbisStopWords();
            string         text  = words.ToText();

            Assert.AreEqual(0, text.Length);
        }
        public void IrbisStopWords_ParseFile_1()
        {
            string fileName = Path.Combine
                              (
                TestDataPath,
                ibis
                              );

            IrbisStopWords words = IrbisStopWords.ParseFile
                                   (
                fileName
                                   );

            Assert.IsTrue(words.IsStopWord("О"));
        }
Exemple #4
0
        static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine("Need 1 argument");
                return;
            }

            string inputFileName = args[0];

            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();

            try
            {
                stopwords = IrbisStopWords.ParseFile("IBIS.STW");

                string source = File.ReadAllText("words.pft");
                formatter = new PftFormatter()
                {
                    Program = PftUtility.CompileProgram(source)
                };

                //DataflowLinkOptions linkOptions = new DataflowLinkOptions
                //{
                //    PropagateCompletion = true
                //};
                //ExecutionDataflowBlockOptions executionOptions
                //    = new ExecutionDataflowBlockOptions
                //{
                //    MaxDegreeOfParallelism = 4
                //};
                //processBlock = new ActionBlock<MarcRecord>
                //    (
                //        (Action<MarcRecord>)ProcessRecord,
                //        executionOptions
                //    );

                using (FileStream stream = File.Create("words.bin"))
                    using (writer = new BinaryWriter(stream))
                        using (accessor = new DirectAccess64(inputFileName))
                        {
                            //maxMfn = accessor.GetMaxMfn();
                            maxMfn = 150000;
                            Console.WriteLine("Max MFN={0}", maxMfn);

                            // Сначала считываем все записи
                            for (int mfn = 1; mfn < maxMfn; mfn++)
                            {
                                ReadRecord(mfn);
                            }
                        }

                using (StreamWriter textWriter = File.CreateText("words.dic"))
                {
                    string[] keys = dictionary.Keys.ToArray();
                    Array.Sort(keys);
                    foreach (string key in keys)
                    {
                        textWriter.WriteLine("{0}\t{1}", key, dictionary[key]);
                    }
                }

                // Дожидаемся завершения
                // processBlock.Complete();
                // processBlock.Completion.Wait();

                Console.WriteLine
                (
                    "Good records={0}, dictionary size={1}, longest array={2}",
                    goodRecords,
                    dictionary.Count,
                    longest
                );

                DictionaryCounterInt32 <int> counter = new DictionaryCounterInt32 <int>();
                using (FileStream stream = File.OpenRead("words.bin"))
                    using (BinaryReader reader = new BinaryReader(stream))
                    {
                        while (stream.Position < stream.Length)
                        {
                            BookData data = new BookData();
                            data.RestoreFromStream(reader);
                            foreach (int word in data.Words)
                            {
                                counter.Increment(word);
                            }
                        }
                    }

                int maxCount  = counter.Values.Max();
                int threshold = maxCount / 5 + 1;
                Console.WriteLine
                (
                    "Max count={0}, threshold={1}",
                    maxCount,
                    threshold
                );

                using (FileStream stream = File.OpenRead("words.bin"))
                    using (BinaryReader reader = new BinaryReader(stream))
                        using (StreamWriter textWriter = File.CreateText("words.csv"))
                        {
                            while (stream.Position < stream.Length)
                            {
                                BookData data = new BookData();
                                data.RestoreFromStream(reader);

                                int i;
                                for (i = 0; i < data.Words.Length; i++)
                                {
                                    textWriter.Write("{0},", data.Words[i]);
                                }
                                for (; i < longest; i++)
                                {
                                    textWriter.Write("0,");
                                }
                                textWriter.WriteLine("{0}", data.Count);
                            }
                        }

                Console.WriteLine("Complete");
            }
            catch (Exception exception)
            {
                Console.WriteLine(exception);
            }

            stopwatch.Stop();
            TimeSpan elapsed = stopwatch.Elapsed;

            Console.WriteLine("Elapsed: {0}", elapsed.ToAutoString());
        }
        public void IrbisStopWords_IsStopWord_2()
        {
            IrbisStopWords words = new IrbisStopWords();

            Assert.IsTrue(words.IsStopWord(" "));
        }
        public void IrbisStopWords_IsStopWord_1()
        {
            IrbisStopWords words = new IrbisStopWords();

            Assert.IsTrue(words.IsStopWord(string.Empty));
        }
        public void IrbisStopWords_Construction_2()
        {
            IrbisStopWords words = new IrbisStopWords(ibis);

            Assert.AreEqual(ibis, words.FileName);
        }
        public void IrbisStopWords_Construction_1()
        {
            IrbisStopWords words = new IrbisStopWords();

            Assert.IsNull(words.FileName);
        }