public void IrbisStopWords_ToLines_1() { IrbisStopWords words = new IrbisStopWords(); string[] lines = words.ToLines(); Assert.AreEqual(0, lines.Length); }
public void IrbisStopWords_ToText_1() { IrbisStopWords words = new IrbisStopWords(); string text = words.ToText(); Assert.AreEqual(0, text.Length); }
public void IrbisStopWords_ParseFile_1() { string fileName = Path.Combine ( TestDataPath, ibis ); IrbisStopWords words = IrbisStopWords.ParseFile ( fileName ); Assert.IsTrue(words.IsStopWord("О")); }
static void Main(string[] args) { if (args.Length != 1) { Console.WriteLine("Need 1 argument"); return; } string inputFileName = args[0]; Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); try { stopwords = IrbisStopWords.ParseFile("IBIS.STW"); string source = File.ReadAllText("words.pft"); formatter = new PftFormatter() { Program = PftUtility.CompileProgram(source) }; //DataflowLinkOptions linkOptions = new DataflowLinkOptions //{ // PropagateCompletion = true //}; //ExecutionDataflowBlockOptions executionOptions // = new ExecutionDataflowBlockOptions //{ // MaxDegreeOfParallelism = 4 //}; //processBlock = new ActionBlock<MarcRecord> // ( // (Action<MarcRecord>)ProcessRecord, // executionOptions // ); using (FileStream stream = File.Create("words.bin")) using (writer = new BinaryWriter(stream)) using (accessor = new DirectAccess64(inputFileName)) { //maxMfn = accessor.GetMaxMfn(); maxMfn = 150000; Console.WriteLine("Max MFN={0}", maxMfn); // Сначала считываем все записи for (int mfn = 1; mfn < maxMfn; mfn++) { ReadRecord(mfn); } } using (StreamWriter textWriter = File.CreateText("words.dic")) { string[] keys = dictionary.Keys.ToArray(); Array.Sort(keys); foreach (string key in keys) { textWriter.WriteLine("{0}\t{1}", key, dictionary[key]); } } // Дожидаемся завершения // processBlock.Complete(); // processBlock.Completion.Wait(); Console.WriteLine ( "Good records={0}, dictionary size={1}, longest array={2}", goodRecords, dictionary.Count, longest ); DictionaryCounterInt32 <int> counter = new DictionaryCounterInt32 <int>(); using (FileStream stream = File.OpenRead("words.bin")) using (BinaryReader reader = new BinaryReader(stream)) { while (stream.Position < stream.Length) { BookData data = new BookData(); data.RestoreFromStream(reader); foreach (int word in data.Words) { counter.Increment(word); } } } int maxCount = counter.Values.Max(); int threshold = maxCount / 5 + 1; Console.WriteLine ( "Max count={0}, threshold={1}", maxCount, threshold ); using (FileStream stream = File.OpenRead("words.bin")) using (BinaryReader reader = new BinaryReader(stream)) using (StreamWriter textWriter = File.CreateText("words.csv")) { while (stream.Position < stream.Length) { BookData data = new BookData(); data.RestoreFromStream(reader); int i; for (i = 0; i < data.Words.Length; i++) { textWriter.Write("{0},", data.Words[i]); } for (; i < longest; i++) { textWriter.Write("0,"); } textWriter.WriteLine("{0}", data.Count); } } Console.WriteLine("Complete"); } catch (Exception exception) { Console.WriteLine(exception); } stopwatch.Stop(); TimeSpan elapsed = stopwatch.Elapsed; Console.WriteLine("Elapsed: {0}", elapsed.ToAutoString()); }
public void IrbisStopWords_IsStopWord_2() { IrbisStopWords words = new IrbisStopWords(); Assert.IsTrue(words.IsStopWord(" ")); }
public void IrbisStopWords_IsStopWord_1() { IrbisStopWords words = new IrbisStopWords(); Assert.IsTrue(words.IsStopWord(string.Empty)); }
public void IrbisStopWords_Construction_2() { IrbisStopWords words = new IrbisStopWords(ibis); Assert.AreEqual(ibis, words.FileName); }
public void IrbisStopWords_Construction_1() { IrbisStopWords words = new IrbisStopWords(); Assert.IsNull(words.FileName); }