private void DownloadCSV()
        {
            IOUtils file = new IOUtils();
            ISdmxObjects sdmxObjects = null;
            DataTable dt = new DataTable();

            sdmxObjects = GetSdmxObjects();

            if (sdmxObjects == null)
                return;

            AddExportColumns(dt);
            PopolateDataTable(dt, sdmxObjects);

            string mySeparator = txtSeparator.Text.Trim().Equals( string.Empty ) ? ";" : txtSeparator.Text.Trim();

            file.SaveCSVFile(dt, GetFilename(), mySeparator, txtDelimiter.Text);
        }
 public void Dispose()
 {
     IOUtils.Dispose(reader);
 }
Exemplo n.º 3
0
 private static void FullPepXMLAndProteinProphetSetup()
 {
     if (GlobalVar.IsSimulation)
     {
         if (!GlobalVar.usePepXMLComputedFile)
         {
             //comet
             log.Info("Performing Comet search on full ms2 data");
             String fullCometFile = PostProcessingScripts.CometStandardSearch(InputFileOrganizer.MS2SimulationTestFile, InputFileOrganizer.preExperimentFilesFolder, true);
             InputFileOrganizer.OriginalCometOutput = fullCometFile;
         }
         //protein prophet
         log.Info("Perform a protein prophet search on full pepxml");
         String fullProteinProphetFile = PostProcessingScripts.ProteinProphetSearch(InputFileOrganizer.OriginalCometOutput, InputFileOrganizer.OutputFolderOfTheRun, true);
         ExecuteShellCommand.MoveFile(fullProteinProphetFile, InputFileOrganizer.preExperimentFilesFolder);
         InputFileOrganizer.OriginalProtXMLFile = Path.Combine(InputFileOrganizer.preExperimentFilesFolder, IOUtils.getBaseName(IOUtils.getBaseName(fullProteinProphetFile)) + ".prot.xml");
     }
 }
        public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, string segment, int indexDivisor,
                                        IComparer <BytesRef> termComp, string segmentSuffix, IOContext context)
        {
            this.termComp = termComp;

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(indexDivisor == -1 || indexDivisor > 0);
            }

            input = dir.OpenInput(IndexFileNames.SegmentFileName(segment, segmentSuffix, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION), context);

            bool success = false;

            try
            {
                version = ReadHeader(input);

                if (version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM)
                {
                    CodecUtil.ChecksumEntireFile(input);
                }

                indexInterval = input.ReadInt32();
                if (indexInterval < 1)
                {
                    throw new CorruptIndexException("invalid indexInterval: " + indexInterval + " (resource=" + input + ")");
                }
                this.indexDivisor = indexDivisor;

                if (indexDivisor < 0)
                {
                    totalIndexInterval = indexInterval;
                }
                else
                {
                    // In case terms index gets loaded, later, on demand
                    totalIndexInterval = indexInterval * indexDivisor;
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(totalIndexInterval > 0);
                }

                SeekDir(input, dirOffset);

                // Read directory
                int numFields = input.ReadVInt32();
                if (numFields < 0)
                {
                    throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + input + ")");
                }
                //System.out.println("FGR: init seg=" + segment + " div=" + indexDivisor + " nF=" + numFields);
                for (int i = 0; i < numFields; i++)
                {
                    int field         = input.ReadVInt32();
                    int numIndexTerms = input.ReadVInt32();
                    if (numIndexTerms < 0)
                    {
                        throw new CorruptIndexException("invalid numIndexTerms: " + numIndexTerms + " (resource=" + input + ")");
                    }
                    long termsStart         = input.ReadVInt64();
                    long indexStart         = input.ReadVInt64();
                    long packedIndexStart   = input.ReadVInt64();
                    long packedOffsetsStart = input.ReadVInt64();
                    if (packedIndexStart < indexStart)
                    {
                        throw new CorruptIndexException("invalid packedIndexStart: " + packedIndexStart + " indexStart: " + indexStart + "numIndexTerms: " + numIndexTerms + " (resource=" + input + ")");
                    }
                    FieldInfo      fieldInfo = fieldInfos.FieldInfo(field);
                    FieldIndexData previous  = fields.Put(fieldInfo, new FieldIndexData(this, fieldInfo, numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart));
                    if (previous != null)
                    {
                        throw new CorruptIndexException("duplicate field: " + fieldInfo.Name + " (resource=" + input + ")");
                    }
                }
                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.DisposeWhileHandlingException(input);
                }
                if (indexDivisor > 0)
                {
                    input.Dispose();
                    input = null;
                    if (success)
                    {
                        indexLoaded = true;
                    }
                    termBytesReader = termBytes.Freeze(true);
                }
            }
        }
Exemplo n.º 5
0
 static List<TemplateItem> GetTemplateData()
 {
     var t = IOUtils.LoadTemplateItemsData();
     //ExampleJsonOutput();
     return t;
 }
Exemplo n.º 6
0
        /// <summary>
        ///     Loads a Game freak texture.
        /// </summary>
        /// <param name="data">The texture data</param>
        /// <returns>The image as a texture</returns>
        public static RenderBase.OTexture load(Stream data, bool keepOpen = false)
        {
            BinaryReader input       = new BinaryReader(data);
            long         descAddress = data.Position;

            data.Seek(8, SeekOrigin.Current);
            if (IOUtils.readStringWithLength(input, 7) != "texture")
            {
                return(null);
            }

            data.Seek(descAddress + 0x18, SeekOrigin.Begin);
            int texLength = input.ReadInt32();

            data.Seek(descAddress + 0x28, SeekOrigin.Begin);
            string texName = IOUtils.readStringWithLength(input, 0x40);

            data.Seek(descAddress + 0x68, SeekOrigin.Begin);
            ushort width      = input.ReadUInt16();
            ushort height     = input.ReadUInt16();
            ushort texFormat  = input.ReadUInt16();
            ushort texMipMaps = input.ReadUInt16();

            data.Seek(0x10, SeekOrigin.Current);
            byte[] texBuffer = input.ReadBytes(texLength);

            RenderBase.OTextureFormat fmt = RenderBase.OTextureFormat.dontCare;

            switch (texFormat)
            {
            case 0x2: fmt = RenderBase.OTextureFormat.rgb565; break;

            case 0x3: fmt = RenderBase.OTextureFormat.rgb8; break;

            case 0x4: fmt = RenderBase.OTextureFormat.rgba8; break;

            case 0x16: fmt = RenderBase.OTextureFormat.rgba4; break;

            case 0x17: fmt = RenderBase.OTextureFormat.rgba5551; break;

            case 0x23: fmt = RenderBase.OTextureFormat.la8; break;

            case 0x24: fmt = RenderBase.OTextureFormat.hilo8; break;

            case 0x25: fmt = RenderBase.OTextureFormat.l8; break;

            case 0x26: fmt = RenderBase.OTextureFormat.a8; break;

            case 0x27: fmt = RenderBase.OTextureFormat.la4; break;

            case 0x28: fmt = RenderBase.OTextureFormat.l4; break;

            case 0x29: fmt = RenderBase.OTextureFormat.a4; break;

            case 0x2a: fmt = RenderBase.OTextureFormat.etc1; break;

            case 0x2b: fmt = RenderBase.OTextureFormat.etc1a4; break;

            default: Debug.WriteLine("Unk tex fmt " + texFormat.ToString("X4") + " @ " + texName); break;
            }

            Bitmap tex = TextureCodec.decode(texBuffer, width, height, fmt);

            if (!keepOpen)
            {
                data.Close();
            }

            return(new RenderBase.OTexture(tex, texName));
        }
Exemplo n.º 7
0
        /// <summary>
        /// Make sure isCompiling() == true as setCompilingTrue() was called
        /// Make sure all parameters are not null
        /// Make sure this method is thread-safety: it cannot be called at the same time
        /// </summary>
        /// <param name="cmd"></param>
        /// <param name="workingDir"></param>
        /// <returns></returns>
        public TCompileResult compile(string cmd, string workingDir)
        {
            int oldTick = SystemUtils.getCurrentTimeMs();

            ICmdParser parser = CmdParserFactory.createCmdParser(cmd);

            List <string> inputFilePaths = null;
            string        outputFilePath = null;

            TCompileResult result = null;

            if (!isAllowCompiling())
            {
                result = new TCompileResult(false, 1, "error: Compiling is stopped!", 0, null); goto my_end;
            }

            try
            {
                parser.getInOutFilePath(workingDir, out inputFilePaths, out outputFilePath);
            }
            catch (Exception ex)
            {
                // Error
                result = new TCompileResult(false, 1, "error: " + ex.Message, 0, null);
                goto my_end;
            }

            if (inputFilePaths == null || inputFilePaths.Count == 0 || outputFilePath == null)
            {
                // Error
                result = new TCompileResult(false, 1, "error: Invalid command, input file is mandatory!", 0, null);
                goto my_end;
            }

            outputFilePath = PathUtils.combine(workingDir, outputFilePath);

            if (!isConnected())
            {
                // Local
                goto my_end;
            }

            #region Distribute

            // Send the input files to server
            foreach (string inputFilePath in inputFilePaths)
            {
                string f       = PathUtils.combine(workingDir, inputFilePath);
                string fNormal = PathUtils.normalizePath(f);
                if (!m_sentFiles.Contains(fNormal))
                {
                    if (sendFile(f))
                    {
                        m_sentFiles.Add(fNormal);
                    }
                    else
                    {
                        // Local
                        goto my_end;
                    }
                }
            }

            // Send compile request to server
            string  cmdToSend = parser.makeNetworkCmd();
            Message msg       = MessageCompileRequest.createMessage(cmdToSend);
            if (!m_messageStream.writeMessage(msg))
            {
                goto my_end;
            }

            if (!isAllowCompiling())
            {
                result = new TCompileResult(false, 1, "error: Compiling is stopped!", 0, null); goto my_end;
            }

            // Receive the compile result
            msg = m_messageStream.readMessage();
            if (msg == null)
            {
                goto my_end;
            }

            if (!isAllowCompiling())
            {
                result = new TCompileResult(false, 1, "error: Compiling is stopped!", 0, null); goto my_end;
            }

            // Parse and check the compile result
            MessageCompileResponse msgCompileRes = new MessageCompileResponse(msg);

            if (msgCompileRes.getWasExec() && msgCompileRes.getExitCode() != 0)
            {
                // Compile error
                // Nghia: TODO rem this for the hot fix
                //result = new TCompileResult(true, msgCompileRes.getExitCode(), msgCompileRes.getOutputText(), 0, m_host);
                goto my_end;
            }
            if (!msgCompileRes.getWasExec())
            {
                goto my_end;
            }

            // Compile OK by server
            // Save the file to disk
            if (!IOUtils.writeFile_Bytes(outputFilePath, msgCompileRes.getOFileData(),
                                         msgCompileRes.getOFileOffset(), msgCompileRes.getOFileSize()))
            {
                // No need to compile local as this is a serious error
                result = new TCompileResult(true, 1, "error: Could not save the output file to disk", 0, m_host);
                goto my_end;
            }

            // Everything is OK
            result = new TCompileResult(true, 0,
                                        "Remotely compiled from " + m_host + "\n" + msgCompileRes.getOutputText(), 0, m_host);

            #endregion

my_end:

            if (result == null)
            {
                // Local
                string specOutFilePath = parser.getLocalSpecificOutputFilePath();
                string specCmd         = parser.getLocalSpecificCommand();
                string specWorkingDir  = parser.getLocalSpecificWorkingDir();

                if (specOutFilePath == null)
                {
                    specOutFilePath = outputFilePath;
                }
                if (specCmd == null)
                {
                    specCmd = cmd;
                }
                if (specWorkingDir == null)
                {
                    specWorkingDir = workingDir;
                }

                specOutFilePath = PathUtils.combine(specWorkingDir, specOutFilePath);

                TProcessResult res = null;

                if (parser.needToLockLocal())
                {
                    lock (s_lock)
                    {
                        doLocal(specCmd, specWorkingDir, specOutFilePath, ref oldTick, ref res);
                    }
                }
                else
                {
                    doLocal(specCmd, specWorkingDir, specOutFilePath, ref oldTick, ref res);
                }

                result = new TCompileResult(res.wasExec, res.exitCode, "Locally compiled\n" + res.outputText, 0, null);

                if (result.wasExec && result.exitCode == 0 && specOutFilePath != outputFilePath)
                {
                    File.Move(specOutFilePath, outputFilePath);
                }
            }

            result.spentTimeMs = SystemUtils.getCurrentTimeMs() - oldTick;

            setCompiling(false);

            return(result);
        }
Exemplo n.º 8
0
            public override void Run()
            {
                using Stream stream = new NetworkStream(cs);
                BinaryReader intReader = new BinaryReader(stream);
                BinaryWriter intWriter = new BinaryWriter(stream);
                try
                {
                    int id = intReader.ReadInt32();
                    if (id < 0)
                    {
                        throw new IOException("Client closed connection before communication started.");
                    }

                    startingGun.Wait();
                    intWriter.Write(43);
                    stream.Flush();

                    while (true)
                    {
                        int command = stream.ReadByte();
                        if (command < 0)
                        {
                            return; // closed
                        }

                        UninterruptableMonitor.Enter(localLock);
                        try
                        {
                            int currentLock = lockedID[0];
                            if (currentLock == -2)
                            {
                                return; // another thread got error, so we exit, too!
                            }
                            switch (command)
                            {
                                case 1:
                                    // Locked
                                    if (currentLock != -1)
                                    {
                                        lockedID[0] = -2;
                                        throw IllegalStateException.Create("id " + id + " got lock, but " + currentLock + " already holds the lock");
                                    }
                                    lockedID[0] = id;
                                    break;

                                case 0:
                                    // Unlocked
                                    if (currentLock != id)
                                    {
                                        lockedID[0] = -2;
                                        throw IllegalStateException.Create("id " + id + " released the lock, but " + currentLock + " is the one holding the lock");
                                    }
                                    lockedID[0] = -1;
                                    break;

                                default:
                                    throw RuntimeException.Create("Unrecognized command: " + command);
                            }
                            intWriter.Write((byte)command);
                            stream.Flush();
                        }
                        finally
                        {
                            UninterruptableMonitor.Exit(localLock);
                        }
                    }
                }
                catch (Exception e) when (e.IsRuntimeException() || e.IsError())
                {
                    throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                }
                catch (Exception ioe) when (ioe.IsException())
                {
                    throw RuntimeException.Create(ioe);
                }
                finally
                {
                    IOUtils.DisposeWhileHandlingException(cs);
                }
            }
Exemplo n.º 9
0
        /// <summary>
        /// An example of a command line is
        /// <br />
        /// java -mx1g edu.stanford.nlp.parser.dvparser.CacheParseHypotheses -model /scr/horatio/dvparser/wsjPCFG.nocompact.simple.ser.gz -output cached9.simple.ser.gz  -treebank /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 200-202
        /// <br />
        /// java -mx4g edu.stanford.nlp.parser.dvparser.CacheParseHypotheses -model ~/scr/dvparser/wsjPCFG.nocompact.simple.ser.gz -output cached.train.simple.ser.gz -treebank /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 200-2199 -numThreads 6
        /// <br />
        /// java -mx4g edu.stanford.nlp.parser.dvparser.CacheParseHypotheses -model ~/scr/dvparser/chinese/xinhuaPCFG.ser.gz -output cached.xinhua.train.ser.gz -treebank /afs/ir/data/linguistic-data/Chinese-Treebank/6/data/utf8/bracketed  026-270,301-499,600-999
        /// </summary>
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            string parserModel = null;
            string output      = null;
            IList <Pair <string, IFileFilter> > treebanks = Generics.NewArrayList();
            int dvKBest    = 200;
            int numThreads = 1;

            for (int argIndex = 0; argIndex < args.Length;)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-dvKBest"))
                {
                    dvKBest   = System.Convert.ToInt32(args[argIndex + 1]);
                    argIndex += 2;
                    continue;
                }
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-parser") || args[argIndex].Equals("-model"))
                {
                    parserModel = args[argIndex + 1];
                    argIndex   += 2;
                    continue;
                }
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-output"))
                {
                    output    = args[argIndex + 1];
                    argIndex += 2;
                    continue;
                }
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-treebank"))
                {
                    Pair <string, IFileFilter> treebankDescription = ArgUtils.GetTreebankDescription(args, argIndex, "-treebank");
                    argIndex = argIndex + ArgUtils.NumSubArgs(args, argIndex) + 1;
                    treebanks.Add(treebankDescription);
                    continue;
                }
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-numThreads"))
                {
                    numThreads = System.Convert.ToInt32(args[argIndex + 1]);
                    argIndex  += 2;
                    continue;
                }
                throw new ArgumentException("Unknown argument " + args[argIndex]);
            }
            if (parserModel == null)
            {
                throw new ArgumentException("Need to supply a parser model with -model");
            }
            if (output == null)
            {
                throw new ArgumentException("Need to supply an output filename with -output");
            }
            if (treebanks.IsEmpty())
            {
                throw new ArgumentException("Need to supply a treebank with -treebank");
            }
            log.Info("Writing output to " + output);
            log.Info("Loading parser model " + parserModel);
            log.Info("Writing " + dvKBest + " hypothesis trees for each tree");
            LexicalizedParser    parser      = ((LexicalizedParser)LexicalizedParser.LoadModel(parserModel, "-dvKBest", int.ToString(dvKBest)));
            CacheParseHypotheses cacher      = new CacheParseHypotheses(parser);
            ITreeTransformer     transformer = DVParser.BuildTrainTransformer(parser.GetOp());
            IList <Tree>         sentences   = new List <Tree>();

            foreach (Pair <string, IFileFilter> description in treebanks)
            {
                log.Info("Reading trees from " + description.first);
                Treebank treebank = parser.GetOp().tlpParams.MemoryTreebank();
                treebank.LoadPath(description.first, description.second);
                treebank = treebank.Transform(transformer);
                Sharpen.Collections.AddAll(sentences, treebank);
            }
            log.Info("Processing " + sentences.Count + " trees");
            IList <Pair <Tree, byte[]> > cache = Generics.NewArrayList();

            transformer = new SynchronizedTreeTransformer(transformer);
            MulticoreWrapper <Tree, Pair <Tree, byte[]> > wrapper = new MulticoreWrapper <Tree, Pair <Tree, byte[]> >(numThreads, new CacheParseHypotheses.CacheProcessor(cacher, parser, dvKBest, transformer));

            foreach (Tree tree in sentences)
            {
                wrapper.Put(tree);
                while (wrapper.Peek())
                {
                    cache.Add(wrapper.Poll());
                    if (cache.Count % 10 == 0)
                    {
                        System.Console.Out.WriteLine("Processed " + cache.Count + " trees");
                    }
                }
            }
            wrapper.Join();
            while (wrapper.Peek())
            {
                cache.Add(wrapper.Poll());
                if (cache.Count % 10 == 0)
                {
                    System.Console.Out.WriteLine("Processed " + cache.Count + " trees");
                }
            }
            System.Console.Out.WriteLine("Finished processing " + cache.Count + " trees");
            IOUtils.WriteObjectToFile(cache, output);
        }
Exemplo n.º 10
0
        public virtual void TestNrt()
        {
            Store.Directory   dir     = NewDirectory();
            Store.Directory   taxoDir = NewDirectory();
            IndexWriterConfig iwc     = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            // Don't allow tiny maxBufferedDocs; it can make this
            // test too slow:
            iwc.SetMaxBufferedDocs(Math.Max(500, iwc.MaxBufferedDocs));

            // MockRandom/AlcololicMergePolicy are too slow:
            TieredMergePolicy tmp = new TieredMergePolicy();

            tmp.FloorSegmentMB = .001;
            iwc.SetMergePolicy(tmp);
            IndexWriter  w      = new IndexWriter(dir, iwc);
            var          tw     = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

            var indexer = new IndexerThread(w, config, tw, null, ordLimit, stop);

            var mgr = new SearcherTaxonomyManager(w, true, null, tw);

            var reopener = new ThreadAnonymousInnerClassHelper(this, stop, mgr);

            reopener.Name = "reopener";
            reopener.Start();

            indexer.Name = "indexer";
            indexer.Start();

            try
            {
                while (!stop.Get())
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.Searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets      facets = GetTaxonomyFacetCounts(pair.TaxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.Searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.True(result.ChildCount > 0);
                            Assert.True(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
                reopener.Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Dispose(mgr, tw, w, taxoDir, dir);
        }
Exemplo n.º 11
0
        public virtual void TestDirectory()
        {
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir  = NewDirectory();
            IndexWriter     w        = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            var             tw       = new DirectoryTaxonomyWriter(taxoDir);

            // first empty commit
            w.Commit();
            tw.Commit();
            var          mgr    = new SearcherTaxonomyManager(indexDir, taxoDir, null);
            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

            var indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop);

            indexer.Start();

            try
            {
                while (!stop.Get())
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.Searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets      facets = GetTaxonomyFacetCounts(pair.TaxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.Searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.True(result.ChildCount > 0);
                            Assert.True(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Dispose(mgr, tw, w, taxoDir, indexDir);
        }
Exemplo n.º 12
0
		public override void WriteAdditionalClipboardData( ref string nodeInfo )
		{
			base.WriteAdditionalClipboardData( ref nodeInfo ):
			IOUtils.AddFieldValueToString( ref nodeInfo, IOUtils.Matrix4x4ToString( m_materialValue ) ):
		}
Exemplo n.º 13
0
		public override void ReadAdditionalClipboardData( ref string[] nodeParams )
		{
			base.ReadAdditionalClipboardData( ref nodeParams ):
			m_materialValue = IOUtils.StringToMatrix4x4( GetCurrentParam( ref nodeParams ) ):
		}
Exemplo n.º 14
0
		public override void WriteToString( ref string nodeInfo, ref string connectionsInfo )
		{
			base.WriteToString( ref nodeInfo, ref connectionsInfo ):
			IOUtils.AddFieldValueToString( ref nodeInfo, IOUtils.Matrix4x4ToString( m_defaultValue ) ):
		}
        private void DownloadDotStatDSD()
        {
            IOUtils file = new IOUtils();
            ISdmxObjects sdmxObjects = null;

            sdmxObjects = GetSdmxObjectsWithRef();

            if (sdmxObjects == null)
                return;

            file.SaveDotSTATFile(sdmxObjects, GetDotSTATExportType());
        }
        /// <exception cref="System.IO.IOException"/>
        public virtual void Annotate(ICoreMap document)
        {
            // write input file in GUTime format
            IElement inputXML  = ToInputXML(document);
            File     inputFile = File.CreateTempFile("gutime", ".input");
            //Document doc = new Document(inputXML);
            PrintWriter inputWriter = new PrintWriter(inputFile);

            inputWriter.Println(XMLUtils.NodeToString(inputXML, false));
            // new XMLOutputter().output(inputXML, inputWriter);
            inputWriter.Close();
            bool          useFirstDate = (!document.ContainsKey(typeof(CoreAnnotations.CalendarAnnotation)) && !document.ContainsKey(typeof(CoreAnnotations.DocDateAnnotation)));
            List <string> args         = new List <string>();

            args.Add("perl");
            args.Add("-I" + this.gutimePath.GetPath());
            args.Add(new File(this.gutimePath, "TimeTag.pl").GetPath());
            if (useFirstDate)
            {
                args.Add("-FDNW");
            }
            args.Add(inputFile.GetPath());
            // run GUTime on the input file
            ProcessBuilder process      = new ProcessBuilder(args);
            StringWriter   outputWriter = new StringWriter();

            SystemUtils.Run(process, outputWriter, null);
            string  output   = outputWriter.GetBuffer().ToString();
            Pattern docClose = Pattern.Compile("</DOC>.*", Pattern.Dotall);

            output = docClose.Matcher(output).ReplaceAll("</DOC>");
            //The TimeTag.pl result file contains next tags which must be removed
            output = output.ReplaceAll("<lex.*?>", string.Empty);
            output = output.Replace("</lex>", string.Empty);
            output = output.Replace("<NG>", string.Empty);
            output = output.Replace("</NG>", string.Empty);
            output = output.Replace("<VG>", string.Empty);
            output = output.Replace("</VG>", string.Empty);
            output = output.Replace("<s>", string.Empty);
            output = output.Replace("</s>", string.Empty);
            // parse the GUTime output
            IElement outputXML;

            try
            {
                outputXML = XMLUtils.ParseElement(output);
            }
            catch (Exception ex)
            {
                throw new Exception(string.Format("error:\n%s\ninput:\n%s\noutput:\n%s", ex, IOUtils.SlurpFile(inputFile), output), ex);
            }

            /*
             * try {
             * outputXML = new SAXBuilder().build(new StringReader(output)).getRootElement();
             * } catch (JDOMException e) {
             * throw new RuntimeException(String.format("error:\n%s\ninput:\n%s\noutput:\n%s",
             * e, IOUtils.slurpFile(inputFile), output));
             * } */
            inputFile.Delete();
            // get Timex annotations
            IList <ICoreMap> timexAnns = ToTimexCoreMaps(outputXML, document);

            document.Set(typeof(TimeAnnotations.TimexAnnotations), timexAnns);
            if (outputResults)
            {
                System.Console.Out.WriteLine(timexAnns);
            }
            // align Timex annotations to sentences
            int timexIndex = 0;

            foreach (ICoreMap sentence in document.Get(typeof(CoreAnnotations.SentencesAnnotation)))
            {
                int sentBegin = BeginOffset(sentence);
                int sentEnd   = EndOffset(sentence);
                // skip times before the sentence
                while (timexIndex < timexAnns.Count && BeginOffset(timexAnns[timexIndex]) < sentBegin)
                {
                    ++timexIndex;
                }
                // determine times within the sentence
                int sublistBegin = timexIndex;
                int sublistEnd   = timexIndex;
                while (timexIndex < timexAnns.Count && sentBegin <= BeginOffset(timexAnns[timexIndex]) && EndOffset(timexAnns[timexIndex]) <= sentEnd)
                {
                    ++sublistEnd;
                    ++timexIndex;
                }
                // set the sentence timexes
                sentence.Set(typeof(TimeAnnotations.TimexAnnotations), timexAnns.SubList(sublistBegin, sublistEnd));
            }
        }
Exemplo n.º 17
0
		public override void ReadFromString( ref string[] nodeParams )
		{
			base.ReadFromString( ref nodeParams ):
			m_defaultValue = IOUtils.StringToMatrix4x4( GetCurrentParam( ref nodeParams ) ):
		}
Exemplo n.º 18
0
 public void Dispose()
 {
     IOUtils.Dispose(origEnum, threadResources);
 }
Exemplo n.º 19
0
		public void WriteToString( ref string nodeInfo )
		{
			IOUtils.AddFieldValueToString( ref nodeInfo, m_value ):
			IOUtils.AddFieldValueToString( ref nodeInfo, m_active ):
			IOUtils.AddFieldValueToString( ref nodeInfo, m_nodeId ):
		}
Exemplo n.º 20
0
        public virtual void TestSparseFacets()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            FacetsConfig      config = new FacetsConfig();

            Document doc = new Document();

            doc.Add(new Int32Field("num", 10, Field.Store.NO));
            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            if (Random().NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new Int32Field("num", 20, Field.Store.NO));
            doc.Add(new FacetField("a", "foo2"));
            doc.Add(new FacetField("b", "bar1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            if (Random().NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new Int32Field("num", 30, Field.Store.NO));
            doc.Add(new FacetField("a", "foo3"));
            doc.Add(new FacetField("b", "bar2"));
            doc.Add(new FacetField("c", "baz1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            writer.Dispose();

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            taxoWriter.Dispose();

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new Int32FieldSource("num"));

            // Ask for top 10 labels for any dims that have counts:
            IList <FacetResult> results = facets.GetAllDims(10);

            Assert.AreEqual(3, results.Count);
            Assert.AreEqual("dim=a path=[] value=60.0 childCount=3\n  foo3 (30.0)\n  foo2 (20.0)\n  foo1 (10.0)\n", results[0].ToString());
            Assert.AreEqual("dim=b path=[] value=50.0 childCount=2\n  bar2 (30.0)\n  bar1 (20.0)\n", results[1].ToString());
            Assert.AreEqual("dim=c path=[] value=30.0 childCount=1\n  baz1 (30.0)\n", results[2].ToString());

            IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir);
        }
Exemplo n.º 21
0
        /// <summary>
        /// Sole constructor. </summary>
        public CompressingStoredFieldsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode)
        {
            this.compressionMode = compressionMode;
            string segment = si.Name;
            bool   success = false;

            fieldInfos = fn;
            numDocs    = si.DocCount;
            ChecksumIndexInput indexStream = null;

            try
            {
                string indexStreamFN  = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
                string fieldsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_EXTENSION);
                // Load the index into memory
                indexStream = d.OpenChecksumInput(indexStreamFN, context);
                string codecNameIdx = formatName + CompressingStoredFieldsWriter.CODEC_SFX_IDX;
                version = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT);
                Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.GetFilePointer());
                indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);

                long maxPointer = -1;

                if (version >= CompressingStoredFieldsWriter.VERSION_CHECKSUM)
                {
                    maxPointer = indexStream.ReadVInt64();
                    CodecUtil.CheckFooter(indexStream);
                }
                else
                {
#pragma warning disable 612, 618
                    CodecUtil.CheckEOF(indexStream);
#pragma warning restore 612, 618
                }
                indexStream.Dispose();
                indexStream = null;

                // Open the data file and read metadata
                fieldsStream = d.OpenInput(fieldsStreamFN, context);
                if (version >= CompressingStoredFieldsWriter.VERSION_CHECKSUM)
                {
                    if (maxPointer + CodecUtil.FooterLength() != fieldsStream.Length)
                    {
                        throw new CorruptIndexException("Invalid fieldsStream maxPointer (file truncated?): maxPointer=" + maxPointer + ", length=" + fieldsStream.Length);
                    }
                }
                else
                {
                    maxPointer = fieldsStream.Length;
                }
                this.maxPointer = maxPointer;
                string codecNameDat  = formatName + CompressingStoredFieldsWriter.CODEC_SFX_DAT;
                int    fieldsVersion = CodecUtil.CheckHeader(fieldsStream, codecNameDat, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT);
                if (version != fieldsVersion)
                {
                    throw new CorruptIndexException("Version mismatch between stored fields index and data: " + version + " != " + fieldsVersion);
                }
                Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == fieldsStream.GetFilePointer());

                if (version >= CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS)
                {
                    chunkSize = fieldsStream.ReadVInt32();
                }
                else
                {
                    chunkSize = -1;
                }
                packedIntsVersion = fieldsStream.ReadVInt32();
                decompressor      = compressionMode.NewDecompressor();
                this.bytes        = new BytesRef();

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.DisposeWhileHandlingException(this, indexStream);
                }
            }
        }
Exemplo n.º 22
0
        public virtual void TestWrongIndexFieldName()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetIndexFieldName("a", "$facets2");

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Document doc = new Document();

            doc.Add(new Int32Field("num", 10, Field.Store.NO));
            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            writer.Dispose();

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            taxoWriter.Dispose();

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, config, c, new Int32FieldSource("num"));

            // Ask for top 10 labels for any dims that have counts:
            IList <FacetResult> results = facets.GetAllDims(10);

            Assert.True(results.Count == 0);

            try
            {
                facets.GetSpecificValue("a");
                Fail("should have hit exc");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            try
            {
                facets.GetTopChildren(10, "a");
                Fail("should have hit exc");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir);
        }
Exemplo n.º 23
0
 public override string /*!*/ ReadLine()
 {
     return(IOUtils.ReadTo(this, '\n'));
 }
Exemplo n.º 24
0
        public virtual void TestRandom()
        {
            string[]        tokens   = GetRandomTokens(10);
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir  = NewDirectory();

            RandomIndexWriter w      = new RandomIndexWriter(Random(), indexDir, Similarity, TimeZone);
            var             tw       = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig    config   = new FacetsConfig();
            int             numDocs  = AtLeast(1000);
            int             numDims  = TestUtil.NextInt(Random(), 1, 7);
            IList <TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims);

            foreach (TestDoc testDoc in testDocs)
            {
                Document doc = new Document();
                doc.Add(NewStringField("content", testDoc.content, Field.Store.NO));
                testDoc.value = Random().NextFloat();
                doc.Add(new SingleDocValuesField("value", testDoc.value));
                for (int j = 0; j < numDims; j++)
                {
                    if (testDoc.dims[j] != null)
                    {
                        doc.Add(new FacetField("dim" + j, testDoc.dims[j]));
                    }
                }
                w.AddDocument(config.Build(tw, doc));
            }

            // NRT open
            IndexSearcher searcher = NewSearcher(w.Reader);

            // NRT open
            var tr = new DirectoryTaxonomyReader(tw);

            ValueSource values = new SingleFieldSource("value");

            int iters = AtLeast(100);

            for (int iter = 0; iter < iters; iter++)
            {
                string searchToken = tokens[Random().Next(tokens.Length)];
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter content=" + searchToken);
                }
                FacetsCollector fc = new FacetsCollector();
                FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc);
                Facets facets = new TaxonomyFacetSumValueSource(tr, config, fc, values);

                // Slow, yet hopefully bug-free, faceting:
                var expectedValues = new List <Dictionary <string, float?> >(numDims);
                for (int i = 0; i < numDims; i++)
                {
                    expectedValues.Add(new Dictionary <string, float?>());
                }

                foreach (TestDoc doc in testDocs)
                {
                    if (doc.content.Equals(searchToken))
                    {
                        for (int j = 0; j < numDims; j++)
                        {
                            if (doc.dims[j] != null)
                            {
                                float?v = expectedValues[j].ContainsKey(doc.dims[j]) ? expectedValues[j][doc.dims[j]] : null;
                                if (v == null)
                                {
                                    expectedValues[j][doc.dims[j]] = doc.value;
                                }
                                else
                                {
                                    expectedValues[j][doc.dims[j]] = (float)v + doc.value;
                                }
                            }
                        }
                    }
                }

                List <FacetResult> expected = new List <FacetResult>();
                for (int i = 0; i < numDims; i++)
                {
                    List <LabelAndValue> labelValues = new List <LabelAndValue>();
                    float totValue = 0;
                    foreach (KeyValuePair <string, float?> ent in expectedValues[i])
                    {
                        labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value));
                        totValue += ent.Value.Value;
                    }
                    SortLabelValues(labelValues);
                    if (totValue > 0)
                    {
                        expected.Add(new FacetResult("dim" + i, new string[0], totValue, labelValues.ToArray(), labelValues.Count));
                    }
                }

                // Sort by highest value, tie break by value:
                SortFacetResults(expected);

                IList <FacetResult> actual = facets.GetAllDims(10);

                // Messy: fixup ties
                SortTies(actual);

                if (VERBOSE)
                {
                    Console.WriteLine("expected=\n" + expected.ToString());
                    Console.WriteLine("actual=\n" + actual.ToString());
                }

                AssertFloatValuesEquals(expected, actual);
            }

            IOUtils.Close(w, tw, searcher.IndexReader, tr, indexDir, taxoDir);
        }
 public override void TearDown()
 {
     IOUtils.Dispose(client, callback, publishWriter, replicator, publishDir, handlerDir);
     base.TearDown();
 }
Exemplo n.º 26
0
 public override void TearDown()
 {
     IOUtils.Close(indexWriter, indexReader, directory);
     base.TearDown();
 }
Exemplo n.º 27
0
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            string basePath         = "/user/socherr/scr/projects/semComp/RNTN/src/params/";
            int    numSlices        = 25;
            bool   useEscapedParens = false;

            for (int argIndex = 0; argIndex < args.Length;)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-slices"))
                {
                    numSlices = System.Convert.ToInt32(args[argIndex + 1]);
                    argIndex += 2;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-path"))
                    {
                        basePath  = args[argIndex + 1];
                        argIndex += 2;
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-useEscapedParens"))
                        {
                            useEscapedParens = true;
                            argIndex        += 1;
                        }
                        else
                        {
                            log.Info("Unknown argument " + args[argIndex]);
                            System.Environment.Exit(2);
                        }
                    }
                }
            }
            SimpleMatrix[] slices = new SimpleMatrix[numSlices];
            for (int i = 0; i < numSlices; ++i)
            {
                slices[i] = LoadMatrix(basePath + "bin/Wt_" + (i + 1) + ".bin", basePath + "Wt_" + (i + 1) + ".txt");
            }
            SimpleTensor tensor = new SimpleTensor(slices);

            log.Info("W tensor size: " + tensor.NumRows() + "x" + tensor.NumCols() + "x" + tensor.NumSlices());
            SimpleMatrix W = LoadMatrix(basePath + "bin/W.bin", basePath + "W.txt");

            log.Info("W matrix size: " + W.NumRows() + "x" + W.NumCols());
            SimpleMatrix Wcat = LoadMatrix(basePath + "bin/Wcat.bin", basePath + "Wcat.txt");

            log.Info("W cat size: " + Wcat.NumRows() + "x" + Wcat.NumCols());
            SimpleMatrix combinedWV = LoadMatrix(basePath + "bin/Wv.bin", basePath + "Wv.txt");

            log.Info("Word matrix size: " + combinedWV.NumRows() + "x" + combinedWV.NumCols());
            File vocabFile = new File(basePath + "vocab_1.txt");

            if (!vocabFile.Exists())
            {
                vocabFile = new File(basePath + "words.txt");
            }
            IList <string> lines = Generics.NewArrayList();

            foreach (string line in IOUtils.ReadLines(vocabFile))
            {
                lines.Add(line.Trim());
            }
            log.Info("Lines in vocab file: " + lines.Count);
            IDictionary <string, SimpleMatrix> wordVectors = Generics.NewTreeMap();

            for (int i_1 = 0; i_1 < lines.Count && i_1 < combinedWV.NumCols(); ++i_1)
            {
                string[] pieces = lines[i_1].Split(" +");
                if (pieces.Length == 0 || pieces.Length > 1)
                {
                    continue;
                }
                wordVectors[pieces[0]] = combinedWV.ExtractMatrix(0, numSlices, i_1, i_1 + 1);
                if (pieces[0].Equals("UNK"))
                {
                    wordVectors[SentimentModel.UnknownWord] = wordVectors["UNK"];
                }
            }
            // If there is no ",", we first try to look for an HTML escaping,
            // then fall back to "." as better than just a random word vector.
            // Same for "``" and ";"
            CopyWordVector(wordVectors, "&#44", ",");
            CopyWordVector(wordVectors, ".", ",");
            CopyWordVector(wordVectors, "&#59", ";");
            CopyWordVector(wordVectors, ".", ";");
            CopyWordVector(wordVectors, "&#96&#96", "``");
            CopyWordVector(wordVectors, "''", "``");
            if (useEscapedParens)
            {
                ReplaceWordVector(wordVectors, "(", "-LRB-");
                ReplaceWordVector(wordVectors, ")", "-RRB-");
            }
            RNNOptions op = new RNNOptions();

            op.numHid = numSlices;
            op.lowercaseWordVectors = false;
            if (Wcat.NumRows() == 2)
            {
                op.classNames         = new string[] { "Negative", "Positive" };
                op.equivalenceClasses = new int[][] { new int[] { 0 }, new int[] { 1 } };
                // TODO: set to null once old models are updated
                op.numClasses = 2;
            }
            if (!wordVectors.Contains(SentimentModel.UnknownWord))
            {
                wordVectors[SentimentModel.UnknownWord] = SimpleMatrix.Random(numSlices, 1, -0.00001, 0.00001, new Random());
            }
            SentimentModel model = SentimentModel.ModelFromMatrices(W, Wcat, tensor, wordVectors, op);

            model.SaveSerialized("matlab.ser.gz");
        }
Exemplo n.º 28
0
        internal Lucene42DocValuesProducer(SegmentReadState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension)
        {
            maxDoc = state.SegmentInfo.DocCount;
            string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension);
            // read in the entries from the metadata file.
            ChecksumIndexInput @in = state.Directory.OpenChecksumInput(metaName, state.Context);
            bool success           = false;

            ramBytesUsed = new AtomicInt64(RamUsageEstimator.ShallowSizeOfInstance(this.GetType()));
            try
            {
                version  = CodecUtil.CheckHeader(@in, metaCodec, VERSION_START, VERSION_CURRENT);
                numerics = new Dictionary <int, NumericEntry>();
                binaries = new Dictionary <int, BinaryEntry>();
                fsts     = new Dictionary <int, FSTEntry>();
                ReadFields(@in /*, state.FieldInfos // LUCENENET: Never read */);

                if (version >= VERSION_CHECKSUM)
                {
                    CodecUtil.CheckFooter(@in);
                }
                else
                {
#pragma warning disable 612, 618
                    CodecUtil.CheckEOF(@in);
#pragma warning restore 612, 618
                }

                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Dispose(@in);
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(@in);
                }
            }

            success = false;
            try
            {
                string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension);
                data = state.Directory.OpenInput(dataName, state.Context);
                int version2 = CodecUtil.CheckHeader(data, dataCodec, VERSION_START, VERSION_CURRENT);
                if (version != version2)
                {
                    throw new CorruptIndexException("Format versions mismatch");
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.DisposeWhileHandlingException(this.data);
                }
            }
        }
Exemplo n.º 29
0
        public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context)
        {
            string      fileName = IndexFileNames.SegmentFileName(segmentName, "", FIELD_INFOS_EXTENSION);
            IndexOutput output   = directory.CreateOutput(fileName, context);
            bool        success  = false;

            try
            {
                output.WriteVInt32(FORMAT_PREFLEX_RW);
                output.WriteVInt32(infos.Count);
                foreach (FieldInfo fi in infos)
                {
                    sbyte bits = 0x0;
                    if (fi.HasVectors)
                    {
                        bits |= STORE_TERMVECTOR;
                    }
                    if (fi.OmitsNorms)
                    {
                        bits |= OMIT_NORMS;
                    }
                    if (fi.HasPayloads)
                    {
                        bits |= STORE_PAYLOADS;
                    }
                    if (fi.IsIndexed)
                    {
                        bits |= IS_INDEXED;
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(fi.IndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.HasPayloads);
                        }
                        if (fi.IndexOptions == IndexOptions.DOCS_ONLY)
                        {
                            bits |= OMIT_TERM_FREQ_AND_POSITIONS;
                        }
                        else if (fi.IndexOptions == IndexOptions.DOCS_AND_FREQS)
                        {
                            bits |= OMIT_POSITIONS;
                        }
                    }
                    output.WriteString(fi.Name);

                    /*
                     * we need to write the field number since IW tries
                     * to stabelize the field numbers across segments so the
                     * FI ordinal is not necessarily equivalent to the field number
                     */
                    output.WriteInt32(fi.Number);
                    output.WriteByte((byte)bits);
                    if (fi.IsIndexed && !fi.OmitsNorms)
                    {
                        // to allow null norm types we need to indicate if norms are written
                        // only in RW case
                        output.WriteByte((byte)(fi.NormType == Index.DocValuesType.NONE ? 0 : 1));
                    }
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(fi.Attributes == null);                           // not used or supported
                    }
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    output.Dispose();
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(output);
                }
            }
        }
Exemplo n.º 30
0
        public override void Build(InputIterator iterator)
        {
            if (iterator.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            string prefix     = this.GetType().Name;
            var    directory  = OfflineSorter.DefaultTempDir();
            var    tempInput  = File.CreateTempFile(prefix, ".input", directory);
            var    tempSorted = File.CreateTempFile(prefix, ".sorted", directory);

            hasPayloads = iterator.HasPayloads;

            var writer = new OfflineSorter.ByteSequencesWriter(tempInput);

            OfflineSorter.ByteSequencesReader reader = null;
            var scratch = new BytesRef();

            TokenStreamToAutomaton ts2a = TokenStreamToAutomaton;

            bool success = false;

            count = 0;
            sbyte[] buffer = new sbyte[8];
            try
            {
                var      output = new ByteArrayDataOutput(buffer);
                BytesRef surfaceForm;

                while ((surfaceForm = iterator.Next()) != null)
                {
                    HashSet <IntsRef> paths = ToFiniteStrings(surfaceForm, ts2a);

                    maxAnalyzedPathsForOneInput = Math.Max(maxAnalyzedPathsForOneInput, paths.Count);

                    foreach (IntsRef path in paths)
                    {
                        Util.Fst.Util.ToBytesRef(path, scratch);

                        // length of the analyzed text (FST input)
                        if (scratch.Length > short.MaxValue - 2)
                        {
                            throw new System.ArgumentException("cannot handle analyzed forms > " + (short.MaxValue - 2) +
                                                               " in length (got " + scratch.Length + ")");
                        }
                        short analyzedLength = (short)scratch.Length;

                        // compute the required length:
                        // analyzed sequence + weight (4) + surface + analyzedLength (short)
                        int requiredLength = analyzedLength + 4 + surfaceForm.Length + 2;

                        BytesRef payload;

                        if (hasPayloads)
                        {
                            if (surfaceForm.Length > (short.MaxValue - 2))
                            {
                                throw new ArgumentException("cannot handle surface form > " + (short.MaxValue - 2) +
                                                            " in length (got " + surfaceForm.Length + ")");
                            }
                            payload = iterator.Payload;
                            // payload + surfaceLength (short)
                            requiredLength += payload.Length + 2;
                        }
                        else
                        {
                            payload = null;
                        }

                        buffer = ArrayUtil.Grow(buffer, requiredLength);

                        output.Reset(buffer);

                        output.WriteShort(analyzedLength);

                        output.WriteBytes(scratch.Bytes, scratch.Offset, scratch.Length);

                        output.WriteInt(EncodeWeight(iterator.Weight));

                        if (hasPayloads)
                        {
                            for (int i = 0; i < surfaceForm.Length; i++)
                            {
                                if (surfaceForm.Bytes[i] == PAYLOAD_SEP)
                                {
                                    throw new ArgumentException(
                                              "surface form cannot contain unit separator character U+001F; this character is reserved");
                                }
                            }
                            output.WriteShort((short)surfaceForm.Length);
                            output.WriteBytes(surfaceForm.Bytes, surfaceForm.Offset, surfaceForm.Length);
                            output.WriteBytes(payload.Bytes, payload.Offset, payload.Length);
                        }
                        else
                        {
                            output.WriteBytes(surfaceForm.Bytes, surfaceForm.Offset, surfaceForm.Length);
                        }

                        Debug.Assert(output.Position == requiredLength, output.Position + " vs " + requiredLength);

                        writer.Write(buffer, 0, output.Position);
                    }
                    count++;
                }
                writer.Dispose();

                // Sort all input/output pairs (required by FST.Builder):
                (new OfflineSorter(new AnalyzingComparator(hasPayloads))).Sort(tempInput, tempSorted);

                // Free disk space:
                tempInput.Delete();

                reader = new OfflineSorter.ByteSequencesReader(tempSorted);

                var outputs = new PairOutputs <long?, BytesRef>(PositiveIntOutputs.Singleton,
                                                                ByteSequenceOutputs.Singleton);
                var builder = new Builder <PairOutputs <long?, BytesRef> .Pair>(FST.INPUT_TYPE.BYTE1, outputs);

                // Build FST:
                BytesRef previousAnalyzed = null;
                BytesRef analyzed         = new BytesRef();
                BytesRef surface          = new BytesRef();
                IntsRef  scratchInts      = new IntsRef();
                var      input            = new ByteArrayDataInput();

                // Used to remove duplicate surface forms (but we
                // still index the hightest-weight one).  We clear
                // this when we see a new analyzed form, so it cannot
                // grow unbounded (at most 256 entries):
                var seenSurfaceForms = new HashSet <BytesRef>();

                var dedup = 0;
                while (reader.Read(scratch))
                {
                    input.Reset(scratch.Bytes, scratch.Offset, scratch.Length);
                    short analyzedLength = input.ReadShort();
                    analyzed.Grow(analyzedLength + 2);
                    input.ReadBytes(analyzed.Bytes, 0, analyzedLength);
                    analyzed.Length = analyzedLength;

                    long cost = input.ReadInt();

                    surface.Bytes = scratch.Bytes;
                    if (hasPayloads)
                    {
                        surface.Length = input.ReadShort();
                        surface.Offset = input.Position;
                    }
                    else
                    {
                        surface.Offset = input.Position;
                        surface.Length = scratch.Length - surface.Offset;
                    }

                    if (previousAnalyzed == null)
                    {
                        previousAnalyzed = new BytesRef();
                        previousAnalyzed.CopyBytes(analyzed);
                        seenSurfaceForms.Add(BytesRef.DeepCopyOf(surface));
                    }
                    else if (analyzed.Equals(previousAnalyzed))
                    {
                        dedup++;
                        if (dedup >= maxSurfaceFormsPerAnalyzedForm)
                        {
                            // More than maxSurfaceFormsPerAnalyzedForm
                            // dups: skip the rest:
                            continue;
                        }
                        if (seenSurfaceForms.Contains(surface))
                        {
                            continue;
                        }
                        seenSurfaceForms.Add(BytesRef.DeepCopyOf(surface));
                    }
                    else
                    {
                        dedup = 0;
                        previousAnalyzed.CopyBytes(analyzed);
                        seenSurfaceForms.Clear();
                        seenSurfaceForms.Add(BytesRef.DeepCopyOf(surface));
                    }

                    // TODO: I think we can avoid the extra 2 bytes when
                    // there is no dup (dedup==0), but we'd have to fix
                    // the exactFirst logic ... which would be sort of
                    // hairy because we'd need to special case the two
                    // (dup/not dup)...

                    // NOTE: must be byte 0 so we sort before whatever
                    // is next
                    analyzed.Bytes[analyzed.Offset + analyzed.Length]     = 0;
                    analyzed.Bytes[analyzed.Offset + analyzed.Length + 1] = (sbyte)dedup;
                    analyzed.Length += 2;

                    Util.Fst.Util.ToIntsRef(analyzed, scratchInts);
                    //System.out.println("ADD: " + scratchInts + " -> " + cost + ": " + surface.utf8ToString());
                    if (!hasPayloads)
                    {
                        builder.Add(scratchInts, outputs.NewPair(cost, BytesRef.DeepCopyOf(surface)));
                    }
                    else
                    {
                        int      payloadOffset = input.Position + surface.Length;
                        int      payloadLength = scratch.Length - payloadOffset;
                        BytesRef br            = new BytesRef(surface.Length + 1 + payloadLength);
                        Array.Copy(surface.Bytes, surface.Offset, br.Bytes, 0, surface.Length);
                        br.Bytes[surface.Length] = PAYLOAD_SEP;
                        Array.Copy(scratch.Bytes, payloadOffset, br.Bytes, surface.Length + 1, payloadLength);
                        br.Length = br.Bytes.Length;
                        builder.Add(scratchInts, outputs.NewPair(cost, br));
                    }
                }
                fst = builder.Finish();

                //Util.dotToFile(fst, "/tmp/suggest.dot");

                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(reader, writer);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(reader, writer);
                }

                tempInput.Delete();
                tempSorted.Delete();
            }
        }
        private void DownloadDotStatCodelist()
        {
            IOUtils file = new IOUtils();
            ISdmxObjects sdmxObjects = null;

            sdmxObjects = GetSdmxObjects();

            if (sdmxObjects == null)
                return;

            file.SaveDotSTATCodelistFile(sdmxObjects.Codelists.First());
        }
Exemplo n.º 32
0
        public override long Decompress(Stream instream, long inLength, Stream outstream)
        {
            #region Format definition in NDSTEK style
            /*  Data header (32bit)
                  Bit 0-3   Reserved
                  Bit 4-7   Compressed type (must be 1 for LZ77)
                  Bit 8-31  Size of decompressed data. if 0, the next 4 bytes are decompressed length
                Repeat below. Each Flag Byte followed by eight Blocks.
                Flag data (8bit)
                  Bit 0-7   Type Flags for next 8 Blocks, MSB first
                Block Type 0 - Uncompressed - Copy 1 Byte from Source to Dest
                  Bit 0-7   One data byte to be copied to dest
                Block Type 1 - Compressed - Copy LEN Bytes from Dest-Disp-1 to Dest
                    If Reserved is 0: - Default
                      Bit 0-3   Disp MSBs
                      Bit 4-7   LEN - 3
                      Bit 8-15  Disp LSBs
                    If Reserved is 1: - Higher compression rates for files with (lots of) long repetitions
                      Bit 4-7   Indicator
                        If Indicator > 1:
                            Bit 0-3    Disp MSBs
                            Bit 4-7    LEN - 1 (same bits as Indicator)
                            Bit 8-15   Disp LSBs
                        If Indicator is 1: A(B CD E)(F GH)
                            Bit 0-3     (LEN - 0x111) MSBs
                            Bit 4-7     Indicator; unused
                            Bit 8-15    (LEN- 0x111) 'middle'-SBs
                            Bit 16-19   Disp MSBs
                            Bit 20-23   (LEN - 0x111) LSBs
                            Bit 24-31   Disp LSBs
                        If Indicator is 0:
                            Bit 0-3     (LEN - 0x11) MSBs
                            Bit 4-7     Indicator; unused
                            Bit 8-11    Disp MSBs
                            Bit 12-15   (LEN - 0x11) LSBs
                            Bit 16-23   Disp LSBs
             */
            #endregion

            long readBytes = 0;

            byte type = (byte)instream.ReadByte();
            if (type != base.magicByte)
                throw new InvalidDataException(String.Format(Main.Get_Traduction("S03"), type.ToString("X")));
            byte[] sizeBytes = new byte[3];
            instream.Read(sizeBytes, 0, 3);
            int decompressedSize = IOUtils.ToNDSu24(sizeBytes, 0);
            readBytes += 4;
            if (decompressedSize == 0)
            {
                sizeBytes = new byte[4];
                instream.Read(sizeBytes, 0, 4);
                decompressedSize = IOUtils.ToNDSs32(sizeBytes, 0);
                readBytes += 4;
            }

            // the maximum 'DISP-1' is still 0xFFF.
            int bufferLength = 0x1000;
            byte[] buffer = new byte[bufferLength];
            int bufferOffset = 0;
            
            int currentOutSize = 0;
            int flags = 0, mask = 1;
            while (currentOutSize < decompressedSize)
            {
                // (throws when requested new flags byte is not available)
                #region Update the mask. If all flag bits have been read, get a new set.
                // the current mask is the mask used in the previous run. So if it masks the
                // last flag bit, get a new flags byte.
                if (mask == 1)
                {
                    if (readBytes >= inLength)
                        throw new NotEnoughDataException(currentOutSize, decompressedSize);
                    flags = instream.ReadByte(); readBytes++;
                    if (flags < 0)
                        throw new StreamTooShortException();
                    mask = 0x80;
                }
                else
                {
                    mask >>= 1;
                }
                #endregion

                // bit = 1 <=> compressed.
                if ((flags & mask) > 0)
                {
                    // (throws when not enough bytes are available)
                    #region Get length and displacement('disp') values from next 2, 3 or 4 bytes

                    // read the first byte first, which also signals the size of the compressed block
                    if (readBytes >= inLength)
                        throw new NotEnoughDataException(currentOutSize, decompressedSize);
                    int byte1 = instream.ReadByte(); readBytes++;
                    if (byte1 < 0)
                        throw new StreamTooShortException();

                    int length = byte1 >> 4;
                    int disp = -1;
                    if (length == 0)
                    {
                        #region case 0; 0(B C)(D EF) + (0x11)(0x1) = (LEN)(DISP)

                        // case 0:
                        // data = AB CD EF (with A=0)
                        // LEN = ABC + 0x11 == BC + 0x11
                        // DISP = DEF + 1

                        // we need two more bytes available
                        if (readBytes + 1 >= inLength)
                            throw new NotEnoughDataException(currentOutSize, decompressedSize);
                        int byte2 = instream.ReadByte(); readBytes++;
                        int byte3 = instream.ReadByte(); readBytes++;
                        if (byte3 < 0)
                            throw new StreamTooShortException();

                        length = (((byte1 & 0x0F) << 4) | (byte2 >> 4)) + 0x11;
                        disp = (((byte2 & 0x0F) << 8) | byte3) + 0x1;

                        #endregion
                    }
                    else if (length == 1)
                    {
                        #region case 1: 1(B CD E)(F GH) + (0x111)(0x1) = (LEN)(DISP)

                        // case 1:
                        // data = AB CD EF GH (with A=1)
                        // LEN = BCDE + 0x111
                        // DISP = FGH + 1

                        // we need three more bytes available
                        if (readBytes + 2 >= inLength)
                            throw new NotEnoughDataException(currentOutSize, decompressedSize);
                        int byte2 = instream.ReadByte(); readBytes++;
                        int byte3 = instream.ReadByte(); readBytes++;
                        int byte4 = instream.ReadByte(); readBytes++;
                        if (byte4 < 0)
                            throw new StreamTooShortException();

                        length = (((byte1 & 0x0F) << 12) | (byte2 << 4) | (byte3 >> 4)) + 0x111;
                        disp = (((byte3 & 0x0F) << 8) | byte4) + 0x1;

                        #endregion
                    }
                    else
                    {
                        #region case > 1: (A)(B CD) + (0x1)(0x1) = (LEN)(DISP)

                        // case other:
                        // data = AB CD
                        // LEN = A + 1
                        // DISP = BCD + 1

                        // we need only one more byte available
                        if (readBytes >= inLength)
                            throw new NotEnoughDataException(currentOutSize, decompressedSize);
                        int byte2 = instream.ReadByte(); readBytes++;
                        if (byte2 < 0)
                            throw new StreamTooShortException();

                        length = ((byte1 & 0xF0) >> 4) + 0x1;
                        disp = (((byte1 & 0x0F) << 8) | byte2) + 0x1;

                        #endregion
                    }

                    if (disp > currentOutSize)
                        throw new InvalidDataException(String.Format(Main.Get_Traduction("S04"), disp, currentOutSize.ToString("X"), 
                            instream.Position.ToString("X"), (byte1 >> 4).ToString("X")));
                    #endregion

                    int bufIdx = bufferOffset + bufferLength - disp;
                    for (int i = 0; i < length; i++)
                    {
                        byte next = buffer[bufIdx % bufferLength];
                        bufIdx++;
                        outstream.WriteByte(next);
                        buffer[bufferOffset] = next;
                        bufferOffset = (bufferOffset + 1) % bufferLength;
                    }
                    currentOutSize += length;
                }
                else
                {
                    if (readBytes >= inLength)
                        throw new NotEnoughDataException(currentOutSize, decompressedSize);
                    int next = instream.ReadByte(); readBytes++;
                    if (next < 0)
                        throw new StreamTooShortException();

                    outstream.WriteByte((byte)next); currentOutSize++;
                    buffer[bufferOffset] = (byte)next;
                    bufferOffset = (bufferOffset + 1) % bufferLength;
                }
            }

            if (readBytes < inLength)
            {
                // the input may be 4-byte aligned.
                if ((readBytes ^ (readBytes & 3)) + 4 < inLength)
                    throw new TooMuchInputException(readBytes, inLength);
            }

            return decompressedSize;
        }
        private void DownloadSDMX()
        {
            IOUtils file = new IOUtils();
            ISdmxObjects sdmxObjects = null;

            sdmxObjects = GetSdmxObjects();

            if (sdmxObjects != null)
                file.SaveSDMXFile(sdmxObjects, sdmxVersion, GetFilename());
        }
Exemplo n.º 34
0
        public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext)
        {
            string     fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION);
            IndexInput input    = directory.OpenInput(fileName, iocontext);

            bool success = false;

            try
            {
                CodecUtil.CheckHeader(input, Lucene40FieldInfosFormat.CODEC_NAME, Lucene40FieldInfosFormat.FORMAT_START, Lucene40FieldInfosFormat.FORMAT_CURRENT);

                int         size  = input.ReadVInt32(); //read in the size
                FieldInfo[] infos = new FieldInfo[size];

                for (int i = 0; i < size; i++)
                {
                    string       name            = input.ReadString();
                    int          fieldNumber     = input.ReadVInt32();
                    byte         bits            = input.ReadByte();
                    bool         isIndexed       = (bits & Lucene40FieldInfosFormat.IS_INDEXED) != 0;
                    bool         storeTermVector = (bits & Lucene40FieldInfosFormat.STORE_TERMVECTOR) != 0;
                    bool         omitNorms       = (bits & Lucene40FieldInfosFormat.OMIT_NORMS) != 0;
                    bool         storePayloads   = (bits & Lucene40FieldInfosFormat.STORE_PAYLOADS) != 0;
                    IndexOptions indexOptions;
                    if (!isIndexed)
                    {
                        indexOptions = IndexOptions.NONE;
                    }
                    else if ((bits & Lucene40FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0)
                    {
                        indexOptions = IndexOptions.DOCS_ONLY;
                    }
                    else if ((bits & Lucene40FieldInfosFormat.OMIT_POSITIONS) != 0)
                    {
                        indexOptions = IndexOptions.DOCS_AND_FREQS;
                    }
                    else if ((bits & Lucene40FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0)
                    {
                        indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
                    }
                    else
                    {
                        indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                    }

                    // LUCENE-3027: past indices were able to write
                    // storePayloads=true when omitTFAP is also true,
                    // which is invalid.  We correct that, here:
                    // LUCENENET specific - to avoid boxing, changed from CompareTo() to IndexOptionsComparer.Compare()
                    if (isIndexed && IndexOptionsComparer.Default.Compare(indexOptions, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0)
                    {
                        storePayloads = false;
                    }
                    // DV Types are packed in one byte
                    byte val = input.ReadByte();
                    LegacyDocValuesType          oldValuesType = GetDocValuesType((sbyte)(val & 0x0F));
                    LegacyDocValuesType          oldNormsType  = GetDocValuesType((sbyte)(((int)((uint)val >> 4)) & 0x0F));
                    IDictionary <string, string> attributes    = input.ReadStringStringMap();
                    if (oldValuesType.GetMapping() != DocValuesType.NONE)
                    {
                        attributes[LEGACY_DV_TYPE_KEY] = oldValuesType.ToString();
                    }
                    if (oldNormsType.GetMapping() != DocValuesType.NONE)
                    {
                        if (oldNormsType.GetMapping() != DocValuesType.NUMERIC)
                        {
                            throw new CorruptIndexException("invalid norm type: " + oldNormsType + " (resource=" + input + ")");
                        }
                        attributes[LEGACY_NORM_TYPE_KEY] = oldNormsType.ToString();
                    }
                    infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, oldValuesType.GetMapping(), oldNormsType.GetMapping(), attributes);
                }
                CodecUtil.CheckEOF(input);
                FieldInfos fieldInfos = new FieldInfos(infos);
                success = true;
                return(fieldInfos);
            }
            finally
            {
                if (success)
                {
                    input.Dispose();
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(input);
                }
            }
        }