private void DownloadCSV() { IOUtils file = new IOUtils(); ISdmxObjects sdmxObjects = null; DataTable dt = new DataTable(); sdmxObjects = GetSdmxObjects(); if (sdmxObjects == null) return; AddExportColumns(dt); PopolateDataTable(dt, sdmxObjects); string mySeparator = txtSeparator.Text.Trim().Equals( string.Empty ) ? ";" : txtSeparator.Text.Trim(); file.SaveCSVFile(dt, GetFilename(), mySeparator, txtDelimiter.Text); }
public void Dispose() { IOUtils.Dispose(reader); }
private static void FullPepXMLAndProteinProphetSetup() { if (GlobalVar.IsSimulation) { if (!GlobalVar.usePepXMLComputedFile) { //comet log.Info("Performing Comet search on full ms2 data"); String fullCometFile = PostProcessingScripts.CometStandardSearch(InputFileOrganizer.MS2SimulationTestFile, InputFileOrganizer.preExperimentFilesFolder, true); InputFileOrganizer.OriginalCometOutput = fullCometFile; } //protein prophet log.Info("Perform a protein prophet search on full pepxml"); String fullProteinProphetFile = PostProcessingScripts.ProteinProphetSearch(InputFileOrganizer.OriginalCometOutput, InputFileOrganizer.OutputFolderOfTheRun, true); ExecuteShellCommand.MoveFile(fullProteinProphetFile, InputFileOrganizer.preExperimentFilesFolder); InputFileOrganizer.OriginalProtXMLFile = Path.Combine(InputFileOrganizer.preExperimentFilesFolder, IOUtils.getBaseName(IOUtils.getBaseName(fullProteinProphetFile)) + ".prot.xml"); } }
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, string segment, int indexDivisor, IComparer <BytesRef> termComp, string segmentSuffix, IOContext context) { this.termComp = termComp; if (Debugging.AssertsEnabled) { Debugging.Assert(indexDivisor == -1 || indexDivisor > 0); } input = dir.OpenInput(IndexFileNames.SegmentFileName(segment, segmentSuffix, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION), context); bool success = false; try { version = ReadHeader(input); if (version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(input); } indexInterval = input.ReadInt32(); if (indexInterval < 1) { throw new CorruptIndexException("invalid indexInterval: " + indexInterval + " (resource=" + input + ")"); } this.indexDivisor = indexDivisor; if (indexDivisor < 0) { totalIndexInterval = indexInterval; } else { // In case terms index gets loaded, later, on demand totalIndexInterval = indexInterval * indexDivisor; } if (Debugging.AssertsEnabled) { Debugging.Assert(totalIndexInterval > 0); } SeekDir(input, dirOffset); // Read directory int numFields = input.ReadVInt32(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + input + ")"); } //System.out.println("FGR: init seg=" + segment + " div=" + indexDivisor + " nF=" + numFields); for (int i = 0; i < numFields; i++) { int field = input.ReadVInt32(); int numIndexTerms = input.ReadVInt32(); if (numIndexTerms < 0) { throw new CorruptIndexException("invalid numIndexTerms: " + numIndexTerms + " (resource=" + input + ")"); } long termsStart = input.ReadVInt64(); long indexStart = input.ReadVInt64(); long packedIndexStart = input.ReadVInt64(); long packedOffsetsStart = input.ReadVInt64(); if (packedIndexStart < indexStart) { throw new CorruptIndexException("invalid packedIndexStart: " + packedIndexStart + " indexStart: " + indexStart + "numIndexTerms: " + numIndexTerms + " (resource=" + input + ")"); } FieldInfo fieldInfo = fieldInfos.FieldInfo(field); FieldIndexData previous = fields.Put(fieldInfo, new FieldIndexData(this, fieldInfo, numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart)); if (previous != null) { throw new CorruptIndexException("duplicate field: " + fieldInfo.Name + " (resource=" + input + ")"); } } success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(input); } if (indexDivisor > 0) { input.Dispose(); input = null; if (success) { indexLoaded = true; } termBytesReader = termBytes.Freeze(true); } } }
static List<TemplateItem> GetTemplateData() { var t = IOUtils.LoadTemplateItemsData(); //ExampleJsonOutput(); return t; }
/// <summary> /// Loads a Game freak texture. /// </summary> /// <param name="data">The texture data</param> /// <returns>The image as a texture</returns> public static RenderBase.OTexture load(Stream data, bool keepOpen = false) { BinaryReader input = new BinaryReader(data); long descAddress = data.Position; data.Seek(8, SeekOrigin.Current); if (IOUtils.readStringWithLength(input, 7) != "texture") { return(null); } data.Seek(descAddress + 0x18, SeekOrigin.Begin); int texLength = input.ReadInt32(); data.Seek(descAddress + 0x28, SeekOrigin.Begin); string texName = IOUtils.readStringWithLength(input, 0x40); data.Seek(descAddress + 0x68, SeekOrigin.Begin); ushort width = input.ReadUInt16(); ushort height = input.ReadUInt16(); ushort texFormat = input.ReadUInt16(); ushort texMipMaps = input.ReadUInt16(); data.Seek(0x10, SeekOrigin.Current); byte[] texBuffer = input.ReadBytes(texLength); RenderBase.OTextureFormat fmt = RenderBase.OTextureFormat.dontCare; switch (texFormat) { case 0x2: fmt = RenderBase.OTextureFormat.rgb565; break; case 0x3: fmt = RenderBase.OTextureFormat.rgb8; break; case 0x4: fmt = RenderBase.OTextureFormat.rgba8; break; case 0x16: fmt = RenderBase.OTextureFormat.rgba4; break; case 0x17: fmt = RenderBase.OTextureFormat.rgba5551; break; case 0x23: fmt = RenderBase.OTextureFormat.la8; break; case 0x24: fmt = RenderBase.OTextureFormat.hilo8; break; case 0x25: fmt = RenderBase.OTextureFormat.l8; break; case 0x26: fmt = RenderBase.OTextureFormat.a8; break; case 0x27: fmt = RenderBase.OTextureFormat.la4; break; case 0x28: fmt = RenderBase.OTextureFormat.l4; break; case 0x29: fmt = RenderBase.OTextureFormat.a4; break; case 0x2a: fmt = RenderBase.OTextureFormat.etc1; break; case 0x2b: fmt = RenderBase.OTextureFormat.etc1a4; break; default: Debug.WriteLine("Unk tex fmt " + texFormat.ToString("X4") + " @ " + texName); break; } Bitmap tex = TextureCodec.decode(texBuffer, width, height, fmt); if (!keepOpen) { data.Close(); } return(new RenderBase.OTexture(tex, texName)); }
/// <summary> /// Make sure isCompiling() == true as setCompilingTrue() was called /// Make sure all parameters are not null /// Make sure this method is thread-safety: it cannot be called at the same time /// </summary> /// <param name="cmd"></param> /// <param name="workingDir"></param> /// <returns></returns> public TCompileResult compile(string cmd, string workingDir) { int oldTick = SystemUtils.getCurrentTimeMs(); ICmdParser parser = CmdParserFactory.createCmdParser(cmd); List <string> inputFilePaths = null; string outputFilePath = null; TCompileResult result = null; if (!isAllowCompiling()) { result = new TCompileResult(false, 1, "error: Compiling is stopped!", 0, null); goto my_end; } try { parser.getInOutFilePath(workingDir, out inputFilePaths, out outputFilePath); } catch (Exception ex) { // Error result = new TCompileResult(false, 1, "error: " + ex.Message, 0, null); goto my_end; } if (inputFilePaths == null || inputFilePaths.Count == 0 || outputFilePath == null) { // Error result = new TCompileResult(false, 1, "error: Invalid command, input file is mandatory!", 0, null); goto my_end; } outputFilePath = PathUtils.combine(workingDir, outputFilePath); if (!isConnected()) { // Local goto my_end; } #region Distribute // Send the input files to server foreach (string inputFilePath in inputFilePaths) { string f = PathUtils.combine(workingDir, inputFilePath); string fNormal = PathUtils.normalizePath(f); if (!m_sentFiles.Contains(fNormal)) { if (sendFile(f)) { m_sentFiles.Add(fNormal); } else { // Local goto my_end; } } } // Send compile request to server string cmdToSend = parser.makeNetworkCmd(); Message msg = MessageCompileRequest.createMessage(cmdToSend); if (!m_messageStream.writeMessage(msg)) { goto my_end; } if (!isAllowCompiling()) { result = new TCompileResult(false, 1, "error: Compiling is stopped!", 0, null); goto my_end; } // Receive the compile result msg = m_messageStream.readMessage(); if (msg == null) { goto my_end; } if (!isAllowCompiling()) { result = new TCompileResult(false, 1, "error: Compiling is stopped!", 0, null); goto my_end; } // Parse and check the compile result MessageCompileResponse msgCompileRes = new MessageCompileResponse(msg); if (msgCompileRes.getWasExec() && msgCompileRes.getExitCode() != 0) { // Compile error // Nghia: TODO rem this for the hot fix //result = new TCompileResult(true, msgCompileRes.getExitCode(), msgCompileRes.getOutputText(), 0, m_host); goto my_end; } if (!msgCompileRes.getWasExec()) { goto my_end; } // Compile OK by server // Save the file to disk if (!IOUtils.writeFile_Bytes(outputFilePath, msgCompileRes.getOFileData(), msgCompileRes.getOFileOffset(), msgCompileRes.getOFileSize())) { // No need to compile local as this is a serious error result = new TCompileResult(true, 1, "error: Could not save the output file to disk", 0, m_host); goto my_end; } // Everything is OK result = new TCompileResult(true, 0, "Remotely compiled from " + m_host + "\n" + msgCompileRes.getOutputText(), 0, m_host); #endregion my_end: if (result == null) { // Local string specOutFilePath = parser.getLocalSpecificOutputFilePath(); string specCmd = parser.getLocalSpecificCommand(); string specWorkingDir = parser.getLocalSpecificWorkingDir(); if (specOutFilePath == null) { specOutFilePath = outputFilePath; } if (specCmd == null) { specCmd = cmd; } if (specWorkingDir == null) { specWorkingDir = workingDir; } specOutFilePath = PathUtils.combine(specWorkingDir, specOutFilePath); TProcessResult res = null; if (parser.needToLockLocal()) { lock (s_lock) { doLocal(specCmd, specWorkingDir, specOutFilePath, ref oldTick, ref res); } } else { doLocal(specCmd, specWorkingDir, specOutFilePath, ref oldTick, ref res); } result = new TCompileResult(res.wasExec, res.exitCode, "Locally compiled\n" + res.outputText, 0, null); if (result.wasExec && result.exitCode == 0 && specOutFilePath != outputFilePath) { File.Move(specOutFilePath, outputFilePath); } } result.spentTimeMs = SystemUtils.getCurrentTimeMs() - oldTick; setCompiling(false); return(result); }
public override void Run() { using Stream stream = new NetworkStream(cs); BinaryReader intReader = new BinaryReader(stream); BinaryWriter intWriter = new BinaryWriter(stream); try { int id = intReader.ReadInt32(); if (id < 0) { throw new IOException("Client closed connection before communication started."); } startingGun.Wait(); intWriter.Write(43); stream.Flush(); while (true) { int command = stream.ReadByte(); if (command < 0) { return; // closed } UninterruptableMonitor.Enter(localLock); try { int currentLock = lockedID[0]; if (currentLock == -2) { return; // another thread got error, so we exit, too! } switch (command) { case 1: // Locked if (currentLock != -1) { lockedID[0] = -2; throw IllegalStateException.Create("id " + id + " got lock, but " + currentLock + " already holds the lock"); } lockedID[0] = id; break; case 0: // Unlocked if (currentLock != id) { lockedID[0] = -2; throw IllegalStateException.Create("id " + id + " released the lock, but " + currentLock + " is the one holding the lock"); } lockedID[0] = -1; break; default: throw RuntimeException.Create("Unrecognized command: " + command); } intWriter.Write((byte)command); stream.Flush(); } finally { UninterruptableMonitor.Exit(localLock); } } } catch (Exception e) when (e.IsRuntimeException() || e.IsError()) { throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } catch (Exception ioe) when (ioe.IsException()) { throw RuntimeException.Create(ioe); } finally { IOUtils.DisposeWhileHandlingException(cs); } }
/// <summary> /// An example of a command line is /// <br /> /// java -mx1g edu.stanford.nlp.parser.dvparser.CacheParseHypotheses -model /scr/horatio/dvparser/wsjPCFG.nocompact.simple.ser.gz -output cached9.simple.ser.gz -treebank /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 200-202 /// <br /> /// java -mx4g edu.stanford.nlp.parser.dvparser.CacheParseHypotheses -model ~/scr/dvparser/wsjPCFG.nocompact.simple.ser.gz -output cached.train.simple.ser.gz -treebank /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 200-2199 -numThreads 6 /// <br /> /// java -mx4g edu.stanford.nlp.parser.dvparser.CacheParseHypotheses -model ~/scr/dvparser/chinese/xinhuaPCFG.ser.gz -output cached.xinhua.train.ser.gz -treebank /afs/ir/data/linguistic-data/Chinese-Treebank/6/data/utf8/bracketed 026-270,301-499,600-999 /// </summary> /// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { string parserModel = null; string output = null; IList <Pair <string, IFileFilter> > treebanks = Generics.NewArrayList(); int dvKBest = 200; int numThreads = 1; for (int argIndex = 0; argIndex < args.Length;) { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-dvKBest")) { dvKBest = System.Convert.ToInt32(args[argIndex + 1]); argIndex += 2; continue; } if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-parser") || args[argIndex].Equals("-model")) { parserModel = args[argIndex + 1]; argIndex += 2; continue; } if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-output")) { output = args[argIndex + 1]; argIndex += 2; continue; } if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-treebank")) { Pair <string, IFileFilter> treebankDescription = ArgUtils.GetTreebankDescription(args, argIndex, "-treebank"); argIndex = argIndex + ArgUtils.NumSubArgs(args, argIndex) + 1; treebanks.Add(treebankDescription); continue; } if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-numThreads")) { numThreads = System.Convert.ToInt32(args[argIndex + 1]); argIndex += 2; continue; } throw new ArgumentException("Unknown argument " + args[argIndex]); } if (parserModel == null) { throw new ArgumentException("Need to supply a parser model with -model"); } if (output == null) { throw new ArgumentException("Need to supply an output filename with -output"); } if (treebanks.IsEmpty()) { throw new ArgumentException("Need to supply a treebank with -treebank"); } log.Info("Writing output to " + output); log.Info("Loading parser model " + parserModel); log.Info("Writing " + dvKBest + " hypothesis trees for each tree"); LexicalizedParser parser = ((LexicalizedParser)LexicalizedParser.LoadModel(parserModel, "-dvKBest", int.ToString(dvKBest))); CacheParseHypotheses cacher = new CacheParseHypotheses(parser); ITreeTransformer transformer = DVParser.BuildTrainTransformer(parser.GetOp()); IList <Tree> sentences = new List <Tree>(); foreach (Pair <string, IFileFilter> description in treebanks) { log.Info("Reading trees from " + description.first); Treebank treebank = parser.GetOp().tlpParams.MemoryTreebank(); treebank.LoadPath(description.first, description.second); treebank = treebank.Transform(transformer); Sharpen.Collections.AddAll(sentences, treebank); } log.Info("Processing " + sentences.Count + " trees"); IList <Pair <Tree, byte[]> > cache = Generics.NewArrayList(); transformer = new SynchronizedTreeTransformer(transformer); MulticoreWrapper <Tree, Pair <Tree, byte[]> > wrapper = new MulticoreWrapper <Tree, Pair <Tree, byte[]> >(numThreads, new CacheParseHypotheses.CacheProcessor(cacher, parser, dvKBest, transformer)); foreach (Tree tree in sentences) { wrapper.Put(tree); while (wrapper.Peek()) { cache.Add(wrapper.Poll()); if (cache.Count % 10 == 0) { System.Console.Out.WriteLine("Processed " + cache.Count + " trees"); } } } wrapper.Join(); while (wrapper.Peek()) { cache.Add(wrapper.Poll()); if (cache.Count % 10 == 0) { System.Console.Out.WriteLine("Processed " + cache.Count + " trees"); } } System.Console.Out.WriteLine("Finished processing " + cache.Count + " trees"); IOUtils.WriteObjectToFile(cache, output); }
public virtual void TestNrt() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); // Don't allow tiny maxBufferedDocs; it can make this // test too slow: iwc.SetMaxBufferedDocs(Math.Max(500, iwc.MaxBufferedDocs)); // MockRandom/AlcololicMergePolicy are too slow: TieredMergePolicy tmp = new TieredMergePolicy(); tmp.FloorSegmentMB = .001; iwc.SetMergePolicy(tmp); IndexWriter w = new IndexWriter(dir, iwc); var tw = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("field", true); AtomicBoolean stop = new AtomicBoolean(); // How many unique facets to index before stopping: int ordLimit = TEST_NIGHTLY ? 100000 : 6000; var indexer = new IndexerThread(w, config, tw, null, ordLimit, stop); var mgr = new SearcherTaxonomyManager(w, true, null, tw); var reopener = new ThreadAnonymousInnerClassHelper(this, stop, mgr); reopener.Name = "reopener"; reopener.Start(); indexer.Name = "indexer"; indexer.Start(); try { while (!stop.Get()) { SearcherAndTaxonomy pair = mgr.Acquire(); try { //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize()); FacetsCollector sfc = new FacetsCollector(); pair.Searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(pair.TaxonomyReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "field"); if (pair.Searcher.IndexReader.NumDocs > 0) { //System.out.println(pair.taxonomyReader.getSize()); Assert.True(result.ChildCount > 0); Assert.True(result.LabelValues.Length > 0); } //if (VERBOSE) { //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0))); //} } finally { mgr.Release(pair); } } } finally { indexer.Join(); reopener.Join(); } if (VERBOSE) { Console.WriteLine("TEST: now stop"); } IOUtils.Dispose(mgr, tw, w, taxoDir, dir); }
public virtual void TestDirectory() { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriter w = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); var tw = new DirectoryTaxonomyWriter(taxoDir); // first empty commit w.Commit(); tw.Commit(); var mgr = new SearcherTaxonomyManager(indexDir, taxoDir, null); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("field", true); AtomicBoolean stop = new AtomicBoolean(); // How many unique facets to index before stopping: int ordLimit = TEST_NIGHTLY ? 100000 : 6000; var indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop); indexer.Start(); try { while (!stop.Get()) { SearcherAndTaxonomy pair = mgr.Acquire(); try { //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize()); FacetsCollector sfc = new FacetsCollector(); pair.Searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(pair.TaxonomyReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "field"); if (pair.Searcher.IndexReader.NumDocs > 0) { //System.out.println(pair.taxonomyReader.getSize()); Assert.True(result.ChildCount > 0); Assert.True(result.LabelValues.Length > 0); } //if (VERBOSE) { //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0))); //} } finally { mgr.Release(pair); } } } finally { indexer.Join(); } if (VERBOSE) { Console.WriteLine("TEST: now stop"); } IOUtils.Dispose(mgr, tw, w, taxoDir, indexDir); }
public override void WriteAdditionalClipboardData( ref string nodeInfo ) { base.WriteAdditionalClipboardData( ref nodeInfo ): IOUtils.AddFieldValueToString( ref nodeInfo, IOUtils.Matrix4x4ToString( m_materialValue ) ): }
public override void ReadAdditionalClipboardData( ref string[] nodeParams ) { base.ReadAdditionalClipboardData( ref nodeParams ): m_materialValue = IOUtils.StringToMatrix4x4( GetCurrentParam( ref nodeParams ) ): }
public override void WriteToString( ref string nodeInfo, ref string connectionsInfo ) { base.WriteToString( ref nodeInfo, ref connectionsInfo ): IOUtils.AddFieldValueToString( ref nodeInfo, IOUtils.Matrix4x4ToString( m_defaultValue ) ): }
private void DownloadDotStatDSD() { IOUtils file = new IOUtils(); ISdmxObjects sdmxObjects = null; sdmxObjects = GetSdmxObjectsWithRef(); if (sdmxObjects == null) return; file.SaveDotSTATFile(sdmxObjects, GetDotSTATExportType()); }
/// <exception cref="System.IO.IOException"/> public virtual void Annotate(ICoreMap document) { // write input file in GUTime format IElement inputXML = ToInputXML(document); File inputFile = File.CreateTempFile("gutime", ".input"); //Document doc = new Document(inputXML); PrintWriter inputWriter = new PrintWriter(inputFile); inputWriter.Println(XMLUtils.NodeToString(inputXML, false)); // new XMLOutputter().output(inputXML, inputWriter); inputWriter.Close(); bool useFirstDate = (!document.ContainsKey(typeof(CoreAnnotations.CalendarAnnotation)) && !document.ContainsKey(typeof(CoreAnnotations.DocDateAnnotation))); List <string> args = new List <string>(); args.Add("perl"); args.Add("-I" + this.gutimePath.GetPath()); args.Add(new File(this.gutimePath, "TimeTag.pl").GetPath()); if (useFirstDate) { args.Add("-FDNW"); } args.Add(inputFile.GetPath()); // run GUTime on the input file ProcessBuilder process = new ProcessBuilder(args); StringWriter outputWriter = new StringWriter(); SystemUtils.Run(process, outputWriter, null); string output = outputWriter.GetBuffer().ToString(); Pattern docClose = Pattern.Compile("</DOC>.*", Pattern.Dotall); output = docClose.Matcher(output).ReplaceAll("</DOC>"); //The TimeTag.pl result file contains next tags which must be removed output = output.ReplaceAll("<lex.*?>", string.Empty); output = output.Replace("</lex>", string.Empty); output = output.Replace("<NG>", string.Empty); output = output.Replace("</NG>", string.Empty); output = output.Replace("<VG>", string.Empty); output = output.Replace("</VG>", string.Empty); output = output.Replace("<s>", string.Empty); output = output.Replace("</s>", string.Empty); // parse the GUTime output IElement outputXML; try { outputXML = XMLUtils.ParseElement(output); } catch (Exception ex) { throw new Exception(string.Format("error:\n%s\ninput:\n%s\noutput:\n%s", ex, IOUtils.SlurpFile(inputFile), output), ex); } /* * try { * outputXML = new SAXBuilder().build(new StringReader(output)).getRootElement(); * } catch (JDOMException e) { * throw new RuntimeException(String.format("error:\n%s\ninput:\n%s\noutput:\n%s", * e, IOUtils.slurpFile(inputFile), output)); * } */ inputFile.Delete(); // get Timex annotations IList <ICoreMap> timexAnns = ToTimexCoreMaps(outputXML, document); document.Set(typeof(TimeAnnotations.TimexAnnotations), timexAnns); if (outputResults) { System.Console.Out.WriteLine(timexAnns); } // align Timex annotations to sentences int timexIndex = 0; foreach (ICoreMap sentence in document.Get(typeof(CoreAnnotations.SentencesAnnotation))) { int sentBegin = BeginOffset(sentence); int sentEnd = EndOffset(sentence); // skip times before the sentence while (timexIndex < timexAnns.Count && BeginOffset(timexAnns[timexIndex]) < sentBegin) { ++timexIndex; } // determine times within the sentence int sublistBegin = timexIndex; int sublistEnd = timexIndex; while (timexIndex < timexAnns.Count && sentBegin <= BeginOffset(timexAnns[timexIndex]) && EndOffset(timexAnns[timexIndex]) <= sentEnd) { ++sublistEnd; ++timexIndex; } // set the sentence timexes sentence.Set(typeof(TimeAnnotations.TimexAnnotations), timexAnns.SubList(sublistBegin, sublistEnd)); } }
public override void ReadFromString( ref string[] nodeParams ) { base.ReadFromString( ref nodeParams ): m_defaultValue = IOUtils.StringToMatrix4x4( GetCurrentParam( ref nodeParams ) ): }
public void Dispose() { IOUtils.Dispose(origEnum, threadResources); }
public void WriteToString( ref string nodeInfo ) { IOUtils.AddFieldValueToString( ref nodeInfo, m_value ): IOUtils.AddFieldValueToString( ref nodeInfo, m_active ): IOUtils.AddFieldValueToString( ref nodeInfo, m_nodeId ): }
public virtual void TestSparseFacets() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new Int32Field("num", 10, Field.Store.NO)); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new Int32Field("num", 20, Field.Store.NO)); doc.Add(new FacetField("a", "foo2")); doc.Add(new FacetField("b", "bar1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new Int32Field("num", 30, Field.Store.NO)); doc.Add(new FacetField("a", "foo3")); doc.Add(new FacetField("b", "bar2")); doc.Add(new FacetField("c", "baz1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new Int32FieldSource("num")); // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.AreEqual(3, results.Count); Assert.AreEqual("dim=a path=[] value=60.0 childCount=3\n foo3 (30.0)\n foo2 (20.0)\n foo1 (10.0)\n", results[0].ToString()); Assert.AreEqual("dim=b path=[] value=50.0 childCount=2\n bar2 (30.0)\n bar1 (20.0)\n", results[1].ToString()); Assert.AreEqual("dim=c path=[] value=30.0 childCount=1\n baz1 (30.0)\n", results[2].ToString()); IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir); }
/// <summary> /// Sole constructor. </summary> public CompressingStoredFieldsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode) { this.compressionMode = compressionMode; string segment = si.Name; bool success = false; fieldInfos = fn; numDocs = si.DocCount; ChecksumIndexInput indexStream = null; try { string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION); string fieldsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_EXTENSION); // Load the index into memory indexStream = d.OpenChecksumInput(indexStreamFN, context); string codecNameIdx = formatName + CompressingStoredFieldsWriter.CODEC_SFX_IDX; version = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT); Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.GetFilePointer()); indexReader = new CompressingStoredFieldsIndexReader(indexStream, si); long maxPointer = -1; if (version >= CompressingStoredFieldsWriter.VERSION_CHECKSUM) { maxPointer = indexStream.ReadVInt64(); CodecUtil.CheckFooter(indexStream); } else { #pragma warning disable 612, 618 CodecUtil.CheckEOF(indexStream); #pragma warning restore 612, 618 } indexStream.Dispose(); indexStream = null; // Open the data file and read metadata fieldsStream = d.OpenInput(fieldsStreamFN, context); if (version >= CompressingStoredFieldsWriter.VERSION_CHECKSUM) { if (maxPointer + CodecUtil.FooterLength() != fieldsStream.Length) { throw new CorruptIndexException("Invalid fieldsStream maxPointer (file truncated?): maxPointer=" + maxPointer + ", length=" + fieldsStream.Length); } } else { maxPointer = fieldsStream.Length; } this.maxPointer = maxPointer; string codecNameDat = formatName + CompressingStoredFieldsWriter.CODEC_SFX_DAT; int fieldsVersion = CodecUtil.CheckHeader(fieldsStream, codecNameDat, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT); if (version != fieldsVersion) { throw new CorruptIndexException("Version mismatch between stored fields index and data: " + version + " != " + fieldsVersion); } Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == fieldsStream.GetFilePointer()); if (version >= CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS) { chunkSize = fieldsStream.ReadVInt32(); } else { chunkSize = -1; } packedIntsVersion = fieldsStream.ReadVInt32(); decompressor = compressionMode.NewDecompressor(); this.bytes = new BytesRef(); success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(this, indexStream); } } }
public virtual void TestWrongIndexFieldName() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); config.SetIndexFieldName("a", "$facets2"); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new Int32Field("num", 10, Field.Store.NO)); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, config, c, new Int32FieldSource("num")); // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.True(results.Count == 0); try { facets.GetSpecificValue("a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } try { facets.GetTopChildren(10, "a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir); }
public override string /*!*/ ReadLine() { return(IOUtils.ReadTo(this, '\n')); }
public virtual void TestRandom() { string[] tokens = GetRandomTokens(10); Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), indexDir, Similarity, TimeZone); var tw = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); int numDocs = AtLeast(1000); int numDims = TestUtil.NextInt(Random(), 1, 7); IList <TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims); foreach (TestDoc testDoc in testDocs) { Document doc = new Document(); doc.Add(NewStringField("content", testDoc.content, Field.Store.NO)); testDoc.value = Random().NextFloat(); doc.Add(new SingleDocValuesField("value", testDoc.value)); for (int j = 0; j < numDims; j++) { if (testDoc.dims[j] != null) { doc.Add(new FacetField("dim" + j, testDoc.dims[j])); } } w.AddDocument(config.Build(tw, doc)); } // NRT open IndexSearcher searcher = NewSearcher(w.Reader); // NRT open var tr = new DirectoryTaxonomyReader(tw); ValueSource values = new SingleFieldSource("value"); int iters = AtLeast(100); for (int iter = 0; iter < iters; iter++) { string searchToken = tokens[Random().Next(tokens.Length)]; if (VERBOSE) { Console.WriteLine("\nTEST: iter content=" + searchToken); } FacetsCollector fc = new FacetsCollector(); FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc); Facets facets = new TaxonomyFacetSumValueSource(tr, config, fc, values); // Slow, yet hopefully bug-free, faceting: var expectedValues = new List <Dictionary <string, float?> >(numDims); for (int i = 0; i < numDims; i++) { expectedValues.Add(new Dictionary <string, float?>()); } foreach (TestDoc doc in testDocs) { if (doc.content.Equals(searchToken)) { for (int j = 0; j < numDims; j++) { if (doc.dims[j] != null) { float?v = expectedValues[j].ContainsKey(doc.dims[j]) ? expectedValues[j][doc.dims[j]] : null; if (v == null) { expectedValues[j][doc.dims[j]] = doc.value; } else { expectedValues[j][doc.dims[j]] = (float)v + doc.value; } } } } } List <FacetResult> expected = new List <FacetResult>(); for (int i = 0; i < numDims; i++) { List <LabelAndValue> labelValues = new List <LabelAndValue>(); float totValue = 0; foreach (KeyValuePair <string, float?> ent in expectedValues[i]) { labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value)); totValue += ent.Value.Value; } SortLabelValues(labelValues); if (totValue > 0) { expected.Add(new FacetResult("dim" + i, new string[0], totValue, labelValues.ToArray(), labelValues.Count)); } } // Sort by highest value, tie break by value: SortFacetResults(expected); IList <FacetResult> actual = facets.GetAllDims(10); // Messy: fixup ties SortTies(actual); if (VERBOSE) { Console.WriteLine("expected=\n" + expected.ToString()); Console.WriteLine("actual=\n" + actual.ToString()); } AssertFloatValuesEquals(expected, actual); } IOUtils.Close(w, tw, searcher.IndexReader, tr, indexDir, taxoDir); }
public override void TearDown() { IOUtils.Dispose(client, callback, publishWriter, replicator, publishDir, handlerDir); base.TearDown(); }
public override void TearDown() { IOUtils.Close(indexWriter, indexReader, directory); base.TearDown(); }
/// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { string basePath = "/user/socherr/scr/projects/semComp/RNTN/src/params/"; int numSlices = 25; bool useEscapedParens = false; for (int argIndex = 0; argIndex < args.Length;) { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-slices")) { numSlices = System.Convert.ToInt32(args[argIndex + 1]); argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-path")) { basePath = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-useEscapedParens")) { useEscapedParens = true; argIndex += 1; } else { log.Info("Unknown argument " + args[argIndex]); System.Environment.Exit(2); } } } } SimpleMatrix[] slices = new SimpleMatrix[numSlices]; for (int i = 0; i < numSlices; ++i) { slices[i] = LoadMatrix(basePath + "bin/Wt_" + (i + 1) + ".bin", basePath + "Wt_" + (i + 1) + ".txt"); } SimpleTensor tensor = new SimpleTensor(slices); log.Info("W tensor size: " + tensor.NumRows() + "x" + tensor.NumCols() + "x" + tensor.NumSlices()); SimpleMatrix W = LoadMatrix(basePath + "bin/W.bin", basePath + "W.txt"); log.Info("W matrix size: " + W.NumRows() + "x" + W.NumCols()); SimpleMatrix Wcat = LoadMatrix(basePath + "bin/Wcat.bin", basePath + "Wcat.txt"); log.Info("W cat size: " + Wcat.NumRows() + "x" + Wcat.NumCols()); SimpleMatrix combinedWV = LoadMatrix(basePath + "bin/Wv.bin", basePath + "Wv.txt"); log.Info("Word matrix size: " + combinedWV.NumRows() + "x" + combinedWV.NumCols()); File vocabFile = new File(basePath + "vocab_1.txt"); if (!vocabFile.Exists()) { vocabFile = new File(basePath + "words.txt"); } IList <string> lines = Generics.NewArrayList(); foreach (string line in IOUtils.ReadLines(vocabFile)) { lines.Add(line.Trim()); } log.Info("Lines in vocab file: " + lines.Count); IDictionary <string, SimpleMatrix> wordVectors = Generics.NewTreeMap(); for (int i_1 = 0; i_1 < lines.Count && i_1 < combinedWV.NumCols(); ++i_1) { string[] pieces = lines[i_1].Split(" +"); if (pieces.Length == 0 || pieces.Length > 1) { continue; } wordVectors[pieces[0]] = combinedWV.ExtractMatrix(0, numSlices, i_1, i_1 + 1); if (pieces[0].Equals("UNK")) { wordVectors[SentimentModel.UnknownWord] = wordVectors["UNK"]; } } // If there is no ",", we first try to look for an HTML escaping, // then fall back to "." as better than just a random word vector. // Same for "``" and ";" CopyWordVector(wordVectors, ",", ","); CopyWordVector(wordVectors, ".", ","); CopyWordVector(wordVectors, ";", ";"); CopyWordVector(wordVectors, ".", ";"); CopyWordVector(wordVectors, "``", "``"); CopyWordVector(wordVectors, "''", "``"); if (useEscapedParens) { ReplaceWordVector(wordVectors, "(", "-LRB-"); ReplaceWordVector(wordVectors, ")", "-RRB-"); } RNNOptions op = new RNNOptions(); op.numHid = numSlices; op.lowercaseWordVectors = false; if (Wcat.NumRows() == 2) { op.classNames = new string[] { "Negative", "Positive" }; op.equivalenceClasses = new int[][] { new int[] { 0 }, new int[] { 1 } }; // TODO: set to null once old models are updated op.numClasses = 2; } if (!wordVectors.Contains(SentimentModel.UnknownWord)) { wordVectors[SentimentModel.UnknownWord] = SimpleMatrix.Random(numSlices, 1, -0.00001, 0.00001, new Random()); } SentimentModel model = SentimentModel.ModelFromMatrices(W, Wcat, tensor, wordVectors, op); model.SaveSerialized("matlab.ser.gz"); }
internal Lucene42DocValuesProducer(SegmentReadState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension) { maxDoc = state.SegmentInfo.DocCount; string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension); // read in the entries from the metadata file. ChecksumIndexInput @in = state.Directory.OpenChecksumInput(metaName, state.Context); bool success = false; ramBytesUsed = new AtomicInt64(RamUsageEstimator.ShallowSizeOfInstance(this.GetType())); try { version = CodecUtil.CheckHeader(@in, metaCodec, VERSION_START, VERSION_CURRENT); numerics = new Dictionary <int, NumericEntry>(); binaries = new Dictionary <int, BinaryEntry>(); fsts = new Dictionary <int, FSTEntry>(); ReadFields(@in /*, state.FieldInfos // LUCENENET: Never read */); if (version >= VERSION_CHECKSUM) { CodecUtil.CheckFooter(@in); } else { #pragma warning disable 612, 618 CodecUtil.CheckEOF(@in); #pragma warning restore 612, 618 } success = true; } finally { if (success) { IOUtils.Dispose(@in); } else { IOUtils.DisposeWhileHandlingException(@in); } } success = false; try { string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension); data = state.Directory.OpenInput(dataName, state.Context); int version2 = CodecUtil.CheckHeader(data, dataCodec, VERSION_START, VERSION_CURRENT); if (version != version2) { throw new CorruptIndexException("Format versions mismatch"); } success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(this.data); } } }
public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", FIELD_INFOS_EXTENSION); IndexOutput output = directory.CreateOutput(fileName, context); bool success = false; try { output.WriteVInt32(FORMAT_PREFLEX_RW); output.WriteVInt32(infos.Count); foreach (FieldInfo fi in infos) { sbyte bits = 0x0; if (fi.HasVectors) { bits |= STORE_TERMVECTOR; } if (fi.OmitsNorms) { bits |= OMIT_NORMS; } if (fi.HasPayloads) { bits |= STORE_PAYLOADS; } if (fi.IsIndexed) { bits |= IS_INDEXED; if (Debugging.AssertsEnabled) { Debugging.Assert(fi.IndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.HasPayloads); } if (fi.IndexOptions == IndexOptions.DOCS_ONLY) { bits |= OMIT_TERM_FREQ_AND_POSITIONS; } else if (fi.IndexOptions == IndexOptions.DOCS_AND_FREQS) { bits |= OMIT_POSITIONS; } } output.WriteString(fi.Name); /* * we need to write the field number since IW tries * to stabelize the field numbers across segments so the * FI ordinal is not necessarily equivalent to the field number */ output.WriteInt32(fi.Number); output.WriteByte((byte)bits); if (fi.IsIndexed && !fi.OmitsNorms) { // to allow null norm types we need to indicate if norms are written // only in RW case output.WriteByte((byte)(fi.NormType == Index.DocValuesType.NONE ? 0 : 1)); } if (Debugging.AssertsEnabled) { Debugging.Assert(fi.Attributes == null); // not used or supported } } success = true; } finally { if (success) { output.Dispose(); } else { IOUtils.DisposeWhileHandlingException(output); } } }
public override void Build(InputIterator iterator) { if (iterator.HasContexts) { throw new System.ArgumentException("this suggester doesn't support contexts"); } string prefix = this.GetType().Name; var directory = OfflineSorter.DefaultTempDir(); var tempInput = File.CreateTempFile(prefix, ".input", directory); var tempSorted = File.CreateTempFile(prefix, ".sorted", directory); hasPayloads = iterator.HasPayloads; var writer = new OfflineSorter.ByteSequencesWriter(tempInput); OfflineSorter.ByteSequencesReader reader = null; var scratch = new BytesRef(); TokenStreamToAutomaton ts2a = TokenStreamToAutomaton; bool success = false; count = 0; sbyte[] buffer = new sbyte[8]; try { var output = new ByteArrayDataOutput(buffer); BytesRef surfaceForm; while ((surfaceForm = iterator.Next()) != null) { HashSet <IntsRef> paths = ToFiniteStrings(surfaceForm, ts2a); maxAnalyzedPathsForOneInput = Math.Max(maxAnalyzedPathsForOneInput, paths.Count); foreach (IntsRef path in paths) { Util.Fst.Util.ToBytesRef(path, scratch); // length of the analyzed text (FST input) if (scratch.Length > short.MaxValue - 2) { throw new System.ArgumentException("cannot handle analyzed forms > " + (short.MaxValue - 2) + " in length (got " + scratch.Length + ")"); } short analyzedLength = (short)scratch.Length; // compute the required length: // analyzed sequence + weight (4) + surface + analyzedLength (short) int requiredLength = analyzedLength + 4 + surfaceForm.Length + 2; BytesRef payload; if (hasPayloads) { if (surfaceForm.Length > (short.MaxValue - 2)) { throw new ArgumentException("cannot handle surface form > " + (short.MaxValue - 2) + " in length (got " + surfaceForm.Length + ")"); } payload = iterator.Payload; // payload + surfaceLength (short) requiredLength += payload.Length + 2; } else { payload = null; } buffer = ArrayUtil.Grow(buffer, requiredLength); output.Reset(buffer); output.WriteShort(analyzedLength); output.WriteBytes(scratch.Bytes, scratch.Offset, scratch.Length); output.WriteInt(EncodeWeight(iterator.Weight)); if (hasPayloads) { for (int i = 0; i < surfaceForm.Length; i++) { if (surfaceForm.Bytes[i] == PAYLOAD_SEP) { throw new ArgumentException( "surface form cannot contain unit separator character U+001F; this character is reserved"); } } output.WriteShort((short)surfaceForm.Length); output.WriteBytes(surfaceForm.Bytes, surfaceForm.Offset, surfaceForm.Length); output.WriteBytes(payload.Bytes, payload.Offset, payload.Length); } else { output.WriteBytes(surfaceForm.Bytes, surfaceForm.Offset, surfaceForm.Length); } Debug.Assert(output.Position == requiredLength, output.Position + " vs " + requiredLength); writer.Write(buffer, 0, output.Position); } count++; } writer.Dispose(); // Sort all input/output pairs (required by FST.Builder): (new OfflineSorter(new AnalyzingComparator(hasPayloads))).Sort(tempInput, tempSorted); // Free disk space: tempInput.Delete(); reader = new OfflineSorter.ByteSequencesReader(tempSorted); var outputs = new PairOutputs <long?, BytesRef>(PositiveIntOutputs.Singleton, ByteSequenceOutputs.Singleton); var builder = new Builder <PairOutputs <long?, BytesRef> .Pair>(FST.INPUT_TYPE.BYTE1, outputs); // Build FST: BytesRef previousAnalyzed = null; BytesRef analyzed = new BytesRef(); BytesRef surface = new BytesRef(); IntsRef scratchInts = new IntsRef(); var input = new ByteArrayDataInput(); // Used to remove duplicate surface forms (but we // still index the hightest-weight one). We clear // this when we see a new analyzed form, so it cannot // grow unbounded (at most 256 entries): var seenSurfaceForms = new HashSet <BytesRef>(); var dedup = 0; while (reader.Read(scratch)) { input.Reset(scratch.Bytes, scratch.Offset, scratch.Length); short analyzedLength = input.ReadShort(); analyzed.Grow(analyzedLength + 2); input.ReadBytes(analyzed.Bytes, 0, analyzedLength); analyzed.Length = analyzedLength; long cost = input.ReadInt(); surface.Bytes = scratch.Bytes; if (hasPayloads) { surface.Length = input.ReadShort(); surface.Offset = input.Position; } else { surface.Offset = input.Position; surface.Length = scratch.Length - surface.Offset; } if (previousAnalyzed == null) { previousAnalyzed = new BytesRef(); previousAnalyzed.CopyBytes(analyzed); seenSurfaceForms.Add(BytesRef.DeepCopyOf(surface)); } else if (analyzed.Equals(previousAnalyzed)) { dedup++; if (dedup >= maxSurfaceFormsPerAnalyzedForm) { // More than maxSurfaceFormsPerAnalyzedForm // dups: skip the rest: continue; } if (seenSurfaceForms.Contains(surface)) { continue; } seenSurfaceForms.Add(BytesRef.DeepCopyOf(surface)); } else { dedup = 0; previousAnalyzed.CopyBytes(analyzed); seenSurfaceForms.Clear(); seenSurfaceForms.Add(BytesRef.DeepCopyOf(surface)); } // TODO: I think we can avoid the extra 2 bytes when // there is no dup (dedup==0), but we'd have to fix // the exactFirst logic ... which would be sort of // hairy because we'd need to special case the two // (dup/not dup)... // NOTE: must be byte 0 so we sort before whatever // is next analyzed.Bytes[analyzed.Offset + analyzed.Length] = 0; analyzed.Bytes[analyzed.Offset + analyzed.Length + 1] = (sbyte)dedup; analyzed.Length += 2; Util.Fst.Util.ToIntsRef(analyzed, scratchInts); //System.out.println("ADD: " + scratchInts + " -> " + cost + ": " + surface.utf8ToString()); if (!hasPayloads) { builder.Add(scratchInts, outputs.NewPair(cost, BytesRef.DeepCopyOf(surface))); } else { int payloadOffset = input.Position + surface.Length; int payloadLength = scratch.Length - payloadOffset; BytesRef br = new BytesRef(surface.Length + 1 + payloadLength); Array.Copy(surface.Bytes, surface.Offset, br.Bytes, 0, surface.Length); br.Bytes[surface.Length] = PAYLOAD_SEP; Array.Copy(scratch.Bytes, payloadOffset, br.Bytes, surface.Length + 1, payloadLength); br.Length = br.Bytes.Length; builder.Add(scratchInts, outputs.NewPair(cost, br)); } } fst = builder.Finish(); //Util.dotToFile(fst, "/tmp/suggest.dot"); success = true; } finally { if (success) { IOUtils.Close(reader, writer); } else { IOUtils.CloseWhileHandlingException(reader, writer); } tempInput.Delete(); tempSorted.Delete(); } }
private void DownloadDotStatCodelist() { IOUtils file = new IOUtils(); ISdmxObjects sdmxObjects = null; sdmxObjects = GetSdmxObjects(); if (sdmxObjects == null) return; file.SaveDotSTATCodelistFile(sdmxObjects.Codelists.First()); }
public override long Decompress(Stream instream, long inLength, Stream outstream) { #region Format definition in NDSTEK style /* Data header (32bit) Bit 0-3 Reserved Bit 4-7 Compressed type (must be 1 for LZ77) Bit 8-31 Size of decompressed data. if 0, the next 4 bytes are decompressed length Repeat below. Each Flag Byte followed by eight Blocks. Flag data (8bit) Bit 0-7 Type Flags for next 8 Blocks, MSB first Block Type 0 - Uncompressed - Copy 1 Byte from Source to Dest Bit 0-7 One data byte to be copied to dest Block Type 1 - Compressed - Copy LEN Bytes from Dest-Disp-1 to Dest If Reserved is 0: - Default Bit 0-3 Disp MSBs Bit 4-7 LEN - 3 Bit 8-15 Disp LSBs If Reserved is 1: - Higher compression rates for files with (lots of) long repetitions Bit 4-7 Indicator If Indicator > 1: Bit 0-3 Disp MSBs Bit 4-7 LEN - 1 (same bits as Indicator) Bit 8-15 Disp LSBs If Indicator is 1: A(B CD E)(F GH) Bit 0-3 (LEN - 0x111) MSBs Bit 4-7 Indicator; unused Bit 8-15 (LEN- 0x111) 'middle'-SBs Bit 16-19 Disp MSBs Bit 20-23 (LEN - 0x111) LSBs Bit 24-31 Disp LSBs If Indicator is 0: Bit 0-3 (LEN - 0x11) MSBs Bit 4-7 Indicator; unused Bit 8-11 Disp MSBs Bit 12-15 (LEN - 0x11) LSBs Bit 16-23 Disp LSBs */ #endregion long readBytes = 0; byte type = (byte)instream.ReadByte(); if (type != base.magicByte) throw new InvalidDataException(String.Format(Main.Get_Traduction("S03"), type.ToString("X"))); byte[] sizeBytes = new byte[3]; instream.Read(sizeBytes, 0, 3); int decompressedSize = IOUtils.ToNDSu24(sizeBytes, 0); readBytes += 4; if (decompressedSize == 0) { sizeBytes = new byte[4]; instream.Read(sizeBytes, 0, 4); decompressedSize = IOUtils.ToNDSs32(sizeBytes, 0); readBytes += 4; } // the maximum 'DISP-1' is still 0xFFF. int bufferLength = 0x1000; byte[] buffer = new byte[bufferLength]; int bufferOffset = 0; int currentOutSize = 0; int flags = 0, mask = 1; while (currentOutSize < decompressedSize) { // (throws when requested new flags byte is not available) #region Update the mask. If all flag bits have been read, get a new set. // the current mask is the mask used in the previous run. So if it masks the // last flag bit, get a new flags byte. if (mask == 1) { if (readBytes >= inLength) throw new NotEnoughDataException(currentOutSize, decompressedSize); flags = instream.ReadByte(); readBytes++; if (flags < 0) throw new StreamTooShortException(); mask = 0x80; } else { mask >>= 1; } #endregion // bit = 1 <=> compressed. if ((flags & mask) > 0) { // (throws when not enough bytes are available) #region Get length and displacement('disp') values from next 2, 3 or 4 bytes // read the first byte first, which also signals the size of the compressed block if (readBytes >= inLength) throw new NotEnoughDataException(currentOutSize, decompressedSize); int byte1 = instream.ReadByte(); readBytes++; if (byte1 < 0) throw new StreamTooShortException(); int length = byte1 >> 4; int disp = -1; if (length == 0) { #region case 0; 0(B C)(D EF) + (0x11)(0x1) = (LEN)(DISP) // case 0: // data = AB CD EF (with A=0) // LEN = ABC + 0x11 == BC + 0x11 // DISP = DEF + 1 // we need two more bytes available if (readBytes + 1 >= inLength) throw new NotEnoughDataException(currentOutSize, decompressedSize); int byte2 = instream.ReadByte(); readBytes++; int byte3 = instream.ReadByte(); readBytes++; if (byte3 < 0) throw new StreamTooShortException(); length = (((byte1 & 0x0F) << 4) | (byte2 >> 4)) + 0x11; disp = (((byte2 & 0x0F) << 8) | byte3) + 0x1; #endregion } else if (length == 1) { #region case 1: 1(B CD E)(F GH) + (0x111)(0x1) = (LEN)(DISP) // case 1: // data = AB CD EF GH (with A=1) // LEN = BCDE + 0x111 // DISP = FGH + 1 // we need three more bytes available if (readBytes + 2 >= inLength) throw new NotEnoughDataException(currentOutSize, decompressedSize); int byte2 = instream.ReadByte(); readBytes++; int byte3 = instream.ReadByte(); readBytes++; int byte4 = instream.ReadByte(); readBytes++; if (byte4 < 0) throw new StreamTooShortException(); length = (((byte1 & 0x0F) << 12) | (byte2 << 4) | (byte3 >> 4)) + 0x111; disp = (((byte3 & 0x0F) << 8) | byte4) + 0x1; #endregion } else { #region case > 1: (A)(B CD) + (0x1)(0x1) = (LEN)(DISP) // case other: // data = AB CD // LEN = A + 1 // DISP = BCD + 1 // we need only one more byte available if (readBytes >= inLength) throw new NotEnoughDataException(currentOutSize, decompressedSize); int byte2 = instream.ReadByte(); readBytes++; if (byte2 < 0) throw new StreamTooShortException(); length = ((byte1 & 0xF0) >> 4) + 0x1; disp = (((byte1 & 0x0F) << 8) | byte2) + 0x1; #endregion } if (disp > currentOutSize) throw new InvalidDataException(String.Format(Main.Get_Traduction("S04"), disp, currentOutSize.ToString("X"), instream.Position.ToString("X"), (byte1 >> 4).ToString("X"))); #endregion int bufIdx = bufferOffset + bufferLength - disp; for (int i = 0; i < length; i++) { byte next = buffer[bufIdx % bufferLength]; bufIdx++; outstream.WriteByte(next); buffer[bufferOffset] = next; bufferOffset = (bufferOffset + 1) % bufferLength; } currentOutSize += length; } else { if (readBytes >= inLength) throw new NotEnoughDataException(currentOutSize, decompressedSize); int next = instream.ReadByte(); readBytes++; if (next < 0) throw new StreamTooShortException(); outstream.WriteByte((byte)next); currentOutSize++; buffer[bufferOffset] = (byte)next; bufferOffset = (bufferOffset + 1) % bufferLength; } } if (readBytes < inLength) { // the input may be 4-byte aligned. if ((readBytes ^ (readBytes & 3)) + 4 < inLength) throw new TooMuchInputException(readBytes, inLength); } return decompressedSize; }
private void DownloadSDMX() { IOUtils file = new IOUtils(); ISdmxObjects sdmxObjects = null; sdmxObjects = GetSdmxObjects(); if (sdmxObjects != null) file.SaveSDMXFile(sdmxObjects, sdmxVersion, GetFilename()); }
public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION); IndexInput input = directory.OpenInput(fileName, iocontext); bool success = false; try { CodecUtil.CheckHeader(input, Lucene40FieldInfosFormat.CODEC_NAME, Lucene40FieldInfosFormat.FORMAT_START, Lucene40FieldInfosFormat.FORMAT_CURRENT); int size = input.ReadVInt32(); //read in the size FieldInfo[] infos = new FieldInfo[size]; for (int i = 0; i < size; i++) { string name = input.ReadString(); int fieldNumber = input.ReadVInt32(); byte bits = input.ReadByte(); bool isIndexed = (bits & Lucene40FieldInfosFormat.IS_INDEXED) != 0; bool storeTermVector = (bits & Lucene40FieldInfosFormat.STORE_TERMVECTOR) != 0; bool omitNorms = (bits & Lucene40FieldInfosFormat.OMIT_NORMS) != 0; bool storePayloads = (bits & Lucene40FieldInfosFormat.STORE_PAYLOADS) != 0; IndexOptions indexOptions; if (!isIndexed) { indexOptions = IndexOptions.NONE; } else if ((bits & Lucene40FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS_ONLY; } else if ((bits & Lucene40FieldInfosFormat.OMIT_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS_AND_FREQS; } else if ((bits & Lucene40FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } else { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } // LUCENE-3027: past indices were able to write // storePayloads=true when omitTFAP is also true, // which is invalid. We correct that, here: // LUCENENET specific - to avoid boxing, changed from CompareTo() to IndexOptionsComparer.Compare() if (isIndexed && IndexOptionsComparer.Default.Compare(indexOptions, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { storePayloads = false; } // DV Types are packed in one byte byte val = input.ReadByte(); LegacyDocValuesType oldValuesType = GetDocValuesType((sbyte)(val & 0x0F)); LegacyDocValuesType oldNormsType = GetDocValuesType((sbyte)(((int)((uint)val >> 4)) & 0x0F)); IDictionary <string, string> attributes = input.ReadStringStringMap(); if (oldValuesType.GetMapping() != DocValuesType.NONE) { attributes[LEGACY_DV_TYPE_KEY] = oldValuesType.ToString(); } if (oldNormsType.GetMapping() != DocValuesType.NONE) { if (oldNormsType.GetMapping() != DocValuesType.NUMERIC) { throw new CorruptIndexException("invalid norm type: " + oldNormsType + " (resource=" + input + ")"); } attributes[LEGACY_NORM_TYPE_KEY] = oldNormsType.ToString(); } infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, oldValuesType.GetMapping(), oldNormsType.GetMapping(), attributes); } CodecUtil.CheckEOF(input); FieldInfos fieldInfos = new FieldInfos(infos); success = true; return(fieldInfos); } finally { if (success) { input.Dispose(); } else { IOUtils.DisposeWhileHandlingException(input); } } }