private static void CreateNodes(String filename, Ontology thisOntology, Dictionary<string, List<string>> dictExistingNodes, Dictionary<string, uint> dictDirectoryEntries, Dictionary<string, long> dictVertexIDs, Action<string> LogMessage, Action<string> LogError) { LogMessage("Begin CreateNodes(" + filename + ")"); try { using (StreamReader srNodes = new StreamReader(filename)) { #region if input stream is empty --> do error handling if (srNodes == null) { LogError("Error reading Nodes file: '" + filename + "'"); return; } #endregion #region init local vars int iCurrentLevel = -1; int iCurrentTripleLevel = 0; String strCurrentTriple; Triple currentTriple = null; Triple selectedTriple = null; uint lineCount = 0; uint instanceCount = 0; #endregion #region for each line while ((strCurrentTriple = srNodes.ReadLine()) != null) { #region some debug info if (lineCount % 100 == 0) { Console.Write("."); } if (lineCount % 10000 == 0) { LogMessage("CreateNodes: lineCount=" + lineCount + " instanceCount=" + instanceCount); GC.Collect(); GC.Collect(); } if (instanceCount > Properties.Settings.Default.InsertLimit) { LogMessage("Quit execution due to InsertLimit setting"); break; } lineCount++; #endregion currentTriple = NTripleParser.Split(strCurrentTriple, LogError); #region some sample data for help /* currentTriple.Subject currentTriple.Predicate currentTriple.TripleObject <http://dbpedia.org/resource/Autism> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Thing> . <http://dbpedia.org/resource/Autism> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Disease> . <http://dbpedia.org/resource/Alabama> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Thing> . <http://dbpedia.org/resource/Alabama> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Place> . <http://dbpedia.org/resource/Alabama> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/PopulatedPlace> . <http://dbpedia.org/resource/Alabama> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/AdministrativeRegion> . */ #endregion #region new concept: per uniqe subject, only one INSERT is done, the one with the highest level within the ontology if (selectedTriple == null) selectedTriple = currentTriple; #region execute gql statement (includes redundancy check for several triple lines regarding one class (eg. Thing, Species, Animal, Mammal --> only mammal is inserted) iCurrentTripleLevel = thisOntology.GetOClassLevel(currentTriple.TripleObject); if (selectedTriple.Subject.Equals(currentTriple.Subject)) { // check level if (iCurrentLevel < iCurrentTripleLevel) { // replace existing with new gql command selectedTriple = currentTriple; iCurrentLevel = iCurrentTripleLevel; } // else do nothing } else { if (!SaveTriple(selectedTriple, dictExistingNodes, dictDirectoryEntries, dictVertexIDs, LogError)) { break; } instanceCount++; // reset values selectedTriple = currentTriple; iCurrentLevel = iCurrentTripleLevel; } #endregion #endregion } // end while #endregion #region finally - save last line if (selectedTriple != null) { SaveTriple(selectedTriple, dictExistingNodes, dictDirectoryEntries, dictVertexIDs, LogError); } #endregion } } catch (Exception e) { LogError("Error creating instance file"); LogError(e.Message); LogError(e.StackTrace); } LogMessage("End CreateNodes(" + filename + ")"); }