public double Vai(short ind) { Literal lit = (Literal)row[ind]; //if (lit.vid != LiteralVidEnumeration.integer) throw new Exception("Wrong literal vid in Vai method"); return((double)lit.Value); }
public void Reset(short si) { if (row[si] == null) { return; } if (row[si] is int) { row[si] = int.MinValue; } else { var oldliteral = row[si] as Literal; if (oldliteral == null) { throw new Exception(); //return; } var newLiteral = new Literal(oldliteral.vid); row[si] = newLiteral; switch (oldliteral.vid) { case LiteralVidEnumeration.integer: newLiteral.Value = 0.0; break; case LiteralVidEnumeration.typedObject: newLiteral.Value = new TypedObject(); break; case LiteralVidEnumeration.text: newLiteral.Value = new Text { }; break; case LiteralVidEnumeration.date: newLiteral.Value = DateTime.MinValue.ToBinary(); break; case LiteralVidEnumeration.boolean: newLiteral.Value = false; break; case LiteralVidEnumeration.nil: break; } } }
private static IEnumerable <TripletGraph> LoadGraphs(string datafile, RDFIntStoreAbstract rdfIntStore) { int ntriples = 0; int nTripletsInBuffer = 0; string subject = null; List <TripletGraph> bufferTripletsGrpah = new List <TripletGraph>(BufferMax); TripletGraph currentTripletGraph = null; HashSet <string> entitiesStrings = new HashSet <string>(); using (var sr = new StreamReader(datafile)) while (!sr.EndOfStream) { string line = sr.ReadLine(); //if (i % 10000 == 0) { Console.Write("{0} ", i / 10000); } if (line == "") { continue; } if (line[0] == '@') { // namespace string[] parts = line.Split(' '); if (parts.Length != 4 || parts[0] != "@prefix" || parts[3] != ".") { Console.WriteLine("Err: strange line: " + line); continue; } string pref = parts[1]; string nsname = parts[2]; if (nsname.Length < 3 || nsname[0] != '<' || nsname[nsname.Length - 1] != '>') { Console.WriteLine("Err: strange nsname: " + nsname); continue; } nsname = nsname.Substring(1, nsname.Length - 2); rdfIntStore.NameSpaceStore.AddPrefix(pref, nsname); } else if (line[0] != ' ') { //if (bufferTripletsGrpah.Count >= BufferMax) if (nTripletsInBuffer >= BufferMax) { rdfIntStore.EntityCoding.InsertPortion(entitiesStrings); foreach (var tripletGraph in bufferTripletsGrpah) { yield return(tripletGraph); } bufferTripletsGrpah.Clear(); entitiesStrings.Clear(); GC.Collect(); nTripletsInBuffer = 0; } // Subject line = line.Trim(); subject = rdfIntStore.NameSpaceStore.GetShortFromFullOrPrefixed(line); entitiesStrings.Add(subject); currentTripletGraph = new TripletGraph() { subject = subject }; bufferTripletsGrpah.Add(currentTripletGraph); } else { // Predicate and object string line1 = line.Trim(); int first_blank = line1.IndexOf(' '); if (first_blank == -1) { Console.WriteLine("Err in line: " + line); continue; } string rest_line = line1.Substring(first_blank + 1).Trim(); // Óáåðåì ïîñëåäíèé ñèìâîë rest_line = rest_line.Substring(0, rest_line.Length - 1).Trim(); bool isDatatype = rest_line[0] == '\"'; string pred_line = line1.Substring(0, first_blank); string predicateString = rdfIntStore.NameSpaceStore.GetShortFromFullOrPrefixed(pred_line); // îáúåêò ìîæåò áûòü entity èëè äàííîå, ó äàííîãî ìîæåò áûòü ÿçûêîâûé ñïåöèôèêàòîð èëè òèï string sdata = null; string datatype = null; string lang = null; if (isDatatype) { // Ïîñëåäíÿÿ äâîéíàÿ êàâû÷êà int lastqu = rest_line.LastIndexOf('\"'); // Çíà÷åíèå äàííûõ sdata = rest_line.Substring(1, lastqu - 1); // ßçûêîâûé ñïåöèàëèçàòîð: int dog = rest_line.LastIndexOf('@'); if (dog == lastqu + 1) { lang = rest_line.Substring(dog + 1, rest_line.Length - dog - 1); } int pp = rest_line.IndexOf("^^"); if (pp == lastqu + 1) { // Òèï äàííûõ string qname = rest_line.Substring(pp + 2); // òèï äàííûõ ìîæåò áûòü "ïðåôèêñíûì" èëè ïîëíûì //datatype = qname[0] == '<' // ? qname.Substring(1, qname.Length - 2) // : GetEntityString(namespaces, qname); datatype = rdfIntStore.NameSpaceStore.GetShortFromFullOrPrefixed(qname); } Literal literal = rdfIntStore.LiteralStore.Create(datatype, sdata, lang); rdfIntStore.PredicatesCoding.Insert(predicateString, literal.vid); currentTripletGraph.PredicateDataValuePairs.Add( new KeyValuePair <int, Literal>(rdfIntStore.PredicatesCoding[predicateString], rdfIntStore.LiteralStore.Write(literal))); } else { rdfIntStore.PredicatesCoding.Insert(predicateString, null); string obj = rdfIntStore.NameSpaceStore.GetShortFromFullOrPrefixed(rest_line); entitiesStrings.Add(obj); currentTripletGraph.PredicateObjValuePairs.Add(new KeyValuePair <int, string>(rdfIntStore.PredicatesCoding[predicateString], obj)); } ntriples++; nTripletsInBuffer++; if (ntriples % 100000 == 0) { Console.Write("r{0} ", ntriples / 100000); } } } rdfIntStore.EntityCoding.InsertPortion(entitiesStrings); foreach (var tripletGraph in bufferTripletsGrpah) { yield return(tripletGraph); } bufferTripletsGrpah.Clear(); rdfIntStore.MakeIndexed(); entitiesStrings.Clear(); GC.Collect(); Console.WriteLine("ntriples={0}", ntriples); }
private static IEnumerable <TripleInt> LoadTriplets(string datafile, RDFIntStoreAbstract rdf) { int ntriples = 0; int nTripletsInBuffer = 0; string subject = null; int subjectCode = 0; using (var sr = new StreamReader(datafile)) while (!sr.EndOfStream) { string line = sr.ReadLine(); //if (i % 10000 == 0) { Console.Write("{0} ", i / 10000); } if (line == "") { continue; } if (line[0] == '@') { // namespace string[] parts = line.Split(' '); if (parts.Length != 4 || parts[0] != "@prefix" || parts[3] != ".") { Console.WriteLine("Err: strange line: " + line); continue; } string pref = parts[1]; string nsname = parts[2]; if (nsname.Length < 3 || nsname[0] != '<' || nsname[nsname.Length - 1] != '>') { Console.WriteLine("Err: strange nsname: " + nsname); continue; } nsname = nsname.Substring(1, nsname.Length - 2); rdf.NameSpaceStore.namespacesByPrefix.Add(pref, nsname); string ns = nsname; // @namespace = @namespace.ToLower(); if (ns[ns.Length - 1] == '/' || ns[ns.Length - 1] == '\\' || ns[ns.Length - 1] == '#') { ns = ns.Substring(0, ns.Length - 1); } int code; if (!rdf.NameSpaceStore.Codes.TryGetValue(ns, out code)) { rdf.NameSpaceStore.Codes.Add(ns, code = rdf.NameSpaceStore.Codes.Count); rdf.NameSpaceStore.NameSpaceStrings.Add(ns); } int temp = code; } else if (line[0] != ' ') { // Subject line = line.Trim(); subject = rdf.NameSpaceStore.GetShortFromFullOrPrefixed(line); subjectCode = rdf.EntityCoding.InsertOne(subject); } else { // Predicate and object string line1 = line.Trim(); int first_blank = line1.IndexOf(' '); if (first_blank == -1) { Console.WriteLine("Err in line: " + line); continue; } string rest_line = line1.Substring(first_blank + 1).Trim(); // Уберем последний символ rest_line = rest_line.Substring(0, rest_line.Length - 1).Trim(); bool isDatatype = rest_line[0] == '\"'; string pred_line = line1.Substring(0, first_blank); string predicateString = rdf.NameSpaceStore.GetShortFromFullOrPrefixed(pred_line); // объект может быть entity или данное, у данного может быть языковый спецификатор или тип string sdata = null; string datatype = null; string lang = null; if (isDatatype) { // Последняя двойная кавычка int lastqu = rest_line.LastIndexOf('\"'); // Значение данных sdata = rest_line.Substring(1, lastqu - 1); // Языковый специализатор: int dog = rest_line.LastIndexOf('@'); if (dog == lastqu + 1) { lang = rest_line.Substring(dog + 1, rest_line.Length - dog - 1); } int pp = rest_line.IndexOf("^^"); if (pp == lastqu + 1) { // Тип данных string qname = rest_line.Substring(pp + 2); // тип данных может быть "префиксным" или полным datatype = rdf.NameSpaceStore.GetShortFromFullOrPrefixed(qname); } Literal literal = rdf.LiteralStore.Create(datatype, sdata, lang); rdf.PredicatesCoding.Insert(predicateString, literal.vid); yield return(new DTripleInt(subjectCode, rdf.PredicatesCoding[predicateString], rdf.LiteralStore.Write(literal))); } else { rdf.PredicatesCoding.Insert(predicateString, null); string obj = rdf.NameSpaceStore.GetShortFromFullOrPrefixed(rest_line); yield return (new OTripleInt(subjectCode, rdf.PredicatesCoding[predicateString], rdf.EntityCoding.InsertOne(obj))); } ntriples++; nTripletsInBuffer++; if (ntriples % 100000 == 0) { Console.Write("r{0} ", ntriples / 100000); } } } rdf.MakeIndexed(); GC.Collect(); Console.WriteLine("ntriples={0}", ntriples); }