public string GetTitle(Publication publication) { string field = null; if (fileType == FileType.WOS) { field = "TI"; } else if (fileType == FileType.SCOPUS) { field = "Title"; } else { throw new Exception("Unhandled file type: " + fileType); } string result = null; if (publication.entries.ContainsKey(field)) { PublicationEntry entry = publication.entries[field]; result = string.Join(" ", entry.values); } return(result); }
public static PublicationsFile Parse(string filePath) { PublicationsFile parsed = new PublicationsFile(); parsed.Type = FileType.SCOPUS; using (TextReader textReader = new StreamReader(filePath, Encoding.UTF8)) { var csv = new CsvParser(textReader); int rowIndex = 0; var titleRow = csv.Read(); while (true) { ++rowIndex; var row = csv.Read(); if (row == null) { break; } if (row.Length > titleRow.Length) { if (row.Length > 0) { logger.Warn("Invalid CSV row ({0}), ignoring: {1}", rowIndex + 1, row[0]); } else { logger.Warn("Invalid CSV row ({0}), ignoring", rowIndex + 1); } continue; } Publication publication = new Publication(); for (int i = 0; i < row.Length; ++i) { PublicationEntry entry = new PublicationEntry(); entry.field = titleRow[i]; entry.values = new List <string>(); string[] values = row[i].Split(';'); foreach (string value in values) { entry.values.Add(value.Trim()); } publication.entries.Add(entry.field, entry); } parsed.publications.Add(publication); } } logger.Info("Publications loaded from scopus: {0}", parsed.publications.Count); return(parsed); }
public string GetJournal(Publication publication) { string field = null; if (fileType == FileType.WOS) { field = "SO"; } else if (fileType == FileType.SCOPUS) { field = "Source title"; } else { throw new Exception("Unhandled file type: " + fileType); } string result = null; if (publication.entries.ContainsKey(field)) { PublicationEntry entry = publication.entries[field]; if (entry.values.Count > 0) { string journal = string.Join(" ", entry.values); if (!string.IsNullOrEmpty(journal)) { result = journal; } } else { logger.Warn("No values for journal extraction"); } } else { logger.Warn("No field for journal extraction"); } if (result != null) { logger.Info("Extracted journal: \"{0}\" (publication: \"{1}\")", result, GetTitle(publication)); } else { logger.Warn("No journal extracted from publication: \"{0}\"", GetTitle(publication)); } return(result); }
public string GetCountry(Publication publication) { string field = null; if (fileType == FileType.WOS) { field = "PA"; } else if (fileType == FileType.SCOPUS) { field = "Publisher"; } else { throw new Exception("Unhandled file type: " + fileType); } string result = null; if (publication.entries.ContainsKey(field)) { PublicationEntry entry = publication.entries[field]; if (entry.values.Count > 0) { string parsed = ParseCountry(entry.values[entry.values.Count - 1]); if (!string.IsNullOrEmpty(parsed)) { result = parsed; } } else { logger.Debug("No values for country extraction"); } } else { logger.Debug("No field for country extraction"); } if (result != null) { logger.Info("Extracted country: \"{0}\" (publication: \"{1}\")", result, GetTitle(publication)); } else { logger.Warn("No country extracted from publication: \"{0}\"", GetTitle(publication)); } return(result); }
public static PublicationsFile Parse(string filePath) { string[] lines = File.ReadAllLines(filePath, Encoding.UTF8); PublicationsFile parsed = new PublicationsFile(); parsed.Type = FileType.WOS; Publication currentPublication = new Publication(); PublicationEntry currentEntry = null; bool wasEndOfFile = false; foreach (string line in lines) { if (wasEndOfFile) { throw new Exception("Data after end of file"); } if (line == "") { continue; } if (line == "EF") { wasEndOfFile = true; continue; } if (line == "ER") { parsed.publications.Add(currentPublication); currentPublication = new Publication(); } else { string tagPart = line.Substring(0, 3).TrimEnd(' '); if (tagPart == "FN" || tagPart == "VR") { continue; } string valuePart = line.Substring(3); if (tagPart != "") { currentEntry = new PublicationEntry(); currentEntry.field = tagPart; currentEntry.values.Add(valuePart); currentPublication.entries.Add(tagPart, currentEntry); } else { currentEntry.values.Add(valuePart); } } } logger.Info("Publications loaded from WOS: {0}", parsed.publications.Count); return(parsed); }
private List <ParsedLocation> GetLocationsFromAddresses(Publication publication, Func <string, string> parseNameForGeocodingFunc, Func <string, string> parseAdditionalNamePrefixFunc, string nameForLog) { string field = null; if (fileType == FileType.WOS) { field = "C1"; } else if (fileType == FileType.SCOPUS) { field = "Affiliations"; } else { throw new Exception("Unhandled file type: " + fileType); } List <ParsedLocation> result = new List <ParsedLocation>(); if (publication.entries.ContainsKey(field)) { PublicationEntry entry = publication.entries[field]; if (entry.values.Count == 0) { logger.Debug("No values for {0} extraction", nameForLog); } foreach (string address in entry.values) { string nameForGeocoding = parseNameForGeocodingFunc(address); if (nameForGeocoding == null) { logger.Warn("No {0} extracted from address: \"{1}\" (in publication \"{2}\")", nameForLog, address, GetTitle(publication)); } string additionalNamePrefix = parseAdditionalNamePrefixFunc(address); if (additionalNamePrefix == null) { logger.Warn("No {0} (additional name prefix) extracted from address: \"{1}\" (in publication \"{2}\")", nameForLog, address, GetTitle(publication)); } if (nameForGeocoding != null && additionalNamePrefix != null) { if (additionalNamePrefix != "") { additionalNamePrefix = additionalNamePrefix.ToUpperInvariant() + ", "; } result.Add(new ParsedLocation() { NameForGeocoding = nameForGeocoding, AdditionalNamePrefix = additionalNamePrefix }); } } } else { logger.Debug("No field for {0} extraction", nameForLog); } if (result.Count > 0) { logger.Info("Extracted {0}: \"{1}\" (publication: \"{2}\")", nameForLog, string.Join(", ", result), GetTitle(publication)); } else { logger.Warn("No {0} extracted from publication: \"{1}\"", nameForLog, GetTitle(publication)); } return(result); }