Пример #1
0
        public string GetTitle(Publication publication)
        {
            string field = null;

            if (fileType == FileType.WOS)
            {
                field = "TI";
            }
            else if (fileType == FileType.SCOPUS)
            {
                field = "Title";
            }
            else
            {
                throw new Exception("Unhandled file type: " + fileType);
            }

            string result = null;

            if (publication.entries.ContainsKey(field))
            {
                PublicationEntry entry = publication.entries[field];
                result = string.Join(" ", entry.values);
            }

            return(result);
        }
Пример #2
0
        public static PublicationsFile Parse(string filePath)
        {
            PublicationsFile parsed = new PublicationsFile();

            parsed.Type = FileType.SCOPUS;

            using (TextReader textReader = new StreamReader(filePath, Encoding.UTF8))
            {
                var csv = new CsvParser(textReader);

                int rowIndex = 0;
                var titleRow = csv.Read();

                while (true)
                {
                    ++rowIndex;
                    var row = csv.Read();
                    if (row == null)
                    {
                        break;
                    }

                    if (row.Length > titleRow.Length)
                    {
                        if (row.Length > 0)
                        {
                            logger.Warn("Invalid CSV row ({0}), ignoring: {1}", rowIndex + 1, row[0]);
                        }
                        else
                        {
                            logger.Warn("Invalid CSV row ({0}), ignoring", rowIndex + 1);
                        }
                        continue;
                    }

                    Publication publication = new Publication();
                    for (int i = 0; i < row.Length; ++i)
                    {
                        PublicationEntry entry = new PublicationEntry();
                        entry.field  = titleRow[i];
                        entry.values = new List <string>();
                        string[] values = row[i].Split(';');
                        foreach (string value in values)
                        {
                            entry.values.Add(value.Trim());
                        }
                        publication.entries.Add(entry.field, entry);
                    }

                    parsed.publications.Add(publication);
                }
            }

            logger.Info("Publications loaded from scopus: {0}", parsed.publications.Count);

            return(parsed);
        }
Пример #3
0
        public string GetJournal(Publication publication)
        {
            string field = null;

            if (fileType == FileType.WOS)
            {
                field = "SO";
            }
            else if (fileType == FileType.SCOPUS)
            {
                field = "Source title";
            }
            else
            {
                throw new Exception("Unhandled file type: " + fileType);
            }

            string result = null;

            if (publication.entries.ContainsKey(field))
            {
                PublicationEntry entry = publication.entries[field];

                if (entry.values.Count > 0)
                {
                    string journal = string.Join(" ", entry.values);
                    if (!string.IsNullOrEmpty(journal))
                    {
                        result = journal;
                    }
                }
                else
                {
                    logger.Warn("No values for journal extraction");
                }
            }
            else
            {
                logger.Warn("No field for journal extraction");
            }

            if (result != null)
            {
                logger.Info("Extracted journal: \"{0}\" (publication: \"{1}\")", result, GetTitle(publication));
            }
            else
            {
                logger.Warn("No journal extracted from publication: \"{0}\"", GetTitle(publication));
            }

            return(result);
        }
Пример #4
0
        public string GetCountry(Publication publication)
        {
            string field = null;

            if (fileType == FileType.WOS)
            {
                field = "PA";
            }
            else if (fileType == FileType.SCOPUS)
            {
                field = "Publisher";
            }
            else
            {
                throw new Exception("Unhandled file type: " + fileType);
            }

            string result = null;

            if (publication.entries.ContainsKey(field))
            {
                PublicationEntry entry = publication.entries[field];

                if (entry.values.Count > 0)
                {
                    string parsed = ParseCountry(entry.values[entry.values.Count - 1]);
                    if (!string.IsNullOrEmpty(parsed))
                    {
                        result = parsed;
                    }
                }
                else
                {
                    logger.Debug("No values for country extraction");
                }
            }
            else
            {
                logger.Debug("No field for country extraction");
            }

            if (result != null)
            {
                logger.Info("Extracted country: \"{0}\" (publication: \"{1}\")", result, GetTitle(publication));
            }
            else
            {
                logger.Warn("No country extracted from publication: \"{0}\"", GetTitle(publication));
            }

            return(result);
        }
Пример #5
0
        public static PublicationsFile Parse(string filePath)
        {
            string[] lines = File.ReadAllLines(filePath, Encoding.UTF8);

            PublicationsFile parsed = new PublicationsFile();

            parsed.Type = FileType.WOS;

            Publication currentPublication = new Publication();

            PublicationEntry currentEntry = null;

            bool wasEndOfFile = false;

            foreach (string line in lines)
            {
                if (wasEndOfFile)
                {
                    throw new Exception("Data after end of file");
                }

                if (line == "")
                {
                    continue;
                }

                if (line == "EF")
                {
                    wasEndOfFile = true;
                    continue;
                }

                if (line == "ER")
                {
                    parsed.publications.Add(currentPublication);
                    currentPublication = new Publication();
                }
                else
                {
                    string tagPart = line.Substring(0, 3).TrimEnd(' ');

                    if (tagPart == "FN" || tagPart == "VR")
                    {
                        continue;
                    }

                    string valuePart = line.Substring(3);

                    if (tagPart != "")
                    {
                        currentEntry       = new PublicationEntry();
                        currentEntry.field = tagPart;
                        currentEntry.values.Add(valuePart);
                        currentPublication.entries.Add(tagPart, currentEntry);
                    }
                    else
                    {
                        currentEntry.values.Add(valuePart);
                    }
                }
            }

            logger.Info("Publications loaded from WOS: {0}", parsed.publications.Count);

            return(parsed);
        }
Пример #6
0
        private List <ParsedLocation> GetLocationsFromAddresses(Publication publication, Func <string, string> parseNameForGeocodingFunc, Func <string, string> parseAdditionalNamePrefixFunc, string nameForLog)
        {
            string field = null;

            if (fileType == FileType.WOS)
            {
                field = "C1";
            }
            else if (fileType == FileType.SCOPUS)
            {
                field = "Affiliations";
            }
            else
            {
                throw new Exception("Unhandled file type: " + fileType);
            }

            List <ParsedLocation> result = new List <ParsedLocation>();

            if (publication.entries.ContainsKey(field))
            {
                PublicationEntry entry = publication.entries[field];
                if (entry.values.Count == 0)
                {
                    logger.Debug("No values for {0} extraction", nameForLog);
                }

                foreach (string address in entry.values)
                {
                    string nameForGeocoding = parseNameForGeocodingFunc(address);

                    if (nameForGeocoding == null)
                    {
                        logger.Warn("No {0} extracted from address: \"{1}\" (in publication \"{2}\")", nameForLog, address, GetTitle(publication));
                    }

                    string additionalNamePrefix = parseAdditionalNamePrefixFunc(address);
                    if (additionalNamePrefix == null)
                    {
                        logger.Warn("No {0} (additional name prefix) extracted from address: \"{1}\" (in publication \"{2}\")", nameForLog, address, GetTitle(publication));
                    }

                    if (nameForGeocoding != null && additionalNamePrefix != null)
                    {
                        if (additionalNamePrefix != "")
                        {
                            additionalNamePrefix = additionalNamePrefix.ToUpperInvariant() + ", ";
                        }
                        result.Add(new ParsedLocation()
                        {
                            NameForGeocoding = nameForGeocoding, AdditionalNamePrefix = additionalNamePrefix
                        });
                    }
                }
            }
            else
            {
                logger.Debug("No field for {0} extraction", nameForLog);
            }

            if (result.Count > 0)
            {
                logger.Info("Extracted {0}: \"{1}\" (publication: \"{2}\")", nameForLog, string.Join(", ", result), GetTitle(publication));
            }
            else
            {
                logger.Warn("No {0} extracted from publication: \"{1}\"", nameForLog, GetTitle(publication));
            }

            return(result);
        }