rm(list = ls())
#Custom functions
fpackage.check <- function(packages) {
lapply(packages, FUN = function(x) {
if (!require(x, character.only = TRUE)) {
install.packages(x, dependencies = TRUE)
library(x, character.only = TRUE)
}
})
}
fsave <- function(x, file = NULL, location = "./data/processed/") {
ifelse(!dir.exists("data"), dir.create("data"), FALSE)
ifelse(!dir.exists("data/processed"), dir.create("data/processed"), FALSE)
if (is.null(file))
file = deparse(substitute(x))
datename <- substr(gsub("[:-]", "", Sys.time()), 1, 8)
totalname <- paste(location, datename, file, ".rda", sep = "")
save(x, file = totalname) #need to fix if file is reloaded as input name, not as x.
}
fload <- function(filename) {
load(filename)
get(ls()[ls() != "filename"])
}
fshowdf <- function(x, ...) {
knitr::kable(x, digits = 2, "html", ...) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover")) %>%
kableExtra::scroll_box(width = "100%", height = "300px")
}
packages <- c("tidyverse", "scholar", "openalexR", "rvest", "jsonlite")
fpackage.check(packages)
## Loading required package: scholar
## Loading required package: rvest
##
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
##
## guess_encoding
## Loading required package: jsonlite
##
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
##
## flatten
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
url <- "https://api.openalex.org/authors?search=Jochem Tolsma"
jt_json <- fromJSON("https://api.openalex.org/authors?search=Jochem+Tolsma", simplifyVector = FALSE)
glimpse(jt_json, max.level = 1)
## List of 3
## $ meta :List of 5
## $ results :List of 1
## $ group_by: list()
str(jt_json$results,2)
## List of 1
## $ :List of 18
## ..$ id : chr "https://openalex.org/A5087380803"
## ..$ orcid : chr "https://orcid.org/0000-0002-4411-6932"
## ..$ display_name : chr "Jochem Tolsma"
## ..$ display_name_alternatives:List of 3
## ..$ relevance_score : num 9037
## ..$ works_count : int 88
## ..$ cited_by_count : int 2123
## ..$ summary_stats :List of 3
## ..$ ids :List of 2
## ..$ affiliations :List of 3
## ..$ last_known_institutions :List of 2
## ..$ topics :List of 25
## ..$ topic_share :List of 25
## ..$ x_concepts :List of 19
## ..$ counts_by_year :List of 14
## ..$ works_api_url : chr "https://api.openalex.org/works?filter=author.id:A5087380803"
## ..$ updated_date : chr "2025-10-22T14:48:21.250339"
## ..$ created_date : chr "2023-07-21"
jt_json[["results"]][[1]]$affiliations[[1]]$institution$display_name
## [1] "Radboud University Nijmegen"
#df <- read_xlsx ("C:/Users/kalle/OneDrive/Documenten/REMA/Jaar 2/Social Networks/KS_labjournal/data/20240419Scholarid_soc_pol.xlsx")
rm(list = ls())
library(tidyverse)
#install.packages("openalexR")
library(openalexR)
df <- readxl::read_excel("C:/Users/kalle/OneDrive/Documenten/REMA/Jaar 2/Social Networks/KS_labjournal/data/20240419Scholarid_soc_pol.xlsx")
mail <- "kalle.stoffers@ru.nl"
#empty list
citations_all <- list()
remaining_authors <- df$Naam[(length(citations_all) + 1):length(df$Naam)]
for (author_name in remaining_authors) {
#fetch information
search <- tryCatch({
oa_fetch(
entity = "authors",
search = author_name,
mailto = mail)
}, error = function(e) {
message("Error with author: ", author_name)
return(NULL)
})
if (is.null(search) || nrow(search) == 0) {
next
}
#first result only
author_data <- search[1, ]
#if there is no data
counts <- author_data$counts_by_year
if (!is.list(counts) || length(counts) == 0 || !is.data.frame(counts[[1]])) {
message("Skipping author with invalid counts_by_year: ", author_data$display_name)
next
}
#select relevant info
df_citations <- author_data$counts_by_year[[1]] |>
select(year, cited_by_count) |>
mutate(Naam = author_data$display_name) |>
relocate(Naam) |>
pivot_wider(names_from = year, values_from = cited_by_count)
#add to list
citations_all[[author_name]] <- df_citations
}
#put all rows together
df_citations_total <- bind_rows(citations_all)
str(df_citations_total)
#merge with OG dataset
df <- left_join(df, df_citations_total, by = "Naam")
str(df)
## tibble [562 × 19] (S3: tbl_df/tbl/data.frame)
## $ Naam : chr [1:562] "Agnieszka Kanas" "Alissa van Zijl" "Ana Maria Torres Chedraui" "Anne Slootweg" ...
## $ Universiteit : chr [1:562] "EUR" "EUR" "EUR" "EUR" ...
## $ Email-adres : chr [1:562] "a.m.kanas@uva.nl" "vanzijl@essb.eur.nl" "torreschedraui@essb.eur.nl" "slootweg@essb.eur.nl" ...
## $ Functie : chr [1:562] "Universitair hoofddocent" "Universitair docent" "Onderzoeker" "External PhD" ...
## $ Google Scholar id: chr [1:562] "wTAuHRkAAAAJ" "ovqQeCsAAAAJ" "F8NzQ2IAAAAJ" NA ...
## $ 2025 : int [1:562] 54 NA 0 1 144 NA NA NA 6 80 ...
## $ 2024 : int [1:562] 97 NA NA 4 150 NA NA NA 7 139 ...
## $ 2023 : int [1:562] 114 NA 0 1 193 NA NA NA 7 152 ...
## $ 2022 : int [1:562] 80 NA NA 3 161 NA NA NA 5 153 ...
## $ 2021 : int [1:562] 91 NA NA 2 144 NA NA NA NA 158 ...
## $ 2020 : int [1:562] 43 NA NA 2 173 NA NA NA NA 142 ...
## $ 2019 : int [1:562] 64 NA NA 1 115 NA NA NA NA 137 ...
## $ 2018 : int [1:562] 51 NA NA NA 126 NA NA NA NA 137 ...
## $ 2017 : int [1:562] 35 NA NA NA 102 NA NA NA NA 122 ...
## $ 2016 : int [1:562] 52 NA NA NA 119 NA NA NA NA 114 ...
## $ 2015 : int [1:562] 35 NA NA NA 99 NA NA NA NA 121 ...
## $ 2014 : int [1:562] 38 NA NA NA 118 NA NA NA NA 109 ...
## $ 2013 : int [1:562] 44 NA NA NA 92 NA NA NA NA 110 ...
## $ 2012 : int [1:562] 30 NA NA NA 74 NA NA NA NA 84 ...
view(df)
#Descriptives?
summary(df)
## Naam Universiteit Email-adres Functie Google Scholar id
## Length:562 Length:562 Length:562 Length:562 Length:562
## Class :character Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## 2025 2024 2023 2022 2021
## Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 8.0 1st Qu.: 13.0 1st Qu.: 13.0 1st Qu.: 9.0 1st Qu.: 9.0
## Median : 34.0 Median : 49.0 Median : 51.0 Median : 44.0 Median : 46.5
## Mean : 165.0 Mean : 267.8 Mean : 278.2 Mean : 251.1 Mean : 275.4
## 3rd Qu.: 112.5 3rd Qu.: 165.5 3rd Qu.: 188.8 3rd Qu.: 178.0 3rd Qu.: 196.8
## Max. :7583.0 Max. :13765.0 Max. :11654.0 Max. :10373.0 Max. :9555.0
## NA's :258 NA's :254 NA's :264 NA's :265 NA's :288
## 2020 2019 2018 2017 2016
## Min. : 0.0 Min. : 0.0 Min. : 0.00 Min. : 0.0 Min. : 0.0
## 1st Qu.: 5.0 1st Qu.: 4.0 1st Qu.: 4.75 1st Qu.: 5.0 1st Qu.: 4.0
## Median : 39.0 Median : 38.0 Median : 33.50 Median : 39.0 Median : 34.0
## Mean : 223.9 Mean : 194.7 Mean : 172.49 Mean : 155.2 Mean : 139.3
## 3rd Qu.: 171.0 3rd Qu.: 149.0 3rd Qu.: 137.00 3rd Qu.: 122.0 3rd Qu.: 125.0
## Max. :6993.0 Max. :5694.0 Max. :4238.00 Max. :2543.0 Max. :2448.0
## NA's :291 NA's :305 NA's :322 NA's :343 NA's :345
## 2015 2014 2013 2012
## Min. : 0.00 Min. : 0.0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 3.25 1st Qu.: 6.0 1st Qu.: 5.0 1st Qu.: 5.0
## Median : 33.00 Median : 33.5 Median : 29.0 Median : 28.0
## Mean : 138.77 Mean : 139.2 Mean : 124.3 Mean : 118.4
## 3rd Qu.: 121.75 3rd Qu.: 118.5 3rd Qu.: 114.2 3rd Qu.: 105.0
## Max. :2217.00 Max. :2294.0 Max. :1512.0 Max. :1961.0
## NA's :360 NA's :374 NA's :382 NA's :399
(Descriptive) RQ1: To what degree is there position homophily in publication collaberations between social science researchers in the Netherlands?
(Explanatory) RQ2: How does the position composition of a researcher’s egonet (paper collaberations / department) influence a researcher’s citations?