1 In class

rm(list = ls())

#Custom functions
fpackage.check <- function(packages) {
    lapply(packages, FUN = function(x) {
        if (!require(x, character.only = TRUE)) {
            install.packages(x, dependencies = TRUE)
            library(x, character.only = TRUE)
        }
    })
}

fsave <- function(x, file = NULL, location = "./data/processed/") {
    ifelse(!dir.exists("data"), dir.create("data"), FALSE)
    ifelse(!dir.exists("data/processed"), dir.create("data/processed"), FALSE)
    if (is.null(file))
        file = deparse(substitute(x))
    datename <- substr(gsub("[:-]", "", Sys.time()), 1, 8)
    totalname <- paste(location, datename, file, ".rda", sep = "")
    save(x, file = totalname)  #need to fix if file is reloaded as input name, not as x. 
}

fload <- function(filename) {
    load(filename)
    get(ls()[ls() != "filename"])
}

fshowdf <- function(x, ...) {
    knitr::kable(x, digits = 2, "html", ...) %>%
        kableExtra::kable_styling(bootstrap_options = c("striped", "hover")) %>%
        kableExtra::scroll_box(width = "100%", height = "300px")
}

packages <- c("tidyverse", "scholar", "openalexR", "rvest", "jsonlite")
fpackage.check(packages)
## Loading required package: scholar
## Loading required package: rvest
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
## 
##     guess_encoding
## Loading required package: jsonlite
## 
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
## 
##     flatten
## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
url <- "https://api.openalex.org/authors?search=Jochem Tolsma"

jt_json <- fromJSON("https://api.openalex.org/authors?search=Jochem+Tolsma", simplifyVector = FALSE)
glimpse(jt_json, max.level = 1)
## List of 3
##  $ meta    :List of 5
##  $ results :List of 1
##  $ group_by: list()
str(jt_json$results,2)
## List of 1
##  $ :List of 18
##   ..$ id                       : chr "https://openalex.org/A5087380803"
##   ..$ orcid                    : chr "https://orcid.org/0000-0002-4411-6932"
##   ..$ display_name             : chr "Jochem Tolsma"
##   ..$ display_name_alternatives:List of 3
##   ..$ relevance_score          : num 9037
##   ..$ works_count              : int 88
##   ..$ cited_by_count           : int 2123
##   ..$ summary_stats            :List of 3
##   ..$ ids                      :List of 2
##   ..$ affiliations             :List of 3
##   ..$ last_known_institutions  :List of 2
##   ..$ topics                   :List of 25
##   ..$ topic_share              :List of 25
##   ..$ x_concepts               :List of 19
##   ..$ counts_by_year           :List of 14
##   ..$ works_api_url            : chr "https://api.openalex.org/works?filter=author.id:A5087380803"
##   ..$ updated_date             : chr "2025-10-22T14:48:21.250339"
##   ..$ created_date             : chr "2023-07-21"
jt_json[["results"]][[1]]$affiliations[[1]]$institution$display_name
## [1] "Radboud University Nijmegen"
#df <- read_xlsx ("C:/Users/kalle/OneDrive/Documenten/REMA/Jaar 2/Social Networks/KS_labjournal/data/20240419Scholarid_soc_pol.xlsx")

2 Homework

rm(list = ls())

library(tidyverse)
#install.packages("openalexR")
library(openalexR)

df <- readxl::read_excel("C:/Users/kalle/OneDrive/Documenten/REMA/Jaar 2/Social Networks/KS_labjournal/data/20240419Scholarid_soc_pol.xlsx")


mail <- "kalle.stoffers@ru.nl"


#empty list
citations_all <- list()

remaining_authors <- df$Naam[(length(citations_all) + 1):length(df$Naam)]

for (author_name in remaining_authors) {

#fetch information
search <- tryCatch({
  oa_fetch(
  entity = "authors",
  search = author_name,
  mailto = mail)
}, error = function(e) {
    message("Error with author: ", author_name)
    return(NULL)
  })

if (is.null(search) || nrow(search) == 0) {
    next
  }

#first result only
author_data <- search[1, ]

#if there is no data
counts <- author_data$counts_by_year

if (!is.list(counts) || length(counts) == 0 || !is.data.frame(counts[[1]])) {
  message("Skipping author with invalid counts_by_year: ", author_data$display_name)
  next
}

#select relevant info
df_citations <- author_data$counts_by_year[[1]] |>
  select(year, cited_by_count) |>
  mutate(Naam = author_data$display_name) |>
  relocate(Naam) |>
  pivot_wider(names_from = year, values_from = cited_by_count)

#add to list
citations_all[[author_name]] <- df_citations

}

#put all rows together
df_citations_total <- bind_rows(citations_all)
str(df_citations_total)

#merge with OG dataset  
df <- left_join(df, df_citations_total, by = "Naam")
str(df)
## tibble [562 × 19] (S3: tbl_df/tbl/data.frame)
##  $ Naam             : chr [1:562] "Agnieszka Kanas" "Alissa van Zijl" "Ana Maria Torres Chedraui" "Anne Slootweg" ...
##  $ Universiteit     : chr [1:562] "EUR" "EUR" "EUR" "EUR" ...
##  $ Email-adres      : chr [1:562] "a.m.kanas@uva.nl" "vanzijl@essb.eur.nl" "torreschedraui@essb.eur.nl" "slootweg@essb.eur.nl" ...
##  $ Functie          : chr [1:562] "Universitair hoofddocent" "Universitair docent" "Onderzoeker" "External PhD" ...
##  $ Google Scholar id: chr [1:562] "wTAuHRkAAAAJ" "ovqQeCsAAAAJ" "F8NzQ2IAAAAJ" NA ...
##  $ 2025             : int [1:562] 54 NA 0 1 144 NA NA NA 6 80 ...
##  $ 2024             : int [1:562] 97 NA NA 4 150 NA NA NA 7 139 ...
##  $ 2023             : int [1:562] 114 NA 0 1 193 NA NA NA 7 152 ...
##  $ 2022             : int [1:562] 80 NA NA 3 161 NA NA NA 5 153 ...
##  $ 2021             : int [1:562] 91 NA NA 2 144 NA NA NA NA 158 ...
##  $ 2020             : int [1:562] 43 NA NA 2 173 NA NA NA NA 142 ...
##  $ 2019             : int [1:562] 64 NA NA 1 115 NA NA NA NA 137 ...
##  $ 2018             : int [1:562] 51 NA NA NA 126 NA NA NA NA 137 ...
##  $ 2017             : int [1:562] 35 NA NA NA 102 NA NA NA NA 122 ...
##  $ 2016             : int [1:562] 52 NA NA NA 119 NA NA NA NA 114 ...
##  $ 2015             : int [1:562] 35 NA NA NA 99 NA NA NA NA 121 ...
##  $ 2014             : int [1:562] 38 NA NA NA 118 NA NA NA NA 109 ...
##  $ 2013             : int [1:562] 44 NA NA NA 92 NA NA NA NA 110 ...
##  $ 2012             : int [1:562] 30 NA NA NA 74 NA NA NA NA 84 ...
view(df)

#Descriptives?

summary(df)
##      Naam           Universiteit       Email-adres          Functie          Google Scholar id 
##  Length:562         Length:562         Length:562         Length:562         Length:562        
##  Class :character   Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                                                
##                                                                                                
##                                                                                                
##                                                                                                
##       2025             2024              2023              2022              2021       
##  Min.   :   0.0   Min.   :    0.0   Min.   :    0.0   Min.   :    0.0   Min.   :   0.0  
##  1st Qu.:   8.0   1st Qu.:   13.0   1st Qu.:   13.0   1st Qu.:    9.0   1st Qu.:   9.0  
##  Median :  34.0   Median :   49.0   Median :   51.0   Median :   44.0   Median :  46.5  
##  Mean   : 165.0   Mean   :  267.8   Mean   :  278.2   Mean   :  251.1   Mean   : 275.4  
##  3rd Qu.: 112.5   3rd Qu.:  165.5   3rd Qu.:  188.8   3rd Qu.:  178.0   3rd Qu.: 196.8  
##  Max.   :7583.0   Max.   :13765.0   Max.   :11654.0   Max.   :10373.0   Max.   :9555.0  
##  NA's   :258      NA's   :254       NA's   :264       NA's   :265       NA's   :288     
##       2020             2019             2018              2017             2016       
##  Min.   :   0.0   Min.   :   0.0   Min.   :   0.00   Min.   :   0.0   Min.   :   0.0  
##  1st Qu.:   5.0   1st Qu.:   4.0   1st Qu.:   4.75   1st Qu.:   5.0   1st Qu.:   4.0  
##  Median :  39.0   Median :  38.0   Median :  33.50   Median :  39.0   Median :  34.0  
##  Mean   : 223.9   Mean   : 194.7   Mean   : 172.49   Mean   : 155.2   Mean   : 139.3  
##  3rd Qu.: 171.0   3rd Qu.: 149.0   3rd Qu.: 137.00   3rd Qu.: 122.0   3rd Qu.: 125.0  
##  Max.   :6993.0   Max.   :5694.0   Max.   :4238.00   Max.   :2543.0   Max.   :2448.0  
##  NA's   :291      NA's   :305      NA's   :322       NA's   :343      NA's   :345     
##       2015              2014             2013             2012       
##  Min.   :   0.00   Min.   :   0.0   Min.   :   0.0   Min.   :   0.0  
##  1st Qu.:   3.25   1st Qu.:   6.0   1st Qu.:   5.0   1st Qu.:   5.0  
##  Median :  33.00   Median :  33.5   Median :  29.0   Median :  28.0  
##  Mean   : 138.77   Mean   : 139.2   Mean   : 124.3   Mean   : 118.4  
##  3rd Qu.: 121.75   3rd Qu.: 118.5   3rd Qu.: 114.2   3rd Qu.: 105.0  
##  Max.   :2217.00   Max.   :2294.0   Max.   :1512.0   Max.   :1961.0  
##  NA's   :360       NA's   :374      NA's   :382      NA's   :399

3 (Updated) RQs

(Descriptive) RQ1: To what degree is there position homophily in publication collaberations between social science researchers in the Netherlands?

(Explanatory) RQ2: How does the position composition of a researcher’s egonet (paper collaberations / department) influence a researcher’s citations?

LS0tDQp0aXRsZTogIldlZWsgMyINCmF1dGhvcjogIkthbGxlIFN0b2ZmZXJzIg0KZGF0ZTogIjIwMjUtMDktMTkiDQpvdXRwdXQ6IGh0bWxfZG9jdW1lbnQNCi0tLQ0KDQpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0NCmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSkNCmBgYA0KDQojIEluIGNsYXNzDQoNCmBgYHtyfQ0KDQoNCnJtKGxpc3QgPSBscygpKQ0KDQojQ3VzdG9tIGZ1bmN0aW9ucw0KZnBhY2thZ2UuY2hlY2sgPC0gZnVuY3Rpb24ocGFja2FnZXMpIHsNCiAgICBsYXBwbHkocGFja2FnZXMsIEZVTiA9IGZ1bmN0aW9uKHgpIHsNCiAgICAgICAgaWYgKCFyZXF1aXJlKHgsIGNoYXJhY3Rlci5vbmx5ID0gVFJVRSkpIHsNCiAgICAgICAgICAgIGluc3RhbGwucGFja2FnZXMoeCwgZGVwZW5kZW5jaWVzID0gVFJVRSkNCiAgICAgICAgICAgIGxpYnJhcnkoeCwgY2hhcmFjdGVyLm9ubHkgPSBUUlVFKQ0KICAgICAgICB9DQogICAgfSkNCn0NCg0KZnNhdmUgPC0gZnVuY3Rpb24oeCwgZmlsZSA9IE5VTEwsIGxvY2F0aW9uID0gIi4vZGF0YS9wcm9jZXNzZWQvIikgew0KICAgIGlmZWxzZSghZGlyLmV4aXN0cygiZGF0YSIpLCBkaXIuY3JlYXRlKCJkYXRhIiksIEZBTFNFKQ0KICAgIGlmZWxzZSghZGlyLmV4aXN0cygiZGF0YS9wcm9jZXNzZWQiKSwgZGlyLmNyZWF0ZSgiZGF0YS9wcm9jZXNzZWQiKSwgRkFMU0UpDQogICAgaWYgKGlzLm51bGwoZmlsZSkpDQogICAgICAgIGZpbGUgPSBkZXBhcnNlKHN1YnN0aXR1dGUoeCkpDQogICAgZGF0ZW5hbWUgPC0gc3Vic3RyKGdzdWIoIls6LV0iLCAiIiwgU3lzLnRpbWUoKSksIDEsIDgpDQogICAgdG90YWxuYW1lIDwtIHBhc3RlKGxvY2F0aW9uLCBkYXRlbmFtZSwgZmlsZSwgIi5yZGEiLCBzZXAgPSAiIikNCiAgICBzYXZlKHgsIGZpbGUgPSB0b3RhbG5hbWUpICAjbmVlZCB0byBmaXggaWYgZmlsZSBpcyByZWxvYWRlZCBhcyBpbnB1dCBuYW1lLCBub3QgYXMgeC4gDQp9DQoNCmZsb2FkIDwtIGZ1bmN0aW9uKGZpbGVuYW1lKSB7DQogICAgbG9hZChmaWxlbmFtZSkNCiAgICBnZXQobHMoKVtscygpICE9ICJmaWxlbmFtZSJdKQ0KfQ0KDQpmc2hvd2RmIDwtIGZ1bmN0aW9uKHgsIC4uLikgew0KICAgIGtuaXRyOjprYWJsZSh4LCBkaWdpdHMgPSAyLCAiaHRtbCIsIC4uLikgJT4lDQogICAgICAgIGthYmxlRXh0cmE6OmthYmxlX3N0eWxpbmcoYm9vdHN0cmFwX29wdGlvbnMgPSBjKCJzdHJpcGVkIiwgImhvdmVyIikpICU+JQ0KICAgICAgICBrYWJsZUV4dHJhOjpzY3JvbGxfYm94KHdpZHRoID0gIjEwMCUiLCBoZWlnaHQgPSAiMzAwcHgiKQ0KfQ0KDQpwYWNrYWdlcyA8LSBjKCJ0aWR5dmVyc2UiLCAic2Nob2xhciIsICJvcGVuYWxleFIiLCAicnZlc3QiLCAianNvbmxpdGUiKQ0KZnBhY2thZ2UuY2hlY2socGFja2FnZXMpDQoNCnVybCA8LSAiaHR0cHM6Ly9hcGkub3BlbmFsZXgub3JnL2F1dGhvcnM/c2VhcmNoPUpvY2hlbSBUb2xzbWEiDQoNCmp0X2pzb24gPC0gZnJvbUpTT04oImh0dHBzOi8vYXBpLm9wZW5hbGV4Lm9yZy9hdXRob3JzP3NlYXJjaD1Kb2NoZW0rVG9sc21hIiwgc2ltcGxpZnlWZWN0b3IgPSBGQUxTRSkNCmdsaW1wc2UoanRfanNvbiwgbWF4LmxldmVsID0gMSkNCg0Kc3RyKGp0X2pzb24kcmVzdWx0cywyKQ0KICANCiAgDQoNCmp0X2pzb25bWyJyZXN1bHRzIl1dW1sxXV0kYWZmaWxpYXRpb25zW1sxXV0kaW5zdGl0dXRpb24kZGlzcGxheV9uYW1lDQoNCiNkZiA8LSByZWFkX3hsc3ggKCJDOi9Vc2Vycy9rYWxsZS9PbmVEcml2ZS9Eb2N1bWVudGVuL1JFTUEvSmFhciAyL1NvY2lhbCBOZXR3b3Jrcy9LU19sYWJqb3VybmFsL2RhdGEvMjAyNDA0MTlTY2hvbGFyaWRfc29jX3BvbC54bHN4IikNCg0KDQpgYGANCg0KIyBIb21ld29yaw0KDQpgYGB7ciwgZXZhbD1GQUxTRSwgZWNobz1UUlVFfQ0Kcm0obGlzdCA9IGxzKCkpDQoNCmxpYnJhcnkodGlkeXZlcnNlKQ0KI2luc3RhbGwucGFja2FnZXMoIm9wZW5hbGV4UiIpDQpsaWJyYXJ5KG9wZW5hbGV4UikNCg0KZGYgPC0gcmVhZHhsOjpyZWFkX2V4Y2VsKCJDOi9Vc2Vycy9rYWxsZS9PbmVEcml2ZS9Eb2N1bWVudGVuL1JFTUEvSmFhciAyL1NvY2lhbCBOZXR3b3Jrcy9LU19sYWJqb3VybmFsL2RhdGEvMjAyNDA0MTlTY2hvbGFyaWRfc29jX3BvbC54bHN4IikNCg0KDQptYWlsIDwtICJrYWxsZS5zdG9mZmVyc0BydS5ubCINCg0KDQojZW1wdHkgbGlzdA0KY2l0YXRpb25zX2FsbCA8LSBsaXN0KCkNCg0KcmVtYWluaW5nX2F1dGhvcnMgPC0gZGYkTmFhbVsobGVuZ3RoKGNpdGF0aW9uc19hbGwpICsgMSk6bGVuZ3RoKGRmJE5hYW0pXQ0KDQpmb3IgKGF1dGhvcl9uYW1lIGluIHJlbWFpbmluZ19hdXRob3JzKSB7DQoNCiNmZXRjaCBpbmZvcm1hdGlvbg0Kc2VhcmNoIDwtIHRyeUNhdGNoKHsNCiAgb2FfZmV0Y2goDQogIGVudGl0eSA9ICJhdXRob3JzIiwNCiAgc2VhcmNoID0gYXV0aG9yX25hbWUsDQogIG1haWx0byA9IG1haWwpDQp9LCBlcnJvciA9IGZ1bmN0aW9uKGUpIHsNCiAgICBtZXNzYWdlKCJFcnJvciB3aXRoIGF1dGhvcjogIiwgYXV0aG9yX25hbWUpDQogICAgcmV0dXJuKE5VTEwpDQogIH0pDQoNCmlmIChpcy5udWxsKHNlYXJjaCkgfHwgbnJvdyhzZWFyY2gpID09IDApIHsNCiAgICBuZXh0DQogIH0NCg0KI2ZpcnN0IHJlc3VsdCBvbmx5DQphdXRob3JfZGF0YSA8LSBzZWFyY2hbMSwgXQ0KDQojaWYgdGhlcmUgaXMgbm8gZGF0YQ0KY291bnRzIDwtIGF1dGhvcl9kYXRhJGNvdW50c19ieV95ZWFyDQoNCmlmICghaXMubGlzdChjb3VudHMpIHx8IGxlbmd0aChjb3VudHMpID09IDAgfHwgIWlzLmRhdGEuZnJhbWUoY291bnRzW1sxXV0pKSB7DQogIG1lc3NhZ2UoIlNraXBwaW5nIGF1dGhvciB3aXRoIGludmFsaWQgY291bnRzX2J5X3llYXI6ICIsIGF1dGhvcl9kYXRhJGRpc3BsYXlfbmFtZSkNCiAgbmV4dA0KfQ0KDQojc2VsZWN0IHJlbGV2YW50IGluZm8NCmRmX2NpdGF0aW9ucyA8LSBhdXRob3JfZGF0YSRjb3VudHNfYnlfeWVhcltbMV1dIHw+DQogIHNlbGVjdCh5ZWFyLCBjaXRlZF9ieV9jb3VudCkgfD4NCiAgbXV0YXRlKE5hYW0gPSBhdXRob3JfZGF0YSRkaXNwbGF5X25hbWUpIHw+DQogIHJlbG9jYXRlKE5hYW0pIHw+DQogIHBpdm90X3dpZGVyKG5hbWVzX2Zyb20gPSB5ZWFyLCB2YWx1ZXNfZnJvbSA9IGNpdGVkX2J5X2NvdW50KQ0KDQojYWRkIHRvIGxpc3QNCmNpdGF0aW9uc19hbGxbW2F1dGhvcl9uYW1lXV0gPC0gZGZfY2l0YXRpb25zDQoNCn0NCg0KI3B1dCBhbGwgcm93cyB0b2dldGhlcg0KZGZfY2l0YXRpb25zX3RvdGFsIDwtIGJpbmRfcm93cyhjaXRhdGlvbnNfYWxsKQ0Kc3RyKGRmX2NpdGF0aW9uc190b3RhbCkNCg0KI21lcmdlIHdpdGggT0cgZGF0YXNldCAgDQpkZiA8LSBsZWZ0X2pvaW4oZGYsIGRmX2NpdGF0aW9uc190b3RhbCwgYnkgPSAiTmFhbSIpDQoNCmBgYA0KDQpgYGB7ciwgZXZhbD1GQUxTRSwgZWNobz1GQUxTRX0NCmZzYXZlKGRmKQ0KDQpgYGANCg0KYGBge3IsIGV2YWw9VFJVRSwgZWNobz1GQUxTRX0NCmRmIDwtIGZsb2FkKCIuL2RhdGEvcHJvY2Vzc2VkLzIwMjUwOTI2ZGYucmRhIikNCmBgYA0KDQpgYGB7cn0NCnN0cihkZikNCnZpZXcoZGYpDQoNCiNEZXNjcmlwdGl2ZXM/DQoNCnN1bW1hcnkoZGYpDQoNCmBgYA0KDQojIChVcGRhdGVkKSBSUXMNCg0KKERlc2NyaXB0aXZlKSBSUTE6IFRvIHdoYXQgZGVncmVlIGlzIHRoZXJlIHBvc2l0aW9uIGhvbW9waGlseSBpbiBwdWJsaWNhdGlvbiBjb2xsYWJlcmF0aW9ucyBiZXR3ZWVuIHNvY2lhbCBzY2llbmNlIHJlc2VhcmNoZXJzIGluIHRoZSBOZXRoZXJsYW5kcz8NCg0KKEV4cGxhbmF0b3J5KSBSUTI6IEhvdyBkb2VzIHRoZSBwb3NpdGlvbiBjb21wb3NpdGlvbiBvZiBhIHJlc2VhcmNoZXIncyBlZ29uZXQgKHBhcGVyIGNvbGxhYmVyYXRpb25zIC8gZGVwYXJ0bWVudCkgaW5mbHVlbmNlIGEgcmVzZWFyY2hlcidzIGNpdGF0aW9ucz8NCg==