#start clean
rm(list=ls())
#Custom functions
fpackage.check <- function(packages) {
lapply(packages, FUN = function(x) {
if (!require(x, character.only = TRUE)) {
install.packages(x, dependencies = TRUE)
library(x, character.only = TRUE)
}
})
}
fsave <- function(x, file = NULL, location = "./data/processed/") {
ifelse(!dir.exists("data"), dir.create("data"), FALSE)
ifelse(!dir.exists("data/processed"), dir.create("data/processed"), FALSE)
if (is.null(file))
file = deparse(substitute(x))
datename <- substr(gsub("[:-]", "", Sys.time()), 1, 8)
totalname <- paste(location, datename, file, ".rda", sep = "")
save(x, file = totalname) #need to fix if file is reloaded as input name, not as x.
}
fload <- function(filename) {
load(filename)
get(ls()[ls() != "filename"])
}
fshowdf <- function(x, ...) {
knitr::kable(x, digits = 2, "html", ...) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover")) %>%
kableExtra::scroll_box(width = "100%", height = "300px")
}
colorize <- function(x, color) {
sprintf("<span style='color: %s;'>%s</span>", color, x)
}
#the big one
fcolnet <- function(data = scholars, university = "RU", discipline = "sociology", waves = list(c(2015,
2018), c(2019, 2023)), type = c("first")) {
# step 1
demographics <- do.call(rbind.data.frame, data$demographics)
demographics <- demographics %>%
mutate(Universiteit1.22 = replace(Universiteit1.22, is.na(Universiteit1.22), ""), Universiteit2.22 = replace(Universiteit2.22,
is.na(Universiteit2.22), ""), Universiteit1.24 = replace(Universiteit1.24, is.na(Universiteit1.24),
""), Universiteit2.24 = replace(Universiteit2.24, is.na(Universiteit2.24), ""), discipline.22 = replace(discipline.22,
is.na(discipline.22), ""), discipline.24 = replace(discipline.24, is.na(discipline.24), ""))
sample <- which((demographics$Universiteit1.22 %in% university | demographics$Universiteit2.22 %in%
university | demographics$Universiteit1.24 %in% university | demographics$Universiteit2.24 %in%
university) & (demographics$discipline.22 %in% discipline | demographics$discipline.24 %in% discipline))
demographics_soc <- demographics[sample, ]
scholars_sel <- lapply(scholars, "[", sample)
# step 2
ids <- demographics_soc$au_id
nwaves <- length(waves)
nets <- array(0, dim = c(nwaves, length(ids), length(ids)), dimnames = list(wave = 1:nwaves, ids,
ids))
dimnames(nets)
# step 3
df_works <- tibble(works_id = unlist(lapply(scholars_sel$work, function(l) l$id)), works_author = unlist(lapply(scholars_sel$work,
function(l) l$author), recursive = FALSE), works_year = unlist(lapply(scholars_sel$work, function(l) l$publication_year),
recursive = FALSE))
df_works <- df_works[!duplicated(df_works), ]
# step 4
if (type == "first") {
for (j in 1:nwaves) {
df_works_w <- df_works[df_works$works_year >= waves[[j]][1] & df_works$works_year <= waves[[j]][2],
]
for (i in 1:nrow(df_works_w)) {
ego <- df_works_w$works_author[i][[1]]$au_id[1]
alters <- df_works_w$works_author[i][[1]]$au_id[-1]
if (sum(ids %in% ego) > 0 & sum(ids %in% alters) > 0) {
nets[j, which(ids %in% ego), which(ids %in% alters)] <- 1
}
}
}
}
if (type == "last") {
for (j in 1:nwaves) {
df_works_w <- df_works[df_works$works_year >= waves[[j]][1] & df_works$works_year <= waves[[j]][2],
]
for (i in 1:nrow(df_works_w)) {
ego <- rev(df_works_w$works_author[i][[1]]$au_id)[1]
alters <- rev(df_works_w$works_author[i][[1]]$au_id)[-1]
if (sum(ids %in% ego) > 0 & sum(ids %in% alters) > 0) {
nets[j, which(ids %in% ego), which(ids %in% alters)] <- 1
}
}
}
}
if (type == "all") {
for (j in 1:nwaves) {
df_works_w <- df_works[df_works$works_year >= waves[[j]][1] & df_works$works_year <= waves[[j]][2],
]
for (i in 1:nrow(df_works_w)) {
egos <- df_works_w$works_author[i][[1]]$au_id
if (sum(ids %in% egos) > 0) {
nets[j, which(ids %in% egos), which(ids %in% egos)] <- 1
}
}
}
}
output <- list()
output$data <- scholars_sel
output$nets <- nets
return(output)
}
fpackage.check(c("tidyverse", "RSiena"))
## [[1]]
## NULL
##
## [[2]]
## NULL
#Add citations to the data
scholars <- fload("C:/Users/kalle/OneDrive/Documenten/REMA/Jaar 2/Social Networks/KS_labjournal/data/processed/scholars_20240924.rda")
scholars_net <- fcolnet(data = scholars,
university = c("RU", "UU", "RUG", "UvA", "VU", "EUR", "Leiden", "UvT"),
discipline = c("sociology", "political science"),
waves = list(c(2015, 2018), c(2019, 2023)),
type = c("all"))
df_ego <- do.call(rbind.data.frame, scholars_net$data$demographics)
df_citations <- fload("C:/Users/kalle/OneDrive/Documenten/REMA/Jaar 2/Social Networks/KS_labjournal/data/processed/df_20250929.rda")
#merging
df_ego <- df_ego |>
left_join(df_citations |> select(Naam, citations_w1, citations_w2),
by = "Naam")
#make ordinal variable function
table(df_ego$Functie.24, useNA = "always")
##
## ?
## 1
## Adjunct hoogleraar
## 1
## Associate researcher
## 1
## Bijzonder hoogleraar
## 14
## Bijzonder Hoogleraar
## 2
## Datamanager
## 1
## Directeur SoG
## 1
## Docent
## 24
## Docent (gepensioneerd)
## 1
## Docent, phd
## 1
## Docent/PhD student
## 1
## Docent?
## 1
## Doctoral Researcher
## 2
## Doctoral student
## 1
## Does project at UvA, has worked as professor, lecturer, and research fellow at different unis
## 1
## Emeritus hoogleraar
## 12
## Emeritus hoogleraar (gast?)
## 1
## External Phd
## 6
## External PhD
## 1
## Fellow
## 1
## Gast/ Onderzoeker
## 1
## Gast/Emeritus universitair hoofddocent
## 1
## Gastonderzoeker
## 1
## Gepensioneerd docent sociologie
## 1
## Gepensioneerd UHD
## 1
## Guest Researcher
## 2
## Honorary professor
## 2
## Hoofdonderzoeker
## 1
## Hoogleraar
## 83
## Hoogleraar (gast)
## 1
## Hoogleraar (visiting)
## 1
## Hoogleraar + Universitair hoofddocent
## 1
## Hoogleraar/Afdelingsvoorzitter
## 1
## Junior docent
## 1
## Junior onderzoeker
## 1
## Lecturer
## 9
## Lecturer and Researcher
## 1
## Lecturer/researcher
## 1
## Managing director/co-founder SocioQuest
## 1
## Medewerker
## 1
## Onderwijsontwikkelaar
## 1
## Onderzoeker
## 34
## Onderzoeker/Lecturer
## 1
## Onderzoeksfellow
## 1
## Onderzoeksmedewerker
## 1
## Other researcher
## 1
## part-time doctoral researcher
## 1
## Phd & postdoc
## 1
## Phd Student
## 1
## PhD student
## 42
## PhD Student
## 112
## PhD Student anthropologie
## 1
## PhD student, Junior Docent
## 2
## PhD student/Gast
## 2
## PhD studentt
## 1
## Postdoc
## 22
## Postdoctoral Research Fellow
## 1
## Promovendus
## 1
## Research fellow
## 1
## Scholarship student
## 1
## Senior lecturer
## 3
## Senior Onderzoeker
## 1
## Senior Research Fellow
## 1
## Senior researcher
## 1
## Senior Researcher
## 1
## Senior researcher part-time
## 1
## Universitair docent
## 154
## Universitair Docent
## 3
## Universitair hoofddocent
## 84
## Universitair hoofddocent/Onderwijsdirecteur
## 1
## Unversitair docent GPM
## 1
## Wetenschappelijk directeur
## 1
## <NA>
## 8
#categories:
#(bijzonder) hoogleraar, UHD, UD, postdoc/senior/docent/researcher, PhD (external).
df_ego <- df_ego |>
mutate(functie_level =
case_when(
grepl("hoogleraar|professor", tolower(Functie.24)) ~ 5,
grepl("universitair hoofddocent|uhd", tolower(Functie.24)) ~ 4,
grepl("universitair docent|unversitair docent", tolower(Functie.24)) ~ 3,
grepl("postdoc|senior|fellow", tolower(Functie.24)) ~ 2,
tolower(Functie.24) %in% c(
"docent", "lecturer", "onderzoeker", "onderzoeker/lecturer",
"lecturer and researcher", "lecturer/researcher",
"docent (gepensioneerd)", "docent?", "gast/ onderzoeker",
"gastonderzoeker", "gepensioneerd docent sociologie",
"guest researcher"
) ~ 2,
tolower(Functie.24) %in% c("doctoral researcher", "doctoral student", "part-time doctoral researcher") ~ 1,
grepl("phd|junior|promovendus", tolower(Functie.24)) ~ 1,
.default = NA
)
)
table(df_ego$functie_level, df_ego$Functie.24, useNA="always")
##
## ? Adjunct hoogleraar Associate researcher Bijzonder hoogleraar Bijzonder Hoogleraar
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 1 0 14 2
## <NA> 1 0 1 0 0
##
## Datamanager Directeur SoG Docent Docent (gepensioneerd) Docent, phd Docent/PhD student
## 1 0 0 0 0 1 1
## 2 0 0 24 1 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## <NA> 1 1 0 0 0 0
##
## Docent? Doctoral Researcher Doctoral student
## 1 0 2 1
## 2 1 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## <NA> 0 0 0
##
## Does project at UvA, has worked as professor, lecturer, and research fellow at different unis
## 1 0
## 2 0
## 3 0
## 4 0
## 5 1
## <NA> 0
##
## Emeritus hoogleraar Emeritus hoogleraar (gast?) External Phd External PhD Fellow
## 1 0 0 6 1 0
## 2 0 0 0 0 1
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 12 1 0 0 0
## <NA> 0 0 0 0 0
##
## Gast/ Onderzoeker Gast/Emeritus universitair hoofddocent Gastonderzoeker
## 1 0 0 0
## 2 1 0 1
## 3 0 0 0
## 4 0 1 0
## 5 0 0 0
## <NA> 0 0 0
##
## Gepensioneerd docent sociologie Gepensioneerd UHD Guest Researcher Honorary professor
## 1 0 0 0 0
## 2 1 0 2 0
## 3 0 0 0 0
## 4 0 1 0 0
## 5 0 0 0 2
## <NA> 0 0 0 0
##
## Hoofdonderzoeker Hoogleraar Hoogleraar (gast) Hoogleraar (visiting)
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 83 1 1
## <NA> 1 0 0 0
##
## Hoogleraar + Universitair hoofddocent Hoogleraar/Afdelingsvoorzitter Junior docent
## 1 0 0 1
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 1 1 0
## <NA> 0 0 0
##
## Junior onderzoeker Lecturer Lecturer and Researcher Lecturer/researcher
## 1 1 0 0 0
## 2 0 9 1 1
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## <NA> 0 0 0 0
##
## Managing director/co-founder SocioQuest Medewerker Onderwijsontwikkelaar Onderzoeker
## 1 0 0 0 0
## 2 0 0 0 34
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## <NA> 1 1 1 0
##
## Onderzoeker/Lecturer Onderzoeksfellow Onderzoeksmedewerker Other researcher
## 1 0 0 0 0
## 2 1 1 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## <NA> 0 0 1 1
##
## part-time doctoral researcher Phd & postdoc Phd Student PhD student PhD Student
## 1 1 0 1 42 112
## 2 0 1 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## <NA> 0 0 0 0 0
##
## PhD Student anthropologie PhD student, Junior Docent PhD student/Gast PhD studentt Postdoc
## 1 1 2 2 1 0
## 2 0 0 0 0 22
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## <NA> 0 0 0 0 0
##
## Postdoctoral Research Fellow Promovendus Research fellow Scholarship student Senior lecturer
## 1 0 1 0 0 0
## 2 1 0 1 0 3
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## <NA> 0 0 0 1 0
##
## Senior Onderzoeker Senior Research Fellow Senior researcher Senior Researcher
## 1 0 0 0 0
## 2 1 1 1 1
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## <NA> 0 0 0 0
##
## Senior researcher part-time Universitair docent Universitair Docent Universitair hoofddocent
## 1 0 0 0 0
## 2 1 0 0 0
## 3 0 154 3 0
## 4 0 0 0 84
## 5 0 0 0 0
## <NA> 0 0 0 0
##
## Universitair hoofddocent/Onderwijsdirecteur Unversitair docent GPM
## 1 0 0
## 2 0 0
## 3 0 1
## 4 1 0
## 5 0 0
## <NA> 0 0
##
## Wetenschappelijk directeur <NA>
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## <NA> 1 8
table(df_ego$functie_level, useNA = "always")
##
## 1 2 3 4 5 <NA>
## 177 112 158 87 120 20
fsave(df_ego)
#First analyses (undirected network)
#Step 1: define data
wave1 <- scholars_net$nets[1,,]
wave2 <- scholars_net$nets[2,,]
##some checks
dim(wave1)
## [1] 674 674
dim(wave2)
## [1] 674 674
#should be 0
sum(is.na(wave1))
## [1] 0
#set diagonal to 0
sum(diag(wave2)==0)
## [1] 26
diag(wave1) <- 0
diag(wave2) <- 0
#only 1s and 0s
sum(wave1>1)
## [1] 0
#at least some 1s
sum(wave1>0)
## [1] 940
#make array
nets <- array(data = c(wave1, wave2), dim = c(dim(wave1), 2))
# dependent
net <- sienaDependent(nets)
#independent
functie <- coCovar(df_ego$functie_level)
mydata <- sienaDataCreate(net, functie)
#Step 2: effects
myeff <- getEffects(mydata)
myeff
## effectName include fix test initialValue parm
## 1 basic rate parameter net TRUE FALSE FALSE 4.52955 0
## 2 degree (density) TRUE FALSE FALSE -1.66572 0
#Step 3: initial description
print01Report(mydata, modelname = "./results/secondtest_scholars")
#step 4: specify model
myeff <- getEffects(mydata)
myeff <- includeEffects(myeff, gwesp, outAct)
## effectNumber effectName shortName include fix test initialValue parm
## 1 21 GWESP (#) gwesp TRUE FALSE FALSE 0 69
## 2 39 degree of ego outAct TRUE FALSE FALSE 0 0
myeff <- includeEffects(myeff, simX, interaction1 = "functie")
## effectNumber effectName shortName include fix test initialValue parm
## 1 153 functie similarity simX TRUE FALSE FALSE 0 0
myeff
## effectName include fix test initialValue parm
## 1 basic rate parameter net TRUE FALSE FALSE 4.52955 0
## 2 degree (density) TRUE FALSE FALSE -1.66572 0
## 3 GWESP (#) TRUE FALSE FALSE 0.00000 69
## 4 degree of ego TRUE FALSE FALSE 0.00000 0
## 5 functie similarity TRUE FALSE FALSE 0.00000 0
#Estimate
myAlgorithm <- sienaAlgorithmCreate(projname = "scholars")
EstM1 <- siena07(myAlgorithm, data = mydata, effects = myeff, returnDeps = TRUE)
EstM1
#GOF
gof1 <- sienaGOF(EstM1, IndegreeDistribution, verbose = FALSE, join = T, varName ="net")
?sienaGOF
plot(gof1)
RI <- RSiena:::sienaRI(data=mydata, ans= EstM1)
class(RI)
RSiena:::plot.sienaRI(RI, addPieChart = T, legendColumns = 3)
Still to-do: