Open sylvaticus opened 3 years ago
✨ thanks @sylvaticus! / cc @diehlpk who has been looking at the breakdown of languages of papers we've reviewed too.
Ok, this would be interesting to add these to the paper and compare with the programming languages the repos had.
If you ever got curious. JOSS reviewers data from the public list.
Generated with the above code (Julia)
```julia # Source: reviewer database of JOSS at https://docs.google.com/spreadsheets/d/1PAPRJ63yq9aPC1COLjaQp8mHmEq3rZUzwUYxTulyu78/edit#gid=856801822 using OdsIO # Loading data.. dataFile = "joss_reviewers_20200724.ods" db = ods_read(dataFile,range=((4,2),(1340,9))) # removing email db = hcat(db[:,1:2],db[:,5:end]) # replacing "nothing".... # ..with empty string in the first three columns... for r in eachrow(db) for cidx in 1:3 r[cidx] = isnothing(r[cidx]) ? "" : r[cidx] end end # ..and with zero in the number of reviews... for r in eachrow(db) for cidx in 4:6 r[cidx] = isnothing(r[cidx]) ? 0 : r[cidx] end end # Converting first 3 columns to string and last 4 to integers db = convert(Array{Union{String,Int64},2},db) # Cleaning.. for r in eachrow(db) for cidx in 1:3 # ugly... r[cidx] = replace(replace(replace(replace(replace(r[cidx], '/'=>','), '('=>','), ')'=> ','), '\n'=> ',') , "and"=> ',') |> strip |> lowercase r[cidx] = replace(r[cidx],", " => ',') # to avoid empty data r[cidx] = replace(r[cidx]," ," => ',') # to avoid empty data r[cidx] = replace(r[cidx], r",$" => "") # remove ending comma end end # Establishing vocabolaries vocLangs = Set{String}() vocActivities = Set{String}() for (ridx,r) in enumerate(eachrow(db)) ##if ridx > 20 break end for cidx in 1:2 #= debug = strip.(split(r[cidx],',')) for l in debug if l == "" println(l) println(ridx) println(cidx) end end =# if r[cidx] == "" continue end push!(vocLangs,strip.(split(r[cidx],','))...) end for cidx in 3:3 if r[cidx] == "" continue end push!(vocActivities,strip.(split(r[cidx],','))...) end end vocLangs = collect(vocLangs) vocActivities = collect(vocActivities) langIdx = Dict{String,Int64}() [langIdx[l] = id for (id,l) in enumerate(vocLangs)] actIdx = Dict{String,Int64}() [actIdx[a] = id for (id,a) in enumerate(vocActivities)] nLangs = length(vocLangs) nActs = length(vocActivities) nRecords = size(db,1) preferredLangCount = zeros(Int64,nLangs) competentLangCount = zeros(Int64,nLangs) actCountByLang = zeros(Int64,nLangs,nActs) # Let's count! for r in eachrow(db) plangs = strip.(split(r[1],',')) olangs = strip.(split(r[2],',')) langs = union(Set(plangs),Set(olangs)) acts = strip.(split(r[3],',')) [preferredLangCount[langIdx[l]] += 1 for l in plangs if l != ""] [competentLangCount[langIdx[l]] += 1 for l in langs if l != ""] [actCountByLang[langIdx[l],actIdx[a]] += 1 for l in langs, a in acts if l != "" && a != ""] end # Let's report: n = 20 println("*** The $n most \"best kwown\" languages...") sortIdx = reverse(sortperm(preferredLangCount))[1:n] [println("- $(rpad(vocLangs[i],12))\t ( $(round(100*preferredLangCount[i]/nRecords,digits=2)) %)") for i in sortIdx] n = 20 println("*** The $n most \"known\" languages...") sortIdx = reverse(sortperm(competentLangCount))[1:n] [println("- $(rpad(vocLangs[i],12))\t ( $(round(100*competentLangCount[i]/nRecords,digits=2)) %)") for i in sortIdx] n = 10 n2 = 4 println("*** The $n2 most common sectors for the $n most \"known\" languages...") sortIdx = reverse(sortperm(competentLangCount))[1:n] for i in sortIdx lang = vocLangs[i] sortIdxActs = reverse(sortperm(actCountByLang[i,:]))[1:n2] print("$(rpad(lang,12)): \t") [print("$(vocActivities[j]), ") for j in sortIdxActs] print("\n") end ```