diff --git a/_quarto.yml b/_quarto.yml index 494dfc3..0e383db 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -51,10 +51,14 @@ website: - choose_license.qmd - make_readme.qmd - archive.qmd - - section: "πŸ’‘ In-depth material" + - section: "Excursus: In-depth material πŸ’‘" contents: - href: in_depth_material/introduction_copyright.qmd text: "Introduction to Copyright and Licensing" + - href: in_depth_material/data_dic_generation.qmd + text: "Automatic Generation of Data Dictionaries" + - href: in_depth_material/other_dependencies.qmd + text: "Identify additional system dependencies for the README" # End Website diff --git a/copyright.qmd b/copyright.qmd index b2c9761..4607ddd 100644 --- a/copyright.qmd +++ b/copyright.qmd @@ -518,7 +518,7 @@ Also, their commercial use may require the consent of the depicted person. [commons-photographs]: https://commons.wikimedia.org/wiki/Commons:Photographs_of_identifiable_people -## Practical Exercise: Adding an Image +## ✍️ Practical Exercise: Adding an Image Let's practice what you learned by adding an image to the manuscript. We'll use [this picture of a penguin from Flickr](https://flic.kr/p/2pEKnUr). diff --git a/data.csv b/data.csv new file mode 100644 index 0000000..03e2c58 --- /dev/null +++ b/data.csv @@ -0,0 +1,345 @@ +"species","island","bill_length_mm","bill_depth_mm","flipper_length_mm","body_mass_g","sex","year" +"Adelie","Torgersen",39.1,18.7,181,3750,"male",2007 +"Adelie","Torgersen",39.5,17.4,186,3800,"female",2007 +"Adelie","Torgersen",40.3,18,195,3250,"female",2007 +"Adelie","Torgersen",NA,NA,NA,NA,NA,2007 +"Adelie","Torgersen",36.7,19.3,193,3450,"female",2007 +"Adelie","Torgersen",39.3,20.6,190,3650,"male",2007 +"Adelie","Torgersen",38.9,17.8,181,3625,"female",2007 +"Adelie","Torgersen",39.2,19.6,195,4675,"male",2007 +"Adelie","Torgersen",34.1,18.1,193,3475,NA,2007 +"Adelie","Torgersen",42,20.2,190,4250,NA,2007 +"Adelie","Torgersen",37.8,17.1,186,3300,NA,2007 +"Adelie","Torgersen",37.8,17.3,180,3700,NA,2007 +"Adelie","Torgersen",41.1,17.6,182,3200,"female",2007 +"Adelie","Torgersen",38.6,21.2,191,3800,"male",2007 +"Adelie","Torgersen",34.6,21.1,198,4400,"male",2007 +"Adelie","Torgersen",36.6,17.8,185,3700,"female",2007 +"Adelie","Torgersen",38.7,19,195,3450,"female",2007 +"Adelie","Torgersen",42.5,20.7,197,4500,"male",2007 +"Adelie","Torgersen",34.4,18.4,184,3325,"female",2007 +"Adelie","Torgersen",46,21.5,194,4200,"male",2007 +"Adelie","Biscoe",37.8,18.3,174,3400,"female",2007 +"Adelie","Biscoe",37.7,18.7,180,3600,"male",2007 +"Adelie","Biscoe",35.9,19.2,189,3800,"female",2007 +"Adelie","Biscoe",38.2,18.1,185,3950,"male",2007 +"Adelie","Biscoe",38.8,17.2,180,3800,"male",2007 +"Adelie","Biscoe",35.3,18.9,187,3800,"female",2007 +"Adelie","Biscoe",40.6,18.6,183,3550,"male",2007 +"Adelie","Biscoe",40.5,17.9,187,3200,"female",2007 +"Adelie","Biscoe",37.9,18.6,172,3150,"female",2007 +"Adelie","Biscoe",40.5,18.9,180,3950,"male",2007 +"Adelie","Dream",39.5,16.7,178,3250,"female",2007 +"Adelie","Dream",37.2,18.1,178,3900,"male",2007 +"Adelie","Dream",39.5,17.8,188,3300,"female",2007 +"Adelie","Dream",40.9,18.9,184,3900,"male",2007 +"Adelie","Dream",36.4,17,195,3325,"female",2007 +"Adelie","Dream",39.2,21.1,196,4150,"male",2007 +"Adelie","Dream",38.8,20,190,3950,"male",2007 +"Adelie","Dream",42.2,18.5,180,3550,"female",2007 +"Adelie","Dream",37.6,19.3,181,3300,"female",2007 +"Adelie","Dream",39.8,19.1,184,4650,"male",2007 +"Adelie","Dream",36.5,18,182,3150,"female",2007 +"Adelie","Dream",40.8,18.4,195,3900,"male",2007 +"Adelie","Dream",36,18.5,186,3100,"female",2007 +"Adelie","Dream",44.1,19.7,196,4400,"male",2007 +"Adelie","Dream",37,16.9,185,3000,"female",2007 +"Adelie","Dream",39.6,18.8,190,4600,"male",2007 +"Adelie","Dream",41.1,19,182,3425,"male",2007 +"Adelie","Dream",37.5,18.9,179,2975,NA,2007 +"Adelie","Dream",36,17.9,190,3450,"female",2007 +"Adelie","Dream",42.3,21.2,191,4150,"male",2007 +"Adelie","Biscoe",39.6,17.7,186,3500,"female",2008 +"Adelie","Biscoe",40.1,18.9,188,4300,"male",2008 +"Adelie","Biscoe",35,17.9,190,3450,"female",2008 +"Adelie","Biscoe",42,19.5,200,4050,"male",2008 +"Adelie","Biscoe",34.5,18.1,187,2900,"female",2008 +"Adelie","Biscoe",41.4,18.6,191,3700,"male",2008 +"Adelie","Biscoe",39,17.5,186,3550,"female",2008 +"Adelie","Biscoe",40.6,18.8,193,3800,"male",2008 +"Adelie","Biscoe",36.5,16.6,181,2850,"female",2008 +"Adelie","Biscoe",37.6,19.1,194,3750,"male",2008 +"Adelie","Biscoe",35.7,16.9,185,3150,"female",2008 +"Adelie","Biscoe",41.3,21.1,195,4400,"male",2008 +"Adelie","Biscoe",37.6,17,185,3600,"female",2008 +"Adelie","Biscoe",41.1,18.2,192,4050,"male",2008 +"Adelie","Biscoe",36.4,17.1,184,2850,"female",2008 +"Adelie","Biscoe",41.6,18,192,3950,"male",2008 +"Adelie","Biscoe",35.5,16.2,195,3350,"female",2008 +"Adelie","Biscoe",41.1,19.1,188,4100,"male",2008 +"Adelie","Torgersen",35.9,16.6,190,3050,"female",2008 +"Adelie","Torgersen",41.8,19.4,198,4450,"male",2008 +"Adelie","Torgersen",33.5,19,190,3600,"female",2008 +"Adelie","Torgersen",39.7,18.4,190,3900,"male",2008 +"Adelie","Torgersen",39.6,17.2,196,3550,"female",2008 +"Adelie","Torgersen",45.8,18.9,197,4150,"male",2008 +"Adelie","Torgersen",35.5,17.5,190,3700,"female",2008 +"Adelie","Torgersen",42.8,18.5,195,4250,"male",2008 +"Adelie","Torgersen",40.9,16.8,191,3700,"female",2008 +"Adelie","Torgersen",37.2,19.4,184,3900,"male",2008 +"Adelie","Torgersen",36.2,16.1,187,3550,"female",2008 +"Adelie","Torgersen",42.1,19.1,195,4000,"male",2008 +"Adelie","Torgersen",34.6,17.2,189,3200,"female",2008 +"Adelie","Torgersen",42.9,17.6,196,4700,"male",2008 +"Adelie","Torgersen",36.7,18.8,187,3800,"female",2008 +"Adelie","Torgersen",35.1,19.4,193,4200,"male",2008 +"Adelie","Dream",37.3,17.8,191,3350,"female",2008 +"Adelie","Dream",41.3,20.3,194,3550,"male",2008 +"Adelie","Dream",36.3,19.5,190,3800,"male",2008 +"Adelie","Dream",36.9,18.6,189,3500,"female",2008 +"Adelie","Dream",38.3,19.2,189,3950,"male",2008 +"Adelie","Dream",38.9,18.8,190,3600,"female",2008 +"Adelie","Dream",35.7,18,202,3550,"female",2008 +"Adelie","Dream",41.1,18.1,205,4300,"male",2008 +"Adelie","Dream",34,17.1,185,3400,"female",2008 +"Adelie","Dream",39.6,18.1,186,4450,"male",2008 +"Adelie","Dream",36.2,17.3,187,3300,"female",2008 +"Adelie","Dream",40.8,18.9,208,4300,"male",2008 +"Adelie","Dream",38.1,18.6,190,3700,"female",2008 +"Adelie","Dream",40.3,18.5,196,4350,"male",2008 +"Adelie","Dream",33.1,16.1,178,2900,"female",2008 +"Adelie","Dream",43.2,18.5,192,4100,"male",2008 +"Adelie","Biscoe",35,17.9,192,3725,"female",2009 +"Adelie","Biscoe",41,20,203,4725,"male",2009 +"Adelie","Biscoe",37.7,16,183,3075,"female",2009 +"Adelie","Biscoe",37.8,20,190,4250,"male",2009 +"Adelie","Biscoe",37.9,18.6,193,2925,"female",2009 +"Adelie","Biscoe",39.7,18.9,184,3550,"male",2009 +"Adelie","Biscoe",38.6,17.2,199,3750,"female",2009 +"Adelie","Biscoe",38.2,20,190,3900,"male",2009 +"Adelie","Biscoe",38.1,17,181,3175,"female",2009 +"Adelie","Biscoe",43.2,19,197,4775,"male",2009 +"Adelie","Biscoe",38.1,16.5,198,3825,"female",2009 +"Adelie","Biscoe",45.6,20.3,191,4600,"male",2009 +"Adelie","Biscoe",39.7,17.7,193,3200,"female",2009 +"Adelie","Biscoe",42.2,19.5,197,4275,"male",2009 +"Adelie","Biscoe",39.6,20.7,191,3900,"female",2009 +"Adelie","Biscoe",42.7,18.3,196,4075,"male",2009 +"Adelie","Torgersen",38.6,17,188,2900,"female",2009 +"Adelie","Torgersen",37.3,20.5,199,3775,"male",2009 +"Adelie","Torgersen",35.7,17,189,3350,"female",2009 +"Adelie","Torgersen",41.1,18.6,189,3325,"male",2009 +"Adelie","Torgersen",36.2,17.2,187,3150,"female",2009 +"Adelie","Torgersen",37.7,19.8,198,3500,"male",2009 +"Adelie","Torgersen",40.2,17,176,3450,"female",2009 +"Adelie","Torgersen",41.4,18.5,202,3875,"male",2009 +"Adelie","Torgersen",35.2,15.9,186,3050,"female",2009 +"Adelie","Torgersen",40.6,19,199,4000,"male",2009 +"Adelie","Torgersen",38.8,17.6,191,3275,"female",2009 +"Adelie","Torgersen",41.5,18.3,195,4300,"male",2009 +"Adelie","Torgersen",39,17.1,191,3050,"female",2009 +"Adelie","Torgersen",44.1,18,210,4000,"male",2009 +"Adelie","Torgersen",38.5,17.9,190,3325,"female",2009 +"Adelie","Torgersen",43.1,19.2,197,3500,"male",2009 +"Adelie","Dream",36.8,18.5,193,3500,"female",2009 +"Adelie","Dream",37.5,18.5,199,4475,"male",2009 +"Adelie","Dream",38.1,17.6,187,3425,"female",2009 +"Adelie","Dream",41.1,17.5,190,3900,"male",2009 +"Adelie","Dream",35.6,17.5,191,3175,"female",2009 +"Adelie","Dream",40.2,20.1,200,3975,"male",2009 +"Adelie","Dream",37,16.5,185,3400,"female",2009 +"Adelie","Dream",39.7,17.9,193,4250,"male",2009 +"Adelie","Dream",40.2,17.1,193,3400,"female",2009 +"Adelie","Dream",40.6,17.2,187,3475,"male",2009 +"Adelie","Dream",32.1,15.5,188,3050,"female",2009 +"Adelie","Dream",40.7,17,190,3725,"male",2009 +"Adelie","Dream",37.3,16.8,192,3000,"female",2009 +"Adelie","Dream",39,18.7,185,3650,"male",2009 +"Adelie","Dream",39.2,18.6,190,4250,"male",2009 +"Adelie","Dream",36.6,18.4,184,3475,"female",2009 +"Adelie","Dream",36,17.8,195,3450,"female",2009 +"Adelie","Dream",37.8,18.1,193,3750,"male",2009 +"Adelie","Dream",36,17.1,187,3700,"female",2009 +"Adelie","Dream",41.5,18.5,201,4000,"male",2009 +"Gentoo","Biscoe",46.1,13.2,211,4500,"female",2007 +"Gentoo","Biscoe",50,16.3,230,5700,"male",2007 +"Gentoo","Biscoe",48.7,14.1,210,4450,"female",2007 +"Gentoo","Biscoe",50,15.2,218,5700,"male",2007 +"Gentoo","Biscoe",47.6,14.5,215,5400,"male",2007 +"Gentoo","Biscoe",46.5,13.5,210,4550,"female",2007 +"Gentoo","Biscoe",45.4,14.6,211,4800,"female",2007 +"Gentoo","Biscoe",46.7,15.3,219,5200,"male",2007 +"Gentoo","Biscoe",43.3,13.4,209,4400,"female",2007 +"Gentoo","Biscoe",46.8,15.4,215,5150,"male",2007 +"Gentoo","Biscoe",40.9,13.7,214,4650,"female",2007 +"Gentoo","Biscoe",49,16.1,216,5550,"male",2007 +"Gentoo","Biscoe",45.5,13.7,214,4650,"female",2007 +"Gentoo","Biscoe",48.4,14.6,213,5850,"male",2007 +"Gentoo","Biscoe",45.8,14.6,210,4200,"female",2007 +"Gentoo","Biscoe",49.3,15.7,217,5850,"male",2007 +"Gentoo","Biscoe",42,13.5,210,4150,"female",2007 +"Gentoo","Biscoe",49.2,15.2,221,6300,"male",2007 +"Gentoo","Biscoe",46.2,14.5,209,4800,"female",2007 +"Gentoo","Biscoe",48.7,15.1,222,5350,"male",2007 +"Gentoo","Biscoe",50.2,14.3,218,5700,"male",2007 +"Gentoo","Biscoe",45.1,14.5,215,5000,"female",2007 +"Gentoo","Biscoe",46.5,14.5,213,4400,"female",2007 +"Gentoo","Biscoe",46.3,15.8,215,5050,"male",2007 +"Gentoo","Biscoe",42.9,13.1,215,5000,"female",2007 +"Gentoo","Biscoe",46.1,15.1,215,5100,"male",2007 +"Gentoo","Biscoe",44.5,14.3,216,4100,NA,2007 +"Gentoo","Biscoe",47.8,15,215,5650,"male",2007 +"Gentoo","Biscoe",48.2,14.3,210,4600,"female",2007 +"Gentoo","Biscoe",50,15.3,220,5550,"male",2007 +"Gentoo","Biscoe",47.3,15.3,222,5250,"male",2007 +"Gentoo","Biscoe",42.8,14.2,209,4700,"female",2007 +"Gentoo","Biscoe",45.1,14.5,207,5050,"female",2007 +"Gentoo","Biscoe",59.6,17,230,6050,"male",2007 +"Gentoo","Biscoe",49.1,14.8,220,5150,"female",2008 +"Gentoo","Biscoe",48.4,16.3,220,5400,"male",2008 +"Gentoo","Biscoe",42.6,13.7,213,4950,"female",2008 +"Gentoo","Biscoe",44.4,17.3,219,5250,"male",2008 +"Gentoo","Biscoe",44,13.6,208,4350,"female",2008 +"Gentoo","Biscoe",48.7,15.7,208,5350,"male",2008 +"Gentoo","Biscoe",42.7,13.7,208,3950,"female",2008 +"Gentoo","Biscoe",49.6,16,225,5700,"male",2008 +"Gentoo","Biscoe",45.3,13.7,210,4300,"female",2008 +"Gentoo","Biscoe",49.6,15,216,4750,"male",2008 +"Gentoo","Biscoe",50.5,15.9,222,5550,"male",2008 +"Gentoo","Biscoe",43.6,13.9,217,4900,"female",2008 +"Gentoo","Biscoe",45.5,13.9,210,4200,"female",2008 +"Gentoo","Biscoe",50.5,15.9,225,5400,"male",2008 +"Gentoo","Biscoe",44.9,13.3,213,5100,"female",2008 +"Gentoo","Biscoe",45.2,15.8,215,5300,"male",2008 +"Gentoo","Biscoe",46.6,14.2,210,4850,"female",2008 +"Gentoo","Biscoe",48.5,14.1,220,5300,"male",2008 +"Gentoo","Biscoe",45.1,14.4,210,4400,"female",2008 +"Gentoo","Biscoe",50.1,15,225,5000,"male",2008 +"Gentoo","Biscoe",46.5,14.4,217,4900,"female",2008 +"Gentoo","Biscoe",45,15.4,220,5050,"male",2008 +"Gentoo","Biscoe",43.8,13.9,208,4300,"female",2008 +"Gentoo","Biscoe",45.5,15,220,5000,"male",2008 +"Gentoo","Biscoe",43.2,14.5,208,4450,"female",2008 +"Gentoo","Biscoe",50.4,15.3,224,5550,"male",2008 +"Gentoo","Biscoe",45.3,13.8,208,4200,"female",2008 +"Gentoo","Biscoe",46.2,14.9,221,5300,"male",2008 +"Gentoo","Biscoe",45.7,13.9,214,4400,"female",2008 +"Gentoo","Biscoe",54.3,15.7,231,5650,"male",2008 +"Gentoo","Biscoe",45.8,14.2,219,4700,"female",2008 +"Gentoo","Biscoe",49.8,16.8,230,5700,"male",2008 +"Gentoo","Biscoe",46.2,14.4,214,4650,NA,2008 +"Gentoo","Biscoe",49.5,16.2,229,5800,"male",2008 +"Gentoo","Biscoe",43.5,14.2,220,4700,"female",2008 +"Gentoo","Biscoe",50.7,15,223,5550,"male",2008 +"Gentoo","Biscoe",47.7,15,216,4750,"female",2008 +"Gentoo","Biscoe",46.4,15.6,221,5000,"male",2008 +"Gentoo","Biscoe",48.2,15.6,221,5100,"male",2008 +"Gentoo","Biscoe",46.5,14.8,217,5200,"female",2008 +"Gentoo","Biscoe",46.4,15,216,4700,"female",2008 +"Gentoo","Biscoe",48.6,16,230,5800,"male",2008 +"Gentoo","Biscoe",47.5,14.2,209,4600,"female",2008 +"Gentoo","Biscoe",51.1,16.3,220,6000,"male",2008 +"Gentoo","Biscoe",45.2,13.8,215,4750,"female",2008 +"Gentoo","Biscoe",45.2,16.4,223,5950,"male",2008 +"Gentoo","Biscoe",49.1,14.5,212,4625,"female",2009 +"Gentoo","Biscoe",52.5,15.6,221,5450,"male",2009 +"Gentoo","Biscoe",47.4,14.6,212,4725,"female",2009 +"Gentoo","Biscoe",50,15.9,224,5350,"male",2009 +"Gentoo","Biscoe",44.9,13.8,212,4750,"female",2009 +"Gentoo","Biscoe",50.8,17.3,228,5600,"male",2009 +"Gentoo","Biscoe",43.4,14.4,218,4600,"female",2009 +"Gentoo","Biscoe",51.3,14.2,218,5300,"male",2009 +"Gentoo","Biscoe",47.5,14,212,4875,"female",2009 +"Gentoo","Biscoe",52.1,17,230,5550,"male",2009 +"Gentoo","Biscoe",47.5,15,218,4950,"female",2009 +"Gentoo","Biscoe",52.2,17.1,228,5400,"male",2009 +"Gentoo","Biscoe",45.5,14.5,212,4750,"female",2009 +"Gentoo","Biscoe",49.5,16.1,224,5650,"male",2009 +"Gentoo","Biscoe",44.5,14.7,214,4850,"female",2009 +"Gentoo","Biscoe",50.8,15.7,226,5200,"male",2009 +"Gentoo","Biscoe",49.4,15.8,216,4925,"male",2009 +"Gentoo","Biscoe",46.9,14.6,222,4875,"female",2009 +"Gentoo","Biscoe",48.4,14.4,203,4625,"female",2009 +"Gentoo","Biscoe",51.1,16.5,225,5250,"male",2009 +"Gentoo","Biscoe",48.5,15,219,4850,"female",2009 +"Gentoo","Biscoe",55.9,17,228,5600,"male",2009 +"Gentoo","Biscoe",47.2,15.5,215,4975,"female",2009 +"Gentoo","Biscoe",49.1,15,228,5500,"male",2009 +"Gentoo","Biscoe",47.3,13.8,216,4725,NA,2009 +"Gentoo","Biscoe",46.8,16.1,215,5500,"male",2009 +"Gentoo","Biscoe",41.7,14.7,210,4700,"female",2009 +"Gentoo","Biscoe",53.4,15.8,219,5500,"male",2009 +"Gentoo","Biscoe",43.3,14,208,4575,"female",2009 +"Gentoo","Biscoe",48.1,15.1,209,5500,"male",2009 +"Gentoo","Biscoe",50.5,15.2,216,5000,"female",2009 +"Gentoo","Biscoe",49.8,15.9,229,5950,"male",2009 +"Gentoo","Biscoe",43.5,15.2,213,4650,"female",2009 +"Gentoo","Biscoe",51.5,16.3,230,5500,"male",2009 +"Gentoo","Biscoe",46.2,14.1,217,4375,"female",2009 +"Gentoo","Biscoe",55.1,16,230,5850,"male",2009 +"Gentoo","Biscoe",44.5,15.7,217,4875,NA,2009 +"Gentoo","Biscoe",48.8,16.2,222,6000,"male",2009 +"Gentoo","Biscoe",47.2,13.7,214,4925,"female",2009 +"Gentoo","Biscoe",NA,NA,NA,NA,NA,2009 +"Gentoo","Biscoe",46.8,14.3,215,4850,"female",2009 +"Gentoo","Biscoe",50.4,15.7,222,5750,"male",2009 +"Gentoo","Biscoe",45.2,14.8,212,5200,"female",2009 +"Gentoo","Biscoe",49.9,16.1,213,5400,"male",2009 +"Chinstrap","Dream",46.5,17.9,192,3500,"female",2007 +"Chinstrap","Dream",50,19.5,196,3900,"male",2007 +"Chinstrap","Dream",51.3,19.2,193,3650,"male",2007 +"Chinstrap","Dream",45.4,18.7,188,3525,"female",2007 +"Chinstrap","Dream",52.7,19.8,197,3725,"male",2007 +"Chinstrap","Dream",45.2,17.8,198,3950,"female",2007 +"Chinstrap","Dream",46.1,18.2,178,3250,"female",2007 +"Chinstrap","Dream",51.3,18.2,197,3750,"male",2007 +"Chinstrap","Dream",46,18.9,195,4150,"female",2007 +"Chinstrap","Dream",51.3,19.9,198,3700,"male",2007 +"Chinstrap","Dream",46.6,17.8,193,3800,"female",2007 +"Chinstrap","Dream",51.7,20.3,194,3775,"male",2007 +"Chinstrap","Dream",47,17.3,185,3700,"female",2007 +"Chinstrap","Dream",52,18.1,201,4050,"male",2007 +"Chinstrap","Dream",45.9,17.1,190,3575,"female",2007 +"Chinstrap","Dream",50.5,19.6,201,4050,"male",2007 +"Chinstrap","Dream",50.3,20,197,3300,"male",2007 +"Chinstrap","Dream",58,17.8,181,3700,"female",2007 +"Chinstrap","Dream",46.4,18.6,190,3450,"female",2007 +"Chinstrap","Dream",49.2,18.2,195,4400,"male",2007 +"Chinstrap","Dream",42.4,17.3,181,3600,"female",2007 +"Chinstrap","Dream",48.5,17.5,191,3400,"male",2007 +"Chinstrap","Dream",43.2,16.6,187,2900,"female",2007 +"Chinstrap","Dream",50.6,19.4,193,3800,"male",2007 +"Chinstrap","Dream",46.7,17.9,195,3300,"female",2007 +"Chinstrap","Dream",52,19,197,4150,"male",2007 +"Chinstrap","Dream",50.5,18.4,200,3400,"female",2008 +"Chinstrap","Dream",49.5,19,200,3800,"male",2008 +"Chinstrap","Dream",46.4,17.8,191,3700,"female",2008 +"Chinstrap","Dream",52.8,20,205,4550,"male",2008 +"Chinstrap","Dream",40.9,16.6,187,3200,"female",2008 +"Chinstrap","Dream",54.2,20.8,201,4300,"male",2008 +"Chinstrap","Dream",42.5,16.7,187,3350,"female",2008 +"Chinstrap","Dream",51,18.8,203,4100,"male",2008 +"Chinstrap","Dream",49.7,18.6,195,3600,"male",2008 +"Chinstrap","Dream",47.5,16.8,199,3900,"female",2008 +"Chinstrap","Dream",47.6,18.3,195,3850,"female",2008 +"Chinstrap","Dream",52,20.7,210,4800,"male",2008 +"Chinstrap","Dream",46.9,16.6,192,2700,"female",2008 +"Chinstrap","Dream",53.5,19.9,205,4500,"male",2008 +"Chinstrap","Dream",49,19.5,210,3950,"male",2008 +"Chinstrap","Dream",46.2,17.5,187,3650,"female",2008 +"Chinstrap","Dream",50.9,19.1,196,3550,"male",2008 +"Chinstrap","Dream",45.5,17,196,3500,"female",2008 +"Chinstrap","Dream",50.9,17.9,196,3675,"female",2009 +"Chinstrap","Dream",50.8,18.5,201,4450,"male",2009 +"Chinstrap","Dream",50.1,17.9,190,3400,"female",2009 +"Chinstrap","Dream",49,19.6,212,4300,"male",2009 +"Chinstrap","Dream",51.5,18.7,187,3250,"male",2009 +"Chinstrap","Dream",49.8,17.3,198,3675,"female",2009 +"Chinstrap","Dream",48.1,16.4,199,3325,"female",2009 +"Chinstrap","Dream",51.4,19,201,3950,"male",2009 +"Chinstrap","Dream",45.7,17.3,193,3600,"female",2009 +"Chinstrap","Dream",50.7,19.7,203,4050,"male",2009 +"Chinstrap","Dream",42.5,17.3,187,3350,"female",2009 +"Chinstrap","Dream",52.2,18.8,197,3450,"male",2009 +"Chinstrap","Dream",45.2,16.6,191,3250,"female",2009 +"Chinstrap","Dream",49.3,19.9,203,4050,"male",2009 +"Chinstrap","Dream",50.2,18.8,202,3800,"male",2009 +"Chinstrap","Dream",45.6,19.4,194,3525,"female",2009 +"Chinstrap","Dream",51.9,19.5,206,3950,"male",2009 +"Chinstrap","Dream",46.8,16.5,189,3650,"female",2009 +"Chinstrap","Dream",45.7,17,195,3650,"female",2009 +"Chinstrap","Dream",55.8,19.8,207,4000,"male",2009 +"Chinstrap","Dream",43.5,18.1,202,3400,"female",2009 +"Chinstrap","Dream",49.6,18.2,193,3775,"male",2009 +"Chinstrap","Dream",50.8,19,210,4100,"male",2009 +"Chinstrap","Dream",50.2,18.7,198,3775,"female",2009 diff --git a/data.qmd b/data.qmd index 15f639e..84fbea5 100644 --- a/data.qmd +++ b/data.qmd @@ -1,5 +1,5 @@ --- -title: "Add Data" +title: "Add Data and Data Dictionary" engine: knitr --- @@ -10,13 +10,17 @@ engine: knitr invisible(loadNamespace("palmerpenguins")) # Tell renv that we need this package stopifnot(packageDescription("palmerpenguins", fields = "License") == "CC0") + +# write both in the top folder and in the in-depth folder +# (so that both scripts can reference the data without relative paths) write.csv(palmerpenguins::penguins, file = "data.csv", row.names = FALSE) +write.csv(palmerpenguins::penguins, file = "in_depth_material/data.csv", row.names = FALSE) ``` You can now download the data set we have prepared for you and put it into your project folder: -[data.csv](data.csv){.btn .btn-lg .btn-info download=""} +[data.csv](data.csv){.btn .btn-lg .btn-info download="data.csv"} ::: {.column-margin} ![palmerpenguins: Palmer Archipelago (Antarctica) Penguin Data](images/palmerpenguins.png){width=250px} @@ -57,389 +61,95 @@ Another resource to look at is the presentation [ARX](https://arx.deidentifier.org/), [sdcTools](https://github.com/sdcTools), [Synthpop](https://www.synthpop.org.uk/), or [OpenDP](https://opendp.org/). -## Add Data Dictionary +## Add a Data Dictionary Whether or not distributing the data set, it is important to document the meaning (e.g., units) and values of its variables. This is typically done with a _data dictionary_ (also called a _codebook_). -In the following, we will demonstrate how to create a simple data dictionary -using the R package [`datawizard`][datawizard]. -You can install them now using: - -[datawizard]: https://easystats.github.io/datawizard/ - -```{.r filename="Console"} -renv::install("datawizard") -``` - -You can put the code that follows in a separate document. -Create it by clicking on _File_ > _New File_ > _Quarto Document..._. -Choose a title such as `Data Dictionary`, -select _HTML_ as format, -uncheck the use of the visual markdown editor, and click on _Create_. -Remove everything except the YAML header (between the `---`). -To make the HTML file self-contained, -also set `embed-resources: true` such that the YAML header looks as follows: - -```{.yml filename="data_dictionary.qmd"} ---- -title: "Data Dictionary" -format: - html: - embed-resources: true ---- -``` - -Then, save it as `data_dictionary.qmd` by clicking on _File_ > _Save_. -To create the actual data dictionary, first write a description for all columns -so others can understand what the variable names mean. -Where necessary, also document their value --- this is especially important if their meaning is non-obvious. -In the following, we demonstrate this by storing the penguins' binomial name -along with the English name. +The recommendation for data dictionaries vary between fields - +both in terms of the recommended content (i.e., what exactly should be documented) +and the technical implementation (i.e., which file formats should be used). -``````{cat} -#| engine.opts: { file: "_data_dictionary.qmd" } -#| class.source: "md" -#| filename: "data_dictionary.qmd" - -```{r} -#| echo: false - -# Store the description of variables -vars <- c( - species = "a character string denoting penguin species", - island = "a character string denoting island in Palmer Archipelago, Antarctica", - bill_length_mm = "a number denoting bill length (millimeters)", - bill_depth_mm = "a number denoting bill depth (millimeters)", - flipper_length_mm = "an integer denoting flipper length (millimeters)", - body_mass_g = "an integer denoting body mass (grams)", - sex = "a character string denoting penguin sex", - year = "an integer denoting the study year" -) - -# Store the description of variable values -vals <- list( - species = c( - Adelie = "Pygoscelis adeliae", - Gentoo = "Pygoscelis papua", - Chinstrap = "Pygoscelis antarcticus" - ) -) -``` -`````` +*For the purpose of this exercise, we keep it easy and propose that you +manually create a file with the data dictionary +(e.g., as a table in .xlsx, .docx, .ods, or as a Markdown table), +documenting only the bare minimum.* -Then, load the data and use `datawizard` -to add the descriptions to the `data.frame`:[^not-permanent] + -[^not-permanent]: Note that the code provided does not alter the data file --- no description will be added to `data.csv`. -The descriptions are only added to a (temporary) copy of the data set within R -to create the data dictionary. +::: {#tip-in-depth .callout-tip collapse="false"} +### Manual is too much work? Automatic generation of (machine-readable) data dictionaries +Our in-depth supplementary material "[Automatic Generation of Data Dictionaries](in_depth_material/data_dic_generation.qmd)" explains how you can automatically create +a data dictionary with an R package. The package reads the data set and extracts relevant information from it. +This approach is in particular useful if you need to document data sets with many variables. -::: {.column-margin} -![datawizard: Easy Data Wrangling and Statistical Transformations](images/datawizard.png){width=250px} +This advanced chapter also contains a section on how to create machine-readable data dictionaries. ::: -``````{cat} -#| engine.opts: { file: "_data_dictionary.qmd", append: TRUE } -#| class.source: "md" -#| filename: "data_dictionary.qmd" - -```{r} -#| echo: false - -dat <- read.csv("data.csv") - -for (x in names(vars)) { - if (x %in% names(vals)) { - dat <- datawizard::assign_labels( - dat, - select = I(x), - variable = vars[[x]], - values = vals[[x]] - ) - } else { - dat <- datawizard::assign_labels( - dat, - select = I(x), - variable = vars[[x]] - ) - } -} -``` - -`````` - -Then, you can create the data dictionary containing the descriptions, -but also some other information about each variable -(e.g., the number of missing values) and print it. - -``````{cat} -#| engine.opts: { file: "_data_dictionary.qmd", append: TRUE } -#| class.source: "md" -#| filename: "data_dictionary.qmd" - -```{r} -#| echo: false -#| column: "body-outset" -#| classes: plain - -datawizard::data_codebook(dat) |> - datawizard::data_select(exclude = ID) |> - datawizard::data_filter(N != "") |> - datawizard::print_md() -``` - -`````` +### A bare minimum data dictionary -```{r} -#| child: "_data_dictionary.qmd" +Most standards for data dictionaries require at least +this information for each variable in your data set: -``` +- `name`: The (machine‑readable) name of the variable +- `label`: A short, human‑readable title or label for the variable +- `type`: The data type of the variable (e.g., `integer`, `float`, `string`, `date`) +- `description`: A brief description of what the variable measures or represents +- `values` (for categorical variables): A mapping of codes to their meanings, for example: 1 = Male, 2 = Female, 9 = Missing) +- `units` (for numeric measures): The units of measurement (e.g., `kg`, `USD`, `years`, or the scale of a survey response), +- `missing_codes`: Any special codes used to denote missing or non‑applicable values (e.g., `-99 = Not answered`) -Depending on the type of data, it may also be necessary -to describe sampling procedures (e.g., selection criteria), -measurement instruments (e.g., questionnaires), -appropriate weighting, -already applied preprocessing steps, or contact information. -In our case, as the data has already been published, -we only store a reference to its source. +Here's an example from a different data set, with variables in rows, and the dictionary in columns: -The data set is from the R package `palmerpenguins`. -If you had it installed -you could use the function `citation()` to create such a reference: +| name | label | type | description | values | units | missing_codes | +| ----------------- | ------------------------- | ------- | --------------------------------------- | --------------------------------------------------------------------------------------- | ----------- | ------------------ | +| gender | Gender | integer | self‑identified gender | 1 = Male; 2 = Female; 3 = Other; 9 = Missing | | 9 = Missing | +| age | Age | integer | age in years | | years | -99 = Not answered | +| blood_pressure | Blood Pressure (systolic) | integer | systolic blood pressure | | mmHg | -99 = Not measured | +| life_satisfaction | Life Satisfaction | integer | "How satisfied are you with your life?" | 1 = Very dissatisfied; 2 = Dissatisfied; 3 = Neutral; 4 = Satisfied; 5 = Very satisfied | scale (1–5) | -99 = Not answered | -```{r} -#| label: "data-citation" -#| eval: false -citation("palmerpenguins", auto = TRUE) |> - format(bibtex = FALSE, style = "text") -``` +### ✍️ Practical Exercise: Add your own data dictionary -Without the package `palmerpenguins` installed, -you can find a [suggested citation on its website][palmerpenguins-citation] -and add that to your data dictionary: +Now go ahead and create a data dictionary for the penguins data set, +in a software (text or spreadsheet) of your choice. -[palmerpenguins-citation]: https://allisonhorst.github.io/palmerpenguins/#citation +Save the data dictionary in the same folder as the actual data set file. -```{r} -#| ref.label = "data-citation", -#| render = function(x, options) gsub("\\n", " ", x = x), -#| echo = FALSE, -#| class.output = "md code-overflow-wrap", -#| attr.output = 'filename="data_dictionary.qmd"' - -# This chunk takes the output from the chunk "data-citation" -# and renders it with all newlines replaced by whitespaces. -``` +::: {#tip-data_dic .callout-tip collapse="true"} +#### Data dictionary for penguins data set (Solution) -Finally, you can render the data dictionary by running the following: +| name | label | type | description | values | units | missing_codes | +| ----------------- | -------------- | ------- | ------------------------------------ | ------------------------- | ----- | ------------- | +| species | Species | string | Penguin species | Adelie; Gentoo; Chinstrap | | NA = Missing | +| island | Island | string | Island where individual was observed | Torgersen; Biscoe; Dream | | NA = Missing | +| bill_length_mm | Bill length | float | Length of the bill (beak) | | mm | NA = Missing | +| bill_depth_mm | Bill depth | float | Depth of the bill (beak) | | mm | NA = Missing | +| flipper_length_mm | Flipper length | integer | Length of the flipper | | mm | NA = Missing | +| body_mass_g | Body mass | integer | Body mass | | g | NA = Missing | +| sex | Sex | string | Sex of the penguin | male; female | | NA = Missing | +| year | Year | integer | Year of data collection | 2007; 2008; 2009 | | | -```{.bash filename="Terminal"} -quarto render data_dictionary.qmd -``` - -This should create the file `data_dictionary.html` -which you open and view in your web browser. - -One could go even further by making the information machine-readable in a standardized way. -We provide an optional example of that in @nte-frictionless. -If you want to learn more about the sharing of research data, -have a look at the tutorial "[FAIR research data management][fair-tutorial]". - -[fair-tutorial]: https://lmu-osc.github.io/FAIR-Data-Management/ - -::: {#nte-frictionless .callout-note collapse="true"} -### Create Machine-Readable Variable Documentation - -This example demonstrates how the title and description of the data set, -the description of the variables and their valid values are stored in a machine-readable way. -We'll reuse the descriptions we already created[^value-labels] and add a few others. - -[^value-labels]: Unfortunately, the descriptions of values are not reused in this example, -as they are [not supported][enum-labels] by the specification we are using. - -[enum-labels]: https://specs.frictionlessdata.io/patterns/#table-schema-enum-labels-and-ordering - -First, store the title and description of the data set as a whole: - -```{.r filename="Console"} -table_info <- c( - title = "penguins data set", - description = "Size measurements for adult foraging penguins near Palmer Station, Antarctica" -) -``` - -As before, also provide a reference to the source. - -```{r} -#| echo: false -#| class-output: "r code-overflow-wrap" -#| attr-output: 'filename="Console"' - -# We have provided the data set as CSV file to the readers. -# Therefore, we cannot assume or require that readers have -# the R package palmerpenguins installed. Instead, we create -# the citation on our end and hide how we obtained it. - -citation("palmerpenguins", auto = TRUE)$url |> - paste0("dat_source <- \"", ... = _, "\"") |> - cat() -``` - -Next, create a list of the categorical variables' valid values: - -```{.r filename="Console"} -valid_vals <- list( - species = c("Adelie", "Gentoo", "Chinstrap"), - island = c("Torgersen", "Biscoe", "Dream"), - sex = c("male", "female"), - year = c(2007, 2008, 2009) -) -``` - -Finally, store the descriptions of the variables we already created earlier: - -```{.r filename="Console"} -# Store the description of variables -vars <- c( - species = "a character string denoting penguin species", - island = "a character string denoting island in Palmer Archipelago, Antarctica", - bill_length_mm = "a number denoting bill length (millimeters)", - bill_depth_mm = "a number denoting bill depth (millimeters)", - flipper_length_mm = "an integer denoting flipper length (millimeters)", - body_mass_g = "an integer denoting body mass (grams)", - sex = "a character string denoting penguin sex", - year = "an integer denoting the study year" -) -``` - -Generally, metadata are either stored embedded into the data or externally, -for example, in a separate file. -We will use the "[frictionless data](https://frictionlessdata.io/)" standard, -where metadata are stored separately. -Another alternative would be [RO-Crate](https://www.researchobject.org/ro-crate/). - -Specifically, one can use the R package [`frictionless`][frictionless] -to create a _schema_ which describes the structure of the data.[^frictionless-note] -For the purpose of the following code, -it is just a nested list that we edit to include our own information. -We also explicitly record in the schema -that missing values are stored in the data file as `NA` -and that the data are licensed under CC0\ 1.0. -Finally, the package is used to create a metadata file that contains the schema. - -[frictionless]: https://docs.ropensci.org/frictionless/ - -[^frictionless-note]: In June 2024, [version 2](https://datapackage.org/) -of the frictionless data standard has been released. -As of November 2024, the R package `frictionless` only supports the first version, -though support for v2 is [planned](https://github.com/frictionlessdata/frictionless-r/labels/datapackage%3Av2). - -```{.r filename="Console"} -# Install {frictionless} and the required dependency {stringi} -renv::install(c( - "frictionless", - "stringi" -)) - -# Read data and create schema -dat_filename <- "data.csv" -dat <- read.csv(dat_filename) -dat_schema <- frictionless::create_schema(dat) - -# Add descriptions to the fields -dat_schema$fields <- lapply(dat_schema$fields, \(x) { - c(x, description = vars[[x$name]]) -}) - -# Record valid values -dat_schema$fields <- lapply(dat_schema$fields, \(x) { - if (x[["name"]] %in% names(valid_vals)) { - modifyList(x, list(constraints = list(enum = valid_vals[[x$name]]))) - } else { - x - } -}) - -# Define missing values -dat_schema$missingValues <- c("", "NA") - -# Create package with license info and write it -dat_package <- frictionless::create_package() |> - frictionless::add_resource( - resource_name = "penguins", - data = dat_filename, - schema = dat_schema, - title = table_info[["title"]], - description = table_info[["description"]], - licenses = list(list( - name = "CC0-1.0", - path = "https://creativecommons.org/publicdomain/zero/1.0/", - title = "CC0 1.0 Universal" - )), - sources = list(list( - title = "CRAN", - path = dat_source - )) - ) -frictionless::write_package(dat_package, directory = ".") -``` - -This creates the metadata file `datapackage.json` in the current directory. -Make sure it is located in the same folder as `data.csv`, -as together they comprise a [data package](https://specs.frictionlessdata.io/data-package/). ::: -Having added the data and its documentation, -one can view and record the utilized packages with `renv`, -thus bringing the project into a consistent state: - -```{.r filename="Console"} -renv::status() -renv::snapshot() -``` - -## Add Data Citation and Attribution +## ✍️ Practical Exercise: Add Data Citation and Attribution All data relied upon should be cited in the manuscript to allow for precise identification and access. -From the "eight core principles of data citation" by @Starr2015, -licensed under [CC0\ 1.0][cc0]: - -> **Principle 1 – Importance**: "Data should be considered legitimate, -> citable products of research. Data citations should be accorded the same -> importance in the scholarly record as citations of other research objects, -> such as publications." -> -> **Principle 3 – Evidence**: "In scholarly literature, whenever and wherever a -> claim relies upon data, the corresponding data should be cited." -> -> **Principle 5 – Access**: "Data citations should facilitate access to the data -> themselves and to such associated metadata, documentation, code, and other -> materials, as are necessary for both humans and machines to make informed use -> of the referenced data." -> -> **Principle 7 – Specificity and Verifiability**: "Data citations should -> facilitate identification of, access to, and verification of the specific data -> that support a claim. Citations or citation metadata should include -> information about provenance and fixity sufficient to facilitate verifying -> that the specific time slice, version and/or granular portion of data -> retrieved subsequently is the same as was originally cited." - Now, it's your turn to add an appropriate citation for the data set to the manuscript. -Does your citation adhere to the principles above? + +Hints: + +- You can find an appropriate BibTeX entry on [the package website](https://allisonhorst.github.io/palmerpenguins/#citation) + or with the function `citation()`^[Note that this function requires to have the respective package installed.]. +- Add the citation in the manuscript where it says "cite data here". ::: {#tip-cite-palmerpenguins .callout-tip collapse="true"} #### Citing the Data Set (Solution) -You can find an appropriate BibTeX entry on [the package website][palmerpenguins-citation] -or with the function `citation()`:^[Note that this function requires -to have the respective package installed.] +Show the correct reference of the data set: ```{r} #| class.output: "bib code-overflow-scroll" @@ -465,27 +175,12 @@ quarto render Manuscript.qmd ``` ::: -While citation happens in the manuscript for reasons of academic integrity and reproducibility, -to comply with any licenses you also may need to provide attribution within your project folder. -Even though the data file we use here does not require attribution, -we recommend adding a short paragraph to `LICENSE.txt`: -```{r} -#| echo: false -#| class-output: "txt code-overflow-wrap" -#| attr-output: 'filename="LICENSE.txt"' - -cat(paste0( - "The penguins data stored in \"data.csv\" by Allison Horst, Alison Hill, and Kristen Gorman available from <", - citation("palmerpenguins", auto = TRUE)$url, - "> are licensed under CC0 1.0: " -)) -``` +## Wrap up -As before, if the license required adding the full license text, -you would also need to copy it to the project folder (if not already in there). +Congrats! You documented your data set and cited it correctly. -Finally, you can go through the commit routine: +To finalize this step, you can go through the commit routine: ```{.bash filename="Terminal"} git status diff --git a/in_depth_material/data.csv b/in_depth_material/data.csv new file mode 100644 index 0000000..03e2c58 --- /dev/null +++ b/in_depth_material/data.csv @@ -0,0 +1,345 @@ +"species","island","bill_length_mm","bill_depth_mm","flipper_length_mm","body_mass_g","sex","year" +"Adelie","Torgersen",39.1,18.7,181,3750,"male",2007 +"Adelie","Torgersen",39.5,17.4,186,3800,"female",2007 +"Adelie","Torgersen",40.3,18,195,3250,"female",2007 +"Adelie","Torgersen",NA,NA,NA,NA,NA,2007 +"Adelie","Torgersen",36.7,19.3,193,3450,"female",2007 +"Adelie","Torgersen",39.3,20.6,190,3650,"male",2007 +"Adelie","Torgersen",38.9,17.8,181,3625,"female",2007 +"Adelie","Torgersen",39.2,19.6,195,4675,"male",2007 +"Adelie","Torgersen",34.1,18.1,193,3475,NA,2007 +"Adelie","Torgersen",42,20.2,190,4250,NA,2007 +"Adelie","Torgersen",37.8,17.1,186,3300,NA,2007 +"Adelie","Torgersen",37.8,17.3,180,3700,NA,2007 +"Adelie","Torgersen",41.1,17.6,182,3200,"female",2007 +"Adelie","Torgersen",38.6,21.2,191,3800,"male",2007 +"Adelie","Torgersen",34.6,21.1,198,4400,"male",2007 +"Adelie","Torgersen",36.6,17.8,185,3700,"female",2007 +"Adelie","Torgersen",38.7,19,195,3450,"female",2007 +"Adelie","Torgersen",42.5,20.7,197,4500,"male",2007 +"Adelie","Torgersen",34.4,18.4,184,3325,"female",2007 +"Adelie","Torgersen",46,21.5,194,4200,"male",2007 +"Adelie","Biscoe",37.8,18.3,174,3400,"female",2007 +"Adelie","Biscoe",37.7,18.7,180,3600,"male",2007 +"Adelie","Biscoe",35.9,19.2,189,3800,"female",2007 +"Adelie","Biscoe",38.2,18.1,185,3950,"male",2007 +"Adelie","Biscoe",38.8,17.2,180,3800,"male",2007 +"Adelie","Biscoe",35.3,18.9,187,3800,"female",2007 +"Adelie","Biscoe",40.6,18.6,183,3550,"male",2007 +"Adelie","Biscoe",40.5,17.9,187,3200,"female",2007 +"Adelie","Biscoe",37.9,18.6,172,3150,"female",2007 +"Adelie","Biscoe",40.5,18.9,180,3950,"male",2007 +"Adelie","Dream",39.5,16.7,178,3250,"female",2007 +"Adelie","Dream",37.2,18.1,178,3900,"male",2007 +"Adelie","Dream",39.5,17.8,188,3300,"female",2007 +"Adelie","Dream",40.9,18.9,184,3900,"male",2007 +"Adelie","Dream",36.4,17,195,3325,"female",2007 +"Adelie","Dream",39.2,21.1,196,4150,"male",2007 +"Adelie","Dream",38.8,20,190,3950,"male",2007 +"Adelie","Dream",42.2,18.5,180,3550,"female",2007 +"Adelie","Dream",37.6,19.3,181,3300,"female",2007 +"Adelie","Dream",39.8,19.1,184,4650,"male",2007 +"Adelie","Dream",36.5,18,182,3150,"female",2007 +"Adelie","Dream",40.8,18.4,195,3900,"male",2007 +"Adelie","Dream",36,18.5,186,3100,"female",2007 +"Adelie","Dream",44.1,19.7,196,4400,"male",2007 +"Adelie","Dream",37,16.9,185,3000,"female",2007 +"Adelie","Dream",39.6,18.8,190,4600,"male",2007 +"Adelie","Dream",41.1,19,182,3425,"male",2007 +"Adelie","Dream",37.5,18.9,179,2975,NA,2007 +"Adelie","Dream",36,17.9,190,3450,"female",2007 +"Adelie","Dream",42.3,21.2,191,4150,"male",2007 +"Adelie","Biscoe",39.6,17.7,186,3500,"female",2008 +"Adelie","Biscoe",40.1,18.9,188,4300,"male",2008 +"Adelie","Biscoe",35,17.9,190,3450,"female",2008 +"Adelie","Biscoe",42,19.5,200,4050,"male",2008 +"Adelie","Biscoe",34.5,18.1,187,2900,"female",2008 +"Adelie","Biscoe",41.4,18.6,191,3700,"male",2008 +"Adelie","Biscoe",39,17.5,186,3550,"female",2008 +"Adelie","Biscoe",40.6,18.8,193,3800,"male",2008 +"Adelie","Biscoe",36.5,16.6,181,2850,"female",2008 +"Adelie","Biscoe",37.6,19.1,194,3750,"male",2008 +"Adelie","Biscoe",35.7,16.9,185,3150,"female",2008 +"Adelie","Biscoe",41.3,21.1,195,4400,"male",2008 +"Adelie","Biscoe",37.6,17,185,3600,"female",2008 +"Adelie","Biscoe",41.1,18.2,192,4050,"male",2008 +"Adelie","Biscoe",36.4,17.1,184,2850,"female",2008 +"Adelie","Biscoe",41.6,18,192,3950,"male",2008 +"Adelie","Biscoe",35.5,16.2,195,3350,"female",2008 +"Adelie","Biscoe",41.1,19.1,188,4100,"male",2008 +"Adelie","Torgersen",35.9,16.6,190,3050,"female",2008 +"Adelie","Torgersen",41.8,19.4,198,4450,"male",2008 +"Adelie","Torgersen",33.5,19,190,3600,"female",2008 +"Adelie","Torgersen",39.7,18.4,190,3900,"male",2008 +"Adelie","Torgersen",39.6,17.2,196,3550,"female",2008 +"Adelie","Torgersen",45.8,18.9,197,4150,"male",2008 +"Adelie","Torgersen",35.5,17.5,190,3700,"female",2008 +"Adelie","Torgersen",42.8,18.5,195,4250,"male",2008 +"Adelie","Torgersen",40.9,16.8,191,3700,"female",2008 +"Adelie","Torgersen",37.2,19.4,184,3900,"male",2008 +"Adelie","Torgersen",36.2,16.1,187,3550,"female",2008 +"Adelie","Torgersen",42.1,19.1,195,4000,"male",2008 +"Adelie","Torgersen",34.6,17.2,189,3200,"female",2008 +"Adelie","Torgersen",42.9,17.6,196,4700,"male",2008 +"Adelie","Torgersen",36.7,18.8,187,3800,"female",2008 +"Adelie","Torgersen",35.1,19.4,193,4200,"male",2008 +"Adelie","Dream",37.3,17.8,191,3350,"female",2008 +"Adelie","Dream",41.3,20.3,194,3550,"male",2008 +"Adelie","Dream",36.3,19.5,190,3800,"male",2008 +"Adelie","Dream",36.9,18.6,189,3500,"female",2008 +"Adelie","Dream",38.3,19.2,189,3950,"male",2008 +"Adelie","Dream",38.9,18.8,190,3600,"female",2008 +"Adelie","Dream",35.7,18,202,3550,"female",2008 +"Adelie","Dream",41.1,18.1,205,4300,"male",2008 +"Adelie","Dream",34,17.1,185,3400,"female",2008 +"Adelie","Dream",39.6,18.1,186,4450,"male",2008 +"Adelie","Dream",36.2,17.3,187,3300,"female",2008 +"Adelie","Dream",40.8,18.9,208,4300,"male",2008 +"Adelie","Dream",38.1,18.6,190,3700,"female",2008 +"Adelie","Dream",40.3,18.5,196,4350,"male",2008 +"Adelie","Dream",33.1,16.1,178,2900,"female",2008 +"Adelie","Dream",43.2,18.5,192,4100,"male",2008 +"Adelie","Biscoe",35,17.9,192,3725,"female",2009 +"Adelie","Biscoe",41,20,203,4725,"male",2009 +"Adelie","Biscoe",37.7,16,183,3075,"female",2009 +"Adelie","Biscoe",37.8,20,190,4250,"male",2009 +"Adelie","Biscoe",37.9,18.6,193,2925,"female",2009 +"Adelie","Biscoe",39.7,18.9,184,3550,"male",2009 +"Adelie","Biscoe",38.6,17.2,199,3750,"female",2009 +"Adelie","Biscoe",38.2,20,190,3900,"male",2009 +"Adelie","Biscoe",38.1,17,181,3175,"female",2009 +"Adelie","Biscoe",43.2,19,197,4775,"male",2009 +"Adelie","Biscoe",38.1,16.5,198,3825,"female",2009 +"Adelie","Biscoe",45.6,20.3,191,4600,"male",2009 +"Adelie","Biscoe",39.7,17.7,193,3200,"female",2009 +"Adelie","Biscoe",42.2,19.5,197,4275,"male",2009 +"Adelie","Biscoe",39.6,20.7,191,3900,"female",2009 +"Adelie","Biscoe",42.7,18.3,196,4075,"male",2009 +"Adelie","Torgersen",38.6,17,188,2900,"female",2009 +"Adelie","Torgersen",37.3,20.5,199,3775,"male",2009 +"Adelie","Torgersen",35.7,17,189,3350,"female",2009 +"Adelie","Torgersen",41.1,18.6,189,3325,"male",2009 +"Adelie","Torgersen",36.2,17.2,187,3150,"female",2009 +"Adelie","Torgersen",37.7,19.8,198,3500,"male",2009 +"Adelie","Torgersen",40.2,17,176,3450,"female",2009 +"Adelie","Torgersen",41.4,18.5,202,3875,"male",2009 +"Adelie","Torgersen",35.2,15.9,186,3050,"female",2009 +"Adelie","Torgersen",40.6,19,199,4000,"male",2009 +"Adelie","Torgersen",38.8,17.6,191,3275,"female",2009 +"Adelie","Torgersen",41.5,18.3,195,4300,"male",2009 +"Adelie","Torgersen",39,17.1,191,3050,"female",2009 +"Adelie","Torgersen",44.1,18,210,4000,"male",2009 +"Adelie","Torgersen",38.5,17.9,190,3325,"female",2009 +"Adelie","Torgersen",43.1,19.2,197,3500,"male",2009 +"Adelie","Dream",36.8,18.5,193,3500,"female",2009 +"Adelie","Dream",37.5,18.5,199,4475,"male",2009 +"Adelie","Dream",38.1,17.6,187,3425,"female",2009 +"Adelie","Dream",41.1,17.5,190,3900,"male",2009 +"Adelie","Dream",35.6,17.5,191,3175,"female",2009 +"Adelie","Dream",40.2,20.1,200,3975,"male",2009 +"Adelie","Dream",37,16.5,185,3400,"female",2009 +"Adelie","Dream",39.7,17.9,193,4250,"male",2009 +"Adelie","Dream",40.2,17.1,193,3400,"female",2009 +"Adelie","Dream",40.6,17.2,187,3475,"male",2009 +"Adelie","Dream",32.1,15.5,188,3050,"female",2009 +"Adelie","Dream",40.7,17,190,3725,"male",2009 +"Adelie","Dream",37.3,16.8,192,3000,"female",2009 +"Adelie","Dream",39,18.7,185,3650,"male",2009 +"Adelie","Dream",39.2,18.6,190,4250,"male",2009 +"Adelie","Dream",36.6,18.4,184,3475,"female",2009 +"Adelie","Dream",36,17.8,195,3450,"female",2009 +"Adelie","Dream",37.8,18.1,193,3750,"male",2009 +"Adelie","Dream",36,17.1,187,3700,"female",2009 +"Adelie","Dream",41.5,18.5,201,4000,"male",2009 +"Gentoo","Biscoe",46.1,13.2,211,4500,"female",2007 +"Gentoo","Biscoe",50,16.3,230,5700,"male",2007 +"Gentoo","Biscoe",48.7,14.1,210,4450,"female",2007 +"Gentoo","Biscoe",50,15.2,218,5700,"male",2007 +"Gentoo","Biscoe",47.6,14.5,215,5400,"male",2007 +"Gentoo","Biscoe",46.5,13.5,210,4550,"female",2007 +"Gentoo","Biscoe",45.4,14.6,211,4800,"female",2007 +"Gentoo","Biscoe",46.7,15.3,219,5200,"male",2007 +"Gentoo","Biscoe",43.3,13.4,209,4400,"female",2007 +"Gentoo","Biscoe",46.8,15.4,215,5150,"male",2007 +"Gentoo","Biscoe",40.9,13.7,214,4650,"female",2007 +"Gentoo","Biscoe",49,16.1,216,5550,"male",2007 +"Gentoo","Biscoe",45.5,13.7,214,4650,"female",2007 +"Gentoo","Biscoe",48.4,14.6,213,5850,"male",2007 +"Gentoo","Biscoe",45.8,14.6,210,4200,"female",2007 +"Gentoo","Biscoe",49.3,15.7,217,5850,"male",2007 +"Gentoo","Biscoe",42,13.5,210,4150,"female",2007 +"Gentoo","Biscoe",49.2,15.2,221,6300,"male",2007 +"Gentoo","Biscoe",46.2,14.5,209,4800,"female",2007 +"Gentoo","Biscoe",48.7,15.1,222,5350,"male",2007 +"Gentoo","Biscoe",50.2,14.3,218,5700,"male",2007 +"Gentoo","Biscoe",45.1,14.5,215,5000,"female",2007 +"Gentoo","Biscoe",46.5,14.5,213,4400,"female",2007 +"Gentoo","Biscoe",46.3,15.8,215,5050,"male",2007 +"Gentoo","Biscoe",42.9,13.1,215,5000,"female",2007 +"Gentoo","Biscoe",46.1,15.1,215,5100,"male",2007 +"Gentoo","Biscoe",44.5,14.3,216,4100,NA,2007 +"Gentoo","Biscoe",47.8,15,215,5650,"male",2007 +"Gentoo","Biscoe",48.2,14.3,210,4600,"female",2007 +"Gentoo","Biscoe",50,15.3,220,5550,"male",2007 +"Gentoo","Biscoe",47.3,15.3,222,5250,"male",2007 +"Gentoo","Biscoe",42.8,14.2,209,4700,"female",2007 +"Gentoo","Biscoe",45.1,14.5,207,5050,"female",2007 +"Gentoo","Biscoe",59.6,17,230,6050,"male",2007 +"Gentoo","Biscoe",49.1,14.8,220,5150,"female",2008 +"Gentoo","Biscoe",48.4,16.3,220,5400,"male",2008 +"Gentoo","Biscoe",42.6,13.7,213,4950,"female",2008 +"Gentoo","Biscoe",44.4,17.3,219,5250,"male",2008 +"Gentoo","Biscoe",44,13.6,208,4350,"female",2008 +"Gentoo","Biscoe",48.7,15.7,208,5350,"male",2008 +"Gentoo","Biscoe",42.7,13.7,208,3950,"female",2008 +"Gentoo","Biscoe",49.6,16,225,5700,"male",2008 +"Gentoo","Biscoe",45.3,13.7,210,4300,"female",2008 +"Gentoo","Biscoe",49.6,15,216,4750,"male",2008 +"Gentoo","Biscoe",50.5,15.9,222,5550,"male",2008 +"Gentoo","Biscoe",43.6,13.9,217,4900,"female",2008 +"Gentoo","Biscoe",45.5,13.9,210,4200,"female",2008 +"Gentoo","Biscoe",50.5,15.9,225,5400,"male",2008 +"Gentoo","Biscoe",44.9,13.3,213,5100,"female",2008 +"Gentoo","Biscoe",45.2,15.8,215,5300,"male",2008 +"Gentoo","Biscoe",46.6,14.2,210,4850,"female",2008 +"Gentoo","Biscoe",48.5,14.1,220,5300,"male",2008 +"Gentoo","Biscoe",45.1,14.4,210,4400,"female",2008 +"Gentoo","Biscoe",50.1,15,225,5000,"male",2008 +"Gentoo","Biscoe",46.5,14.4,217,4900,"female",2008 +"Gentoo","Biscoe",45,15.4,220,5050,"male",2008 +"Gentoo","Biscoe",43.8,13.9,208,4300,"female",2008 +"Gentoo","Biscoe",45.5,15,220,5000,"male",2008 +"Gentoo","Biscoe",43.2,14.5,208,4450,"female",2008 +"Gentoo","Biscoe",50.4,15.3,224,5550,"male",2008 +"Gentoo","Biscoe",45.3,13.8,208,4200,"female",2008 +"Gentoo","Biscoe",46.2,14.9,221,5300,"male",2008 +"Gentoo","Biscoe",45.7,13.9,214,4400,"female",2008 +"Gentoo","Biscoe",54.3,15.7,231,5650,"male",2008 +"Gentoo","Biscoe",45.8,14.2,219,4700,"female",2008 +"Gentoo","Biscoe",49.8,16.8,230,5700,"male",2008 +"Gentoo","Biscoe",46.2,14.4,214,4650,NA,2008 +"Gentoo","Biscoe",49.5,16.2,229,5800,"male",2008 +"Gentoo","Biscoe",43.5,14.2,220,4700,"female",2008 +"Gentoo","Biscoe",50.7,15,223,5550,"male",2008 +"Gentoo","Biscoe",47.7,15,216,4750,"female",2008 +"Gentoo","Biscoe",46.4,15.6,221,5000,"male",2008 +"Gentoo","Biscoe",48.2,15.6,221,5100,"male",2008 +"Gentoo","Biscoe",46.5,14.8,217,5200,"female",2008 +"Gentoo","Biscoe",46.4,15,216,4700,"female",2008 +"Gentoo","Biscoe",48.6,16,230,5800,"male",2008 +"Gentoo","Biscoe",47.5,14.2,209,4600,"female",2008 +"Gentoo","Biscoe",51.1,16.3,220,6000,"male",2008 +"Gentoo","Biscoe",45.2,13.8,215,4750,"female",2008 +"Gentoo","Biscoe",45.2,16.4,223,5950,"male",2008 +"Gentoo","Biscoe",49.1,14.5,212,4625,"female",2009 +"Gentoo","Biscoe",52.5,15.6,221,5450,"male",2009 +"Gentoo","Biscoe",47.4,14.6,212,4725,"female",2009 +"Gentoo","Biscoe",50,15.9,224,5350,"male",2009 +"Gentoo","Biscoe",44.9,13.8,212,4750,"female",2009 +"Gentoo","Biscoe",50.8,17.3,228,5600,"male",2009 +"Gentoo","Biscoe",43.4,14.4,218,4600,"female",2009 +"Gentoo","Biscoe",51.3,14.2,218,5300,"male",2009 +"Gentoo","Biscoe",47.5,14,212,4875,"female",2009 +"Gentoo","Biscoe",52.1,17,230,5550,"male",2009 +"Gentoo","Biscoe",47.5,15,218,4950,"female",2009 +"Gentoo","Biscoe",52.2,17.1,228,5400,"male",2009 +"Gentoo","Biscoe",45.5,14.5,212,4750,"female",2009 +"Gentoo","Biscoe",49.5,16.1,224,5650,"male",2009 +"Gentoo","Biscoe",44.5,14.7,214,4850,"female",2009 +"Gentoo","Biscoe",50.8,15.7,226,5200,"male",2009 +"Gentoo","Biscoe",49.4,15.8,216,4925,"male",2009 +"Gentoo","Biscoe",46.9,14.6,222,4875,"female",2009 +"Gentoo","Biscoe",48.4,14.4,203,4625,"female",2009 +"Gentoo","Biscoe",51.1,16.5,225,5250,"male",2009 +"Gentoo","Biscoe",48.5,15,219,4850,"female",2009 +"Gentoo","Biscoe",55.9,17,228,5600,"male",2009 +"Gentoo","Biscoe",47.2,15.5,215,4975,"female",2009 +"Gentoo","Biscoe",49.1,15,228,5500,"male",2009 +"Gentoo","Biscoe",47.3,13.8,216,4725,NA,2009 +"Gentoo","Biscoe",46.8,16.1,215,5500,"male",2009 +"Gentoo","Biscoe",41.7,14.7,210,4700,"female",2009 +"Gentoo","Biscoe",53.4,15.8,219,5500,"male",2009 +"Gentoo","Biscoe",43.3,14,208,4575,"female",2009 +"Gentoo","Biscoe",48.1,15.1,209,5500,"male",2009 +"Gentoo","Biscoe",50.5,15.2,216,5000,"female",2009 +"Gentoo","Biscoe",49.8,15.9,229,5950,"male",2009 +"Gentoo","Biscoe",43.5,15.2,213,4650,"female",2009 +"Gentoo","Biscoe",51.5,16.3,230,5500,"male",2009 +"Gentoo","Biscoe",46.2,14.1,217,4375,"female",2009 +"Gentoo","Biscoe",55.1,16,230,5850,"male",2009 +"Gentoo","Biscoe",44.5,15.7,217,4875,NA,2009 +"Gentoo","Biscoe",48.8,16.2,222,6000,"male",2009 +"Gentoo","Biscoe",47.2,13.7,214,4925,"female",2009 +"Gentoo","Biscoe",NA,NA,NA,NA,NA,2009 +"Gentoo","Biscoe",46.8,14.3,215,4850,"female",2009 +"Gentoo","Biscoe",50.4,15.7,222,5750,"male",2009 +"Gentoo","Biscoe",45.2,14.8,212,5200,"female",2009 +"Gentoo","Biscoe",49.9,16.1,213,5400,"male",2009 +"Chinstrap","Dream",46.5,17.9,192,3500,"female",2007 +"Chinstrap","Dream",50,19.5,196,3900,"male",2007 +"Chinstrap","Dream",51.3,19.2,193,3650,"male",2007 +"Chinstrap","Dream",45.4,18.7,188,3525,"female",2007 +"Chinstrap","Dream",52.7,19.8,197,3725,"male",2007 +"Chinstrap","Dream",45.2,17.8,198,3950,"female",2007 +"Chinstrap","Dream",46.1,18.2,178,3250,"female",2007 +"Chinstrap","Dream",51.3,18.2,197,3750,"male",2007 +"Chinstrap","Dream",46,18.9,195,4150,"female",2007 +"Chinstrap","Dream",51.3,19.9,198,3700,"male",2007 +"Chinstrap","Dream",46.6,17.8,193,3800,"female",2007 +"Chinstrap","Dream",51.7,20.3,194,3775,"male",2007 +"Chinstrap","Dream",47,17.3,185,3700,"female",2007 +"Chinstrap","Dream",52,18.1,201,4050,"male",2007 +"Chinstrap","Dream",45.9,17.1,190,3575,"female",2007 +"Chinstrap","Dream",50.5,19.6,201,4050,"male",2007 +"Chinstrap","Dream",50.3,20,197,3300,"male",2007 +"Chinstrap","Dream",58,17.8,181,3700,"female",2007 +"Chinstrap","Dream",46.4,18.6,190,3450,"female",2007 +"Chinstrap","Dream",49.2,18.2,195,4400,"male",2007 +"Chinstrap","Dream",42.4,17.3,181,3600,"female",2007 +"Chinstrap","Dream",48.5,17.5,191,3400,"male",2007 +"Chinstrap","Dream",43.2,16.6,187,2900,"female",2007 +"Chinstrap","Dream",50.6,19.4,193,3800,"male",2007 +"Chinstrap","Dream",46.7,17.9,195,3300,"female",2007 +"Chinstrap","Dream",52,19,197,4150,"male",2007 +"Chinstrap","Dream",50.5,18.4,200,3400,"female",2008 +"Chinstrap","Dream",49.5,19,200,3800,"male",2008 +"Chinstrap","Dream",46.4,17.8,191,3700,"female",2008 +"Chinstrap","Dream",52.8,20,205,4550,"male",2008 +"Chinstrap","Dream",40.9,16.6,187,3200,"female",2008 +"Chinstrap","Dream",54.2,20.8,201,4300,"male",2008 +"Chinstrap","Dream",42.5,16.7,187,3350,"female",2008 +"Chinstrap","Dream",51,18.8,203,4100,"male",2008 +"Chinstrap","Dream",49.7,18.6,195,3600,"male",2008 +"Chinstrap","Dream",47.5,16.8,199,3900,"female",2008 +"Chinstrap","Dream",47.6,18.3,195,3850,"female",2008 +"Chinstrap","Dream",52,20.7,210,4800,"male",2008 +"Chinstrap","Dream",46.9,16.6,192,2700,"female",2008 +"Chinstrap","Dream",53.5,19.9,205,4500,"male",2008 +"Chinstrap","Dream",49,19.5,210,3950,"male",2008 +"Chinstrap","Dream",46.2,17.5,187,3650,"female",2008 +"Chinstrap","Dream",50.9,19.1,196,3550,"male",2008 +"Chinstrap","Dream",45.5,17,196,3500,"female",2008 +"Chinstrap","Dream",50.9,17.9,196,3675,"female",2009 +"Chinstrap","Dream",50.8,18.5,201,4450,"male",2009 +"Chinstrap","Dream",50.1,17.9,190,3400,"female",2009 +"Chinstrap","Dream",49,19.6,212,4300,"male",2009 +"Chinstrap","Dream",51.5,18.7,187,3250,"male",2009 +"Chinstrap","Dream",49.8,17.3,198,3675,"female",2009 +"Chinstrap","Dream",48.1,16.4,199,3325,"female",2009 +"Chinstrap","Dream",51.4,19,201,3950,"male",2009 +"Chinstrap","Dream",45.7,17.3,193,3600,"female",2009 +"Chinstrap","Dream",50.7,19.7,203,4050,"male",2009 +"Chinstrap","Dream",42.5,17.3,187,3350,"female",2009 +"Chinstrap","Dream",52.2,18.8,197,3450,"male",2009 +"Chinstrap","Dream",45.2,16.6,191,3250,"female",2009 +"Chinstrap","Dream",49.3,19.9,203,4050,"male",2009 +"Chinstrap","Dream",50.2,18.8,202,3800,"male",2009 +"Chinstrap","Dream",45.6,19.4,194,3525,"female",2009 +"Chinstrap","Dream",51.9,19.5,206,3950,"male",2009 +"Chinstrap","Dream",46.8,16.5,189,3650,"female",2009 +"Chinstrap","Dream",45.7,17,195,3650,"female",2009 +"Chinstrap","Dream",55.8,19.8,207,4000,"male",2009 +"Chinstrap","Dream",43.5,18.1,202,3400,"female",2009 +"Chinstrap","Dream",49.6,18.2,193,3775,"male",2009 +"Chinstrap","Dream",50.8,19,210,4100,"male",2009 +"Chinstrap","Dream",50.2,18.7,198,3775,"female",2009 diff --git a/in_depth_material/data_dic_generation.qmd b/in_depth_material/data_dic_generation.qmd new file mode 100644 index 0000000..83911e1 --- /dev/null +++ b/in_depth_material/data_dic_generation.qmd @@ -0,0 +1,342 @@ +--- +title: "Automatic Generation of Data Dictionaries" +engine: knitr +--- + +**Note: This is an add-on to the Chapter "[Add Data and Data Dictionary](/data.qmd)". It describes how you can (a) automatically generate data dictionaries with an R package, and (b) how to create a machine readable documentation of your data.** + +## Automatic Generation of Data Dictionaries + +First, we will demonstrate how to create a simple data dictionary +using the R package [`datawizard`][datawizard]. We will use the penguin data set which is introduced in the Chapter "[Add Data and Data Dictionary](/data.qmd)". +You can download it and put it into your project folder: + +[data.csv](../data.csv){.btn .btn-lg .btn-info download="data.csv"} + +You can install the `datawizard` package into our `renv` environment using: + +[datawizard]: https://easystats.github.io/datawizard/ + +```{.r filename="Console"} +renv::install("datawizard") +``` + +We create a separate Quarto file for the data dictionary. +Create it by clicking on _File_ > _New File_ > _Quarto Document..._. +Choose a title such as `Data Dictionary`, +select _HTML_ as format, +uncheck the use of the visual markdown editor, and click on _Create_. +Remove everything except the YAML header (between the `---`). +To make the HTML file self-contained, +also set `embed-resources: true` such that the YAML header looks as follows: + +```{.yml filename="data_dictionary.qmd"} +--- +title: "Data Dictionary" +format: + html: + embed-resources: true +--- +``` + +Then, save it as `data_dictionary.qmd` by clicking on _File_ > _Save_. + +To create the actual data dictionary, first write a description for all columns +so others can understand what the variable names mean. +Where necessary, also document their value +-- this is especially important if their meaning is non-obvious. +In the following, we demonstrate this by storing the penguins' binomial name +along with the English name. + +``````{cat} +#| engine.opts: { file: "_data_dictionary.qmd" } +#| class.source: "md" +#| filename: "data_dictionary.qmd" + +```{r} +#| echo: false + +# Store the description of variables +vars <- c( + species = "a character string denoting penguin species", + island = "a character string denoting island in Palmer Archipelago, Antarctica", + bill_length_mm = "a number denoting bill length (millimeters)", + bill_depth_mm = "a number denoting bill depth (millimeters)", + flipper_length_mm = "an integer denoting flipper length (millimeters)", + body_mass_g = "an integer denoting body mass (grams)", + sex = "a character string denoting penguin sex", + year = "an integer denoting the study year" +) + +# Store the description of variable values +vals <- list( + species = c( + Adelie = "Pygoscelis adeliae", + Gentoo = "Pygoscelis papua", + Chinstrap = "Pygoscelis antarcticus" + ) +) +``` +`````` + +Then, load the data and use `datawizard` +to add the descriptions to the `data.frame`:[^not-permanent] + +[^not-permanent]: Note that the code provided does not alter the data file +-- no description will be added to `data.csv`. +The descriptions are only added to a (temporary) copy of the data set within R +to create the data dictionary. + +::: {.column-margin} +![datawizard: Easy Data Wrangling and Statistical Transformations](../images/datawizard.png){width=250px} +::: + +``````{cat} +#| engine.opts: { file: "_data_dictionary.qmd", append: TRUE } +#| class.source: "md" +#| filename: "data_dictionary.qmd" + +```{r} +#| echo: false + +dat <- read.csv("data.csv") + +for (x in names(vars)) { + if (x %in% names(vals)) { + dat <- datawizard::assign_labels( + dat, + select = I(x), + variable = vars[[x]], + values = vals[[x]] + ) + } else { + dat <- datawizard::assign_labels( + dat, + select = I(x), + variable = vars[[x]] + ) + } +} +``` + +`````` + +Then, you can create the data dictionary containing the descriptions, +but also some other information about each variable +(e.g., the number of missing values) and print it. + +``````{cat} +#| engine.opts: { file: "_data_dictionary.qmd", append: TRUE } +#| class.source: "md" +#| filename: "data_dictionary.qmd" + +```{r} +#| echo: false +#| column: "body-outset" +#| classes: plain + +datawizard::data_codebook(dat) |> + datawizard::data_select(exclude = ID) |> + datawizard::data_filter(N != "") |> + datawizard::print_md() +``` + +`````` + +```{r} +#| child: "_data_dictionary.qmd" + +``` + +Depending on the type of data, it may also be necessary +to describe sampling procedures (e.g., selection criteria), +measurement instruments (e.g., questionnaires), +appropriate weighting, +already applied preprocessing steps, or contact information. +In our case, as the data has already been published, +we only store a reference to its source. + +The data set is from the R package `palmerpenguins`. +If you had it installed +you could use the function `citation()` to create such a reference: + +```{r} +#| label: "data-citation" +#| eval: false + +citation("palmerpenguins", auto = TRUE) |> + format(bibtex = FALSE, style = "text") +``` + +Without the package `palmerpenguins` installed, +you can find a [suggested citation on its website][palmerpenguins-citation] +and add that to your data dictionary: + +[palmerpenguins-citation]: https://allisonhorst.github.io/palmerpenguins/#citation + +```{r} +#| ref.label = "data-citation", +#| render = function(x, options) gsub("\\n", " ", x = x), +#| echo = FALSE, +#| class.output = "md code-overflow-wrap", +#| attr.output = 'filename="data_dictionary.qmd"' + +# This chunk takes the output from the chunk "data-citation" +# and renders it with all newlines replaced by whitespaces. +``` + +Finally, you can render the data dictionary by running the following: + +```{.bash filename="Terminal"} +quarto render data_dictionary.qmd +``` + +This should create the file `data_dictionary.html` +which you open and view in your web browser. + +If you want to learn more about the sharing of research data, +have a look at the tutorial "[FAIR research data management][fair-tutorial]". + +[fair-tutorial]: https://lmu-osc.github.io/FAIR-Data-Management/ + +## Create Machine-Readable Variable Documentation + +One could go even further by making the information machine-readable in a standardized way. + +This section demonstrates how the title and description of the data set, +the description of the variables and their valid values are stored in a machine-readable way. +We'll reuse the descriptions we already created[^value-labels] and add a few others. + +[^value-labels]: Unfortunately, the descriptions of values are not reused in this example, +as they are [not supported][enum-labels] by the specification we are using. + +[enum-labels]: https://specs.frictionlessdata.io/patterns/#table-schema-enum-labels-and-ordering + +First, store the title and description of the data set as a whole: + +```{.r filename="Console"} +table_info <- c( + title = "penguins data set", + description = "Size measurements for adult foraging penguins near Palmer Station, Antarctica" +) +``` + +As before, also provide a reference to the source. + +```{r} +#| echo: false +#| class-output: "r code-overflow-wrap" +#| attr-output: 'filename="Console"' + +# We have provided the data set as CSV file to the readers. +# Therefore, we cannot assume or require that readers have +# the R package palmerpenguins installed. Instead, we create +# the citation on our end and hide how we obtained it. + +citation("palmerpenguins", auto = TRUE)$url |> + paste0("dat_source <- \"", ... = _, "\"") |> + cat() +``` + +Next, create a list of the categorical variables' valid values: + +```{.r filename="Console"} +valid_vals <- list( + species = c("Adelie", "Gentoo", "Chinstrap"), + island = c("Torgersen", "Biscoe", "Dream"), + sex = c("male", "female"), + year = c(2007, 2008, 2009) +) +``` + +Finally, store the descriptions of the variables we already created earlier: + +```{.r filename="Console"} +# Store the description of variables +vars <- c( + species = "a character string denoting penguin species", + island = "a character string denoting island in Palmer Archipelago, Antarctica", + bill_length_mm = "a number denoting bill length (millimeters)", + bill_depth_mm = "a number denoting bill depth (millimeters)", + flipper_length_mm = "an integer denoting flipper length (millimeters)", + body_mass_g = "an integer denoting body mass (grams)", + sex = "a character string denoting penguin sex", + year = "an integer denoting the study year" +) +``` + +Generally, metadata are either stored embedded into the data or externally, +for example, in a separate file. +We will use the "[frictionless data](https://frictionlessdata.io/)" standard, +where metadata are stored separately. +Another alternative would be [RO-Crate](https://www.researchobject.org/ro-crate/). + +Specifically, one can use the R package [`frictionless`][frictionless] +to create a _schema_ which describes the structure of the data.[^frictionless-note] +For the purpose of the following code, +it is just a nested list that we edit to include our own information. +We also explicitly record in the schema +that missing values are stored in the data file as `NA` +and that the data are licensed under CC0\ 1.0. +Finally, the package is used to create a metadata file that contains the schema. + +[frictionless]: https://docs.ropensci.org/frictionless/ + +[^frictionless-note]: In June 2024, [version 2](https://datapackage.org/) +of the frictionless data standard has been released. +As of November 2024, the R package `frictionless` only supports the first version, +though support for v2 is [planned](https://github.com/frictionlessdata/frictionless-r/labels/datapackage%3Av2). + +```{.r filename="Console"} +# Install {frictionless} and the required dependency {stringi} +renv::install(c( + "frictionless", + "stringi" +)) + +# Read data and create schema +dat_filename <- "data.csv" +dat <- read.csv(dat_filename) +dat_schema <- frictionless::create_schema(dat) + +# Add descriptions to the fields +dat_schema$fields <- lapply(dat_schema$fields, \(x) { + c(x, description = vars[[x$name]]) +}) + +# Record valid values +dat_schema$fields <- lapply(dat_schema$fields, \(x) { + if (x[["name"]] %in% names(valid_vals)) { + modifyList(x, list(constraints = list(enum = valid_vals[[x$name]]))) + } else { + x + } +}) + +# Define missing values +dat_schema$missingValues <- c("", "NA") + +# Create package with license info and write it +dat_package <- frictionless::create_package() |> + frictionless::add_resource( + resource_name = "penguins", + data = dat_filename, + schema = dat_schema, + title = table_info[["title"]], + description = table_info[["description"]], + licenses = list(list( + name = "CC0-1.0", + path = "https://creativecommons.org/publicdomain/zero/1.0/", + title = "CC0 1.0 Universal" + )), + sources = list(list( + title = "CRAN", + path = dat_source + )) + ) +frictionless::write_package(dat_package, directory = ".") +``` + +This creates the metadata file `datapackage.json` in the current directory. +Make sure it is located in the same folder as `data.csv`, +as together they comprise a [data package](https://specs.frictionlessdata.io/data-package/). diff --git a/in_depth_material/introduction_copyright.qmd b/in_depth_material/introduction_copyright.qmd index b6a6596..c0f63c8 100644 --- a/in_depth_material/introduction_copyright.qmd +++ b/in_depth_material/introduction_copyright.qmd @@ -16,7 +16,7 @@ format: --- -**Note: This is an expanded version of the Chapter "[Sharing work of others: Copyright](/copyright.qmd)", where you find more details on the choice of licenses.** +**Note: This is an expanded version of the Chapter "[Sharing work of others: Copyright](/copyright.qmd)". Here you find more details on the choice of licenses.** By default, everything you put into the project folder will be shared publicly. In many instances, this will also include works by others than yourself or your co-authors, diff --git a/in_depth_material/other_dependencies.qmd b/in_depth_material/other_dependencies.qmd new file mode 100644 index 0000000..4e42d3b --- /dev/null +++ b/in_depth_material/other_dependencies.qmd @@ -0,0 +1,96 @@ +--- +title: "Identify additional system dependencies for the README" +engine: knitr +--- + +The specific version of R and the loaded packages is the most crucial information that should go into the README (or that is stored in the `renv` lockfile). Ideally, you list other system dependencies, such as the version of Quarto[^renv-quarto] that you used to render your paper. + +[^renv-quarto]: As of August 2024, a proposal for `renv` to record the version of Quarto +has not been implemented, see [rstudio/renv#1143](https://github.com/rstudio/renv/issues/1143). + +An overview over the system dependencies of R packages can be created +using the function `pak::pkg_sysreqs()`. +In combination with `renv`, we can obtain the system dependencies +of all R packages the current project directly depends on: + +```{.r filename="Console"} +# Find all R package dependencies +deps <- renv::dependencies()$Package |> + unique() |> + pak::pkg_deps(dependencies = NA) |> + getElement("package") + +# Identify their system dependencies +pak::pkg_sysreqs(deps) +``` + +The output may look like the following: + +```txt +── Install scripts ────────────────── Fedora 40 ── +dnf install -y make pandoc git + +── Packages and their system dependencies ──────── +fs – make +knitr – pandoc +remotes – git +rmarkdown – pandoc +sass – make +``` + +We can see that the programs `make`, `pandoc`, +and `git` were identified as system dependencies. +Often, one can obtain their version by running them with the `--version` argument: + +```{.bash filename="Terminal"} +make --version +pandoc --version +git --version +``` + +However, this does not work for all system dependencies. +Specifically, it does not work for libraries -- software that is not supposed to be run on its own. +Identifying their version is beyond the scope of this tutorial. + +We also know that we need Quarto to create the PDF, +so let's find out its version as well: + +```{.bash filename="Terminal"} +quarto --version +``` + +If you installed `apaquarto` or any other Quarto extension, +one can query their versions as follows:[^already-included] + +[^already-included]: Luckily, the extensions are included in the project folder, +so technically their version is already recorded in the project's files. + +```{.bash filename="Terminal"} +quarto list extensions +``` + +Finally, we know that we installed a $\TeX$ distribution to create the PDF, +so let's find out its version by running: + +```{.bash filename="Terminal"} +quarto check +``` + +The output is quite long and it might look slightly different for you, +but the relevant sections are the following: + +```txt +[βœ“] Checking tools....................OK + TinyTeX: v2024.09 + Chromium: (not installed) + +[βœ“] Checking LaTeX....................OK + Using: TinyTex + Path: /home/r155953/.TinyTeX/bin/x86_64-linux + Version: 2024 +``` + +Add these dependencies to the section **Computational Requirements / Dependencies** of your README file. + +Of course, all the system dependencies identified until now +may have dependencies on their own. Use your own judgement to decide when not to dig deeper. diff --git a/make_readme.qmd b/make_readme.qmd index 1aeecda..ea80bd4 100644 --- a/make_readme.qmd +++ b/make_readme.qmd @@ -8,167 +8,116 @@ engine: knitr Having settled on a license, it is time to add a final touch. Imagine returning to your project in five years, having forgotten most of the details of what you did exactly. -What would be useful to know in order to quickly understand -what is going on in the project? -This is what needs to be described in the README. -While you could just start writing along, -it is helpful to provide at least the following information in sections on their own. +What would your future self want to know in order to quickly understand +what is going on in the project? -### Name and Description +This is what needs to be described in the `README` file. +It should be the primary entry point into your project, +the place where new users go first to get an orientation. -How is the project called? -What is it about? -Which files does the project folder contain? -How are they organized? +There is no common standard how to structure a README file, +but you should usually provide at least the following information, +structured by sections: -### Involved Data +**Name and Description** -Are any (empirical) data involved (e.g., being analyzed or used as input)? -From which sources can they be obtained? -Are they already included in the project folder? -Where is their data dictionary located? -Which terms, usage restrictions, or licenses apply? -If they are not publicly available, -is an alternative, synthesized version provided? +- [ ] How is the project called? +- [ ] What is it about? +- [ ] Which files does the project folder contain? +- [ ] How are they organized? -### Computational Requirements +**Data** -What software needs to be installed to run the analysis --- in other words, what are its dependencies? -This also includes software that you have used for any manual steps. -For every dependency, describe where it can be obtained from. -If the code has particular hardware requirements -(e.g., in terms of processor or memory), -these should be also noted. -Finally, for steps that take more than a couple of seconds, -the approximate runtime should be indicated. +- [ ] Are any (empirical) data involved (e.g., being analyzed or used as input)? +- [ ] Are they already included in the project folder? If not: From which sources can they be obtained? +- [ ] Where is their data dictionary located? +- [ ] Which terms, usage restrictions, or licenses apply? +- [ ] (optionally) If they are not publicly available, is an alternative, synthesized version provided? -@nte-dependencies provides more information -on determining the dependencies of an R project. +**Computational Requirements / Dependencies** -### Usage +- [ ] What software needs to be installed to run the analysis? -How can one run the project -- is there a master script -or a particular order in which any scripts need to be executed? -Provide detailed instructions for running the full project. +For every dependency (e.g., packages that you load in R), describe where it can be obtained from. +If the code has particular hardware requirements +(e.g., in terms of processor or memory), these should be also noted. +Finally, for steps that take more than a couple of seconds, +indicate the approximate runtime. -### List of Results +For an R project, the most important information for enabling reproducibility are the specific version of R and the loaded packages. Ideally, you also list other system dependencies. The in-depth supplementary material [*Identify additional system dependencies for the README*](/in_depth_material/other_dependencies.qmd) provides more information on determining the dependencies of an R project. +*For the purpose of this tutorial, however, you can skip this step.* -For every result (i.e., number, figure, or table) that is -computed in the project and displayed in the manuscript, -indicate where exactly it is computed. +**How to reproduce the results** -### Citation +- [ ] Provide step-by-step instructions that are necessary to reproduce all reported results. -Is there a recommended way to cite this project? -Is there a published article associated with it -that you would like to have cited? +Is there a master script or a particular order in which any scripts need to be executed? -### License +**Citation** -Under which licenses are the works in this project folder available? +- [ ] Is there a recommended way to cite this project? +- [ ] Is there a published article associated with it that you would like to have cited? -## Create It! +**License** -Create your README now as the file `README.md`. +- [ ] Under which licenses are the works in this project folder available? -::: {#nte-dependencies .callout-note} -### Identifying R Dependencies -R itself and the R packages are already documented as this project uses `renv`. -Therefore you can focus on all other dependencies, -such as the system dependencies of R packages -as well as the version of Quarto.[^renv-quarto] -[^renv-quarto]: As of August 2024, a proposal for `renv` to record the version of Quarto -has not been implemented, see [rstudio/renv#1143](https://github.com/rstudio/renv/issues/1143). +## Identifying R Dependencies -An overview over the system dependencies of R packages can be created -using the function `pak::pkg_sysreqs()`. -In combination with `renv`, we can obtain the system dependencies -of all R packages the current project directly depends on: +If you just write "We used R to analyze the data" in your manuscript or in the *Computational Requirements / Dependencies* section of your README file, this would be too underspecified. To enable someone else to really reproduce your results, you need specific information on ... -```{.r filename="Console"} -# Find all R package dependencies -deps <- renv::dependencies()$Package |> - unique() |> - pak::pkg_deps(dependencies = NA) |> - getElement("package") +- The exact version of R +- All packages and their versions that you loaded in your project +- (optionally) Further system dependencies -# Identify their system dependencies -pak::pkg_sysreqs(deps) -``` +### R & Package Dependencies -The output may look like the following: +R itself and the R packages are already documented as our tutorial project uses `renv`: The information is stored in the `renv.lock` file at the root of the project, which tracks each package’s version and source. In this case, you can simply refer to that in the README file and all is done: -```txt -── Install scripts ────────────────── Fedora 40 ── -dnf install -y make pandoc git - -── Packages and their system dependencies ──────── -fs – make -knitr – pandoc -remotes – git -rmarkdown – pandoc -sass – make -``` +> The R and packages versions used to compute the results are stored in the `renv` environment (see `renv.lock` file). -We can see that the programs `make`, `pandoc`, -and `git` were identified as system dependencies. -Often, one can obtain their version by running them with the `--version` argument: +If you do *not* use `renv`, the easiest solution to get a list of all needed packages is the `sessionInfo()` command. *Call it **after** you loaded all necessary packages!* -```{.bash filename="Terminal"} -make --version -pandoc --version -git --version -``` +It lists all packages including their versions, and you can copy & paste that information into your README file. Here's an example for a project: -However, this does not work for all system dependencies. -Specifically, it does not work for libraries -- software that is not supposed to be run on its own. -Identifying their version is beyond the scope of this tutorial. - -We also know that we need Quarto to create the PDF, -so let's find out its version as well: +```txt +> sessionInfo() +R version 4.4.1 (2024-06-14) +Platform: aarch64-apple-darwin20 +Running under: macOS 15.5 -```{.bash filename="Terminal"} -quarto --version -``` +Matrix products: default +BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib +LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0 -If you installed `apaquarto` or any other Quarto extension, -one can query their versions as follows:[^already-included] +locale: +[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 -[^already-included]: Luckily, the extensions are included in the project folder, -so technically their version is already recorded in the project's files. +time zone: Europe/Berlin +tzcode source: internal -```{.bash filename="Terminal"} -quarto list extensions -``` +attached base packages: +[1] parallel stats graphics grDevices utils datasets methods base -Finally, we know that we installed a $\TeX$ distribution to create the PDF, -so let's find out its version by running: +other attached packages: + [1] rio_1.2.2 doRNG_1.8.6 rngtools_1.5.2 + [4] doParallel_1.0.17 iterators_1.0.14 foreach_1.5.2 + [7] fitPCurve_0.0.0.9000 testthat_3.2.3 prettycode_1.1.0 +[10] colorDF_0.1.7 -```{.bash filename="Terminal"} -quarto check +loaded via a namespace (and not attached): + [1] gtable_0.3.6 ggplot2_3.5.2 htmlwidgets_1.6.4 devtools_2.4.5 + [5] remotes_2.5.0 TruncExpFam_1.2.1 vctrs_0.6.5 tools_4.4.1 + [9] generics_0.1.4 tibble_3.2.1 pkgconfig_2.0.3 R.oo_1.26.0 +[13] data.table_1.17.4 RColorBrewer_1.1-3 desc_1.4.3 lifecycle_1.0.4 ``` -The output is quite long and it might look slightly different for you, -but the relevant sections are the following: -```txt -[βœ“] Checking tools....................OK - TinyTeX: v2024.09 - Chromium: (not installed) - -[βœ“] Checking LaTeX....................OK - Using: TinyTex - Path: /home/r155953/.TinyTeX/bin/x86_64-linux - Version: 2024 -``` +## ✍️ Practical Exercise: Create your README! -Of course, all the system dependencies identified until now -may have dependencies on their own. -Use your own judgement to decide when not to dig deeper. -::: +Create your README now as the file `README.md`, located at the top level folder. If you feel stuck, you can have a look at the following examples: @@ -187,15 +136,15 @@ The most important file in this project folder is `Manuscript.qmd` which contain - `data_dictionary.html`: a dictionary to the data file, created using `data_dictionary.qmd` -The folder `_extensions` contains the `apaquarto` extension which is used to typeset the PDF accoording to APA guidelines. +The folder `_extensions` contains the `apaquarto` extension which is used to typeset the PDF according to APA guidelines. ``` ::: ::: {#tip-data .callout-tip collapse="true"} -### Involved Data +### Data ```{.md .code-overflow-wrap filename="README.md"} -## Involved Data +## Data The manuscript analyzes the "palmerpenguins" data set available from . The data is stored as "data.csv" and documented in the file "data_dictionary.html". It is made available under CC0 1.0. ``` @@ -233,7 +182,7 @@ All R packages that this project requires are managed using [`renv`](https://cra install.packages("renv") ``` -Next, one can open a new R session in the root folder of this project and run the following, which should install all required R packages at their recorded versions: +Next, one can open a new R session in the root folder of this project and run the following command, which should install all required R packages at their recorded versions: ```r renv::restore() @@ -241,11 +190,11 @@ renv::restore() `````` ::: -::: {#tip-usage .callout-tip collapse="true"} -### Usage +::: {#tip-how-to-reproduce .callout-tip collapse="true"} +### How to reproduce the results `````{.md .code-overflow-wrap filename="README.md"} -## Usage +## How to reproduce the results The manuscript can be rendered to PDF using the following command: @@ -255,17 +204,7 @@ quarto render Manuscript.qmd `````` ::: -::: {#tip-list-of-results .callout-tip collapse="true"} -### List of Results - -```{.md .code-overflow-wrap filename="README.md"} -## List of Results -- In-text numbers in the section "results": Calculated in the chunk "t-test" within "Manuscript.qmd" -- Table 1: Calculated in the chunk "tbl-descriptive-statistics" within "Manuscript.qmd" -- Figure 2: Calculated in the chunk "fig-bill-length-comparison" within "Manuscript.qmd" -``` -::: ::: {#tip-citation .callout-tip collapse="true"} ### Citation @@ -289,6 +228,6 @@ Of course, you would use the same license for the manuscript that you chose in t ```{.md .code-overflow-wrap filename="README.md"} ## License -The manuscript files `Manuscript.qmd`, `Manuscript.tex`, and `Manuscript.pdf` by Josephine Zerna, Christoph Scheffel, and are available under [CC\ BY-SA\ 4.0](https://creativecommons.org/licenses/by-sa/4.0/) or (at your option) under the [AGPLv3](https://www.gnu.org/licenses/agpl-3.0.html) (or later). For further copyright information, see `LICENSE.txt`. +The manuscript files `Manuscript.qmd`, `Manuscript.tex`, and `Manuscript.pdf` by Josephine Zerna, Christoph Scheffel, and are available under [CC\ BY-SA\ 4.0](https://creativecommons.org/licenses/by-sa/4.0/). For further copyright information, see `LICENSE.txt`. ``` ::: diff --git a/renv.lock b/renv.lock deleted file mode 100644 index 5de457a..0000000 --- a/renv.lock +++ /dev/null @@ -1,498 +0,0 @@ -{ - "R": { - "Version": "4.4.1", - "Repositories": [ - { - "Name": "CRAN", - "URL": "https://packagemanager.posit.co/cran/latest" - } - ] - }, - "Packages": { - "R6": { - "Package": "R6", - "Version": "2.6.1", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R" - ], - "Hash": "d4335fe7207f1c01ab8c41762f5840d4" - }, - "Rcpp": { - "Package": "Rcpp", - "Version": "1.0.13", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "methods", - "utils" - ], - "Hash": "f27411eb6d9c3dada5edd444b8416675" - }, - "base64enc": { - "Package": "base64enc", - "Version": "0.1-3", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R" - ], - "Hash": "543776ae6848fde2f48ff3816d0628bc" - }, - "bslib": { - "Package": "bslib", - "Version": "0.8.0", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "base64enc", - "cachem", - "fastmap", - "grDevices", - "htmltools", - "jquerylib", - "jsonlite", - "lifecycle", - "memoise", - "mime", - "rlang", - "sass" - ], - "Hash": "b299c6741ca9746fb227debcb0f9fb6c" - }, - "cachem": { - "Package": "cachem", - "Version": "1.1.0", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "fastmap", - "rlang" - ], - "Hash": "cd9a672193789068eb5a2aad65a0dedf" - }, - "cli": { - "Package": "cli", - "Version": "3.6.5", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "utils" - ], - "Hash": "16850760556401a2eeb27d39bd11c9cb" - }, - "datawizard": { - "Package": "datawizard", - "Version": "0.12.3", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "insight", - "stats", - "utils" - ], - "Hash": "611537168bbb78b57720de109ec1ad19" - }, - "digest": { - "Package": "digest", - "Version": "0.6.37", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "utils" - ], - "Hash": "33698c4b3127fc9f506654607fb73676" - }, - "evaluate": { - "Package": "evaluate", - "Version": "1.0.4", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R" - ], - "Hash": "7c29cedd515863338c7f5f77fe9ddf74" - }, - "fastmap": { - "Package": "fastmap", - "Version": "1.2.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "aa5e1cd11c2d15497494c5292d7ffcc8" - }, - "fontawesome": { - "Package": "fontawesome", - "Version": "0.5.2", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "htmltools", - "rlang" - ], - "Hash": "c2efdd5f0bcd1ea861c2d4e2a883a67d" - }, - "fs": { - "Package": "fs", - "Version": "1.6.6", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "methods" - ], - "Hash": "7eb1e342eee7e0a7449c49cdaa526d39" - }, - "glue": { - "Package": "glue", - "Version": "1.8.0", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "methods" - ], - "Hash": "5899f1eaa825580172bb56c08266f37c" - }, - "grateful": { - "Package": "grateful", - "Version": "0.2.4", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "knitr", - "remotes", - "renv", - "rmarkdown", - "rstudioapi", - "utils" - ], - "Hash": "f87d698662778b5bae252429742c23de" - }, - "highr": { - "Package": "highr", - "Version": "0.11", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "xfun" - ], - "Hash": "d65ba49117ca223614f71b60d85b8ab7" - }, - "htmltools": { - "Package": "htmltools", - "Version": "0.5.8.1", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "base64enc", - "digest", - "fastmap", - "grDevices", - "rlang", - "utils" - ], - "Hash": "81d371a9cc60640e74e4ab6ac46dcedc" - }, - "insight": { - "Package": "insight", - "Version": "0.20.3", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "methods", - "stats", - "utils" - ], - "Hash": "19a5e2db13306af8a80d4e605d3cb06e" - }, - "jquerylib": { - "Package": "jquerylib", - "Version": "0.1.4", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "htmltools" - ], - "Hash": "5aab57a3bd297eee1c1d862735972182" - }, - "jsonlite": { - "Package": "jsonlite", - "Version": "2.0.0", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "methods" - ], - "Hash": "b0776f526d36d8bd4a3344a88fe165c4" - }, - "knitr": { - "Package": "knitr", - "Version": "1.50", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "evaluate", - "highr", - "methods", - "tools", - "xfun", - "yaml" - ], - "Hash": "5a07d8ec459d7b80bd4acca5f4a6e062" - }, - "later": { - "Package": "later", - "Version": "1.3.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "Rcpp", - "rlang" - ], - "Hash": "a3e051d405326b8b0012377434c62b37" - }, - "lifecycle": { - "Package": "lifecycle", - "Version": "1.0.4", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "cli", - "glue", - "rlang" - ], - "Hash": "b8552d117e1b808b09a832f589b79035" - }, - "memoise": { - "Package": "memoise", - "Version": "2.0.1", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "cachem", - "rlang" - ], - "Hash": "e2817ccf4a065c5d9d7f2cfbe7c1d78c" - }, - "mime": { - "Package": "mime", - "Version": "0.12", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "tools" - ], - "Hash": "18e9c28c1d3ca1560ce30658b22ce104" - }, - "palmerpenguins": { - "Package": "palmerpenguins", - "Version": "0.1.1", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R" - ], - "Hash": "6c6861efbc13c1d543749e9c7be4a592" - }, - "processx": { - "Package": "processx", - "Version": "3.8.6", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "R6", - "ps", - "utils" - ], - "Hash": "720161b280b0a35f4d1490ead2fe81d0" - }, - "ps": { - "Package": "ps", - "Version": "1.9.1", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "utils" - ], - "Hash": "093688087b0bacce6ba2f661f36328e2" - }, - "quarto": { - "Package": "quarto", - "Version": "1.4.4", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "cli", - "jsonlite", - "later", - "processx", - "rlang", - "rmarkdown", - "rstudioapi", - "tools", - "utils", - "yaml" - ], - "Hash": "af456d7a181750812bd8b2bfedb3ea4e" - }, - "rappdirs": { - "Package": "rappdirs", - "Version": "0.3.3", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R" - ], - "Hash": "5e3c5dc0b071b21fa128676560dbe94d" - }, - "remotes": { - "Package": "remotes", - "Version": "2.5.0", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "methods", - "stats", - "tools", - "utils" - ], - "Hash": "3ee025083e66f18db6cf27b56e23e141" - }, - "renv": { - "Package": "renv", - "Version": "1.0.7", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "utils" - ], - "Hash": "397b7b2a265bc5a7a06852524dabae20" - }, - "rlang": { - "Package": "rlang", - "Version": "1.1.6", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "utils" - ], - "Hash": "892124978869b74935dc3934c42bfe5a" - }, - "rmarkdown": { - "Package": "rmarkdown", - "Version": "2.28", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "bslib", - "evaluate", - "fontawesome", - "htmltools", - "jquerylib", - "jsonlite", - "knitr", - "methods", - "tinytex", - "tools", - "utils", - "xfun", - "yaml" - ], - "Hash": "062470668513dcda416927085ee9bdc7" - }, - "rstudioapi": { - "Package": "rstudioapi", - "Version": "0.16.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "96710351d642b70e8f02ddeb237c46a7" - }, - "sass": { - "Package": "sass", - "Version": "0.4.9", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R6", - "fs", - "htmltools", - "rappdirs", - "rlang" - ], - "Hash": "d53dbfddf695303ea4ad66f86e99b95d" - }, - "sessioninfo": { - "Package": "sessioninfo", - "Version": "1.2.2", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "cli", - "tools", - "utils" - ], - "Hash": "3f9796a8d0a0e8c6eb49a4b029359d1f" - }, - "tinytex": { - "Package": "tinytex", - "Version": "0.52", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "xfun" - ], - "Hash": "cfbad971a71f0e27cec22e544a08bc3b" - }, - "withr": { - "Package": "withr", - "Version": "3.0.1", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "grDevices", - "graphics" - ], - "Hash": "07909200e8bbe90426fbfeb73e1e27aa" - }, - "xfun": { - "Package": "xfun", - "Version": "0.52", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "grDevices", - "stats", - "tools" - ], - "Hash": "652ce36fe7d57688e6786819b09d9190" - }, - "yaml": { - "Package": "yaml", - "Version": "2.3.10", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "51dab85c6c98e50a18d7551e9d49f76c" - } - } -}