Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,10 @@ Suggests:
gridExtra,
vcd,
MASS,
forcats
forcats,
emojifont,
waffle,
PieGlyph
License: GPL
LazyLoad: yes
LazyData: yes
Expand Down
23 changes: 22 additions & 1 deletion man/Arbuthnot.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ Campbell, R. B., Arbuthnot and the Human Sex Ratio (2001).
\emph{Human Biology}, 73:4, 605-610.
%\url{http://www.math.uni.edu/~campbell/arbuth.html}

Creighton, C. (1965). A History of Epidemics in Britain, 2nd edition, vol. 1 and 2.
Creighton, C. (1965). A History of Epidemics in Britain, 2nd edition, vol. 1 and 2.
NY: Barnes and Noble.

S. Zabell (1976). Arbuthnot, Heberden, and the \emph{Bills of Mortality}.
Expand All @@ -78,5 +78,26 @@ lines(Arb.smooth$x, Arb.smooth$y, col="blue", lwd=2)

# plot the total christenings to observe the anomalie in 1704
with(Arbuthnot, plot(Year,Total, type='b', ylab="Total Christenings"))

###########################################################################
# Challenge 03: Dr. John Russell
# github.com/drjohnrussell/30DayChartChallenge/blob/main/2025/Challenge03.R
###########################################################################

if (require("PieGlyph")) {
require(ggplot2)
Arbuthnot |>
transform(total=`Males` + `Females`) |>
ggplot(aes(x=Year, y=total)) +
geom_pie_glyph(slices=c("Males","Females"),
color="black",radius=0.4) +
scale_fill_manual(values=c("lightblue","pink")) +
theme_minimal() +
theme(legend.position="bottom") +
labs(fill="Gender",
title="John Arbuthnot's Data on Male and Female Baptisms in London",
x="Year",
y="Total Baptisms")
}
}
\keyword{datasets}
59 changes: 52 additions & 7 deletions man/Cholera.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Farr's table.
}
\details{
The supply of \code{water} was classified as \dQuote{Thames, between Battersea and Waterloo Bridges}
(central London),
(central London),
\dQuote{New River, Rivers Lea and Ravensbourne}, and \dQuote{Thames, at Kew and Hammersmith}
(western London).
The factor levels use abbreviations for these.
Expand All @@ -49,7 +49,7 @@ The data frame is sorted by increasing elevation above the high water mark.
}
\source{
Bingham P., Verlander, N. Q., Cheal M. J. (2004).
John Snow, William Farr and the 1849 outbreak of cholera that affected London:
John Snow, William Farr and the 1849 outbreak of cholera that affected London:
a reworking of the data highlights the importance of the water supply.
\emph{Public Health}, 118(6), 387-394, Table 2.
(The data was kindly supplied by Neville Verlander, including additional variables
Expand All @@ -67,7 +67,7 @@ The relevant tables are at pages clii -- clvii.
data(Cholera)

# plot cholera deaths vs. elevation
plot(cholera_drate ~ elevation, data=Cholera,
plot(cholera_drate ~ elevation, data=Cholera,
pch=16, cex.lab=1.2, cex=1.2,
xlab="Elevation above high water mark (ft)",
ylab="Deaths from cholera in 1849 per 10,000")
Expand All @@ -81,15 +81,15 @@ lines(mort ~ elev, lwd=2, col="blue")

if(require("car", quietly=TRUE)) {
# show separate regression lines for each water supply
scatterplot(cholera_drate ~ elevation | water, data=Cholera,
scatterplot(cholera_drate ~ elevation | water, data=Cholera,
smooth=FALSE, pch=15:17,
id=list(n=2, labels=sub(",.*", "", Cholera$district)),
col=c("red", "darkgreen", "blue"),
legend=list(coords="topleft", title="Water supply"),
xlab="Elevation above high water mark (ft)",
ylab="Deaths from cholera in 1849 per 10,000")
scatterplot(cholera_drate ~ poor_rate | water, data=Cholera,

scatterplot(cholera_drate ~ poor_rate | water, data=Cholera,
smooth=FALSE, pch=15:17,
id=list(n=2, labels=sub(",.*", "", Cholera$district)),
col=c("red", "darkgreen", "blue"),
Expand All @@ -99,7 +99,7 @@ if(require("car", quietly=TRUE)) {
}

# fit a logistic regression model a la Bingham etal.
fit <- glm( cbind(cholera_deaths, popn) ~
fit <- glm( cbind(cholera_deaths, popn) ~
water + elevation + poor_rate + annual_deaths +
pop_dens + persons_house,
data=Cholera, family=binomial)
Expand All @@ -113,5 +113,50 @@ if (require(effects)) {
plot(eff)
}

###########################################################################
# Challenge 01: Dr. John Russell
# github.com/drjohnrussell/30DayChartChallenge/blob/main/2025/Challenge01.R
# Hint: It works better if emojifont package is installed!
###########################################################################

if (require("emojifont")) {
load.fontawesome()
}

require(ggplot2)
plot1 <- Cholera |>
ggplot(aes(x=elevation, y=cholera_drate,color=water)) +
geom_point() +
theme_bw() +
labs(y="Cholera Death Rate per 10000 citizens (log)",
x="Elevation (log)",
color="Water Source") +
theme(legend.position="bottom") +
scale_y_log10() +
scale_x_log10() +
scale_color_brewer(palette="Dark2")

plot1

if (require("waffle")) {
day1 <- Cholera |>
transform(newcholeradeaths = cholera_deaths/40) |>
sort_by(~water) |>
ggplot(aes(fill=water,values=newcholeradeaths)) +
geom_waffle(radius=grid::unit(0.4,"npc"),
size=.25) +
theme_void() +
labs(title="Deaths from Cholera in London in 1849",
subtitle="40 Deaths per Square",
caption="Data from William Farr's initial report, where he concluded
elevation \nwas the factor in the spread of plague",
fill="Water Source") +
scale_fill_brewer(palette="Dark2") +
theme(legend.position="bottom",
panel.background=element_rect(color="white"))

day1
}

}
\keyword{datasets}
36 changes: 28 additions & 8 deletions man/Galton.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ Galton, F. (1886). Regression Towards Mediocrity in Hereditary Stature
\emph{Journal of the Anthropological Institute}, 15, 246-263
}
\references{
Friendly, M. & Denis, D. (2005). The early origins and development of the scatterplot.
\emph{Journal of the History of the Behavioral Sciences},
Friendly, M. & Denis, D. (2005). The early origins and development of the scatterplot.
\emph{Journal of the History of the Behavioral Sciences},
41, 103-130.

Galton, F. (1869). \emph{Hereditary Genius: An Inquiry into its Laws and Consequences}.
Expand All @@ -46,21 +46,21 @@ Hanley, J. A. (2004). "Transmuting" Women into Men: Galton's Family Data on Huma
\emph{The American Statistician}, 58, 237-243.
See: \url{http://www.medicine.mcgill.ca/epidemiology/hanley/galton/} for source materials.

Stigler, S. M. (1986).
Stigler, S. M. (1986).
\emph{The History of Statistics: The Measurement of Uncertainty before 1900}.
Cambridge, MA: Harvard University Press, Table 8.1

Wachsmuth, A. W., Wilkinson L., Dallal G. E. (2003).
Galton's bend: A previously undiscovered nonlinearity in Galton's family stature regression data.
\emph{The American Statistician}, 57, 190-192.
Wachsmuth, A. W., Wilkinson L., Dallal G. E. (2003).
Galton's bend: A previously undiscovered nonlinearity in Galton's family stature regression data.
\emph{The American Statistician}, 57, 190-192.
%\url{http://staff.ustc.edu.cn/~zwp/teach/Reg/galton.pdf}
\doi{10.1198/0003130031874}

}
\seealso{
\code{link{GaltonFamilies}},
\code{\link{PearsonLee}},
\code{galton} in the \pkg{psych} % \code{\link[psych]{galton}}
\code{galton} in the \pkg{psych} % \code{\link[psych]{galton}}
}

\examples{
Expand All @@ -76,7 +76,7 @@ data(Galton)
# sunflower plot with regression line and data ellipses and lowess smooth
###########################################################################

with(Galton,
with(Galton,
{
sunflowerplot(parent,child, xlim=c(62,74), ylim=c(62,74))
reg <- lm(child ~ parent)
Expand All @@ -87,5 +87,25 @@ with(Galton,
}
})

###########################################################################
# Challenge 02: Dr. John Russell
# github.com/drjohnrussell/30DayChartChallenge/blob/main/2025/Challenge02.R
###########################################################################


require(ggplot2)

Galton |>
ggplot(aes(x=parent,y=child)) +
geom_point(position="jitter") +
geom_density2d_filled(alpha=.6,show.legend=FALSE,bins=9) +
theme_minimal() +
geom_smooth(method="loess",se=FALSE,color="red") +
geom_smooth(method="lm",se=FALSE) +
labs(title="Galton's Height Data",
x="Parent Height (inches)",
y="Child Height (inches)") +
scale_fill_brewer(type="seq")

}
\keyword{datasets}
28 changes: 22 additions & 6 deletions man/PearsonLee.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,16 @@ Pearson, K. and Lee, A. (1896). Mathematical contributions to the theory
of evolution. On telegony in man, etc. \emph{Proceedings of the Royal Society of
London}, 60 , 273-283.

Pearson, K. and Lee, A. (1903).
Pearson, K. and Lee, A. (1903).
On the laws of inheritance in man: I. Inheritance of physical characters. \emph{Biometrika}, 2(4), 357-462.
(Tables XXII, p. 415; XXV, p. 417; XXVIII, p. 419 and XXXI, p. 421.)


}
\references{
Wachsmuth, A.W., Wilkinson L., Dallal G.E. (2003).
Galton's bend: A previously undiscovered nonlinearity in Galton's family stature regression data.
\emph{The American Statistician}, 57, 190-192.
Wachsmuth, A.W., Wilkinson L., Dallal G.E. (2003).
Galton's bend: A previously undiscovered nonlinearity in Galton's family stature regression data.
\emph{The American Statistician}, 57, 190-192.
%\url{http://staff.ustc.edu.cn/~zwp/teach/Reg/galton.pdf}
\doi{10.1198/0003130031874}
}
Expand All @@ -58,13 +58,13 @@ Galton's bend: A previously undiscovered nonlinearity in Galton's family stature
data(PearsonLee)
str(PearsonLee)

with(PearsonLee,
with(PearsonLee,
{
lim <- c(55,80)
xv <- seq(55,80, .5)
sunflowerplot(parent,child, number=frequency, xlim=lim, ylim=lim, seg.col="gray", size=.1)
abline(lm(child ~ parent, weights=frequency), col="blue", lwd=2)
lines(xv, predict(loess(child ~ parent, weights=frequency), data.frame(parent=xv)),
lines(xv, predict(loess(child ~ parent, weights=frequency), data.frame(parent=xv)),
col="blue", lwd=2)
# NB: dataEllipse doesn't take frequency into account
if(require(car)) {
Expand Down Expand Up @@ -106,5 +106,21 @@ ggplot(PearsonLee, aes(x = child, y = parent, weight=frequency)) +
theme(legend.position = c(0.14, 0.885),
legend.background = element_rect(fill = 'white'))

###########################################################################
# Challenge 02: Dr. John Russell
# github.com/drjohnrussell/30DayChartChallenge/blob/main/2025/Challenge02.R
###########################################################################

PearsonLee |>
ggplot(aes(x=parent,y=child)) +
geom_point() +
theme_minimal() +
geom_smooth(aes(weight=frequency), method="lm", se=FALSE, color="red",size=2) +
geom_smooth(aes(weight = frequency), method="lm", se=FALSE) +
facet_grid(par ~ chl) +
labs(title="Pearson and Lee's Data on Parent and Child",
x="Parent Height (inches)",
y="Child Height (inches)")

}
\keyword{datasets}