I would like to propose modifying the boxplot function as follows:
Instead of only accepting an (mxn) matrix (x) and creating (n) boxplots
from the columns of (x), optionally in place of the (mxn) matrix accept
a list of numeric arrays which can be any length, and create boxplots
for each of the arrays in the list.
The obvious benefit in doing this is that one can easily create boxplots
to compare datasets with differing numbers of data-points.
For example:
from RandomArray import normal
x1 = normal(10,3,100)
x2 = normal(10,5,150)
x3 = normal(10,1,1000)boxplot([x1, x2, x3])
The code below implements the proposed change by modifying the boxplot
function axes.py. It works and is proof of concept if nothing else.
ยทยทยท
##--------------------------------------------------##
## modifications to the boxplot function in axes.py ##
##--------------------------------------------------##
def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
positions=None, widths=None):
"""
boxplot(x, notch=0, sym='+', vert=1, whis=1.5,
positions=None, widths=None)
Make a box and whisker plot for each column of x.
Or
Make a box and whisker plot for each array in list x.
The box extends from the lower to upper quartile values
of the data, with a line at the median. The whiskers
extend from the box to show the range of the data. Flier
points are those past the end of the whiskers.
notch = 0 (default) produces a rectangular box plot.
notch = 1 will produce a notched box plot
sym (default 'b+') is the default symbol for flier points.
Enter an empty string ('') if you don't want to show fliers.
vert = 1 (default) makes the boxes vertical.
vert = 0 makes horizontal boxes. This seems goofy, but
that's how Matlab did it.
whis (default 1.5) defines the length of the whiskers as
a function of the inner quartile range. They extend to the
most extreme data point within ( whis*(75%-25%) ) data range.
positions (default 1,2,...,n) sets the horizontal positions of
the boxes. The ticks and limits are automatically set to match
the positions.
widths is either a scalar or a vector and sets the width of
each box. The default is 0.5, or 0.15*(distance between extreme
positions) if that is smaller.
x is either:
(1) a Numeric array
(2) a list of 1-dimension Numeric arrays of any length
Returns a list of the lines added
"""
if not self._hold: self.cla()
holdStatus = self._hold
whiskers, caps, boxes, medians, fliers = , , , ,
# CASE #1: x is a numeric array
if type(x) == type(array([0])):
x = asarray(x)
rank = len(x.shape)
if 1 == rank:
x.shape = -1, 1
row, col = x.shape
# CASE #2: x is a list of numeric arrays
if type(x) == type(list([0])):
col = len(x) # one column for each array
#reshape the vectors in list x if necessary
for ii in range(len(x)):
rank = len(x[ii].shape)
if 1 == rank:
x[ii].shape = -1, 1
# get some plot info
if positions is None:
positions = range(1, col + 1)
if widths is None:
distance = max(positions) - min(positions)
widths = min(0.15*max(distance,1.0), 0.5)
if isinstance(widths, float) or isinstance(widths, int):
widths = ones((col,), 'd') * widths
# loop through columns, adding each to plot
self.hold(True)
for i,pos in enumerate(positions):
# CASE #1: x is a numeric array
if type(x)==type(array([0])):
d = x[:,i]
# CASE #2: x is a list of numeric arrays
if type(x)==type(list([0])):
d = x[i][:,0]
row = len(d)
# get median and quartiles
q1, med, q3 = prctile(d,[25,50,75])
# get high extreme
iq = q3 - q1
hi_val = q3 + whis*iq
wisk_hi = compress( d <= hi_val , d )
if len(wisk_hi) == 0:
wisk_hi = q3
else:
wisk_hi = max(wisk_hi)
# get low extreme
lo_val = q1 - whis*iq
wisk_lo = compress( d >= lo_val, d )
if len(wisk_lo) == 0:
wisk_lo = q1
else:
wisk_lo = min(wisk_lo)
# get fliers - if we are showing them
flier_hi =
flier_lo =
flier_hi_x =
flier_lo_x =
if len(sym) != 0:
flier_hi = compress( d > wisk_hi, d )
flier_lo = compress( d < wisk_lo, d )
flier_hi_x = ones(flier_hi.shape[0]) * pos
flier_lo_x = ones(flier_lo.shape[0]) * pos
# get x locations for fliers, whisker, whisker cap and box
sides
box_x_min = pos - widths[i] * 0.5
box_x_max = pos + widths[i] * 0.5
wisk_x = ones(2) * pos
cap_x_min = pos - widths[i] * 0.25
cap_x_max = pos + widths[i] * 0.25
cap_x = [cap_x_min, cap_x_max]
# get y location for median
med_y = [med, med]
# calculate 'regular' plot
if notch == 0:
# make our box vectors
box_x = [box_x_min, box_x_max, box_x_max, box_x_min,
box_x_min ]
box_y = [q1, q1, q3, q3, q1 ]
# make our median line vectors
med_x = [box_x_min, box_x_max]
# calculate 'notch' plot
else:
notch_max = med + 1.57*iq/sqrt(row)
notch_min = med - 1.57*iq/sqrt(row)
if notch_max > q3:
notch_max = q3
if notch_min < q1:
notch_min = q1
# make our notched box vectors
box_x = [box_x_min, box_x_max, box_x_max, cap_x_max,
box_x_max, box_x_max, box_x_min, box_x_min, cap_x_min, box_x_min,
box_x_min ]
box_y = [q1, q1, notch_min, med, notch_max, q3, q3,
notch_max, med, notch_min, q1]
# make our median line vectors
med_x = [cap_x_min, cap_x_max]
med_y = [med, med]
# vertical or horizontal plot?
if vert:
def doplot(*args):
return self.plot(*args)
else:
def doplot(*args):
shuffled =
for i in range(0, len(args), 3):
shuffled.extend([args[i+1], args[i], args[i+2]])
return self.plot(*shuffled)
whiskers.extend(doplot(wisk_x, [q1, wisk_lo], 'b--',
wisk_x, [q3, wisk_hi], 'b--'))
caps.extend(doplot(cap_x, [wisk_hi, wisk_hi], 'k-',
cap_x, [wisk_lo, wisk_lo], 'k-'))
boxes.extend(doplot(box_x, box_y, 'b-'))
medians.extend(doplot(med_x, med_y, 'r-'))
fliers.extend(doplot(flier_hi_x, flier_hi, sym,
flier_lo_x, flier_lo, sym))
# fix our axes/ticks up a little
if 1 == vert:
setticks, setlim = self.set_xticks, self.set_xlim
else:
setticks, setlim = self.set_yticks, self.set_ylim
newlimits = min(positions)-0.5, max(positions)+0.5
setlim(newlimits)
setticks(positions)
# reset hold status
self.hold(holdStatus)
return dict(whiskers=whiskers, caps=caps, boxes=boxes,
medians=medians, fliers=fliers)