Hi,
Using matplotlib I try to plot satellite observations, which consists of
roughly one million patches that are not gridded regularly.
I first collect the vertices (corner points of the observations) and
colors and then use PolyCollection and ax.add_collection to add these
patches to the figure.
On my 64bit Linux machine:
# 518400 patches will use >2Gb of memory
# 1M patches will use > 4Gb of memory
On a 32bit machine the memory is roughly half compared to 64bit.
My question: how can I plot more efficiently and use less memory?
An example script with random data is below.
System:
Debian Testing: kernel 2.6.32-5-amd64 x86_64 GNU/Linux
Python 2.6.6 (r266:84292, Dec 26 2010, 22:31:48)
matplotlib.__version__ = '1.0.0'
Best regards,
Onet.
#!/usr/bin/env python
···
#
# Purpose : Show large use of memory when plotting
# large numbers of patches
#
import random
import matplotlib
matplotlib.use('AGG') # produce AGG graphics (o.a. PNG) by default
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from matplotlib.collections import PolyCollection
from mpl_toolkits.basemap import Basemap
def test_polycollection (NLats, NLons):
""" Test poly collections """
fig = plt.figure()
ax = fig.add_axes([0.1,0.1,0.8,0.8])
figmap = Basemap (llcrnrlon=-180., llcrnrlat=-90,
urcrnrlon=180., urcrnrlat=90.,\
resolution='c', area_thresh=10000.,
projection='cyl')
# Color map and min/max bounds
cmap = plt.cm.jet
vmin = 0
vmax = 10
# Arrays for the vertices and the colors
Poly_Vertices = []
Poly_Colors = []
# add pixel to array of vertices and set a random color
for LatNr in range (0, NLats):
for LonNr in range (0, NLons, 2):
# shift lon 1 point if odd for staggered grid
if (LatNr % 2 == 0):
# even
ShiftLon = 0
else:
# odd
ShiftLon = 1
# calc coordinates for vertex storage
x1,y1 = (359.*(ShiftLon + LonNr + 1)/(NLons+1) - 179.9,
179.*(LatNr )/(NLats+1) - 89.9)
x2,y2 = (359.*(ShiftLon + LonNr + 2)/(NLons+1) - 179.9,
179.*(LatNr + 1)/(NLats+1) - 89.9)
x3,y3 = (359.*(ShiftLon + LonNr + 1)/(NLons+1) - 179.9,
179.*(LatNr + 2)/(NLats+1) - 89.9)
x4,y4 = (359.*(ShiftLon + LonNr )/(NLons+1) - 179.9,
179.*(LatNr + 1)/(NLats+1) - 89.9)
# get RGB colors, cut off alpha.
RandomValue = random.random() * vmax
colorsgen = cmap ((RandomValue - vmin) / (vmax - vmin))[:3]
# add the polygon vertices and the color to the array
Poly_Vertices.append ([(x1,y1), (x2,y2), (x3,y3), (x4,y4)])
Poly_Colors.append (colorsgen)
# Create PolyCollection and add it to the axes
print 'PolyCollection: number of elements: ', len (Poly_Colors)
Data_PatchCollection = PolyCollection (Poly_Vertices,
facecolor=Poly_Colors,
edgecolor='black',
linewidth=0)
print 'add_collection'
ax.add_collection (Data_PatchCollection)
print 'add_collection done'
# finish the plot by drawing coastlines
figmap.drawcoastlines()
plt.title ('PolyCollection on a map')
fig.savefig ('polycol.png', dpi=300)
plt.close()
return
#
# End test_polycollection
#
if __name__ == "__main__":
""" Test the memory size of matplotlib using poly
collections.
On a 64 bit linux machine the memory use is
enormous when plotting large numbers of patches
via matplotlib / PolyCollection.
For 518400 patches matplotlib will use ~2Gb of
memory. On a 32 bit Linux machine, the memory
usage is roughly half.
Can this be done more efficient?
Debian Testing: Linux host 2.6.32-5-amd64 #1 SMP
Wed Jan 12 03:40:32 UTC 2011 x86_64 GNU/Linux
Python 2.6.6 (r266:84292, Dec 26 2010, 22:31:48)
matplotlib.__version__ = '1.0.0'
"""
# 129600 patches will use ~630Mb of memory (on 64bit Linux)
#NLats=360
#NLons=720
# 259200 patches will use ~1Gb of memory (on 64bit Linux)
#NLats=360
#NLons=1440
# 518400 patches will use ~2Gb of memory (on 64bit Linux)
NLats=720
NLons=1440
#
# test the memory size of using poly collections.
test_polycollection (NLats, NLons)