Hi,

I have implemented PNG stream prediction for the pdf backend. It was

marked as a TODO.

Cheers,

Cornelius

## ···

-------------------

Details:

At first I thought about linking to libpng, but I couldn't find an API

for raw encoding of single lines as PDF needs it. Therefore, I

implemented the code in plain python (it makes heavy use of numpy to do

the number crunching). It needs testing with python 3 however...

I took care of five different encoding modes as specified by RFC 2083:

10 - noop

11 - Sub encoding

12 - Up encoding

13 - Avg encoding

14 - Paeth encoding

15 - Optimal encoding (*)

(*) For '15', I considered only modes 10-13, because my Paeth implementation

is not very fast (nor very clean).

The prediction is done by a static method in the 'Stream' class, which

is called from writeImages. My heuristics have shown (and

http://www.libpng.org/pub/png/book/chapter09.html also mentions it) that

gray-scale data does usually not benefit from stream prediction.

Therefore, the prediction is only applied to color images.

Finally, I think that the encoding mode is something to be made

adjustable by the user, so that a rcParams['pdf.prediction'] (or

'filter' or 'pngcompression') setting should be justified. I didn't

touch anything of this, because that's up to you devs.

# -------------- next part --------------

Index: lib/matplotlib/backends/backend_pdf.py

--- lib/matplotlib/backends/backend_pdf.py (revision 8989)

+++ lib/matplotlib/backends/backend_pdf.py (working copy)

@@ -351,6 +351,117 @@

compressed = self.compressobj.compress(data)

self.file.write(compressed)

+ @staticmethod

+ def pngPredict(data, height, width, predictor = 10):

+ """Applies a PNG stream predictor to the data stream

+

+ predictor, int, valid values are

+ 10, no stream prediction

+ 11, Sub predictor

+ 12, Up predictor

+ 13, Average predictor

+ 14, Paeth predictor

+ 15, optimal predictor, not implemented

+ """

+ if predictor < 10 or predictor > 15:

+ return data

+

+ bytesPerSample = len(data) // (width * height)

+ bytesPerRow = width * bytesPerSample

+

+ datat = np.fromstring(data, np.uint8)

+ datat.shape = (height, bytesPerRow)

+

+ pred = np.zeros(len(data) + height, np.uint8)

+ pred.shape = (height, bytesPerRow + 1)

+

+ uLine = np.zeros(bytesPerRow, np.uint8)

+ uShift = np.zeros(bytesPerRow, dtype=np.uint8)

+ work = np.zeros(bytesPerRow, dtype=np.int16) # n.b. int16!

+ for row in xrange(height):

+

+ # PDF standard allows per-line predictors

+ pred[row, 0] = predictor - 10

+

+ cLine = datat[row]

+

+ # No predictor

+ if predictor == 10:

+ pred[row, 1:] = cLine

+

+ # Sub predictor

+ if predictor == 11:

+ work[bytesPerSample:] = cLine[:-bytesPerSample]

+ pred[row, 1:] = cLine - work

+

+ # Up predictor

+ elif predictor == 12:

+ pred[row, 1:] = cLine - uLine

+ uLine = cLine

+

+ # Average predictor

+ elif predictor == 13:

+ work[:bytesPerSample] = 0 # cluttered below...

+ work[bytesPerSample:] = cLine[:-bytesPerSample]

+ work += uLine

+ work /= 2

+

+ pred[row, 1:] = cLine - work

+ uLine = cLine

+

+ # Paeth predictor

+ elif predictor == 14:

+ work[:bytesPerSample] = 0 # cluttered below...

+ work[bytesPerSample:] = cLine[:-bytesPerSample]

+ uShift[bytesPerSample:] = uLine[:-bytesPerSample]

+

+ p = work + uLine - uShift #n.b. int16!

+ pa = abs(p - work)

+ pb = abs(p - uLine)

+ pc = abs(p - uShift)

+

+ paethSelect = lambda a,b,c,pa,pb,pc: a if pa <= pb and pa <= pc else b if pb <= pc else c

+

+ work = np.array([paethSelect(a,b,c,pa,pb,pc) for a,b,c,pa,pb,pc in

+ zip(work, uLine, uShift, pa, pb, pc)])

+

+ pred[row, 1:] = cLine - work

+ uLine = cLine

+

+ elif predictor == 15: # possibly TODO: also check Paeth prediction

+ # minimum sum of absolute differences heuristic by

+ # Lee Daniel Crocker

+ entr_id = sum(abs(cLine.astype(np.int8)))

+

+ work[:bytesPerSample] = 0 # cluttered below...

+ work[bytesPerSample:] = cLine[:-bytesPerSample]

+ predSub = cLine - work

+ entrSub = sum(abs(predSub.astype(np.int8)))

+

+ predUp = cLine - uLine

+ entrUp = sum(abs(predUp.astype(np.int8)))

+

+ work += uLine

+ work /= 2

+ predAvg = cLine - work

+ entrAvg = sum(abs(predAvg.astype(np.int8)))

+

+ if entr_id <= entrSub and entr_id <= entrUp and entr_id <= entrAvg:

+ pred[row, 0] = 0

+ pred[row, 1:] = cLine

+ elif entrSub <= entrUp and entrSub <= entrAvg:

+ pred[row, 0] = 1

+ pred[row, 1:] = predSub

+ elif entrUp <= entrAvg:

+ pred[row, 0] = 2

+ pred[row, 1:] = predUp

+ else:

+ pred[row, 0] = 3

+ pred[row, 1:] = predAvg

+

+ uLine = cLine

+ return pred.tostring()

+

def _flush(self):

"""Flush the compression object."""

@@ -1185,8 +1296,16 @@

{'Type': Name('XObject'), 'Subtype': Name('Image'),

'Width': width, 'Height': height,

'ColorSpace': Name('DeviceRGB'), 'BitsPerComponent': 8,

- 'SMask': smaskObject})

- self.currentstream.write(data) # TODO: predictors (i.e., output png)

+ 'SMask': smaskObject, 'DecodeParms': {'Predictor': predictor,

+ 'BitsPerComponent': 8, 'Columns': width, 'Colors': 3}})

+ # TODO: predictors (i.e., output png)

+ # predictor = rcParams.get('pdf.predictor', 0)

+ predictor = 15 # optimized predictor

+ if predictor:

+ predicted = Stream.pngPredict(data, height, width, predictor)

+ self.currentstream.write(predicted)

+ else:

+ self.currentstream.write(data)

self.endStream()

img.flipud_out()