saving data to a file

belinda_thom · January 3, 2007, 3:57am

Hi,

Is there a way for me to keep adding the next row of a 2d array to a file via load? (matlab's save had a very useful -append option).

I managed to get 2d laod/save working, e.g.

   import numpy as N
   import pylab as P
   import bthom.utils as U
   # a numpy 2d array
   a = N.arange(12,dtype=N.float).reshape((3,4))
   a[0][0] = N.pi
   # checking out the matploblib save/load stuff
   P.save("data.csv", a, fmt="%.4f", delimiter=";")
   aa = P.load("data.csv", delimiter= ";")
   x,y,z,w = P.load("data.csv", delimiter=";", unpack=True)

The above took me longer than it perhaps should have b/c of advice I'd gotten elsewhere recommending trying to keep numpy and pylab separate when possible (to take advantage of all of numpy's features; it seems numpy doesn't even have the all-to-handy load/save functionality).

When I try similar tricks to write one row at a time, I'm hosed in that the shape is gone:

   # checking out a way to keep appending
   fname = "data1.csv"
   U.clobber_file(fname) #this thing just ensures 0 bytes in file
   f = open(fname,"a")
   nrows,ncols = a.shape
   for i in range(nrows) :
       P.save(f, a[i,:], fmt="%d", delimiter=";")
   f.close()
   aaa = P.load("data1.csv", delimiter= ";")

in particular:

   % cat data1.csv
   3
   1
   2
   4
   <snip>
   11

Thanks in advance,
--b

Steve_Schmerler · January 3, 2007, 10:01am

belinda thom wrote:

Hi,

Is there a way for me to keep adding the next row of a 2d array to a file via load? (matlab's save had a very useful -append option).

I managed to get 2d laod/save working, e.g.

   import numpy as N
   import pylab as P
   import bthom.utils as U
   # a numpy 2d array
   a = N.arange(12,dtype=N.float).reshape((3,4))
   a[0][0] = N.pi
   # checking out the matploblib save/load stuff
   P.save("data.csv", a, fmt="%.4f", delimiter=";")
   aa = P.load("data.csv", delimiter= ";")
   x,y,z,w = P.load("data.csv", delimiter=";", unpack=True)

The above took me longer than it perhaps should have b/c of advice I'd gotten elsewhere recommending trying to keep numpy and pylab separate when possible (to take advantage of all of numpy's features; it seems numpy doesn't even have the all-to-handy load/save functionality).

When I try similar tricks to write one row at a time, I'm hosed in that the shape is gone:

   # checking out a way to keep appending
   fname = "data1.csv"
   U.clobber_file(fname) #this thing just ensures 0 bytes in file
   f = open(fname,"a")
   nrows,ncols = a.shape
   for i in range(nrows) :
       P.save(f, a[i,:], fmt="%d", delimiter=";")
   f.close()
   aaa = P.load("data1.csv", delimiter= ";")

in particular:

   % cat data1.csv
   3
   1
   2
   4
   <snip>
   11

Thanks in advance,
--b

This is because pylab.save() writes every 1D-array (like a[i,:]) as "column vector". In the definition of the save() function:

[...]
     if len(X.shape)==1:
         origShape = X.shape
         X.shape = len(X), 1
[...]

This reshapes the 1D-array (len(a[i,:].shape) == 1) to a 2D-array of shape Nx1 and a loop over the first axis writes the rows (in this case one element per row) to file.

There are several ways to do what you want:

···

#-----------------------------------------------------------------------#

# generate data
a = N.arange(12,dtype=N.float).reshape((3,4))
a[0][0] = N.pi
P.save("data.csv", a, fmt="%.4f", delimiter=";")

# (A)
# rather hackish way, define your own save(), not
# really useful, just to show that it works
def save2(fname, X, fmt='%.18e',delimiter=' '):

     if is_string_like(fname):
         if fname.endswith('.gz'):
             import gzip
             fh = gzip.open(fname,'wb')
         else:
             fh = file(fname,'w')
     elif hasattr(fname, 'seek'):
         fh = fname
     else:
         raise ValueError('fname must be a string or file handle')

     X = N.asarray(X)
     origShape = None
     if len(X.shape)==1:
         origShape = X.shape
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
## X.shape = len(X), 1
         X.shape = 1, len(X)
# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
     for row in X:
         fh.write(delimiter.join([fmt%val for val in row]) + '\n')

if origShape is not None:
X.shape = origShape

fname = "data1.csv"
f = open(fname,"a")
nrows,ncols = a.shape
for i in range(nrows) :
save2(f, a[i,:], fmt="%f", delimiter=";")
f.close()
aaa = P.load("data1.csv", delimiter= ";")
print aaa

print "-----------------------"

# (B)
# do it without a save() function
fname = "data2.csv"
f = open(fname,"a")
nrows,ncols = a.shape
delim = ';'
fmt = '%f'
for i in range(nrows):
# just like in pylab.save()
f.write(delim.join([fmt %val for val in a[i,:]]) + '\n')
f.close()
aaa = P.load("data2.csv", delimiter= ";")
print aaa

print "-----------------------"

# (C)
# probably the best: save a 1xn "row vector" per line
fname = "data3.csv"
f = open(fname,"a")
nrows,ncols = a.shape
for i in range(nrows) :
P.save(f, a[i,:].reshape((1,ncols)), fmt="%f", delimiter=";")
f.close()
aaa = P.load("data3.csv", delimiter= ";")
print aaa

HTH

--
cheers,
steve

Random number generation is the art of producing pure gibberish as quickly as possible.