belinda thom wrote:
Hi,
Is there a way for me to keep adding the next row of a 2d array to a file via load? (matlab's save had a very useful -append option).
I managed to get 2d laod/save working, e.g.
import numpy as N
import pylab as P
import bthom.utils as U
# a numpy 2d array
a = N.arange(12,dtype=N.float).reshape((3,4))
a[0][0] = N.pi
# checking out the matploblib save/load stuff
P.save("data.csv", a, fmt="%.4f", delimiter=";")
aa = P.load("data.csv", delimiter= ";")
x,y,z,w = P.load("data.csv", delimiter=";", unpack=True)
The above took me longer than it perhaps should have b/c of advice I'd gotten elsewhere recommending trying to keep numpy and pylab separate when possible (to take advantage of all of numpy's features; it seems numpy doesn't even have the all-to-handy load/save functionality).
When I try similar tricks to write one row at a time, I'm hosed in that the shape is gone:
# checking out a way to keep appending
fname = "data1.csv"
U.clobber_file(fname) #this thing just ensures 0 bytes in file
f = open(fname,"a")
nrows,ncols = a.shape
for i in range(nrows) :
P.save(f, a[i,:], fmt="%d", delimiter=";")
f.close()
aaa = P.load("data1.csv", delimiter= ";")
in particular:
% cat data1.csv
3
1
2
4
<snip>
11
Thanks in advance,
--b
This is because pylab.save() writes every 1D-array (like a[i,:]) as "column vector". In the definition of the save() function:
[...]
if len(X.shape)==1:
origShape = X.shape
X.shape = len(X), 1
[...]
This reshapes the 1D-array (len(a[i,:].shape) == 1) to a 2D-array of shape Nx1 and a loop over the first axis writes the rows (in this case one element per row) to file.
There are several ways to do what you want:
···
#-----------------------------------------------------------------------#
# generate data
a = N.arange(12,dtype=N.float).reshape((3,4))
a[0][0] = N.pi
P.save("data.csv", a, fmt="%.4f", delimiter=";")
# (A)
# rather hackish way, define your own save(), not
# really useful, just to show that it works
def save2(fname, X, fmt='%.18e',delimiter=' '):
if is_string_like(fname):
if fname.endswith('.gz'):
import gzip
fh = gzip.open(fname,'wb')
else:
fh = file(fname,'w')
elif hasattr(fname, 'seek'):
fh = fname
else:
raise ValueError('fname must be a string or file handle')
X = N.asarray(X)
origShape = None
if len(X.shape)==1:
origShape = X.shape
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
## X.shape = len(X), 1
X.shape = 1, len(X)
# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
for row in X:
fh.write(delimiter.join([fmt%val for val in row]) + '\n')
if origShape is not None:
X.shape = origShape
fname = "data1.csv"
f = open(fname,"a")
nrows,ncols = a.shape
for i in range(nrows) :
save2(f, a[i,:], fmt="%f", delimiter=";")
f.close()
aaa = P.load("data1.csv", delimiter= ";")
print aaa
print "-----------------------"
# (B)
# do it without a save() function
fname = "data2.csv"
f = open(fname,"a")
nrows,ncols = a.shape
delim = ';'
fmt = '%f'
for i in range(nrows):
# just like in pylab.save()
f.write(delim.join([fmt %val for val in a[i,:]]) + '\n')
f.close()
aaa = P.load("data2.csv", delimiter= ";")
print aaa
print "-----------------------"
# (C)
# probably the best: save a 1xn "row vector" per line
fname = "data3.csv"
f = open(fname,"a")
nrows,ncols = a.shape
for i in range(nrows) :
P.save(f, a[i,:].reshape((1,ncols)), fmt="%f", delimiter=";")
f.close()
aaa = P.load("data3.csv", delimiter= ";")
print aaa
HTH
--
cheers,
steve
Random number generation is the art of producing pure gibberish as quickly as possible.