def open_file(maindir): """ Creates the digital RF reading object. Args: maindir (:obj:'str'): The directory where the data is located. Returns: drfObj (obj:"DigitalRFReader"): Digital RF Reader object. chandict (obj:"dict"): Dictionary that holds info for the channels. start_indx (obj:'long'): Start index in samples. end_indx (obj:'long'): End index in samples. """ mainpath = os.path.expanduser(maindir) drfObj = drf.DigitalRFReader(mainpath) chans = drfObj.get_channels() chandict={} start_indx, end_indx=[0, sp.inf] # Get channel info for ichan in chans: curdict = {} curdict['sind'], curdict['eind'] = drfObj.get_bounds(ichan) # determine the read boundrys assuming the sampling is the same. start_indx = sp.maximum(curdict['sind'], start_indx) end_indx = sp.minimum(curdict['eind'], end_indx) dmetadict = drfObj.read_metadata(start_indx, end_indx, ichan) dmetakeys = dmetadict.keys() curdict['sps'] = dmetadict[dmetakeys[0]]['samples_per_second'] curdict['fo'] = dmetadict[dmetakeys[0]]['center_frequencies'].ravel()[0] chandict[ichan] = curdict return (drfObj, chandict, start_indx, end_indx)
def predict(tau,model,xT,yT): err = sp.zeros(tau.size) for j,t in enumerate(tau): yp = model.predict(xT,tau=t)[0] eq = sp.where(yp.ravel()==yT.ravel())[0] err[j] = eq.size*100.0/yT.size return err
def learn(self,x,y): ''' Function that learns the GMM with ridge regularizationb from training samples Input: x : the training samples y : the labels Output: the mean, covariance and proportion of each class, as well as the spectral decomposition of the covariance matrix ''' ## Get information from the data C = sp.unique(y).shape[0] #C = int(y.max(0)) # Number of classes n = x.shape[0] # Number of samples d = x.shape[1] # Number of variables eps = sp.finfo(sp.float64).eps ## Initialization self.ni = sp.empty((C,1)) # Vector of number of samples for each class self.prop = sp.empty((C,1)) # Vector of proportion self.mean = sp.empty((C,d)) # Vector of means self.cov = sp.empty((C,d,d)) # Matrix of covariance self.Q = sp.empty((C,d,d)) # Matrix of eigenvectors self.L = sp.empty((C,d)) # Vector of eigenvalues self.classnum = sp.empty(C).astype('uint8') ## Learn the parameter of the model for each class for c,cR in enumerate(sp.unique(y)): j = sp.where(y==(cR))[0] self.classnum[c] = cR # Save the right label self.ni[c] = float(j.size) self.prop[c] = self.ni[c]/n self.mean[c,:] = sp.mean(x[j,:],axis=0) self.cov[c,:,:] = sp.cov(x[j,:],bias=1,rowvar=0) # Normalize by ni to be consistent with the update formulae # Spectral decomposition L,Q = linalg.eigh(self.cov[c,:,:]) idx = L.argsort()[::-1] self.L[c,:] = L[idx] self.Q[c,:,:]=Q[:,idx]
def BIC(self,x,y,tau=None): ''' Computes the Bayesian Information Criterion of the model ''' ## Get information from the data C,d = self.mean.shape n = x.shape[0] ## Initialization if tau is None: TAU=self.tau else: TAU=tau ## Penalization P = C*(d*(d+3)/2) + (C-1) P *= sp.log(n) ## Compute the log-likelihood L = 0 for c in range(C): j = sp.where(y==(c+1))[0] xi = x[j,:] invCov,logdet = self.compute_inverse_logdet(c,TAU) cst = logdet - 2*sp.log(self.prop[c]) # Pre compute the constant xi -= self.mean[c,:] temp = sp.dot(invCov,xi.T).T K = sp.sum(xi*temp,axis=1)+cst L +=sp.sum(K) del K,xi return L + P
def nms(dets,proba, T): dets = dets.astype("float") if len(dets) == 0: return [] x1 = dets[:, 0] y1 = dets[:, 1] x2 = dets[:, 2] y2 = dets[:, 3] scores = proba areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] keep = [] while order.size > 0: i = order[0] keep.append(i) xx1 = sp.maximum(x1[i], x1[order[1:]]) yy1 = sp.maximum(y1[i], y1[order[1:]]) xx2 = sp.minimum(x2[i], x2[order[1:]]) yy2 = sp.minimum(y2[i], y2[order[1:]]) w = sp.maximum(0.0, xx2 - xx1 + 1) h = sp.maximum(0.0, yy2 - yy1 + 1) inter = w * h ovr = inter / (areas[i] + areas[order[1:]] - inter) inds = sp.where(ovr <= T)[0] order = order[inds + 1] return keep
def outlier_removed_fit(m, w = None, n_iter=10, polyord=7): """ Remove outliers using fited data. Args: m (:obj:`numpy array`): Phase curve. n_iter (:obj:'int'): Number of iteration outlier removal polyorder (:obj:'int'): Order of polynomial used. Returns: fit (:obj:'numpy array'): Curve with outliers removed """ if w is None: w = sp.ones_like(m) W = sp.diag(sp.sqrt(w)) m2 = sp.copy(m) tv = sp.linspace(-1, 1, num=len(m)) A = sp.zeros([len(m), polyord]) for j in range(polyord): A[:, j] = tv**(float(j)) A2 = sp.dot(W,A) m2w = sp.dot(m2,W) fit = None for i in range(n_iter): xhat = sp.linalg.lstsq(A2, m2w)[0] fit = sp.dot(A, xhat) # use gradient for central finite differences which keeps order resid = sp.gradient(fit - m2) std = sp.std(resid) bidx = sp.where(sp.absolute(resid) > 2.0*std)[0] for bi in bidx: A2[bi,:]=0.0 m2[bi]=0.0 m2w[bi]=0.0 if debug_plot: plt.plot(m2,label="outlier removed") plt.plot(m,label="original") plt.plot(fit,label="fit") plt.legend() plt.ylim([sp.minimum(fit)-std*3.0,sp.maximum(fit)+std*3.0]) plt.show() return(fit)
def split_data_class(self,y,v=5): ''' The function split the data into v folds. The samples of each class are split approximatly in v folds Input: n : the number of samples v : the number of folds Output: None ''' # Get parameters n = y.size C = y.max().astype('int') # Get the step for each class tc = [] for j in range(v): tempit = [] tempiT = [] for i in range(C): # Get all samples for each class t = sp.where(y==(i+1))[0] nc = t.size stepc = nc // v # Step size for each class if stepc == 0: print "Not enough sample to build "+ str(v) +" folds in class " + str(i) sp.random.seed(i) # Set the random generator to the same initial state tc = t[sp.random.permutation(nc)] # Random sampling of indices of samples for class i # Set testing and training samples if j < (v-1): start,end = j*stepc,(j+1)*stepc else: start,end = j*stepc,nc tempiT.extend(sp.asarray(tc[start:end])) #Testing k = range(v) k.remove(j) for l in k: if l < (v-1): start,end = l*stepc,(l+1)*stepc else: start,end = l*stepc,nc tempit.extend(sp.asarray(tc[start:end])) #Training self.it.append(tempit) self.iT.append(tempiT)
def get_fddb_face_data(k = 12, on_drive = False): root = 'F:\\datasets\\image_data_sets\\faces\\FDDB\\' iroot = os.path.join(root,'originalPics') eroot = os.path.join(root,'FDDB-folds') pattern = '-ellipseList.txt' c = 0 X,y = [],[] for path, subdirs, files in os.walk(eroot): for fname in files: if fname.find(pattern) > 0: fpath = os.path.join(path,fname) print(fpath) with open(fpath) as f: lines = sp.array(f.readlines()) paths_indx = sp.where([line.find('/') > 0 for line in lines])[0] counts_indx = paths_indx + 1 paths = sp.array([e.strip() for e in lines[paths_indx]]) ellipces = [] for i in counts_indx: cnt = int(lines[i]) ellipces.append(lines[i+1:i+cnt+1]) ellipces = [ [ [float(num) for num in line.split()[:-1]] for line in e] for e in ellipces] ellipces = sp.array(ellipces) for iname,ells in zip(paths[:],ellipces[:]): ppath = os.path.join(iroot,iname.replace('/','\\')) + '.jpg' file_id = iname.split('/')[-1] frame = fr.get_frame(ppath) for item in ells: ra,rb,theta,x,y = item x1,y1,x2,y2 = util.ellipse2bbox(a = ra, b = rb, angle = theta, cx = x, cy = y) x = x1 y = y1 h = abs(y2-y1) w = abs(x2-x1) print(file_id,(y,x,h,w)) non_neg = x > 0 and y > 0 if not non_neg: continue if on_drive: for item in Datasets.data_augmentation(frame,y,x,w,h): fr.write_frame('F:\\train_data\\pos\\' + str(c) + '_' + str(file_id) + '_pos',item) c +=1 else: pass X = sp.array(X) y = sp.ones(len(X)) return X,y