HPC Lab - Software - BiBench

Source code for bibench.util

####################################################################
###     ____  _ ____                  _                          ###
###    | __ )(_) __ )  ___ _ __   ___| |__                       ###
###    |  _ \| |  _ \ / _ \ '_ \ / __| '_ \                      ###
###    | |_) | | |_) |  __/ | | | (__| | | |                     ###
###    |____/|_|____/ \___|_| |_|\___|_| |_|                     ###
###                                                              ###
###--------------------------------------------------------------###
###                                                              ###
### This file is part of the BiBench package for biclustering    ###
### analysis.                                                    ###
###                                                              ###
### Copyright (c) 2011 by:                                       ###
###   * Kemal Eren,                                              ###
###   * Mehmet Deveci,                                           ###
###   * Umit V. Catalyurek                                       ###
###                                                              ###
###--------------------------------------------------------------###
###                                                              ###
### For license info, please see the README and LICENSE files    ###
### in the main directory.                                       ###
###                                                              ###
###--------------------------------------------------------------###

import os
import itertools
import bibench
import numpy as np
import zlib
import cPickle


isiterable = lambda obj: isinstance(obj, basestring) or getattr(obj, '__iter__', False)

[docs]def flatten(nested): """ Flatten a list of lists into a single list. >>> flatten([[1, 2, 3], [4, 5, 6]]) [1, 2, 3, 4, 5, 6] """ return [item for sublist in nested for item in sublist]
[docs]def grouper(iterable, n, fillvalue=None): """ Iterate over a list in chunks. From 'http://stackoverflow.com/questions/434287/what-is-the-most-pythonic-way-to-iterate-over-a-list-in-chunks' >>> list(grouper([1, 2, 3, 4], 3, 'x')) [(1, 2, 3), (4, 'x', 'x')] """ args = [iter(iterable)] * n return itertools.izip_longest(*args, fillvalue=fillvalue)
[docs]def which(program): """ Check for an executable on the PATH; return its absolute path. Taken from http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python """ def is_exe(fpath): return os.path.exists(fpath) and os.access(fpath, os.X_OK) fpath, fname = os.path.split(program) if fpath: if is_exe(program): return program else: for path in os.environ["PATH"].split(os.pathsep): exe_file = os.path.join(path, program) if is_exe(exe_file): return exe_file return None
[docs]def dict_combinations(d): """ Takes a dictionary containing lists. Generates all combinations of values from those lists. Useful for ranges of parameters for functions. >>> [i for i in dict_combinations(dict(first=[1,2]))] [{'first': 1}, {'first': 2}] """ keys, values = zip(*d.items()) for x in itertools.product(*values): yield dict(zip(keys, x))
[docs]def make_index_map(mylist): """Map each item in the list to its list index.""" d = dict() for i, item in enumerate(mylist): d[item] = i return d
[docs]def bootstrap(data, size): """ Bootstrap a new dataset, of any size, from the given dataset, with replacement. Args: * data: numpy.ndarray * size: int or sequence of ints Returns: A numpy.ndarray """ choices = np.random.random_integers(low=0, high=data.size-1, size=size) return np.array(data.flatten())[choices]
[docs]def shuffle(data): """Shuffle an array along all axes. Returns the shuffled array.""" fdata = data.flatten() np.random.shuffle(fdata) fdata.shape = data.shape return fdata
[docs]def get_hidden_dir(subdir=None): """ Get the BiBench cache directory, and create it if necessary. Args: * subdir: a subdir to create if it does not exist. """ destdir = os.path.join(os.getenv("HOME"), '.bibench') if not os.path.exists(destdir): os.mkdir(destdir) if not subdir is None: destdir = os.path.join(destdir, subdir) if not os.path.exists(destdir): os.mkdir(destdir) return destdir
[docs]def zdumps(obj): """dump an object, compressing as much as possible""" return zlib.compress(cPickle.dumps(obj,cPickle.HIGHEST_PROTOCOL),9)
[docs]def zloads(zstr): """load a compressed string dumped by _zdumps_""" return cPickle.loads(zlib.decompress(zstr))