Module `smearn.symbolic`

The module smearn.symbolic contains the basics of symbolic manipulation on which smearn are based. It contains the basics of computation graphs for forward and backward propagation for computing values and gradients.

Expand source code

'''
The module `smearn.symbolic` contains the basics of symbolic manipulation on which `smearn` are based.
It contains the basics of computation graphs for forward and backward propagation for computing values and gradients.
'''

import numpy as np

#
# Exceptions
#

class ShapeError(Exception):
    '''
    An exception type to be passed when there is an error regarding the shapes of tensors -- for example, if trying to add two tensors of different (non-broadcastable) shapes.
    '''
    pass


#
# Symbolic manipulation
#

class Symbol:
    '''
    A `smearn.symbolic.Symbol` represents a node in a computation graph.
    '''

    def __init__(self, shape=None, parents=[], value=None, constant=False, regularization=None, value_initializers=None):
        '''
        `shape` is the shape of the tensor that the node will output

        `value` will hold its value of the tensor (after being computed if the symbol it represents is not a constant). Note that the shape of `value` may (and usually will) be different from `shape`, this is because values may be computed for batches of more than one element at a time.

        `constant` determines whether the symbol represents a constant (which will not be optimized for during training).

        `parents` is a list of other symbols that will be used to compute the value of the current symbol.

        Any class inheriting from this one and representing a specific operation should overload the `_op_f` and `_op_b` methods.
        `_op_f` must return the value of the symbol (computed using the values of its parents).
        `_op_b` must take as arguments an integer i and the gradient tensor of some other symbol with respect to the current one and return the gradient of that symbol with respect to the i-th parent of the current one.
        '''
        self.shape = shape
        self.value = value
        self.constant = constant
        self.parents = parents
        for parent in parents:
            parent.children.append(self)
        self.children = []
        self.gradient = None
        self.regularization = regularization
        self.value_initializers = value_initializers

    def set_value(self, value):
        if not self.constant:
            raise Exception("Setting the value of a constant symbol is not allowed.")

        s1, t1 = remove_trailing_ones(self.shape)
        s2, t2 = remove_trailing_ones(value.shape)

        if len(s1) > 0 and s1 != s2[-len(s1):]:
            raise ShapeError("Shape {} is not broadcastable to shape {}".format(self.shape, value.shape))

        self.value = value.reshape(value.shape + (t1 - t2) * (1,)) if t1 >= t2 else value.reshape(value.shape[:t1-t2])

        

    def compute_value(self):
        if self.value is not None:
            return

        for parent in self.parents:
            parent.compute_value()

        self.value = self._op_f()

    def compute_gradients(self, batch_shape):
        if self.gradient is not None:
            return

        self.gradient = np.zeros(batch_shape + self.shape)

        for child in self.children:
            child.compute_gradients(batch_shape)
            v = child._op_b(child.parents.index(self), child.gradient)
            self.gradient += v

    def propagate_gradients(self, batch_shape=None):
        if batch_shape is None:
            self.gradient = np.ones(self.value.shape)
            batch_shape = self.value.shape[:-len(self.shape)]
        else:
            self.compute_gradients(batch_shape)

        for parent in self.parents:
            parent.propagate_gradients(batch_shape)

    def propagate_learning(self, optimizer):
        if not self.constant and len(self.parents) == 0:
            v = np.mean(self.gradient, axis=tuple([*range(len(self.gradient.shape)-2)]))
            optimizer.apply_gradient(self)

        for parent in self.parents:
            parent.propagate_learning(optimizer)

    def initialize_optimizer_for_parents(self, optimizer):
        if not self.constant and len(self.parents) == 0:
            optimizer.initialize_symbol(self)

        for parent in self.parents:
            parent.initialize_optimizer_for_parents(optimizer)

    def reset_values_and_gradients(self, train=True):
        if not len(self.parents) == 0:
            self.value = None
        elif self.value_initializers is not None:
            self.value = self.value_initializers[0 if train else 1](self.shape)
        
        self.gradient = None

        for parent in self.parents:
            parent.reset_values_and_gradients()

    def _op_f(self):
        return self.value

    def _op_b(self, idx, childs_gradient):
        return childs_gradient


#
# Helper functions
#

def ensure_tuple(d):
    '''
    This function returns the input if it is a tuple, and a tuple containing the input if the input is an integer.
    '''
    if type(d) is tuple:
        return d

    if type(d) is not int:
        raise Exception("Expected a tuple or an integer")

    return (d,)

def remove_trailing_ones(shape):
    '''
    Removes the trailing ones from a tuple.
    '''
    i = 0
    while len(shape) > 0 and shape[-1] == 1:
        shape = shape[:-1]
        i += 1
    return shape, i

def np_parallel_transpose(A):
    '''
    Given a stack of matrices, this function returns the stack of matrix transposes.
    '''
    return np.einsum("...ij->...ji", A)

Functions

def ensure_tuple(d)

This function returns the input if it is a tuple, and a tuple containing the input if the input is an integer.

Expand source code

def ensure_tuple(d):
    '''
    This function returns the input if it is a tuple, and a tuple containing the input if the input is an integer.
    '''
    if type(d) is tuple:
        return d

    if type(d) is not int:
        raise Exception("Expected a tuple or an integer")

    return (d,)

def np_parallel_transpose(A)

Given a stack of matrices, this function returns the stack of matrix transposes.

Expand source code

def np_parallel_transpose(A):
    '''
    Given a stack of matrices, this function returns the stack of matrix transposes.
    '''
    return np.einsum("...ij->...ji", A)

def remove_trailing_ones(shape)

Removes the trailing ones from a tuple.

Expand source code

def remove_trailing_ones(shape):
    '''
    Removes the trailing ones from a tuple.
    '''
    i = 0
    while len(shape) > 0 and shape[-1] == 1:
        shape = shape[:-1]
        i += 1
    return shape, i

Classes

class ShapeError (*args, **kwargs)

An exception type to be passed when there is an error regarding the shapes of tensors – for example, if trying to add two tensors of different (non-broadcastable) shapes.

Expand source code

class ShapeError(Exception):
    '''
    An exception type to be passed when there is an error regarding the shapes of tensors -- for example, if trying to add two tensors of different (non-broadcastable) shapes.
    '''
    pass

Ancestors

builtins.Exception
builtins.BaseException

class Symbol (shape=None, parents=[], value=None, constant=False, regularization=None, value_initializers=None)

A Symbol represents a node in a computation graph.

shape is the shape of the tensor that the node will output

value will hold its value of the tensor (after being computed if the symbol it represents is not a constant). Note that the shape of value may (and usually will) be different from shape, this is because values may be computed for batches of more than one element at a time.

constant determines whether the symbol represents a constant (which will not be optimized for during training).

parents is a list of other symbols that will be used to compute the value of the current symbol.

Any class inheriting from this one and representing a specific operation should overload the _op_f and _op_b methods. _op_f must return the value of the symbol (computed using the values of its parents). _op_b must take as arguments an integer i and the gradient tensor of some other symbol with respect to the current one and return the gradient of that symbol with respect to the i-th parent of the current one.

Expand source code

class Symbol:
    '''
    A `smearn.symbolic.Symbol` represents a node in a computation graph.
    '''

    def __init__(self, shape=None, parents=[], value=None, constant=False, regularization=None, value_initializers=None):
        '''
        `shape` is the shape of the tensor that the node will output

        `value` will hold its value of the tensor (after being computed if the symbol it represents is not a constant). Note that the shape of `value` may (and usually will) be different from `shape`, this is because values may be computed for batches of more than one element at a time.

        `constant` determines whether the symbol represents a constant (which will not be optimized for during training).

        `parents` is a list of other symbols that will be used to compute the value of the current symbol.

        Any class inheriting from this one and representing a specific operation should overload the `_op_f` and `_op_b` methods.
        `_op_f` must return the value of the symbol (computed using the values of its parents).
        `_op_b` must take as arguments an integer i and the gradient tensor of some other symbol with respect to the current one and return the gradient of that symbol with respect to the i-th parent of the current one.
        '''
        self.shape = shape
        self.value = value
        self.constant = constant
        self.parents = parents
        for parent in parents:
            parent.children.append(self)
        self.children = []
        self.gradient = None
        self.regularization = regularization
        self.value_initializers = value_initializers

    def set_value(self, value):
        if not self.constant:
            raise Exception("Setting the value of a constant symbol is not allowed.")

        s1, t1 = remove_trailing_ones(self.shape)
        s2, t2 = remove_trailing_ones(value.shape)

        if len(s1) > 0 and s1 != s2[-len(s1):]:
            raise ShapeError("Shape {} is not broadcastable to shape {}".format(self.shape, value.shape))

        self.value = value.reshape(value.shape + (t1 - t2) * (1,)) if t1 >= t2 else value.reshape(value.shape[:t1-t2])

        

    def compute_value(self):
        if self.value is not None:
            return

        for parent in self.parents:
            parent.compute_value()

        self.value = self._op_f()

    def compute_gradients(self, batch_shape):
        if self.gradient is not None:
            return

        self.gradient = np.zeros(batch_shape + self.shape)

        for child in self.children:
            child.compute_gradients(batch_shape)
            v = child._op_b(child.parents.index(self), child.gradient)
            self.gradient += v

    def propagate_gradients(self, batch_shape=None):
        if batch_shape is None:
            self.gradient = np.ones(self.value.shape)
            batch_shape = self.value.shape[:-len(self.shape)]
        else:
            self.compute_gradients(batch_shape)

        for parent in self.parents:
            parent.propagate_gradients(batch_shape)

    def propagate_learning(self, optimizer):
        if not self.constant and len(self.parents) == 0:
            v = np.mean(self.gradient, axis=tuple([*range(len(self.gradient.shape)-2)]))
            optimizer.apply_gradient(self)

        for parent in self.parents:
            parent.propagate_learning(optimizer)

    def initialize_optimizer_for_parents(self, optimizer):
        if not self.constant and len(self.parents) == 0:
            optimizer.initialize_symbol(self)

        for parent in self.parents:
            parent.initialize_optimizer_for_parents(optimizer)

    def reset_values_and_gradients(self, train=True):
        if not len(self.parents) == 0:
            self.value = None
        elif self.value_initializers is not None:
            self.value = self.value_initializers[0 if train else 1](self.shape)
        
        self.gradient = None

        for parent in self.parents:
            parent.reset_values_and_gradients()

    def _op_f(self):
        return self.value

    def _op_b(self, idx, childs_gradient):
        return childs_gradient

Subclasses

Methods

def compute_gradients(self, batch_shape)

Expand source code

def compute_gradients(self, batch_shape):
    if self.gradient is not None:
        return

    self.gradient = np.zeros(batch_shape + self.shape)

    for child in self.children:
        child.compute_gradients(batch_shape)
        v = child._op_b(child.parents.index(self), child.gradient)
        self.gradient += v

def compute_value(self)

Expand source code

def compute_value(self):
    if self.value is not None:
        return

    for parent in self.parents:
        parent.compute_value()

    self.value = self._op_f()

def initialize_optimizer_for_parents(self, optimizer)

Expand source code

def initialize_optimizer_for_parents(self, optimizer):
    if not self.constant and len(self.parents) == 0:
        optimizer.initialize_symbol(self)

    for parent in self.parents:
        parent.initialize_optimizer_for_parents(optimizer)

def propagate_gradients(self, batch_shape=None)

Expand source code

def propagate_gradients(self, batch_shape=None):
    if batch_shape is None:
        self.gradient = np.ones(self.value.shape)
        batch_shape = self.value.shape[:-len(self.shape)]
    else:
        self.compute_gradients(batch_shape)

    for parent in self.parents:
        parent.propagate_gradients(batch_shape)

def propagate_learning(self, optimizer)

Expand source code

def propagate_learning(self, optimizer):
    if not self.constant and len(self.parents) == 0:
        v = np.mean(self.gradient, axis=tuple([*range(len(self.gradient.shape)-2)]))
        optimizer.apply_gradient(self)

    for parent in self.parents:
        parent.propagate_learning(optimizer)

def reset_values_and_gradients(self, train=True)

Expand source code

def reset_values_and_gradients(self, train=True):
    if not len(self.parents) == 0:
        self.value = None
    elif self.value_initializers is not None:
        self.value = self.value_initializers[0 if train else 1](self.shape)
    
    self.gradient = None

    for parent in self.parents:
        parent.reset_values_and_gradients()

def set_value(self, value)

Expand source code

def set_value(self, value):
    if not self.constant:
        raise Exception("Setting the value of a constant symbol is not allowed.")

    s1, t1 = remove_trailing_ones(self.shape)
    s2, t2 = remove_trailing_ones(value.shape)

    if len(s1) > 0 and s1 != s2[-len(s1):]:
        raise ShapeError("Shape {} is not broadcastable to shape {}".format(self.shape, value.shape))

    self.value = value.reshape(value.shape + (t1 - t2) * (1,)) if t1 >= t2 else value.reshape(value.shape[:t1-t2])