The Reduction tree class description is parameterized so that it will work for any tree which is unary to (N-1)-ary , and also so that the fan-in and the fan-out trees are of different ranks (i.e., one can have a binary fan-in tree and a quaternary fan-out tree.). Past research has shown that the ranks of the fan-in and fan-out tree have a significant effect on parallel synchronization performance, and should be determined based on the architecture, number of processors, and memory hierarchy of the parallel system [13].
template <class SizeType>struct FanInNode
{
typedef SizeType fanin_size_type;
enum { fanin_size = sizeof(fanin_size_type) };
bool ith_fanin_child_exists(const int i);
};
template <class SizeType>
struct FanOutNode
{
typedef SizeType fanout_size_type;
enum { fanout_size = sizeof(fanout_size_type) };
bool ith_fanout_child_exists(const int i);
};
struct Reduction
{
struct MyNode : public FanInNode<short>,
public FanOutNode<int>
{
};
typedef MyNode node_type;
Reduction(const int size); // constructor
virtual Reduction(); // destructor
// number of threads participating
int Size() const;
// get the node corresp. to the ith thread
node_type& get_node(const int i);
// get this thread's node
node_type& get_my_node();
// index of the fanin parent of the ith thread
static int fanin_parent(const int i);
// index of the fanout parent of the ith thread
static int fanout_parent(const int i);
// index of the kth fanin child of the ith thread
static int fanout_child(const int i, const int k);
// index of the kth fanout child of the ith thread
static int fanin_child(const int i, const int k);
};