NiHu  2.0
p2p_precompute.hpp
Go to the documentation of this file.
1 
7 #ifndef NIHU_P2P_PRECOMPUTE_HPP_INCLUDED
8 #define NIHU_P2P_PRECOMPUTE_HPP_INCLUDED
9 
10 #include "cluster_tree.hpp"
11 #include "fmm_operator.hpp"
12 #include "lists.hpp"
13 #include "local_operator.hpp"
14 #include "util/eigen_utils.hpp"
15 #include "util/matrix_traits.hpp"
16 
17 #include <Eigen/SparseCore>
18 
19 #ifdef NIHU_FMM_PARALLEL
20 #include <omp.h>
21 #endif
22 
23 #include <chrono>
24 #include <memory>
25 #include <type_traits>
26 #include <utility>
27 #include <vector>
28 
29 namespace NiHu
30 {
31 namespace fmm
32 {
33 
34 namespace internal
35 {
36 
43 template <class Operator, class ClusterDerived, bool isLocal>
44 class sparse_index_computer;
45 
47 template <class Operator, class ClusterDerived>
48 class sparse_index_computer<Operator, ClusterDerived, false>
49 {
50  typedef typename std::decay<Operator>::type operator_t;
51  typedef unsigned int index_t;
52  typedef std::vector<std::pair<index_t, index_t> > indices_t;
53  typedef ClusterDerived cluster_t;
54  typedef cluster_tree<cluster_t> tree_t;
55 
56 public:
57 
58  static indices_t eval(tree_t const &tree,
59  interaction_lists::list_t const &p2p_list)
60  {
61  indices_t indices;
62 
63  // determine number of source- receiver pairs
64  size_t s = 0;
65  for (unsigned to = 0; to < p2p_list.size(); ++to)
66  for (auto from : p2p_list[to])
67  s += tree[to].get_rec_node_idx().size() *
68  tree[from].get_src_node_idx().size();
69 
70  indices.reserve(s);
71 
72  for (unsigned to = 0; to < p2p_list.size(); ++to)
73  for (auto from : p2p_list[to])
74  for (auto i : tree[to].get_rec_node_idx()) // loop over receiver nodes
75  for (auto j : tree[from].get_src_node_idx()) // loop over source nodes
76  indices.push_back(std::make_pair(i, j));
77 
78  return indices;
79  }
80 };
81 
83 template <class Operator, class ClusterDerived>
84 class sparse_index_computer<Operator, ClusterDerived, true>
85 {
86  typedef typename std::decay<Operator>::type operator_t;
87  typedef std::vector<std::pair<unsigned int, unsigned int> > indices_t;
88  typedef ClusterDerived cluster_t;
89  typedef cluster_tree<cluster_t> tree_t;
90 
91 public:
92  static indices_t eval(tree_t const &tree,
93  interaction_lists::list_t const &p2p_list)
94  {
95  indices_t indices;
96 
97  // determine number of src receiver pairs
98  size_t s = 0;
99  for (unsigned to = 0; to < p2p_list.size(); ++to)
100  if (std::find(p2p_list[to].begin(), p2p_list[to].end(), to) != p2p_list[to].end())
101  s += tree[to].get_rec_node_idx().size();
102 
103  indices.reserve(s);
104 
105  for (unsigned to = 0; to < p2p_list.size(); ++to)
106  if (std::find(p2p_list[to].begin(), p2p_list[to].end(), to) != p2p_list[to].end())
107  for (auto i : tree[to].get_rec_node_idx())
108  indices.push_back(std::make_pair(i, i));
109 
110  return indices;
111  }
112 };
113 
120 template <class Operator, class ClusterDerived, bool isResultEigen = is_eigen<
121  typename std::decay<Operator>::type::result_t>::value>
122 class sparse_computer;
123 
124 
126 template <class Operator, class ClusterDerived>
127 class sparse_computer<Operator, ClusterDerived, true>
128 {
129  typedef typename std::decay<Operator>::type operator_t;
130  typedef typename operator_t::result_t result_t;
131  typedef typename scalar<result_t>::type scalar_t;
132  static size_t const rows = num_rows<result_t>::value;
133  static size_t const cols = num_cols<result_t>::value;
134  typedef std::ptrdiff_t storage_index_t;
135  typedef Eigen::SparseMatrix<scalar_t, Eigen::RowMajor, storage_index_t> sparse_t;
136  typedef ClusterDerived cluster_t;
137  typedef cluster_tree<cluster_t> tree_t;
138 
139  static bool const is_local = is_local_operator<operator_t>::value;
140  typedef sparse_index_computer<Operator, ClusterDerived, is_local> index_computer_t;
141 
142 public:
154  static sparse_t *eval(Operator &&op, tree_t const &tree, interaction_lists::list_t const &list, size_t &assembly_time)
155  {
156  auto tstart = std::chrono::steady_clock::now();
157 
158  auto indices = index_computer_t::eval(tree, list);
159  size_t s = indices.size();
160 
161  sparse_t *pmat = new sparse_t(rows * tree.get_n_rec_nodes(), cols * tree.get_n_src_nodes());
162  Eigen::Matrix<unsigned, Eigen::Dynamic, 1> num_nonzeros_per_row(pmat->rows(),1);
163  num_nonzeros_per_row.setZero();
164 
165  for (size_t isrcrec = 0; isrcrec < s; ++isrcrec)
166  {
167  auto i = indices[isrcrec].first;
168  for (int ii = 0; ii < rows; ++ii)
169  num_nonzeros_per_row(i * rows + ii) += cols;
170  }
171  pmat->reserve(num_nonzeros_per_row);
172 
173 #ifdef NIHU_FMM_PARALLEL
174 // #pragma omp parallel for
175 #endif
176  for (int isrcrec = 0; isrcrec < s; ++isrcrec)
177  {
178  auto i = indices[isrcrec].first;
179  auto j = indices[isrcrec].second;
180  result_t opmat = op(i, j);
181  for (size_t ii = 0; ii < rows; ++ii) // loop over matrix rows
182  for (size_t jj = 0; jj < cols; ++jj) // loop over matrix cols
183  pmat->insert(i * rows + ii, j * cols + jj) = opmat(ii, jj);
184  }
185 #ifdef NIHU_FMM_PARALLEL
186 // #pragma omp barrier
187 #endif
188  pmat->makeCompressed();
189 
190  auto tend = std::chrono::steady_clock::now();
191  assembly_time = std::chrono::duration_cast<std::chrono::microseconds>(tend - tstart).count();
192 
193  return pmat;
194  }
195 };
196 
198 template <class Operator, class ClusterDerived>
199 class sparse_computer<Operator, ClusterDerived, false>
200 {
201  typedef typename std::decay<Operator>::type operator_t;
202  typedef typename operator_t::result_t result_t;
203  using scalar_t = typename scalar<result_t>::type;
204  typedef std::ptrdiff_t storage_index_t;
205  typedef Eigen::SparseMatrix<scalar_t, Eigen::RowMajor, storage_index_t> sparse_t;
206  typedef ClusterDerived cluster_t;
207  typedef cluster_tree<cluster_t> tree_t;
208 
209  static bool const is_local = is_local_operator<operator_t>::value;
210  typedef sparse_index_computer<Operator, ClusterDerived, is_local> index_computer_t;
211 
212 public:
213 
214  static sparse_t *eval(
215  Operator &&op,
216  tree_t const &tree,
217  interaction_lists::list_t const &list,
218  size_t &assembly_time)
219  {
220  auto tstart = std::chrono::steady_clock::now();
221 
222  // i-j indices of final sparse matrix
223  auto indices = index_computer_t::eval(tree, list);
224  size_t s = indices.size();
225 
226  sparse_t *pmat = new sparse_t(tree.get_n_rec_nodes(), tree.get_n_src_nodes());
227  Eigen::Matrix<unsigned, Eigen::Dynamic, 1> num_nonzeros_per_row(pmat->rows(),1);
228  num_nonzeros_per_row.setZero();
229 
230  for (size_t isrcrec = 0; isrcrec < s; ++isrcrec)
231  num_nonzeros_per_row(indices[isrcrec].first)++;
232  pmat->reserve(num_nonzeros_per_row);
233 
234 #ifdef NIHU_FMM_PARALLEL
235 // #pragma omp parallel for
236 #endif
237  for (int isrcrec = 0; isrcrec < s; ++isrcrec)
238  {
239  size_t i = indices[isrcrec].first;
240  size_t j = indices[isrcrec].second;
241  pmat->insert(i, j) = op(i, j);
242  }
243 #ifdef NIHU_FMM_PARALLEL
244 // #pragma omp barrier
245 #endif
246  pmat->makeCompressed();
247 
248  auto tend = std::chrono::steady_clock::now();
249  assembly_time = std::chrono::duration_cast<std::chrono::microseconds>(tend - tstart).count();
250 
251  return pmat;
252  }
253 };
254 
255 
256 } // end of namespace internal
257 
264 template <class Scalar, size_t NumDofPerSrc, size_t NumDofPerRec>
266  : public fmm_operator<p2p_tag>
267 {
268 private:
269 
270 public:
271  typedef Scalar scalar_t;
272 
273  // RowMajor is needed to allow Eigen's parallelization of sparse matrix * dense vector
274  typedef Eigen::SparseMatrix<scalar_t, Eigen::RowMajor, std::ptrdiff_t> sparse_t;
275  typedef Eigen::Matrix<scalar_t, Eigen::Dynamic, 1> response_t;
276 
277  static size_t const num_dof_per_src = NumDofPerSrc;
278  static size_t const num_dof_per_rec = NumDofPerRec;
279 
286  template <class Operator, class ClusterDerived>
287  p2p_precompute(Operator &&op, cluster_tree<ClusterDerived> const &tree, interaction_lists::list_t const &list)
288  : m_pmat(internal::sparse_computer<Operator, ClusterDerived>::eval(std::forward<Operator>(op), tree, list, m_assembly_time))
289  {
290  }
291 
296  sparse_t const &get_sparse_matrix() const
297  {
298  return *m_pmat;
299  }
300 
305  size_t get_assembly_time() const
306  {
307  return m_assembly_time;
308  }
309 
316  template <class RHS>
317  response_t operator*(RHS const &rhs) const
318  {
319  return *m_pmat * rhs;
320  }
321 
322 private:
324  std::shared_ptr<sparse_t> m_pmat;
326  size_t m_assembly_time;
327 };
328 
338 template <class Operator, class ClusterDerived>
339 auto create_p2p_precompute(Operator &&op,
340  cluster_tree<ClusterDerived> const &tree,
341  interaction_lists::list_t const &list)
342 {
343  typedef typename std::decay<Operator>::type operator_t;
344  return p2p_precompute<
345  typename scalar<typename operator_t::result_t>::type,
346  operator_t::num_dof_per_src,
347  operator_t::num_dof_per_rec
348  >(std::forward<Operator>(op), tree, list);
349 }
350 
351 
352 } // end of namespace fmm
353 } // end of namespace NiHu
354 
355 #endif /* NIHU_P2P_PRECOMPUTE_HPP_INCLUDED */
fmm_operator.hpp
FMM operator types and tags.
NiHu::fmm::p2p_precompute
Precomputed P2P operator.
Definition: p2p_precompute.hpp:265
NiHu::fmm::chebyshev_cluster
Cluster class of the Black Box FMM.
Definition: chebyshev_cluster.hpp:28
eigen_utils.hpp
Implementation of Eigen related utility classes.
NiHu::fmm::cluster_tree
Class representing a cluster tree.
Definition: cluster_tree.hpp:33
cluster_tree.hpp
Implementation of class NiHu::fmm::cluster_tree.
NiHu::fmm::p2p_precompute::get_sparse_matrix
const sparse_t & get_sparse_matrix() const
Retrieve assembled sparse matrix.
Definition: p2p_precompute.hpp:296
lists.hpp
Definition of class NiHu::fmm::interaction_lists.
NiHu::fmm::fmm_operator
Operator defining its tag type.
Definition: fmm_operator.hpp:85
NiHu::fmm::p2p_precompute::get_assembly_time
size_t get_assembly_time() const
Retrieve matrix assembly time.
Definition: p2p_precompute.hpp:305
local_operator.hpp
Interface of local operator.
NiHu::fmm::p2p_precompute::operator*
response_t operator*(RHS const &rhs) const
Matrix-vector multiplication.
Definition: p2p_precompute.hpp:317
NiHu::fmm::create_p2p_precompute
auto create_p2p_precompute(Operator &&op, cluster_tree< ClusterDerived > const &tree, interaction_lists::list_t const &list)
Create function for a precomputed P2P operator.
Definition: p2p_precompute.hpp:339
NiHu::fmm::p2p_precompute::p2p_precompute
p2p_precompute(Operator &&op, cluster_tree< ClusterDerived > const &tree, interaction_lists::list_t const &list)
Constructor initializes and computes the sparse matrix.
Definition: p2p_precompute.hpp:287
matrix_traits.hpp
compile time properties of matrices