7 #ifndef FMM_MATRIX_HPP_INCLUDED
8 #define FMM_MATRIX_HPP_INCLUDED
20 #define mexForcedPrintf(text) { \
21 mxArray *new_number; \
22 mxArray *str = mxCreateString( (text) ); \
23 mexCallMATLAB(1,&new_number,1,&str,"input"); \
24 double out = mxGetScalar(new_number); \
26 mxDestroyArray(new_number); \
27 mxDestroyArray(str); \
30 #define mexForcedPrintf(text) std::cout << text
34 #ifdef NIHU_FMM_PARALLEL
66 class P2M,
class P2L,
class M2P,
class L2P,
67 class M2M,
class L2L,
class M2L
72 typedef typename std::decay<P2P>::type p2p_t;
73 typedef typename std::decay<P2M>::type p2m_t;
74 typedef typename std::decay<P2L>::type p2l_t;
75 typedef typename std::decay<M2P>::type m2p_t;
76 typedef typename std::decay<L2P>::type l2p_t;
77 typedef typename std::decay<M2M>::type m2m_t;
78 typedef typename std::decay<L2L>::type l2l_t;
79 typedef typename std::decay<M2L>::type m2l_t;
81 typedef typename m2l_t::cluster_t cluster_t;
85 typedef typename p2p_t::sparse_t sparse_t;
86 typedef typename scalar<sparse_t>::type scalar_t;
87 typedef Eigen::Matrix<scalar_t, Eigen::Dynamic, 1> excitation_t;
88 typedef Eigen::Matrix<scalar_t, Eigen::Dynamic, 1> response_t;
109 P2M &&p2m, P2L &&p2l, M2P &&m2p, L2P &&l2p,
110 M2M &&m2m, L2L &&l2l, M2L &&m2l,
113 : m_p2p(std::forward<P2P>(
p2p))
114 , m_p2m(std::forward<P2M>(p2m))
115 , m_p2l(std::forward<P2L>(p2l))
116 , m_m2p(std::forward<M2P>(m2p))
117 , m_l2p(std::forward<L2P>(l2p))
118 , m_m2m(std::forward<M2M>(m2m))
119 , m_l2l(std::forward<L2L>(l2l))
120 , m_m2l(std::forward<M2L>(m2l))
123 , m_timer(tree.get_n_levels())
124 , m_rhs_segments(tree.get_n_clusters())
125 , m_lhs_segments(tree.get_n_clusters())
129 mexForcedPrintf(
"fmm_matrix constructor\n");
131 for (
auto c : m_tree.get_leaf_src_indices())
133 cluster_t
const &clus = m_tree[c];
136 for (
auto c : m_tree.get_leaf_rec_indices())
138 cluster_t
const &clus = m_tree[c];
149 m_cut_ratio = cut_ratio;
165 size_t cut_level = std::min<size_t>(2, m_tree.
get_n_levels() - 1);
167 #ifdef NIHU_FMM_PARALLEL
168 int max_num_threads = omp_get_max_threads();
169 size_t cut_num_clusters = max_num_threads * m_cut_ratio;
173 if (num_clusters > cut_num_clusters)
186 return m_p2p.get_sparse_matrix().rows();
193 return m_p2p.get_sparse_matrix().cols();
200 template <
class RhsDerived>
203 for (
auto c : m_tree.get_leaf_src_indices())
205 cluster_t
const &clus = m_tree[c];
206 for (
size_t i = 0; i < clus.get_n_src_nodes(); ++i)
208 size_t ii = clus.get_src_node_idx()[i];
222 for (
auto c : m_tree[root].get_children())
224 if (m_tree[c].is_source())
227 multipoles[root] += m_m2m(root, c) * multipoles[c];
239 std::vector<multipole_t>
const &multipoles,
size_t to)
241 if (!m_tree[to].is_receiver())
244 for (
auto from : m_lists.
get_list(interaction_lists::M2L)[to])
245 locals[to] += m_m2l(to, from) * multipoles[from];
247 for (
auto from : m_lists.
get_list(interaction_lists::L2L)[to])
248 locals[to] += m_l2l(to, from) * locals[from];
250 for (
auto c : m_tree[to].get_children())
263 int b = int(m_tree.
level_end(cut_level));
265 #ifdef NIHU_FMM_PARALLEL
266 #pragma omp parallel for
268 for (
int to = a; to < b; ++to)
270 #ifdef NIHU_FMM_PARALLEL
275 upward_pass_bfs(multipoles, cut_level - 1);
278 void downward_pass_dfs(std::vector<local_t> &locals,
279 std::vector<multipole_t>
const &multipoles)
285 size_t max_to_level = cut_level - 1;
286 downward_pass_bfs(locals, multipoles, max_to_level);
292 #ifdef NIHU_FMM_PARALLEL
293 #pragma omp parallel for
295 for (
int to =
int(a); to < int(b); ++to)
299 void upward_pass_bfs(std::vector<multipole_t> &multipoles)
304 void upward_pass_bfs(std::vector<multipole_t> &multipoles,
size_t lowest_to_level)
307 for (
size_t iLevel = lowest_to_level; iLevel >= 2; --iLevel)
310 #ifdef NIHU_FMM_PARALLEL
311 #pragma omp parallel for
315 for (
auto from : m_tree[to].get_children())
317 if (!m_tree[from].is_source())
319 multipoles[to] += m_m2m(to, from) * multipoles[from];
322 #ifdef NIHU_FMM_PARALLEL
329 void downward_pass_bfs(std::vector<local_t> &locals,
330 std::vector<multipole_t>
const &multipoles)
333 downward_pass_bfs(locals, multipoles, max_to_level);
336 void downward_pass_bfs(std::vector<local_t> &locals,
337 std::vector<multipole_t>
const &multipoles,
340 for (
unsigned iLevel = 2; iLevel <= max_to_level; ++iLevel)
346 #ifdef NIHU_FMM_PARALLEL
347 #pragma omp parallel for
349 for (
int to =
int(a); to < int(b); ++to)
350 for (
size_t from : m_lists.
get_list(interaction_lists::M2L)[to])
351 locals[to] += m_m2l(to, from) * multipoles[from];
352 #ifdef NIHU_FMM_PARALLEL
362 #ifdef NIHU_FMM_PARALLEL
363 #pragma omp parallel for
365 for (
int to =
int(a); to < int(b); ++to)
367 if (!m_tree[to].is_receiver())
369 size_t from = m_tree[to].get_parent();
370 locals[to] += m_l2l(to, from) * locals[from];
372 #ifdef NIHU_FMM_PARALLEL
385 template <
class Derived>
388 for (
auto c : m_tree.get_leaf_rec_indices())
390 cluster_t
const &clus = m_tree[c];
391 for (
size_t i = 0; i < clus.get_n_rec_nodes(); ++i)
393 size_t ii = clus.get_rec_node_idx()[i];
403 template <
class ExcType>
406 #ifdef NIHU_DEBUGGING
407 std::cout <<
"starting operator * " << std::endl;
412 #ifdef NIHU_DEBUGGING
413 std::cout <<
"Computing P2P " << std::endl;
417 response_t lhs = m_p2p * rhs;
419 #ifdef NIHU_DEBUGGING
420 std::cout <<
"Finished P2P " << std::endl;
425 #ifdef NIHU_DEBUGGING
426 std::cout <<
"Instantiating local & multipole " << std::endl;
430 std::vector<multipole_t> multipoles(n_clusters);
431 std::vector<local_t> locals(n_clusters);
432 for (
size_t c = 0; c < n_clusters; ++c)
434 if (m_tree[c].get_level() < 2)
436 if (m_tree[c].is_source())
437 multipoles[c] = m_tree[c].zero_multipole();
438 if (m_tree[c].is_receiver())
439 locals[c] = m_tree[c].zero_local();
441 #ifdef NIHU_DEBUGGING
442 std::cout <<
"Instantiating local & multipole ready " << std::endl;
445 #ifdef NIHU_DEBUGGING
446 std::cout <<
"Reordering excitation " << std::endl;
451 #ifdef NIHU_DEBUGGING
452 std::cout <<
"Reorder ready " << std::endl;
456 #ifdef NIHU_DEBUGGING
457 std::cout <<
"Computing P2L " << std::endl;
462 auto const &P2Llist = m_lists.
get_list(interaction_lists::P2L);
463 for (
auto to : m_tree.get_leaf_rec_indices())
464 for (
auto from : P2Llist[to])
465 locals[to] += m_p2l(to, from) * m_rhs_segments[from];
468 #ifdef NIHU_DEBUGGING
469 std::cout <<
"Computing P2L ready " << std::endl;
472 #ifdef NIHU_DEBUGGING
473 std::cout <<
"Computing P2M " << std::endl;
480 auto const &src_idx = m_tree.get_leaf_src_indices();
481 #ifdef NIHU_FMM_PARALLEL
482 #pragma omp parallel for
484 for (
int i = 0; i < int(src_idx.size()); ++i)
486 size_t to = src_idx[i];
487 multipoles[to] += m_p2m(to) * m_rhs_segments[to];
489 #ifdef NIHU_FMM_PARALLEL
493 #ifdef NIHU_DEBUGGING
494 std::cout <<
"Computing P2M ready " << std::endl;
498 #if defined NIHU_FMM_TRAVERSE_BFS
500 #ifdef NIHU_DEBUGGING
501 std::cout <<
"Starting upward pass BFS " << std::endl;
504 upward_pass_bfs(multipoles);
505 #ifdef NIHU_DEBUGGING
506 std::cout <<
"Upward pass BFS ready" << std::endl;
509 #ifdef NIHU_DEBUGGING
510 std::cout <<
"Starting downward pass BFS " << std::endl;
513 downward_pass_bfs(locals, multipoles);
514 #ifdef NIHU_DEBUGGING
515 std::cout <<
"Downward pass BFS ready" << std::endl;
518 #elif defined NIHU_FMM_TRAVERSE_DFS
520 downward_pass_dfs(locals, multipoles);
522 # error You need to explicitly define NIHU_FMM_TRAVERSE_BFS or NIHU_FMM_TRAVERSE_DFS tree traversing algorithm
526 #ifdef NIHU_DEBUGGING
527 std::cout <<
"Computing L2P " << std::endl;
532 auto const &rec_idx = m_tree.get_leaf_rec_indices();
533 #ifdef NIHU_FMM_PARALLEL
534 #pragma omp parallel for
536 for (
int i = 0; i < int(rec_idx.size()); ++i)
538 size_t to = rec_idx[i];
539 m_lhs_segments[to] = m_l2p(to) * locals[to];
541 #ifdef NIHU_FMM_PARALLEL
545 #ifdef NIHU_DEBUGGING
546 std::cout <<
"Computing L2P ready" << std::endl;
550 #ifdef NIHU_DEBUGGING
551 std::cout <<
"Computing M2P " << std::endl;
555 for (
auto to : m_tree.get_leaf_rec_indices())
556 for (
auto from : m_lists.
get_list(interaction_lists::M2P)[to])
557 m_lhs_segments[to] += m_m2p(to, from) * multipoles[from];
559 #ifdef NIHU_DEBUGGING
560 std::cout <<
"Computing M2P ready" << std::endl;
564 #ifdef NIHU_DEBUGGING
565 std::cout <<
"Reordering response" << std::endl;
569 #ifdef NIHU_DEBUGGING
570 std::cout <<
"Reordering response ready " << std::endl;
581 fmm_timer &get_timer()
590 return m_p2p.get_sparse_matrix().diagonal();
605 std::vector<excitation_t> m_rhs_segments;
606 std::vector<response_t> m_lhs_segments;
623 template <
class P2P,
class P2M,
class P2L,
class M2P,
class L2P,
624 class M2M,
class L2L,
class M2L,
class Cluster>
625 fmm_matrix<P2P, P2M, P2L, M2P, L2P, M2M, L2L, M2L>
628 P2M &&p2m, P2L &&p2l, M2P &&m2p, L2P &&l2p,
629 M2M &&m2m, L2L &&l2l, M2L &&m2l,
634 mexForcedPrintf(
"create_fmm_matrix from separate\n");
637 std::forward<P2P>(
p2p),
638 std::forward<P2M>(p2m),
639 std::forward<P2L>(p2l),
640 std::forward<M2P>(m2p),
641 std::forward<L2P>(l2p),
642 std::forward<M2M>(m2m),
643 std::forward<L2L>(l2l),
644 std::forward<M2L>(m2l),
659 template <
class Cluster,
class ...CollOps>
666 mexForcedPrintf(
"create_fmm_matrix from collection\n");