40#ifndef TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
41#define TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
43#include "TpetraCore_config.h"
44#include "Teuchos_Array.hpp"
45#include "Teuchos_ArrayView.hpp"
89namespace PackCrsMatrixImpl {
97template<
class OutputOffsetsViewType,
99 class InputOffsetsViewType,
100 class InputLocalRowIndicesViewType,
101 class InputLocalRowPidsViewType,
103#ifdef HAVE_TPETRA_DEBUG
111 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
112 typedef typename CountsViewType::non_const_value_type count_type;
113 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
114 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
115 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
117 typedef typename OutputOffsetsViewType::device_type device_type;
118 static_assert (std::is_same<
typename CountsViewType::device_type::execution_space,
119 typename device_type::execution_space>::value,
120 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
121 static_assert (Kokkos::is_view<OutputOffsetsViewType>::value,
122 "OutputOffsetsViewType must be a Kokkos::View.");
123 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
124 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
125 static_assert (std::is_integral<output_offset_type>::value,
126 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
127 static_assert (Kokkos::is_view<CountsViewType>::value,
128 "CountsViewType must be a Kokkos::View.");
129 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
130 "CountsViewType must be a nonconst Kokkos::View.");
131 static_assert (std::is_integral<count_type>::value,
132 "The type of each entry of CountsViewType must be a built-in integer type.");
133 static_assert (Kokkos::is_view<InputOffsetsViewType>::value,
134 "InputOffsetsViewType must be a Kokkos::View.");
135 static_assert (std::is_integral<input_offset_type>::value,
136 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
137 static_assert (Kokkos::is_view<InputLocalRowIndicesViewType>::value,
138 "InputLocalRowIndicesViewType must be a Kokkos::View.");
139 static_assert (std::is_integral<local_row_index_type>::value,
140 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
143 const CountsViewType& counts,
144 const InputOffsetsViewType& rowOffsets,
145 const InputLocalRowIndicesViewType& lclRowInds,
146 const InputLocalRowPidsViewType& lclRowPids,
147 const count_type sizeOfLclCount,
148 const count_type sizeOfGblColInd,
149 const count_type sizeOfPid,
150 const count_type sizeOfValue) :
151 outputOffsets_ (outputOffsets),
153 rowOffsets_ (rowOffsets),
154 lclRowInds_ (lclRowInds),
155 lclRowPids_ (lclRowPids),
156 sizeOfLclCount_ (sizeOfLclCount),
157 sizeOfGblColInd_ (sizeOfGblColInd),
158 sizeOfPid_ (sizeOfPid),
159 sizeOfValue_ (sizeOfValue),
163 const size_t numRowsToPack =
static_cast<size_t> (lclRowInds_.extent (0));
165 if (numRowsToPack !=
static_cast<size_t> (counts_.extent (0))) {
166 std::ostringstream os;
167 os <<
"lclRowInds.extent(0) = " << numRowsToPack
168 <<
" != counts.extent(0) = " << counts_.extent (0)
170 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
172 if (
static_cast<size_t> (numRowsToPack + 1) !=
173 static_cast<size_t> (outputOffsets_.extent (0))) {
174 std::ostringstream os;
175 os <<
"lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
176 <<
" != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
178 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
183 KOKKOS_INLINE_FUNCTION
void
184 operator() (
const local_row_index_type& curInd,
185 output_offset_type& update,
186 const bool final)
const
189 if (curInd <
static_cast<local_row_index_type
> (0)) {
197 if (curInd >=
static_cast<local_row_index_type
> (outputOffsets_.extent (0))) {
202 outputOffsets_(curInd) = update;
205 if (curInd <
static_cast<local_row_index_type
> (counts_.extent (0))) {
206 const auto lclRow = lclRowInds_(curInd);
207 if (
static_cast<size_t> (lclRow + 1) >=
static_cast<size_t> (rowOffsets_.extent (0)) ||
208 static_cast<local_row_index_type
> (lclRow) <
static_cast<local_row_index_type
> (0)) {
216 const count_type count =
217 static_cast<count_type
> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
223 const count_type numBytes = (count == 0) ?
224 static_cast<count_type
> (0) :
225 sizeOfLclCount_ + count * (sizeOfGblColInd_ +
226 (lclRowPids_.size() > 0 ? sizeOfPid_ : 0) +
230 counts_(curInd) = numBytes;
242 typedef typename device_type::execution_space execution_space;
243 auto error_h = Kokkos::create_mirror_view (error_);
245 Kokkos::deep_copy (execution_space(), error_h, error_);
250 OutputOffsetsViewType outputOffsets_;
251 CountsViewType counts_;
252 typename InputOffsetsViewType::const_type rowOffsets_;
253 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
254 typename InputLocalRowPidsViewType::const_type lclRowPids_;
255 count_type sizeOfLclCount_;
256 count_type sizeOfGblColInd_;
257 count_type sizeOfPid_;
258 count_type sizeOfValue_;
259 Kokkos::View<int, device_type> error_;
271template<
class OutputOffsetsViewType,
272 class CountsViewType,
273 class InputOffsetsViewType,
274 class InputLocalRowIndicesViewType,
275 class InputLocalRowPidsViewType>
276typename CountsViewType::non_const_value_type
277computeNumPacketsAndOffsets (
const OutputOffsetsViewType& outputOffsets,
278 const CountsViewType& counts,
279 const InputOffsetsViewType& rowOffsets,
280 const InputLocalRowIndicesViewType& lclRowInds,
281 const InputLocalRowPidsViewType& lclRowPids,
282 const typename CountsViewType::non_const_value_type sizeOfLclCount,
283 const typename CountsViewType::non_const_value_type sizeOfGblColInd,
284 const typename CountsViewType::non_const_value_type sizeOfPid,
285 const typename CountsViewType::non_const_value_type sizeOfValue)
288 CountsViewType,
typename InputOffsetsViewType::const_type,
289 typename InputLocalRowIndicesViewType::const_type,
290 typename InputLocalRowPidsViewType::const_type> functor_type;
291 typedef typename CountsViewType::non_const_value_type count_type;
292 typedef typename OutputOffsetsViewType::size_type size_type;
293 typedef typename OutputOffsetsViewType::execution_space execution_space;
294 typedef typename functor_type::local_row_index_type LO;
295 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
296 const char prefix[] =
"computeNumPacketsAndOffsets: ";
298 count_type count = 0;
299 const count_type numRowsToPack = lclRowInds.extent (0);
301 if (numRowsToPack == 0) {
305 TEUCHOS_TEST_FOR_EXCEPTION
306 (rowOffsets.extent (0) <=
static_cast<size_type
> (1),
307 std::invalid_argument, prefix <<
"There is at least one row to pack, "
308 "but the matrix has no rows. lclRowInds.extent(0) = " <<
309 numRowsToPack <<
", but rowOffsets.extent(0) = " <<
310 rowOffsets.extent (0) <<
" <= 1.");
311 TEUCHOS_TEST_FOR_EXCEPTION
312 (outputOffsets.extent (0) !=
313 static_cast<size_type
> (numRowsToPack + 1), std::invalid_argument,
314 prefix <<
"Output dimension does not match number of rows to pack. "
315 <<
"outputOffsets.extent(0) = " << outputOffsets.extent (0)
316 <<
" != lclRowInds.extent(0) + 1 = "
317 <<
static_cast<size_type
> (numRowsToPack + 1) <<
".");
318 TEUCHOS_TEST_FOR_EXCEPTION
319 (counts.extent (0) != numRowsToPack, std::invalid_argument,
320 prefix <<
"counts.extent(0) = " << counts.extent (0)
321 <<
" != numRowsToPack = " << numRowsToPack <<
".");
323 functor_type f (outputOffsets, counts, rowOffsets,
324 lclRowInds, lclRowPids, sizeOfLclCount,
325 sizeOfGblColInd, sizeOfPid, sizeOfValue);
326 Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
329 const int errCode = f.getError ();
330 TEUCHOS_TEST_FOR_EXCEPTION
331 (errCode != 0, std::runtime_error, prefix <<
"parallel_scan error code "
332 << errCode <<
" != 0.");
336 for (LO k = 0; k < numRowsToPack; ++k) {
339 if (outputOffsets(numRowsToPack) != total) {
340 if (errStr.get () == NULL) {
341 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
343 std::ostringstream& os = *errStr;
345 <<
"outputOffsets(numRowsToPack=" << numRowsToPack <<
") "
346 << outputOffsets(numRowsToPack) <<
" != sum of counts = "
347 << total <<
"." << std::endl;
348 if (numRowsToPack != 0) {
350 if (numRowsToPack <
static_cast<LO
> (10)) {
351 os <<
"outputOffsets: [";
352 for (LO i = 0; i <= numRowsToPack; ++i) {
353 os << outputOffsets(i);
354 if (
static_cast<LO
> (i + 1) <= numRowsToPack) {
358 os <<
"]" << std::endl;
360 for (LO i = 0; i < numRowsToPack; ++i) {
362 if (
static_cast<LO
> (i + 1) < numRowsToPack) {
366 os <<
"]" << std::endl;
369 os <<
"outputOffsets(" << (numRowsToPack-1) <<
") = "
370 << outputOffsets(numRowsToPack-1) <<
"." << std::endl;
373 count = outputOffsets(numRowsToPack);
374 return {
false, errStr};
380 using Tpetra::Details::getEntryOnHost;
381 return static_cast<count_type
> (getEntryOnHost (outputOffsets,
401template<
class ST,
class ColumnMap,
class BufferDeviceType>
403Kokkos::pair<int, size_t>
405 const Kokkos::View<char*, BufferDeviceType>& exports,
410 const size_t num_ent,
411 const size_t num_bytes_per_value,
412 const bool pack_pids)
414 using Kokkos::subview;
415 using LO =
typename ColumnMap::local_ordinal_type;
416 using GO =
typename ColumnMap::global_ordinal_type;
417 using return_type = Kokkos::pair<int, size_t>;
421 return return_type (0, 0);
424 const LO num_ent_LO =
static_cast<LO
> (num_ent);
425 const size_t num_ent_beg = offset;
428 const size_t gids_beg = num_ent_beg + num_ent_len;
431 const size_t pids_beg = gids_beg + gids_len;
432 const size_t pids_len = pack_pids ?
434 static_cast<size_t> (0);
436 const size_t vals_beg = gids_beg + gids_len + pids_len;
437 const size_t vals_len = num_ent * num_bytes_per_value;
439 char*
const num_ent_out = exports.data () + num_ent_beg;
440 char*
const gids_out = exports.data () + gids_beg;
441 char*
const pids_out = pack_pids ? exports.data () + pids_beg : NULL;
442 char*
const vals_out = exports.data () + vals_beg;
444 size_t num_bytes_out = 0;
451 for (
size_t k = 0; k < num_ent; ++k) {
452 const LO lid = lids_in[k];
453 const GO gid = col_map.getGlobalElement (lid);
458 for (
size_t k = 0; k < num_ent; ++k) {
459 const LO lid = lids_in[k];
460 const int pid = pids_in[lid];
466 error_code += p.first;
467 num_bytes_out += p.second;
470 if (error_code != 0) {
471 return return_type (10, num_bytes_out);
474 const size_t expected_num_bytes =
475 num_ent_len + gids_len + pids_len + vals_len;
476 if (num_bytes_out != expected_num_bytes) {
477 return return_type (11, num_bytes_out);
479 return return_type (0, num_bytes_out);
482template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
483struct PackCrsMatrixFunctor {
484 typedef LocalMatrix local_matrix_device_type;
486 typedef typename local_matrix_device_type::value_type ST;
489 typedef typename local_matrix_device_type::device_type DT;
491 typedef Kokkos::View<const size_t*, BufferDeviceType>
492 num_packets_per_lid_view_type;
493 typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
494 typedef Kokkos::View<char*, BufferDeviceType> exports_view_type;
498 typedef typename num_packets_per_lid_view_type::non_const_value_type
500 typedef typename offsets_view_type::non_const_value_type
502 typedef Kokkos::pair<int, LO> value_type;
504 static_assert (std::is_same<LO, typename local_matrix_device_type::ordinal_type>::value,
505 "local_map_type::local_ordinal_type and "
506 "local_matrix_device_type::ordinal_type must be the same.");
508 local_matrix_device_type local_matrix;
509 local_map_type local_col_map;
510 exports_view_type exports;
511 num_packets_per_lid_view_type num_packets_per_lid;
512 export_lids_view_type export_lids;
513 source_pids_view_type source_pids;
514 offsets_view_type offsets;
515 size_t num_bytes_per_value;
518 PackCrsMatrixFunctor (
const local_matrix_device_type& local_matrix_in,
519 const local_map_type& local_col_map_in,
520 const exports_view_type& exports_in,
521 const num_packets_per_lid_view_type& num_packets_per_lid_in,
522 const export_lids_view_type& export_lids_in,
523 const source_pids_view_type& source_pids_in,
524 const offsets_view_type& offsets_in,
525 const size_t num_bytes_per_value_in,
526 const bool pack_pids_in) :
527 local_matrix (local_matrix_in),
528 local_col_map (local_col_map_in),
529 exports (exports_in),
530 num_packets_per_lid (num_packets_per_lid_in),
531 export_lids (export_lids_in),
532 source_pids (source_pids_in),
533 offsets (offsets_in),
534 num_bytes_per_value (num_bytes_per_value_in),
535 pack_pids (pack_pids_in)
537 const LO numRows = local_matrix_in.numRows ();
539 static_cast<LO
> (local_matrix.graph.row_map.extent (0));
540 TEUCHOS_TEST_FOR_EXCEPTION
541 (numRows != 0 && rowMapDim != numRows +
static_cast<LO
> (1),
542 std::logic_error,
"local_matrix.graph.row_map.extent(0) = "
543 << rowMapDim <<
" != numRows (= " << numRows <<
" ) + 1.");
546 KOKKOS_INLINE_FUNCTION
void init (value_type& dst)
const
548 using ::Tpetra::Details::OrdinalTraits;
549 dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
552 KOKKOS_INLINE_FUNCTION
void
553 join (value_type& dst,
const value_type& src)
const
557 if (src.first != 0 && dst.first == 0) {
562 KOKKOS_INLINE_FUNCTION
563 void operator() (
const LO i, value_type& dst)
const
565 const size_t offset = offsets[i];
566 const LO export_lid = export_lids[i];
567 const size_t buf_size = exports.size();
568 const size_t num_bytes = num_packets_per_lid(i);
569 const size_t num_ent =
570 static_cast<size_t> (local_matrix.graph.row_map[export_lid+1]
571 - local_matrix.graph.row_map[export_lid]);
581 if (export_lid >= local_matrix.numRows ()) {
582 if (dst.first != 0) {
583 dst = Kokkos::make_pair (1, i);
587 else if ((offset > buf_size || offset + num_bytes > buf_size)) {
588 if (dst.first != 0) {
589 dst = Kokkos::make_pair (2, i);
599 const auto row_beg = local_matrix.graph.row_map[export_lid];
600 const auto row_end = local_matrix.graph.row_map[export_lid + 1];
601 auto vals_in = subview (local_matrix.values,
602 Kokkos::make_pair (row_beg, row_end));
603 auto lids_in = subview (local_matrix.graph.entries,
604 Kokkos::make_pair (row_beg, row_end));
605 typedef local_map_type LMT;
606 typedef BufferDeviceType BDT;
607 auto p = packCrsMatrixRow<ST, LMT, BDT> (local_col_map, exports, lids_in,
608 source_pids, vals_in, offset,
609 num_ent, num_bytes_per_value,
611 int error_code_this_row = p.first;
612 size_t num_bytes_packed_this_row = p.second;
613 if (error_code_this_row != 0) {
614 if (dst.first != 0) {
615 dst = Kokkos::make_pair (error_code_this_row, i);
618 else if (num_bytes_packed_this_row != num_bytes) {
619 if (dst.first != 0) {
620 dst = Kokkos::make_pair (3, i);
633template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
635do_pack (
const LocalMatrix& local_matrix,
637 const Kokkos::View<char*, BufferDeviceType>& exports,
641 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
642 const size_t num_bytes_per_value,
643 const bool pack_pids)
646 using DT =
typename LocalMatrix::device_type;
647 using range_type = Kokkos::RangePolicy<typename DT::execution_space, LO>;
648 const char prefix[] =
"Tpetra::Details::do_pack: ";
650 if (export_lids.extent (0) != 0) {
651 TEUCHOS_TEST_FOR_EXCEPTION
652 (
static_cast<size_t> (offsets.extent (0)) !=
653 static_cast<size_t> (export_lids.extent (0) + 1),
654 std::invalid_argument, prefix <<
"offsets.extent(0) = "
655 << offsets.extent (0) <<
" != export_lids.extent(0) (= "
656 << export_lids.extent (0) <<
") + 1.");
657 TEUCHOS_TEST_FOR_EXCEPTION
658 (export_lids.extent (0) != num_packets_per_lid.extent (0),
659 std::invalid_argument, prefix <<
"export_lids.extent(0) = " <<
660 export_lids.extent (0) <<
" != num_packets_per_lid.extent(0) = "
661 << num_packets_per_lid.extent (0) <<
".");
665 TEUCHOS_TEST_FOR_EXCEPTION
666 (pack_pids && exports.extent (0) != 0 &&
667 source_pids.extent (0) == 0, std::invalid_argument, prefix <<
668 "pack_pids is true, and exports.extent(0) = " <<
669 exports.extent (0) <<
" != 0, meaning that we need to pack at "
670 "least one matrix entry, but source_pids.extent(0) = 0.");
673 using pack_functor_type =
674 PackCrsMatrixFunctor<LocalMatrix, LocalMap, BufferDeviceType>;
675 pack_functor_type f (local_matrix, local_map, exports,
676 num_packets_per_lid, export_lids,
677 source_pids, offsets, num_bytes_per_value,
680 typename pack_functor_type::value_type result;
681 range_type range (0, num_packets_per_lid.extent (0));
682 Kokkos::parallel_reduce (range, f, result);
684 if (result.first != 0) {
687 TEUCHOS_TEST_FOR_EXCEPTION
688 (
true, std::runtime_error, prefix <<
"PackCrsMatrixFunctor "
689 "reported error code " << result.first <<
" for the first "
690 "bad row " << result.second <<
".");
723template<
typename ST,
typename LO,
typename GO,
typename NT,
typename BufferDeviceType>
726 Kokkos::DualView<char*, BufferDeviceType>& exports,
727 const Kokkos::View<size_t*, BufferDeviceType>& num_packets_per_lid,
728 const Kokkos::View<const LO*, BufferDeviceType>& export_lids,
729 const Kokkos::View<const int*, typename NT::device_type>& export_pids,
730 size_t& constant_num_packets,
731 const bool pack_pids)
734 "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix",
738 typedef BufferDeviceType DT;
739 typedef Kokkos::DualView<char*, BufferDeviceType> exports_view_type;
740 const char prefix[] =
"Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix: ";
741 constexpr bool debug =
false;
744 auto local_col_map = sourceMatrix.
getColMap ()->getLocalMap ();
749 constant_num_packets = 0;
751 const size_t num_export_lids =
752 static_cast<size_t> (export_lids.extent (0));
753 TEUCHOS_TEST_FOR_EXCEPTION
755 static_cast<size_t> (num_packets_per_lid.extent (0)),
756 std::invalid_argument, prefix <<
"num_export_lids.extent(0) = "
757 << num_export_lids <<
" != num_packets_per_lid.extent(0) = "
758 << num_packets_per_lid.extent (0) <<
".");
759 if (num_export_lids != 0) {
760 TEUCHOS_TEST_FOR_EXCEPTION
761 (num_packets_per_lid.data () == NULL, std::invalid_argument,
762 prefix <<
"num_export_lids = "<< num_export_lids <<
" != 0, but "
763 "num_packets_per_lid.data() = "
764 << num_packets_per_lid.data () <<
" == NULL.");
771 size_t num_bytes_per_value = 0;
786 size_t num_bytes_per_value_l = 0;
787 if (local_matrix.values.extent(0) > 0) {
788 const ST& val = local_matrix.values(0);
791 using Teuchos::reduceAll;
792 reduceAll<int, size_t> (* (sourceMatrix.
getComm ()),
794 num_bytes_per_value_l,
795 Teuchos::outArg (num_bytes_per_value));
798 if (num_export_lids == 0) {
799 exports = exports_view_type (
"exports", 0);
804 Kokkos::View<size_t*, DT> offsets (
"offsets", num_export_lids + 1);
809 computeNumPacketsAndOffsets (offsets, num_packets_per_lid,
810 local_matrix.graph.row_map, export_lids,
812 num_bytes_per_lid, num_bytes_per_gid,
813 num_bytes_per_pid, num_bytes_per_value);
816 if (count >
static_cast<size_t> (exports.extent (0))) {
817 exports = exports_view_type (
"exports", count);
819 std::ostringstream os;
820 os <<
"*** exports resized to " << count << std::endl;
821 std::cerr << os.str ();
825 std::ostringstream os;
826 os <<
"*** count: " << count <<
", exports.extent(0): "
827 << exports.extent (0) << std::endl;
828 std::cerr << os.str ();
834 TEUCHOS_TEST_FOR_EXCEPTION
835 (pack_pids && exports.extent (0) != 0 &&
836 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
837 "pack_pids is true, and exports.extent(0) = " <<
838 exports.extent (0) <<
" != 0, meaning that we need to pack at least "
839 "one matrix entry, but export_pids.extent(0) = 0.");
841 typedef typename std::decay<
decltype (local_matrix)>::type
842 local_matrix_device_type;
843 typedef typename std::decay<
decltype (local_col_map)>::type
846 exports.modify_device ();
847 auto exports_d = exports.view_device ();
848 do_pack<local_matrix_device_type, local_map_type, DT>
849 (local_matrix, local_col_map, exports_d, num_packets_per_lid,
850 export_lids, export_pids, offsets, num_bytes_per_value,
857template<
typename ST,
typename LO,
typename GO,
typename NT>
860 Teuchos::Array<char>& exports,
861 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
862 const Teuchos::ArrayView<const LO>& exportLIDs,
863 size_t& constantNumPackets)
868 using host_exec_space =
typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space;
869 using device_exec_space =
typename device_type::execution_space;
870 using host_dev_type = Kokkos::Device<host_exec_space, Kokkos::HostSpace>;
876 Kokkos::View<size_t*, buffer_device_type> num_packets_per_lid_d =
878 numPacketsPerLID.getRawPtr (),
879 numPacketsPerLID.size (),
false,
880 "num_packets_per_lid");
887 Kokkos::View<const LO*, buffer_device_type> export_lids_d =
889 exportLIDs.getRawPtr (),
890 exportLIDs.size (),
true,
893 Kokkos::View<int*, device_type> export_pids_d;
894 Kokkos::DualView<char*, buffer_device_type> exports_dv;
895 constexpr bool pack_pids =
false;
896 PackCrsMatrixImpl::packCrsMatrix<ST, LO, GO, NT, buffer_device_type> (
897 sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
898 export_pids_d, constantNumPackets, pack_pids);
902 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
903 (numPacketsPerLID.getRawPtr (),
904 numPacketsPerLID.size ());
906 Kokkos::deep_copy (device_exec_space(), num_packets_per_lid_h, num_packets_per_lid_d);
913 if (
static_cast<size_t> (exports.size ()) !=
914 static_cast<size_t> (exports_dv.extent (0))) {
915 exports.resize (exports_dv.extent (0));
917 Kokkos::View<char*, host_dev_type> exports_h (exports.getRawPtr (),
920 Kokkos::deep_copy (device_exec_space(), exports_h, exports_dv.d_view);
923template<
typename ST,
typename LO,
typename GO,
typename NT>
930 size_t& constantNumPackets)
936 Kokkos::View<int*, device_type> exportPIDs_d (
"exportPIDs", 0);
937 constexpr bool pack_pids =
false;
940 auto numPacketsPerLID_nc = numPacketsPerLID;
941 numPacketsPerLID_nc.clear_sync_state ();
942 numPacketsPerLID_nc.modify_device ();
943 auto numPacketsPerLID_d = numPacketsPerLID.view_device ();
946 TEUCHOS_ASSERT( ! exportLIDs.need_sync_device () );
947 auto exportLIDs_d = exportLIDs.view_device ();
950 "Tpetra::Details::packCrsMatrixNew",
953 PackCrsMatrixImpl::packCrsMatrix<ST,LO,GO,NT,buffer_device_type> (
954 sourceMatrix, exports, numPacketsPerLID_d, exportLIDs_d,
955 exportPIDs_d, constantNumPackets, pack_pids);
958template<
typename ST,
typename LO,
typename GO,
typename NT>
962 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
963 const Teuchos::ArrayView<const LO>& exportLIDs,
964 const Teuchos::ArrayView<const int>& sourcePIDs,
965 size_t& constantNumPackets)
969 typedef typename Kokkos::DualView<char*, buffer_device_type>::t_host::execution_space host_exec_space;
970 typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
972 typename local_matrix_device_type::device_type outputDevice;
973 typedef typename NT::execution_space execution_space;
977 std::unique_ptr<std::string> prefix;
979 const int myRank = [&] () {
980 auto map = sourceMatrix.
getMap ();
981 if (map.get () ==
nullptr) {
984 auto comm = map->getComm ();
985 if (comm.get () ==
nullptr) {
988 return comm->getRank ();
990 std::ostringstream os;
991 os <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs: ";
992 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
994 std::ostringstream os2;
995 os2 << *prefix <<
"start" << std::endl;
996 std::cerr << os2.str ();
1003 auto num_packets_per_lid_d =
1005 numPacketsPerLID.getRawPtr (),
1006 numPacketsPerLID.size (),
false,
1007 "num_packets_per_lid");
1011 auto export_lids_d =
1013 exportLIDs.getRawPtr (),
1014 exportLIDs.size (),
true,
1018 auto export_pids_d =
1020 sourcePIDs.getRawPtr (),
1021 sourcePIDs.size (),
true,
1023 constexpr bool pack_pids =
true;
1025 PackCrsMatrixImpl::packCrsMatrix
1026 (sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
1027 export_pids_d, constantNumPackets, pack_pids);
1029 catch (std::exception& e) {
1031 std::ostringstream os;
1032 os << *prefix <<
"PackCrsMatrixImpl::packCrsMatrix threw: "
1033 << e.what () << std::endl;
1034 std::cerr << os.str ();
1040 std::ostringstream os;
1041 os << *prefix <<
"PackCrsMatrixImpl::packCrsMatrix threw an exception "
1042 "not a subclass of std::exception" << std::endl;
1043 std::cerr << os.str ();
1048 if (numPacketsPerLID.size () != 0) {
1052 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
1053 (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1055 Kokkos::deep_copy (execution_space(), num_packets_per_lid_h, num_packets_per_lid_d);
1057 catch (std::exception& e) {
1059 std::ostringstream os;
1060 os << *prefix <<
"Kokkos::deep_copy threw: " << e.what () << std::endl;
1061 std::cerr << os.str ();
1067 std::ostringstream os;
1068 os << *prefix <<
"Kokkos::deep_copy threw an exception not a subclass "
1069 "of std::exception" << std::endl;
1070 std::cerr << os.str ();
1077 std::ostringstream os;
1078 os << *prefix <<
"done" << std::endl;
1079 std::cerr << os.str ();
1086#define TPETRA_DETAILS_PACKCRSMATRIX_INSTANT( ST, LO, GO, NT ) \
1088 Details::packCrsMatrix<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1089 Teuchos::Array<char>&, \
1090 const Teuchos::ArrayView<size_t>&, \
1091 const Teuchos::ArrayView<const LO>&, \
1094 Details::packCrsMatrixNew<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1095 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1096 const Kokkos::DualView<size_t*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1097 const Kokkos::DualView<const LO*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1100 Details::packCrsMatrixWithOwningPIDs<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1101 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1102 const Teuchos::ArrayView<size_t>&, \
1103 const Teuchos::ArrayView<const LO>&, \
1104 const Teuchos::ArrayView<const int>&, \
Declaration of the Tpetra::CrsMatrix class.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::getEntryOnHost.
KOKKOS_FUNCTION Kokkos::pair< int, size_t > packCrsMatrixRow(const ColumnMap &col_map, const Kokkos::View< char *, BufferDeviceType > &exports, const typename PackTraits< typename ColumnMap::local_ordinal_type >::input_array_type &lids_in, const typename PackTraits< int >::input_array_type &pids_in, const typename PackTraits< ST >::input_array_type &vals_in, const size_t offset, const size_t num_ent, const size_t num_bytes_per_value, const bool pack_pids)
Packs a single row of the CrsMatrix.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
typename Node::device_type device_type
The Kokkos device type.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
static bool verbose()
Whether Tpetra is in verbose mode.
"Local" part of Map suitable for Kokkos kernels.
LocalOrdinal local_ordinal_type
The type of local indices.
GlobalOrdinal global_ordinal_type
The type of global indices.
Compute the number of packets and offsets for the pack procedure.
int getError() const
Host function for getting the error.
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Implementation details of Tpetra.
void packCrsMatrix(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Traits class for packing / unpacking data of type T.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > packArray(char outBuf[], const value_type inBuf[], const size_t numEnt)
Pack the first numEnt entries of the given input buffer of value_type, into the output buffer of byte...
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const T &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.