peterrum / dealii Goto Github PK
View Code? Open in Web Editor NEWThis project forked from dealii/dealii
The development repository for the deal.II finite element library.
Home Page: https://www.dealii.org/
License: Other
This project forked from dealii/dealii
The development repository for the deal.II finite element library.
Home Page: https://www.dealii.org/
License: Other
Introduce a new vector (LinearAlgebra::SharedMPI::Vector
) in deal.II in such a way that it can be used in dealii-based DG-application (DG, ghost cells), in hyper.deal
(DG, ghost, faces), and in the CEED benchmarks (CG).
related to dealii#10872, hyperdeal/hyperdeal#18, and https://github.com/peterrum/ceed_benchmarks_dealii/tree/sm_vector_mf
LinearAlgebra::SharedMPI::Vector
-> storing the raw pointers and providing RW access to arrays and its entries + has a LinearAlgebra::SharedMPI::Partitioner
for default update_ghost_values()
/compression()
(aka. DataAccessOnFaces::unspecified
)LinearAlgebra::SharedMPI::Partitioner
implementation for communication
IndexSet
-> https://github.com/dealii/dealii/blob/067a4365c2848766320e07fc4f6c9ed7b880ac8f/include/deal.II/lac/la_sm_partitioner.h#L1-L483 and https://github.com/dealii/dealii/blob/067a4365c2848766320e07fc4f6c9ed7b880ac8f/source/lac/la_sm_partitioner.cc#L1-L784 -> CGglobal_cell_index
-> https://github.com/hyperdeal/hyperdeal/blob/0ae55a2dba9803cda3f8a40792309d1738178dde/include/hyper.deal/matrix_free/vector_partitioner.h#L1-L1499 with functions translating cells and faces to pairs (rank, offset): /**
* Return position of shared cell: cell -> (owner, offset)
*/
const std::map<dealii::types::global_dof_index,
std::pair<unsigned int, unsigned int>> &
get_maps() const; // cells
/**
* Return position of ghost face: (cell, no) -> (owner, offset)
*/
const std::map<std::pair<dealii::types::global_dof_index, unsigned int>,
std::pair<unsigned int, unsigned int>> &
get_maps_ghost() const; // ghost faces in the context of hyper.deal
DoFInfo
(owned by MatrixFree
) pre-computing and storing pair for each lane of macro cell using the above functions of the partitioner -> dof_indices_contiguous_ptr
FEEvaluation
uses DoFInfo::dof_indices_contiguous_ptr
DoFInfo
?DoFInfo::vector_partitioner_sm
(x1), DoFInfo::vector_partitioner_face_variants_sm
(x5)?MatrixFree::AdditionalData
?IndexSet
/dealii::Utilties::MPI::Partitioner
vs. DG -> global_cell_index
MatrixFree::get_vector_partitioner()
MatrixFree::initialize_dof_vector()
-> remove sm comm as argument?VectorDataExchange::update_ghost_values_start()
, update_ghost_values_finish()
, compress_start()
, compress_finish()
, reset_ghost_values()
VectorDataExchange::zero_vector_region()
-> use normal partitioner?via read_dof_values()
, distribute_local_to_global()
, gather_evaluate()
, integrate_scatter()
read_dof_values()
/distribute_local_to_global()
use VectorReader
/read_write_operation
(CG/DG) /read_write_operation_contiguous
(DG -> to be specialized?, rule out interleaved_contiguous
? - see code snippets below) if (n_filled_lanes == VectorizedArrayType::size() &&
n_lanes == VectorizedArrayType::size())
{
if (this->dof_info->index_storage_variants[ind][this->cell] ==
internal::MatrixFreeFunctions::DoFInfo::IndexStorageVariants::
contiguous)
{
if (n_components == 1 || n_fe_components == 1)
for (unsigned int comp = 0; comp < n_components; ++comp)
operation.process_dofs_vectorized_transpose(
this->data->dofs_per_component_on_cell,
dof_indices,
*src[comp],
values_dofs[comp],
vector_selector);
else
operation.process_dofs_vectorized_transpose(
this->data->dofs_per_component_on_cell * n_components,
dof_indices,
*src[0],
&values_dofs[0][0],
vector_selector);
}
for (unsigned int comp = 0; comp < n_components; ++comp)
{
for (unsigned int i = 0; i < this->data->dofs_per_component_on_cell;
++i)
operation.process_empty(values_dofs[comp][i]);
if (this->dof_info->index_storage_variants[ind][this->cell] ==
internal::MatrixFreeFunctions::DoFInfo::IndexStorageVariants::
contiguous)
{
if (n_components == 1 || n_fe_components == 1)
{
for (unsigned int v = 0; v < n_filled_lanes; ++v)
if (mask[v] == true)
for (unsigned int i = 0;
i < this->data->dofs_per_component_on_cell;
++i)
operation.process_dof(dof_indices[v] + i,
*src[comp],
values_dofs[comp][i][v]);
}
else
{
for (unsigned int v = 0; v < n_filled_lanes; ++v)
if (mask[v] == true)
for (unsigned int i = 0;
i < this->data->dofs_per_component_on_cell;
++i)
operation.process_dof(
dof_indices[v] + i +
comp * this->data->dofs_per_component_on_cell,
*src[0],
values_dofs[comp][i][v]);
}
}
for cell gather_evaluate()
/integrate_scatter()
call read_dof_values()
/distribute_local_to_global()
for faces gather_evaluate()
/integrate_scatter()
-> fe_face_evaluation_process_and_io()
:
MatrixFreeFunctions::tensor_symmetric_hermite
+ do_gradients
: n_face_orientations
/n_vectorization_lanes_filled
do_gradients
: n_face_orientations
/n_vectorization_lanes_filled
// case 4: contiguous indices without interleaving
else if (n_face_orientations > 1 ||
dof_info.index_storage_variants[dof_access_index][cell] ==
MatrixFreeFunctions::DoFInfo::IndexStorageVariants::
contiguous)
{
const unsigned int *indices =
&dof_info.dof_indices_contiguous[dof_access_index]
[cell *
VectorizedArrayType::size()];
Number2_ *vector_ptr =
global_vector_ptr + comp * static_dofs_per_component +
dof_info
.component_dof_indices_offset[active_fe_index]
[first_selected_component];
if (do_gradients == true &&
data.element_type ==
MatrixFreeFunctions::tensor_symmetric_hermite)
{
if (n_face_orientations == 1 &&
dof_info.n_vectorization_lanes_filled[dof_access_index]
[cell] ==
VectorizedArrayType::size())
for (unsigned int i = 0; i < dofs_per_face; ++i)
{
const unsigned int ind1 =
index_array_hermite[0][2 * i];
const unsigned int ind2 =
index_array_hermite[0][2 * i + 1];
const unsigned int i_ = reorientate(0, i);
proc.function_2a(temp1[i_],
temp1[i_ + dofs_per_face],
vector_ptr + ind1,
vector_ptr + ind2,
grad_weight,
indices,
indices);
}
else if (n_face_orientations == 1)
for (unsigned int i = 0; i < dofs_per_face; ++i)
{
const unsigned int ind1 =
index_array_hermite[0][2 * i];
const unsigned int ind2 =
index_array_hermite[0][2 * i + 1];
const unsigned int i_ = reorientate(0, i);
const unsigned int n_filled_lanes =
dof_info
.n_vectorization_lanes_filled[dof_access_index]
[cell];
for (unsigned int v = 0; v < n_filled_lanes; ++v)
proc.function_3a(temp1[i_][v],
temp1[i_ + dofs_per_face][v],
vector_ptr[ind1 + indices[v]],
vector_ptr[ind2 + indices[v]],
grad_weight[v]);
if (integrate == false)
for (unsigned int v = n_filled_lanes;
v < VectorizedArrayType::size();
++v)
{
temp1[i_][v] = 0.0;
temp1[i_ + dofs_per_face][v] = 0.0;
}
}
else
{
Assert(false, ExcNotImplemented());
const unsigned int n_filled_lanes =
dof_info
.n_vectorization_lanes_filled[dof_access_index]
[cell];
for (unsigned int v = 0; v < n_filled_lanes; ++v)
for (unsigned int i = 0; i < dofs_per_face; ++i)
proc.function_3a(
temp1[reorientate(v, i)][v],
temp1[reorientate(v, i) + dofs_per_face][v],
vector_ptr[index_array_hermite[v][2 * i] +
indices[v]],
vector_ptr[index_array_hermite[v][2 * i + 1] +
indices[v]],
grad_weight[v]);
}
}
else
{
if (n_face_orientations == 1 &&
dof_info.n_vectorization_lanes_filled[dof_access_index]
[cell] ==
VectorizedArrayType::size())
for (unsigned int i = 0; i < dofs_per_face; ++i)
{
const unsigned int ind = index_array_nodal[0][i];
const unsigned int i_ = reorientate(0, i);
proc.function_2b(temp1[i_],
vector_ptr + ind,
indices);
}
else if (n_face_orientations == 1)
for (unsigned int i = 0; i < dofs_per_face; ++i)
{
const unsigned int ind = index_array_nodal[0][i];
const unsigned int i_ = reorientate(0, i);
const unsigned int n_filled_lanes =
dof_info
.n_vectorization_lanes_filled[dof_access_index]
[cell];
for (unsigned int v = 0; v < n_filled_lanes; ++v)
proc.function_3b(temp1[i_][v],
vector_ptr[ind + indices[v]]);
if (integrate == false)
for (unsigned int v = n_filled_lanes;
v < VectorizedArrayType::size();
++v)
temp1[i_][v] = 0.0;
}
else
for (unsigned int i = 0; i < dofs_per_face; ++i)
{
for (unsigned int v = 0;
v < VectorizedArrayType::size();
++v)
if (cells[v] != numbers::invalid_unsigned_int)
proc.function_3b(
temp1[reorientate(v, i)][v],
vector_ptr[index_array_nodal[v][i] +
dof_info.dof_indices_contiguous
[dof_access_index][cells[v]]]);
}
}
}
else
{
// case 5: default vector access
AssertDimension(n_face_orientations, 1);
// for the integrate_scatter path (integrate == true), we
// need to only prepare the data in this function for all
// components to later call distribute_local_to_global();
// for the gather_evaluate path (integrate == false), we
// instead want to leave early because we need to get the
// vector data from somewhere else
proc.function_5(temp1, comp);
if (integrate)
accesses_global_vector = false;
else
return false;
}
}
template <typename T0, typename T1, typename T2, typename T3>
void
function_2a(T0 & temp_1,
T0 & temp_2,
const T1 src_ptr_1,
const T1 src_ptr_2,
const T2 &grad_weight,
const T3 &indices_1,
const T3 &indices_2)
{
// case 2a)
do_vectorized_gather(src_ptr_1, indices_1, temp_1);
do_vectorized_gather(src_ptr_2, indices_2, temp_2);
temp_2 = grad_weight * (temp_1 - temp_2);
}
template <typename T0, typename T1, typename T2>
void
function_2b(T0 &temp, const T1 src_ptr, const T2 &indices)
{
// case 2b)
do_vectorized_gather(src_ptr, indices, temp);
}
template <typename T0, typename T1, typename T2>
void
function_3a(T0 & temp_1,
T0 & temp_2,
const T1 &src_ptr_1,
const T2 &src_ptr_2,
const T2 &grad_weight)
{
// case 3a)
temp_1 = src_ptr_1;
temp_2 = grad_weight * (temp_1 - src_ptr_2);
}
template <typename T1, typename T2>
void
function_3b(T1 &temp, const T2 &src_ptr)
{
// case 3b)
temp = src_ptr;
}
template <typename T0, typename T1, typename T2, typename T3>
void
function_2a(const T0 &temp_1,
const T0 &temp_2,
T1 dst_ptr_1,
T1 dst_ptr_2,
const T2 &grad_weight,
const T3 &indices_1,
const T3 &indices_2)
{
// case 2a)
const VectorizedArrayType val = temp_1 - grad_weight * temp_2;
const VectorizedArrayType grad = grad_weight * temp_2;
do_vectorized_scatter_add(val, indices_1, dst_ptr_1);
do_vectorized_scatter_add(grad, indices_2, dst_ptr_2);
}
template <typename T0, typename T1, typename T2>
void
function_2b(const T0 &temp, T1 dst_ptr, const T2 &indices)
{
// case 2b)
do_vectorized_scatter_add(temp, indices, dst_ptr);
}
template <typename T0, typename T1, typename T2>
void
function_3a(const T0 &temp_1,
const T0 &temp_2,
T1 & dst_ptr_1,
T1 & dst_ptr_2,
const T2 &grad_weight)
{
// case 3a)
const Number val = temp_1 - grad_weight * temp_2;
const Number grad = grad_weight * temp_2;
dst_ptr_1 += val;
dst_ptr_2 += grad;
}
template <typename T0, typename T1>
void
function_3b(const T0 &temp, T1 &dst_ptr)
{
// case 3b)
dst_ptr += temp;
}
A declarative, efficient, and flexible JavaScript library for building user interfaces.
๐ Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. ๐๐๐
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google โค๏ธ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.