diff --git a/src/core/common/include/context.h b/src/core/common/include/context.h index f2377bde..bde85fa0 100644 --- a/src/core/common/include/context.h +++ b/src/core/common/include/context.h @@ -1,70 +1,143 @@ #pragma once +#include + +#include +#include #include #include #include -#include -#include #include "common/include/md_types.h" #include "common/include/nonbonded_14_mode.h" +#include "common/include/vdw_rules.h" +#include "host_device_buffer.h" class Context { public: - static Context& instance() { static Context ctx; return ctx; } - /* ============================================= - * == GENERAL + * == CONFIG * ============================================= */ - int n_atoms = 0; - int n_atoms_solute = 0; + std::string base_folder; + bool run_gpu = false; + + int n_atoms = 0; // the total number of atoms + int n_atoms_solute = 0; // the total number of solute number, in our system [0, n_atoms_solute) are solute, [n_atoms_solute, n_atoms) are water atoms int n_patoms = 0; int n_qatoms = 0; int n_waters = 0; int n_molecules = 0; - - std::string base_folder; double dt = 0.0; double tau_T = 0.0; + md_t md; - bool run_gpu = false; + /* + + */ + std::unique_ptr> coords; + std::unique_ptr> velocities; + std::unique_ptr> dvelocities; - /* ============================================= - * == FROM MD FILE - * ============================================= - */ - md_t md; - bool separate_scaling = false; - - /* ============================================= - * == FROM TOPOLOGY FILE - * ============================================= - */ + /* + */ + int n_angles = 0; + int n_angles_solute = 0; + int n_cangles = 0; + std::unique_ptr> angles; + std::unique_ptr> cangles; - int n_coords = 0; int n_bonds = 0; int n_bonds_solute = 0; int n_cbonds = 0; - int n_angles = 0; - int n_angles_solute = 0; - int n_cangles = 0; + std::unique_ptr> bonds; + std::unique_ptr> cbonds; int n_torsions = 0; int n_torsions_solute = 0; int n_ctorsions = 0; + std::unique_ptr> torsions; + std::unique_ptr> ctorsions; int n_impropers = 0; int n_impropers_solute = 0; int n_cimpropers = 0; + std::unique_ptr> impropers; + std::unique_ptr> cimpropers; + + int n_restrspos = 0; + std::unique_ptr> restrspos; + + int n_restrangs = 0; + std::unique_ptr> restrangs; + + + int n_restrdists = 0; + std::unique_ptr> restrdists; + + int n_restrseqs = 0; + std::unique_ptr> restrseqs; + int n_restrwalls = 0; + std::unique_ptr> restrwalls; + + /* + Atom Info + */ int n_charges = 0; int n_ccharges = 0; + std::unique_ptr> charges; + std::unique_ptr> ccharges; int n_atypes = 0; int n_catypes = 0; + std::unique_ptr> atypes; + std::unique_ptr> catypes; + + std::unique_ptr> heavy; + std::unique_ptr> coords_init; + + std::unique_ptr> excluded; + + std::unique_ptr> winv; + + std::unique_ptr> shell; + /* + Pair + */ + std::unique_ptr> LJ_matrix; + + /* + Shake + */ + int n_shake_constraints = 0; + std::unique_ptr> mol_n_shakes; + std::unique_ptr> shake_bonds; + std::unique_ptr> xcoords; // todo: It's just a temporary variables... + /* + Water + */ + + std::unique_ptr> wshells; + + + + /* + FEP + */ + std::unique_ptr> lambdas; // Actually length is only 2.. + + /* + Energy + */ + + std::unique_ptr> EQ_restraint; + + /* + */ + int n_ngbrs23 = 0; int n_ngbrs14 = 0; int n_excluded = 0; @@ -72,30 +145,35 @@ class Context { int n_cgrps_solvent = 0; int iuse_switch_atom = 0; - std::vector coords_top; - std::vector bonds; - std::vector cbonds; - std::vector angles; - std::vector cangles; - std::vector torsions; - std::vector ctorsions; - std::vector impropers; - std::vector cimpropers; - std::vector charges; - std::vector ccharges; - std::vector atypes; - std::vector catypes; std::vector atom_to_qi; - std::vector unified_ccharges; - std::vector unified_catypes; - std::vector LJ_matrix; + std::unique_ptr> unified_ccharges; + std::unique_ptr> unified_catypes; + std::unique_ptr> ngbrs_14; + std::vector ngbrs_14_builder; + + std::unique_ptr> p_atoms_list; + std::unique_ptr> w_atoms_list; + std::unique_ptr> q_atoms_list; + + std::unique_ptr> charge_table_all; + std::unique_ptr> charge_pair_products; + std::unique_ptr> p_charge_types; + std::unique_ptr> w_charge_types; + std::unique_ptr> q_charge_types; + + std::unique_ptr> catype_table_all; + std::unique_ptr> catype_pair_params; + std::unique_ptr> p_catype_types; + std::unique_ptr> w_catype_types; + std::unique_ptr> q_catype_types; + + std::map, int> catype_to_type_host; + int n_charge_types = 0; + int zero_charge_type = -1; + int n_catype_types = 0; + int zero_catype_type = -1; - std::vector ngbrs_14; - - std::unique_ptr excluded; - std::unique_ptr heavy; std::vector molecules; - std::vector winv; std::vector charge_groups; topo_t topo = {}; @@ -106,7 +184,6 @@ class Context { */ int n_lambdas = 0; - std::vector lambdas; int n_qangcouples = 0; int n_qangles = 0; @@ -143,7 +220,7 @@ class Context { std::vector q_atypes; std::vector q_bonds; std::vector q_charges; - std::vector q_elscales; + std::unique_ptr> q_elscales; std::vector q_exclpairs; std::vector q_impropers; std::vector q_shakes; @@ -155,20 +232,6 @@ class Context { * ============================================= */ - int n_restrseqs = 0; - int n_restrspos = 0; - int n_restrdists = 0; - int n_restrangs = 0; - int n_restrwalls = 0; - - std::vector restrseqs; - std::vector restrspos; - std::vector restrdists; - std::vector restrangs; - std::vector restrwalls; - - std::unique_ptr shell; - /* ============================================= * == SHELLS / SOLVENT * ============================================= @@ -185,16 +248,12 @@ class Context { int n_shells = 0; std::vector> list_sh; std::vector> nsort; - std::vector wshells; /* ============================================= * == SHAKE * ============================================= */ - int n_shake_constraints = 0; - std::vector mol_n_shakes; - std::vector shake_bonds; /* ============================================= * == CALCULATED IN THE INTEGRATION @@ -202,10 +261,6 @@ class Context { */ std::vector p_atoms; - std::vector coords; - std::vector xcoords; - std::vector velocities; - std::vector dvelocities; energy_t E_total = {}; std::vector EQ_total; @@ -225,7 +280,6 @@ class Context { std::vector EQ_nonbond_qx; E_restraint_t E_restraint = {}; - std::vector EQ_restraint; double Temp = 0.0; double Tfree = 0.0; @@ -244,6 +298,7 @@ class Context { * ============================================= */ + bool separate_scaling = false; double Ndegf = 0.0; double Ndegfree = 0.0; double Ndegf_solvent = 0.0; @@ -270,7 +325,7 @@ class Context { } const ccharge_t& unified_ccharge_by_code(int code) const { - return unified_ccharges[code - 1]; + return unified_ccharges->cpu_data_p[code - 1]; } const ccharge_t& unified_ccharge(int atom_idx, int state) const { @@ -282,21 +337,27 @@ class Context { } const catype_t& unified_catype_by_code(int code) const { - return unified_catypes[code - 1]; + return unified_catypes->cpu_data_p[code - 1]; } const catype_t& unified_catype(int atom_idx, int state) const { return unified_catype_by_code(unified_atype_code(atom_idx, state)); } + void cuda_initialize_helpers(); + void cuda_free_helpers(); + void cuda_reset_energies(); + void cuda_sync_all_to_device(); + private: Context() = default; ~Context() {} + void cuda_initialize_atom_lists_host(); + void cuda_initialize_ngbrs14_host(); + void cuda_initialize_charge_tables_host(); + void cuda_initialize_catype_tables_host(); + Context(const Context&) = delete; Context& operator=(const Context&) = delete; - - - - }; diff --git a/src/core/common/include/cuda_runtime_utility.h b/src/core/common/include/cuda_runtime_utility.h new file mode 100644 index 00000000..ea019882 --- /dev/null +++ b/src/core/common/include/cuda_runtime_utility.h @@ -0,0 +1,17 @@ +#pragma once + +#include + +#include +#include + +inline void check_cuda(cudaError_t status) { + if (status != cudaSuccess) { + std::printf(">>> FATAL: CUDA call failed with error code %d: %s\n", status, cudaGetErrorString(status)); + std::exit(EXIT_FAILURE); + } +} + +inline void check_cudaMalloc(void** dev_ptr, size_t size) { + check_cuda(cudaMalloc(dev_ptr, size)); +} diff --git a/src/core/common/include/host_device_buffer.h b/src/core/common/include/host_device_buffer.h new file mode 100644 index 00000000..d0590f8c --- /dev/null +++ b/src/core/common/include/host_device_buffer.h @@ -0,0 +1,149 @@ +#pragma once + +#include +#include +#include + +#include "cuda_runtime_utility.h" + +template +class HostDeviceBuffer { + public: + size_t length = 0; // number of elements + bool cpu_mem_flag = true; // whether host memory exists + bool gpu_mem_flag = true; // whether device memory exists + + T* cpu_data_p = nullptr; // CPU pointer + T* gpu_data_p = nullptr; // GPU pointer + + HostDeviceBuffer(int length, bool cpu_mem_flag = true, bool gpu_mem_flag = true); + HostDeviceBuffer(unsigned int length, bool cpu_mem_flag = true, bool gpu_mem_flag = true); + HostDeviceBuffer(size_t length, bool cpu_mem_flag = true, bool gpu_mem_flag = true); + + ~HostDeviceBuffer(); + + void allocate(); + void deallocate(); + + // copy CPU -> GPU + void upload(T* buffer = nullptr); + + // copy GPU -> CPU + void download(T* buffer = nullptr); +}; + +template +HostDeviceBuffer::HostDeviceBuffer(int length, bool cpu_mem_flag, bool gpu_mem_flag) + : length(length), cpu_mem_flag(cpu_mem_flag), gpu_mem_flag(gpu_mem_flag) { + allocate(); +} + +template +HostDeviceBuffer::HostDeviceBuffer(unsigned int length, bool cpu_mem_flag, bool gpu_mem_flag) + : length(length), cpu_mem_flag(cpu_mem_flag), gpu_mem_flag(gpu_mem_flag) { + allocate(); +} + +template +HostDeviceBuffer::HostDeviceBuffer(size_t length, bool cpu_mem_flag, bool gpu_mem_flag) + : length(length), cpu_mem_flag(cpu_mem_flag), gpu_mem_flag(gpu_mem_flag) { + allocate(); +} + +template +HostDeviceBuffer::~HostDeviceBuffer() { + deallocate(); +} + +template +void HostDeviceBuffer::allocate() { + if (length == 0) { + return; + } + + const size_t bytes = length * sizeof(T); + if (cpu_mem_flag) { + cpu_data_p = new T[length]; + std::memset(cpu_data_p, 0, bytes); + } + + if (gpu_mem_flag) { + check_cudaMalloc((void**)&gpu_data_p, bytes); + check_cuda(cudaMemset(gpu_data_p, 0, bytes)); + } +} + +template +void HostDeviceBuffer::deallocate() { + if (cpu_data_p != nullptr) { + delete[] cpu_data_p; + cpu_data_p = nullptr; + } + + if (gpu_data_p != nullptr) { + check_cuda(cudaFree(gpu_data_p)); + gpu_data_p = nullptr; + } + + length = 0; +} + +//--------------------------------------------------------------------------------------------- +// Upload: CPU -> GPU +// +// If buffer != nullptr, copy from buffer to device. +// Otherwise copy from internal cpu_data_p. +// +// Requires device memory. +//--------------------------------------------------------------------------------------------- +template +void HostDeviceBuffer::upload(T* buffer) { + if (length == 0) { + return; + } + + if (gpu_data_p == nullptr) { + throw std::runtime_error("HostDeviceBuffer::upload failed: GPU memory is not allocated."); + } + + T* src = buffer; + if (src == nullptr) { + src = cpu_data_p; + } + + if (src == nullptr) { + throw std::runtime_error("HostDeviceBuffer::upload failed: no CPU source buffer available."); + } + + check_cuda(cudaMemcpy(gpu_data_p, src, length * sizeof(T), cudaMemcpyHostToDevice)); +} + +//--------------------------------------------------------------------------------------------- +// Download: GPU -> CPU +// +// If buffer != nullptr, copy into buffer. +// Otherwise copy into internal cpu_data_p. +// +// Requires device memory. +//--------------------------------------------------------------------------------------------- +template +void HostDeviceBuffer::download(T* buffer) { + if (length == 0) { + return; + } + + if (gpu_data_p == nullptr) { + throw std::runtime_error("HostDeviceBuffer::download failed: GPU memory is not allocated."); + } + + T* dst = buffer; + if (dst == nullptr) { + dst = cpu_data_p; + } + + if (dst == nullptr) { + throw std::runtime_error("HostDeviceBuffer::download failed: no CPU destination buffer available."); + } + + check_cuda(cudaMemcpy(dst, gpu_data_p, length * sizeof(T), cudaMemcpyDeviceToHost)); +} diff --git a/src/core/common/include/host_device_memory_info.h b/src/core/common/include/host_device_memory_info.h new file mode 100644 index 00000000..8848341d --- /dev/null +++ b/src/core/common/include/host_device_memory_info.h @@ -0,0 +1,16 @@ +#pragma once +class HostDeviceMemoryInfo { + public: + long long total_cpu_memory; + long long total_gpu_memory; + + static HostDeviceMemoryInfo& Instance() { + static HostDeviceMemoryInfo instance; + return instance; + } + + protected: + HostDeviceMemoryInfo() { + total_cpu_memory = total_gpu_memory = 0; + } +}; \ No newline at end of file diff --git a/src/core/common/include/init.h b/src/core/common/include/init.h index 23a62a14..9c1e9950 100644 --- a/src/core/common/include/init.h +++ b/src/core/common/include/init.h @@ -64,5 +64,4 @@ void clean_variables(); // void write_energies(int iteration); void calc_integration(); -void calc_integration_step(int iteration); diff --git a/src/core/common/src/handler.cpp b/src/core/common/src/handler.cpp index 85a68b13..3fdd1341 100644 --- a/src/core/common/src/handler.cpp +++ b/src/core/common/src/handler.cpp @@ -44,8 +44,10 @@ void Handler::run(int num_iterations) { void Handler::update_energy_totals() { auto& host = Context::instance(); + auto *lambdas = host.lambdas->cpu_data_p; + auto *EQ_restraint = host.EQ_restraint->cpu_data_p; for (int state = 0; state < host.n_lambdas; state++) { - if (host.lambdas[state] == 0) { + if (lambdas[state] == 0) { host.EQ_bond[state].Uangle = 0; host.EQ_bond[state].Ubond = 0; host.EQ_bond[state].Utor = 0; @@ -56,24 +58,24 @@ void Handler::update_energy_totals() { host.EQ_nonbond_qp[state].Uvdw = 0; host.EQ_nonbond_qw[state].Ucoul = 0; host.EQ_nonbond_qw[state].Uvdw = 0; - host.EQ_restraint[state].Urestr = 0; + EQ_restraint[state].Urestr = 0; } host.EQ_nonbond_qx[state].Ucoul = host.EQ_nonbond_qq[state].Ucoul + host.EQ_nonbond_qp[state].Ucoul + host.EQ_nonbond_qw[state].Ucoul; host.EQ_nonbond_qx[state].Uvdw = host.EQ_nonbond_qq[state].Uvdw + host.EQ_nonbond_qp[state].Uvdw + host.EQ_nonbond_qw[state].Uvdw; host.EQ_total[state].Utot = host.EQ_bond[state].Ubond + host.EQ_bond[state].Uangle + host.EQ_bond[state].Utor + host.EQ_bond[state].Uimp + - host.EQ_nonbond_qx[state].Ucoul + host.EQ_nonbond_qx[state].Uvdw + host.EQ_restraint[state].Urestr; + host.EQ_nonbond_qx[state].Ucoul + host.EQ_nonbond_qx[state].Uvdw + EQ_restraint[state].Urestr; - host.E_bond_q.Ubond += host.EQ_bond[state].Ubond * host.lambdas[state]; - host.E_bond_q.Uangle += host.EQ_bond[state].Uangle * host.lambdas[state]; - host.E_bond_q.Utor += host.EQ_bond[state].Utor * host.lambdas[state]; - host.E_bond_q.Uimp += host.EQ_bond[state].Uimp * host.lambdas[state]; - host.E_nonbond_qx.Ucoul += host.EQ_nonbond_qx[state].Ucoul * host.lambdas[state]; - host.E_nonbond_qx.Uvdw += host.EQ_nonbond_qx[state].Uvdw * host.lambdas[state]; + host.E_bond_q.Ubond += host.EQ_bond[state].Ubond * lambdas[state]; + host.E_bond_q.Uangle += host.EQ_bond[state].Uangle * lambdas[state]; + host.E_bond_q.Utor += host.EQ_bond[state].Utor * lambdas[state]; + host.E_bond_q.Uimp += host.EQ_bond[state].Uimp * lambdas[state]; + host.E_nonbond_qx.Ucoul += host.EQ_nonbond_qx[state].Ucoul * lambdas[state]; + host.E_nonbond_qx.Uvdw += host.EQ_nonbond_qx[state].Uvdw * lambdas[state]; // Update protein restraint energies with an average of all states - host.E_restraint.Upres += host.EQ_restraint[state].Urestr * host.lambdas[state]; + host.E_restraint.Upres += EQ_restraint[state].Urestr * lambdas[state]; } // Update totals @@ -94,6 +96,8 @@ void Handler::print_outputs(int iteration) { void Handler::reset_energies() { auto& host = Context::instance(); + auto &dvelocities = host.dvelocities->cpu_data_p; + auto *EQ_restraint = host.EQ_restraint->cpu_data_p; host.E_total.Upot = 0; host.E_bond_p.Uangle = 0; host.E_bond_p.Ubond = 0; @@ -121,7 +125,8 @@ void Handler::reset_energies() { host.E_restraint.Ushell = 0; host.E_restraint.Upres = 0; host.E_restraint.Urestr = 0; - for (auto& dvel : host.dvelocities) { + for (int i = 0; i < host.n_atoms; i++) { + auto& dvel = dvelocities[i]; dvel.x = 0; dvel.y = 0; dvel.z = 0; @@ -140,6 +145,6 @@ void Handler::reset_energies() { host.EQ_nonbond_qx[state].Ucoul = 0; host.EQ_nonbond_qx[state].Uvdw = 0; host.EQ_total[state].Utot = 0; - host.EQ_restraint[state].Urestr = 0; + EQ_restraint[state].Urestr = 0; } } diff --git a/src/core/common/src/init.cpp b/src/core/common/src/init.cpp index 0fc9cdb2..0b48cc87 100644 --- a/src/core/common/src/init.cpp +++ b/src/core/common/src/init.cpp @@ -17,36 +17,38 @@ static void finalize_ngbrs14_host() { auto& ctx = Context::instance(); + auto &LJ_matrix = ctx.LJ_matrix->cpu_data_p; + auto &ngbrs_14 = ctx.ngbrs_14_builder; - for (auto& pair : ctx.ngbrs_14) { + for (auto& pair : ngbrs_14) { if (pair.x > pair.y) { std::swap(pair.x, pair.y); } } - ctx.ngbrs_14.erase( - std::remove_if(ctx.ngbrs_14.begin(), ctx.ngbrs_14.end(), [&ctx](const int3& pair) { + ngbrs_14.erase( + std::remove_if(ngbrs_14.begin(), ngbrs_14.end(), [&ctx, &LJ_matrix](const int3& pair) { if (pair.x < 0 || pair.y < 0 || pair.x >= ctx.n_atoms_solute || pair.y >= ctx.n_atoms_solute) { return true; } if (pair.x == pair.y) { return true; } - return ctx.LJ_matrix[pair.x * ctx.n_atoms_solute + pair.y] != 1; + return LJ_matrix[pair.x * ctx.n_atoms_solute + pair.y] != 1; }), - ctx.ngbrs_14.end()); + ngbrs_14.end()); - std::sort(ctx.ngbrs_14.begin(), ctx.ngbrs_14.end(), [](const int3& lhs, const int3& rhs) { + std::sort(ngbrs_14.begin(), ngbrs_14.end(), [](const int3& lhs, const int3& rhs) { if (lhs.x != rhs.x) return lhs.x < rhs.x; return lhs.y < rhs.y; }); - ctx.ngbrs_14.erase( - std::unique(ctx.ngbrs_14.begin(), ctx.ngbrs_14.end(), [](const int3& lhs, const int3& rhs) { + ngbrs_14.erase( + std::unique(ngbrs_14.begin(), ngbrs_14.end(), [](const int3& lhs, const int3& rhs) { return lhs.x == rhs.x && lhs.y == rhs.y; }), - ctx.ngbrs_14.end()); + ngbrs_14.end()); - for (auto& pair : ctx.ngbrs_14) { + for (auto& pair : ngbrs_14) { const bool ai_is_q = ctx.atom_to_qi[pair.x] != -1; const bool aj_is_q = ctx.atom_to_qi[pair.y] != -1; if (ai_is_q && aj_is_q) { @@ -57,7 +59,12 @@ static void finalize_ngbrs14_host() { pair.z = NONBONDED_14_PP; } } - ctx.n_ngbrs14 = static_cast(ctx.ngbrs_14.size()); + ctx.ngbrs_14 = std::make_unique>(ngbrs_14.size(), true, ctx.run_gpu); + if (!ngbrs_14.empty()) { + std::copy(ngbrs_14.begin(), ngbrs_14.end(), ctx.ngbrs_14->cpu_data_p); + } + ctx.n_ngbrs14 = static_cast(ctx.ngbrs_14->length); + ngbrs_14.clear(); } // Remove bonds, angles, torsions and impropers which are excluded or changed in the FEP file @@ -69,14 +76,19 @@ void exclude_qatom_definitions() { excluded = 0; int solute_excluded = 0; + + auto &bonds = ctx.bonds->cpu_data_p; + auto &angles = ctx.angles->cpu_data_p; + auto &torsions = ctx.torsions->cpu_data_p; + if (ctx.n_qangles > 0) { for (int i = 0; i < ctx.n_angles; i++) { - if (ctx.angles[i].ai == ctx.q_angles[qai].ai && ctx.angles[i].aj == ctx.q_angles[qai].aj && ctx.angles[i].ak == ctx.q_angles[qai].ak) { + if (angles[i].ai == ctx.q_angles[qai].ai && angles[i].aj == ctx.q_angles[qai].aj && angles[i].ak == ctx.q_angles[qai].ak) { qai++; excluded++; if (i < ctx.n_angles_solute) solute_excluded++; } else { - ctx.angles[ai] = ctx.angles[i]; + angles[ai] = angles[i]; ai++; } } @@ -88,12 +100,12 @@ void exclude_qatom_definitions() { solute_excluded = 0; if (ctx.n_qbonds > 0) { for (int i = 0; i < ctx.n_bonds; i++) { - if (ctx.bonds[i].ai == ctx.q_bonds[qbi].ai && ctx.bonds[i].aj == ctx.q_bonds[qbi].aj) { + if (bonds[i].ai == ctx.q_bonds[qbi].ai && bonds[i].aj == ctx.q_bonds[qbi].aj) { qbi++; excluded++; if (i < ctx.n_bonds_solute) solute_excluded++; } else { - ctx.bonds[bi] = ctx.bonds[i]; + bonds[bi] = bonds[i]; bi++; } } @@ -121,12 +133,12 @@ void exclude_qatom_definitions() { solute_excluded = 0; if (ctx.n_qtorsions > 0) { for (int i = 0; i < ctx.n_torsions; i++) { - if (ctx.torsions[i].ai == ctx.q_torsions[qti].ai && ctx.torsions[i].aj == ctx.q_torsions[qti].aj && ctx.torsions[i].ak == ctx.q_torsions[qti].ak && ctx.torsions[i].al == ctx.q_torsions[qti].al) { + if (torsions[i].ai == ctx.q_torsions[qti].ai && torsions[i].aj == ctx.q_torsions[qti].aj && torsions[i].ak == ctx.q_torsions[qti].ak && torsions[i].al == ctx.q_torsions[qti].al) { qti++; excluded++; if (i < ctx.n_torsions_solute) solute_excluded++; } else { - ctx.torsions[ti] = ctx.torsions[i]; + torsions[ti] = torsions[i]; ti++; } } @@ -139,8 +151,11 @@ void exclude_qatom_definitions() { void exclude_all_atoms_excluded_definitions() { auto& ctx = Context::instance(); + auto *excluded = ctx.excluded->cpu_data_p; int n_excl; int ai = 0, bi = 0, ii = 0, ti = 0; + auto &impropers = ctx.impropers->cpu_data_p; + auto &torsions = ctx.torsions->cpu_data_p; // n_excl = 0; // for (int i = 0; i < n_angles; i++) { @@ -173,10 +188,10 @@ void exclude_all_atoms_excluded_definitions() { n_excl = 0; for (int i = 0; i < ctx.n_impropers; i++) { - if (ctx.excluded[ctx.impropers[i].ai - 1] && ctx.excluded[ctx.impropers[i].aj - 1] && ctx.excluded[ctx.impropers[i].ak - 1] && ctx.excluded[ctx.impropers[i].al - 1]) { + if (excluded[impropers[i].ai - 1] && excluded[impropers[i].aj - 1] && excluded[impropers[i].ak - 1] && excluded[impropers[i].al - 1]) { n_excl++; } else { - ctx.impropers[ii] = ctx.impropers[i]; + impropers[ii] = impropers[i]; ii++; } } @@ -185,10 +200,10 @@ void exclude_all_atoms_excluded_definitions() { n_excl = 0; for (int i = 0; i < ctx.n_torsions; i++) { - if (ctx.excluded[ctx.torsions[i].ai - 1] && ctx.excluded[ctx.torsions[i].aj - 1] && ctx.excluded[ctx.torsions[i].ak - 1] && ctx.excluded[ctx.torsions[i].al - 1]) { + if (excluded[torsions[i].ai - 1] && excluded[torsions[i].aj - 1] && excluded[torsions[i].ak - 1] && excluded[torsions[i].al - 1]) { n_excl++; } else { - ctx.torsions[ti] = ctx.torsions[i]; + torsions[ti] = torsions[i]; ti++; } } @@ -204,17 +219,19 @@ void exclude_shaken_definitions() { int si = 0; int ang_i = 0; int ai, aj; + auto &shake_bonds = ctx.shake_bonds->cpu_data_p; excluded = 0; solute_excluded = 0; + auto &bonds = ctx.bonds->cpu_data_p; if (ctx.n_shake_constraints > 0) { for (int i = 0; i < ctx.n_bonds; i++) { - if (ctx.bonds[i].ai == ctx.shake_bonds[si].ai && ctx.bonds[i].aj == ctx.shake_bonds[si].aj) { + if (bonds[i].ai == shake_bonds[si].ai && bonds[i].aj == shake_bonds[si].aj) { si++; excluded++; if (i < ctx.n_bonds_solute) solute_excluded++; } else { - ctx.bonds[bi] = ctx.bonds[i]; + bonds[bi] = bonds[i]; bi++; } } @@ -224,25 +241,26 @@ void exclude_shaken_definitions() { excluded = 0; solute_excluded = 0; + auto &angles = ctx.angles->cpu_data_p; if (ctx.n_shake_constraints > 0) { // Mark angles whose terminal atoms (i and k) match a shaken bond for (int i = 0; i < ctx.n_shake_constraints; i++) { - ai = ctx.shake_bonds[i].ai; - aj = ctx.shake_bonds[i].aj; + ai = shake_bonds[i].ai; + aj = shake_bonds[i].aj; for (int j = 0; j < ctx.n_angles; j++) { - if ((ctx.angles[j].ai == ai && ctx.angles[j].ak == aj) || (ctx.angles[j].ai == aj && ctx.angles[j].ak == ai)) { - ctx.angles[j].code = 0; + if ((angles[j].ai == ai && angles[j].ak == aj) || (angles[j].ai == aj && angles[j].ak == ai)) { + angles[j].code = 0; break; } } } for (int i = 0; i < ctx.n_angles; i++) { - if (ctx.angles[i].code == 0) { + if (angles[i].code == 0) { excluded++; if (i < ctx.n_angles_solute) solute_excluded++; } else { - ctx.angles[ang_i] = ctx.angles[i]; + angles[ang_i] = angles[i]; ang_i++; } } @@ -253,36 +271,50 @@ void exclude_shaken_definitions() { void init_velocities() { auto& ctx = Context::instance(); - ctx.velocities.resize(ctx.n_atoms); + auto &atypes = ctx.atypes->cpu_data_p; + auto &catypes = ctx.catypes->cpu_data_p; + ctx.velocities = std::make_unique>(ctx.n_atoms, true, ctx.run_gpu); + auto &velocities = ctx.velocities->cpu_data_p; // If not previous value set, use a Maxwell distribution to fill velocities double kT = Boltz * ctx.md.initial_temperature; double sd, mass; for (int i = 0; i < ctx.n_atoms; i++) { - mass = ctx.catypes[ctx.atypes[i].code - 1].m; + mass = catypes[atypes[i].code - 1].m; sd = sqrt(kT / mass); - ctx.velocities[i].x = gauss(0, sd); - ctx.velocities[i].y = gauss(0, sd); - ctx.velocities[i].z = gauss(0, sd); + velocities[i].x = gauss(0, sd); + velocities[i].y = gauss(0, sd); + velocities[i].z = gauss(0, sd); + } + + if (ctx.run_gpu) { + ctx.velocities->upload(); } } void init_dvelocities() { auto& ctx = Context::instance(); - ctx.dvelocities.assign(ctx.n_atoms, {}); + ctx.dvelocities = std::make_unique>(ctx.n_atoms, true, ctx.run_gpu); } void init_xcoords() { auto& ctx = Context::instance(); - ctx.xcoords.resize(ctx.n_atoms); + ctx.xcoords = std::make_unique>(ctx.n_atoms, true, ctx.run_gpu); } void init_inv_mass() { auto& ctx = Context::instance(); - ctx.winv.resize(ctx.n_atoms); + auto &atypes = ctx.atypes->cpu_data_p; + auto &catypes = ctx.catypes->cpu_data_p; + ctx.winv = std::make_unique>(ctx.n_atoms, true, ctx.run_gpu); + auto *winv = ctx.winv->cpu_data_p; for (int ai = 0; ai < ctx.n_atoms; ai++) { - ctx.winv[ai] = 1 / ctx.catypes[ctx.atypes[ai].code - 1].m; + winv[ai] = 1 / catypes[atypes[ai].code - 1].m; + } + + if (ctx.run_gpu) { + ctx.winv->upload(); } } @@ -304,10 +336,14 @@ void init_wshells() { auto& ctx = Context::instance(); int n_inshell; double drs, router, ri, dr, Vshell, rshell; + auto &bonds = ctx.bonds->cpu_data_p; + auto &cbonds = ctx.cbonds->cpu_data_p; + auto &angles = ctx.angles->cpu_data_p; + auto &cangles = ctx.cangles->cpu_data_p; if (ctx.mu_w == 0) { // Get water properties from first water molecule - cbond_t cbondw = ctx.cbonds[ctx.bonds[ctx.n_atoms_solute].code - 1]; - cangle_t canglew = ctx.cangles[ctx.angles[ctx.n_atoms_solute].code - 1]; + cbond_t cbondw = cbonds[bonds[ctx.n_atoms_solute].code - 1]; + cangle_t canglew = cangles[angles[ctx.n_atoms_solute].code - 1]; ctx.crg_ow = ctx.unified_ccharge(ctx.n_atoms_solute, 0).charge; @@ -317,7 +353,8 @@ void init_wshells() { drs = wpolr_layer / drouter; ctx.n_shells = (int)floor(-0.5 + sqrt(2 * drs + 0.25)); - ctx.wshells.resize(ctx.n_shells); + ctx.wshells = std::make_unique>(ctx.n_shells, true, ctx.run_gpu); + auto *wshells = ctx.wshells->cpu_data_p; printf("n_shells = %d\n", ctx.n_shells); @@ -325,12 +362,12 @@ void init_wshells() { ctx.n_max_inshell = 0; for (int i = 0; i < ctx.n_shells; i++) { - ctx.wshells[i].avtheta = 0; - ctx.wshells[i].avn_inshell = 0; - ctx.wshells[i].router = router; + wshells[i].avtheta = 0; + wshells[i].avn_inshell = 0; + wshells[i].router = router; dr = drouter * (i + 1); ri = router - dr; - ctx.wshells[i].dr = dr; + wshells[i].dr = dr; Vshell = pow(router, 3) - pow(ri, 3); n_inshell = (int)floor(4 * M_PI / 3 * Vshell * rho_water); if (n_inshell > ctx.n_max_inshell) { @@ -339,13 +376,13 @@ void init_wshells() { rshell = pow(0.5 * (pow(router, 3) + pow(ri, 3)), 1.0 / 3.0); // --- Note below: 0.98750 = (1-1/epsilon) for water - ctx.wshells[i].cstb = ctx.crgQtot * 0.98750 / (rho_water * ctx.mu_w * 4 * M_PI * pow(rshell, 2)); + wshells[i].cstb = ctx.crgQtot * 0.98750 / (rho_water * ctx.mu_w * 4 * M_PI * pow(rshell, 2)); router -= dr; } // rc > wshells[n_shells-1].router - wshells[n_shells-1].dr - printf("shell 0: (%f, %f). shell 1: (%f, %f). shell 2: (%f, %f).\n", ctx.wshells[0].router, ctx.wshells[0].router - ctx.wshells[0].dr, ctx.wshells[1].router, ctx.wshells[1].router - ctx.wshells[1].dr, ctx.wshells[2].router, ctx.wshells[2].router - ctx.wshells[2].dr); + printf("shell 0: (%f, %f). shell 1: (%f, %f). shell 2: (%f, %f).\n", wshells[0].router, wshells[0].router - wshells[0].dr, wshells[1].router, wshells[1].router - wshells[1].dr, wshells[2].router, wshells[2].router - wshells[2].dr); ctx.n_max_inshell = ctx.n_waters; // Make largest a little bigger just in case @@ -356,55 +393,73 @@ void init_wshells() { ctx.list_sh.assign(ctx.n_max_inshell, std::vector(ctx.n_shells, 0)); ctx.nsort.assign(ctx.n_max_inshell, std::vector(ctx.n_shells, 0)); + + if (ctx.run_gpu) { + ctx.wshells->upload(); + } } void init_pshells() { auto& ctx = Context::instance(); + auto &atypes = ctx.atypes->cpu_data_p; + auto &catypes = ctx.catypes->cpu_data_p; + auto &coords_init = ctx.coords_init->cpu_data_p; + auto *excluded = ctx.excluded->cpu_data_p; double mass, r2, rin2; - ctx.heavy = std::make_unique(ctx.n_atoms); - ctx.shell = std::make_unique(ctx.n_atoms); + ctx.heavy = std::make_unique>(ctx.n_atoms, true, ctx.run_gpu); + auto *heavy = ctx.heavy->cpu_data_p; + ctx.shell = std::make_unique>(ctx.n_atoms, true, ctx.run_gpu); + auto *shell = ctx.shell->cpu_data_p; for (int i = 0; i < ctx.n_atoms; ++i) { - ctx.heavy[i] = false; - ctx.shell[i] = false; + heavy[i] = false; + shell[i] = false; } rin2 = pow(shell_default * ctx.topo.exclusion_radius, 2); int n_heavy = 0, n_inshell = 0; for (int i = 0; i < ctx.n_atoms; i++) { - mass = ctx.catypes[ctx.atypes[i].code - 1].m; + mass = catypes[atypes[i].code - 1].m; if (mass < 4.0) { - ctx.heavy[i] = false; + heavy[i] = false; } else { - ctx.heavy[i] = true; + heavy[i] = true; n_heavy++; } - if (ctx.heavy[i] && !ctx.excluded[i] && i < ctx.n_atoms_solute) { - r2 = pow(ctx.coords_top[i].x - ctx.topo.solute_center.x, 2) + pow(ctx.coords_top[i].y - ctx.topo.solute_center.y, 2) + pow(ctx.coords_top[i].z - ctx.topo.solute_center.z, 2); + if (heavy[i] && !excluded[i] && i < ctx.n_atoms_solute) { + r2 = pow(coords_init[i].x - ctx.topo.solute_center.x, 2) + pow(coords_init[i].y - ctx.topo.solute_center.y, 2) + pow(coords_init[i].z - ctx.topo.solute_center.z, 2); if (r2 > rin2) { - ctx.shell[i] = true; + shell[i] = true; n_inshell++; } else { - ctx.shell[i] = false; + shell[i] = false; } } } + if (ctx.run_gpu) { + ctx.heavy->upload(); + ctx.shell->upload(); + } + printf("n_heavy = %d, n_inshell = %d\n", n_heavy, n_inshell); } // Marks heavy atoms for the shell/excluded arrays. // Shared between switch-atom and centroid shell init paths. static int mark_heavy_atoms(Context& ctx) { + auto &atypes = ctx.atypes->cpu_data_p; + auto &catypes = ctx.catypes->cpu_data_p; + auto *heavy = ctx.heavy->cpu_data_p; int n_heavy = 0; for (int i = 0; i < ctx.n_atoms; i++) { - double mass = ctx.catypes[ctx.atypes[i].code - 1].m; + double mass = catypes[atypes[i].code - 1].m; if (mass < 4.0) { - ctx.heavy[i] = false; + heavy[i] = false; } else { - ctx.heavy[i] = true; + heavy[i] = true; n_heavy++; } } @@ -415,13 +470,18 @@ static int mark_heavy_atoms(Context& ctx) { // Used when iuse_switch_atom == 1. void init_pshells_with_switch_atoms() { auto& ctx = Context::instance(); + auto &coords_init = ctx.coords_init->cpu_data_p; + auto *excluded = ctx.excluded->cpu_data_p; + bool *heavy = nullptr; double r2, rin2; - ctx.heavy = std::make_unique(ctx.n_atoms); - ctx.shell = std::make_unique(ctx.n_atoms); + ctx.heavy = std::make_unique>(ctx.n_atoms, true, ctx.run_gpu); + heavy = ctx.heavy->cpu_data_p; + ctx.shell = std::make_unique>(ctx.n_atoms, true, ctx.run_gpu); + auto *shell = ctx.shell->cpu_data_p; for (int i = 0; i < ctx.n_atoms; ++i) { - ctx.heavy[i] = false; - ctx.shell[i] = false; + heavy[i] = false; + shell[i] = false; } rin2 = pow(ctx.md.shell_radius, 2); @@ -431,11 +491,11 @@ void init_pshells_with_switch_atoms() { for (int grp = 0; grp < ctx.n_cgrps_solute; grp++) { cgrp_t cgrp = ctx.charge_groups[grp]; int i = cgrp.iswitch - 1; - if (ctx.heavy[i] && !ctx.excluded[i] && i < ctx.n_atoms_solute) { - r2 = pow(ctx.coords_top[i].x - ctx.topo.solute_center.x, 2) + pow(ctx.coords_top[i].y - ctx.topo.solute_center.y, 2) + pow(ctx.coords_top[i].z - ctx.topo.solute_center.z, 2); + if (heavy[i] && !excluded[i] && i < ctx.n_atoms_solute) { + r2 = pow(coords_init[i].x - ctx.topo.solute_center.x, 2) + pow(coords_init[i].y - ctx.topo.solute_center.y, 2) + pow(coords_init[i].z - ctx.topo.solute_center.z, 2); bool in_shell = r2 > rin2; for (int j = 0; j < cgrp.n_atoms; j++) { - ctx.shell[cgrp.a[j] - 1] = in_shell; + shell[cgrp.a[j] - 1] = in_shell; if (in_shell) { n_inshell++; } @@ -443,6 +503,11 @@ void init_pshells_with_switch_atoms() { } } + if (ctx.run_gpu) { + ctx.heavy->upload(); + ctx.shell->upload(); + } + printf("(switch atoms): n_heavy = %d, n_inshell = %d\n", n_heavy, n_inshell); } @@ -450,13 +515,18 @@ void init_pshells_with_switch_atoms() { // Used when iuse_switch_atom == 0. void init_pshells_with_centroids() { auto& ctx = Context::instance(); + auto &coords_init = ctx.coords_init->cpu_data_p; + auto *excluded = ctx.excluded->cpu_data_p; + bool *heavy = nullptr; double r2, rin2; - ctx.heavy = std::make_unique(ctx.n_atoms); - ctx.shell = std::make_unique(ctx.n_atoms); + ctx.heavy = std::make_unique>(ctx.n_atoms, true, ctx.run_gpu); + heavy = ctx.heavy->cpu_data_p; + ctx.shell = std::make_unique>(ctx.n_atoms, true, ctx.run_gpu); + auto *shell = ctx.shell->cpu_data_p; for (int i = 0; i < ctx.n_atoms; ++i) { - ctx.heavy[i] = false; - ctx.shell[i] = false; + heavy[i] = false; + shell[i] = false; } rin2 = pow(ctx.md.shell_radius, 2); @@ -466,14 +536,14 @@ void init_pshells_with_centroids() { for (int grp = 0; grp < ctx.n_cgrps_solute; grp++) { cgrp_t cgrp = ctx.charge_groups[grp]; int i = cgrp.iswitch - 1; - if (ctx.heavy[i] && !ctx.excluded[i] && i < ctx.n_atoms_solute) { + if (heavy[i] && !excluded[i] && i < ctx.n_atoms_solute) { // Compute centroid of charge group double cx = 0, cy = 0, cz = 0; for (int j = 0; j < cgrp.n_atoms; j++) { int ai = cgrp.a[j] - 1; - cx += ctx.coords_top[ai].x; - cy += ctx.coords_top[ai].y; - cz += ctx.coords_top[ai].z; + cx += coords_init[ai].x; + cy += coords_init[ai].y; + cz += coords_init[ai].z; } cx /= cgrp.n_atoms; cy /= cgrp.n_atoms; @@ -482,7 +552,7 @@ void init_pshells_with_centroids() { r2 = pow(cx - ctx.topo.solute_center.x, 2) + pow(cy - ctx.topo.solute_center.y, 2) + pow(cz - ctx.topo.solute_center.z, 2); bool in_shell = r2 > rin2; for (int j = 0; j < cgrp.n_atoms; j++) { - ctx.shell[cgrp.a[j] - 1] = in_shell; + shell[cgrp.a[j] - 1] = in_shell; if (in_shell) { n_inshell++; } @@ -490,13 +560,18 @@ void init_pshells_with_centroids() { } } + if (ctx.run_gpu) { + ctx.heavy->upload(); + ctx.shell->upload(); + } + printf("(centroids): n_heavy = %d, n_inshell = %d\n", n_heavy, n_inshell); } void init_restrseqs() { auto& ctx = Context::instance(); ctx.n_restrseqs = 1; - ctx.restrseqs.resize(1); + ctx.restrseqs = std::make_unique>(1, true, ctx.run_gpu); restrseq_t seq; seq.ai = 1; @@ -505,7 +580,11 @@ void init_restrseqs() { seq.ih = 0; seq.to_center = 2; - ctx.restrseqs[0] = seq; + ctx.restrseqs->cpu_data_p[0] = seq; + + if (ctx.run_gpu) { + ctx.restrseqs->upload(); + } } /* ============================================= @@ -515,56 +594,67 @@ void init_restrseqs() { void init_shake() { auto& ctx = Context::instance(); + auto *heavy = ctx.heavy->cpu_data_p; + auto *excluded = ctx.excluded->cpu_data_p; int ai, aj; int mol = 0; int shake; int n_solute_shake_constraints = 0; double excl_shake = 0; + auto &bonds = ctx.bonds->cpu_data_p; + auto &cbonds = ctx.cbonds->cpu_data_p; ctx.n_shake_constraints = 0; - ctx.mol_n_shakes.assign(ctx.n_molecules, 0); + ctx.mol_n_shakes = std::make_unique>(ctx.n_molecules, true, ctx.run_gpu); + auto *mol_n_shakes = ctx.mol_n_shakes->cpu_data_p; for (int bi = 0; bi < ctx.n_bonds; bi++) { - ai = ctx.bonds[bi].ai - 1; - aj = ctx.bonds[bi].aj - 1; + ai = bonds[bi].ai - 1; + aj = bonds[bi].aj - 1; while (mol + 1 < ctx.n_molecules && ai + 1 >= ctx.molecules[mol + 1]) { // new molecule mol += 1; } - if ((ctx.md.shake_hydrogens && (!ctx.heavy[ai] || !ctx.heavy[aj])) || (ctx.md.shake_solute && ai + 1 <= ctx.n_atoms_solute) || (ctx.md.shake_solvent && ai + 1 > ctx.n_atoms_solute)) { - ctx.mol_n_shakes[mol]++; + if ((ctx.md.shake_hydrogens && (!heavy[ai] || !heavy[aj])) || (ctx.md.shake_solute && ai + 1 <= ctx.n_atoms_solute) || (ctx.md.shake_solvent && ai + 1 > ctx.n_atoms_solute)) { + mol_n_shakes[mol]++; ctx.n_shake_constraints++; - if (ctx.excluded[ai]) excl_shake += 0.5; - if (ctx.excluded[aj]) excl_shake += 0.5; + if (excluded[ai]) excl_shake += 0.5; + if (excluded[aj]) excl_shake += 0.5; } } - ctx.shake_bonds.resize(ctx.n_shake_constraints); + ctx.shake_bonds = std::make_unique>(ctx.n_shake_constraints, true, ctx.run_gpu); + auto *shake_bonds = ctx.shake_bonds->cpu_data_p; mol = 0; shake = 0; for (int bi = 0; bi < ctx.n_bonds; bi++) { - ai = ctx.bonds[bi].ai - 1; - aj = ctx.bonds[bi].aj - 1; + ai = bonds[bi].ai - 1; + aj = bonds[bi].aj - 1; while (mol + 1 < ctx.n_molecules && ai + 1 >= ctx.molecules[mol + 1]) { // new molecule mol += 1; } - if ((ctx.md.shake_hydrogens && (!ctx.heavy[ai] || !ctx.heavy[aj])) || (ctx.md.shake_solute && ai + 1 <= ctx.n_atoms_solute) || (ctx.md.shake_solvent && ai + 1 > ctx.n_atoms_solute)) { - ctx.shake_bonds[shake].ai = ai + 1; - ctx.shake_bonds[shake].aj = aj + 1; - ctx.shake_bonds[shake].dist2 = pow(ctx.cbonds[ctx.bonds[bi].code - 1].b0, 2); + if ((ctx.md.shake_hydrogens && (!heavy[ai] || !heavy[aj])) || (ctx.md.shake_solute && ai + 1 <= ctx.n_atoms_solute) || (ctx.md.shake_solvent && ai + 1 > ctx.n_atoms_solute)) { + shake_bonds[shake].ai = ai + 1; + shake_bonds[shake].aj = aj + 1; + shake_bonds[shake].dist2 = pow(cbonds[bonds[bi].code - 1].b0, 2); shake++; } } // Get total number of shake constraints in solute (used for separate scaling of temperatures) for (int i = 0; i < ctx.n_molecules - ctx.n_waters; i++) { - n_solute_shake_constraints += ctx.mol_n_shakes[i]; + n_solute_shake_constraints += mol_n_shakes[i]; + } + + if (ctx.run_gpu) { + ctx.mol_n_shakes->upload(); + ctx.shake_bonds->upload(); } ctx.Ndegf = 3 * ctx.n_atoms - ctx.n_shake_constraints; @@ -614,6 +704,10 @@ void init_patoms() { // states above 0 get appended as extra codes. static void init_unified_atom_parameters() { auto& ctx = Context::instance(); + auto &charges = ctx.charges->cpu_data_p; + auto &ccharges = ctx.ccharges->cpu_data_p; + auto &atypes = ctx.atypes->cpu_data_p; + auto &catypes = ctx.catypes->cpu_data_p; const int n_states = ctx.n_parameter_states(); ctx.atom_to_qi.assign(ctx.n_atoms, -1); @@ -623,8 +717,10 @@ static void init_unified_atom_parameters() { const int n_extra_q_states = n_states > 0 ? n_states - 1 : 0; const int n_codes = ctx.n_atoms + ctx.n_qatoms * n_extra_q_states; - ctx.unified_ccharges.resize(n_codes); - ctx.unified_catypes.resize(n_codes); + ctx.unified_ccharges = std::make_unique>(n_codes, true, ctx.run_gpu); + ctx.unified_catypes = std::make_unique>(n_codes, true, ctx.run_gpu); + auto *unified_ccharges = ctx.unified_ccharges->cpu_data_p; + auto *unified_catypes = ctx.unified_catypes->cpu_data_p; for (int atom_idx = 0; atom_idx < ctx.n_atoms; atom_idx++) { const int unified_code = atom_idx + 1; @@ -641,13 +737,13 @@ static void init_unified_atom_parameters() { resolved_type = ctx.q_catypes[ctx.q_atypes[qi].code - 1]; resolved_type.code = unified_code; } else { - resolved_charge.charge = ctx.ccharges[ctx.charges[atom_idx].code - 1].charge; - resolved_type = ctx.catypes[ctx.atypes[atom_idx].code - 1]; + resolved_charge.charge = ccharges[charges[atom_idx].code - 1].charge; + resolved_type = catypes[atypes[atom_idx].code - 1]; resolved_type.code = unified_code; } - ctx.unified_ccharges[unified_code - 1] = resolved_charge; - ctx.unified_catypes[unified_code - 1] = resolved_type; + unified_ccharges[unified_code - 1] = resolved_charge; + unified_catypes[unified_code - 1] = resolved_type; } for (int state = 1; state < n_states; state++) { @@ -661,8 +757,8 @@ static void init_unified_atom_parameters() { catype_t resolved_type = ctx.q_catypes[ctx.q_atypes[qi + state * ctx.n_qatoms].code - 1]; resolved_type.code = unified_code; - ctx.unified_ccharges[unified_code - 1] = resolved_charge; - ctx.unified_catypes[unified_code - 1] = resolved_type; + unified_ccharges[unified_code - 1] = resolved_charge; + unified_catypes[unified_code - 1] = resolved_type; } } } @@ -781,7 +877,7 @@ void init_variables() { ctx.EQ_nonbond_qp.assign(ctx.n_lambdas, {}); ctx.EQ_nonbond_qw.assign(ctx.n_lambdas, {}); ctx.EQ_nonbond_qx.assign(ctx.n_lambdas, {}); - ctx.EQ_restraint.assign(ctx.n_lambdas, {}); + ctx.EQ_restraint = std::make_unique>(ctx.n_lambdas, true, ctx.run_gpu); if (ctx.n_shake_constraints > 0) { initial_shaking(); @@ -798,6 +894,11 @@ void init_variables() { void clean_variables() { auto& ctx = Context::instance(); + ctx.angles.reset(); + ctx.cangles.reset(); + ctx.bonds.reset(); + ctx.cbonds.reset(); + ctx.coords_init.reset(); for (auto& charge_group : ctx.charge_groups) { delete[] charge_group.a; @@ -805,26 +906,42 @@ void clean_variables() { } ctx.charge_groups.clear(); - ctx.angles.clear(); - ctx.atypes.clear(); - ctx.bonds.clear(); - ctx.cangles.clear(); - ctx.catypes.clear(); - ctx.cbonds.clear(); - ctx.ccharges.clear(); - ctx.charges.clear(); + ctx.atypes.reset(); + ctx.catypes.reset(); + ctx.ccharges.reset(); + ctx.charges.reset(); ctx.atom_to_qi.clear(); - ctx.unified_ccharges.clear(); - ctx.unified_catypes.clear(); - ctx.cimpropers.clear(); - ctx.coords.clear(); - ctx.coords_top.clear(); - ctx.ctorsions.clear(); + ctx.unified_ccharges.reset(); + ctx.unified_catypes.reset(); + ctx.ngbrs_14.reset(); + ctx.ngbrs_14_builder.clear(); + ctx.p_atoms_list.reset(); + ctx.w_atoms_list.reset(); + ctx.q_atoms_list.reset(); + ctx.charge_table_all.reset(); + ctx.charge_pair_products.reset(); + ctx.p_charge_types.reset(); + ctx.w_charge_types.reset(); + ctx.q_charge_types.reset(); + ctx.catype_table_all.reset(); + ctx.catype_pair_params.reset(); + ctx.p_catype_types.reset(); + ctx.w_catype_types.reset(); + ctx.q_catype_types.reset(); + ctx.catype_to_type_host.clear(); + ctx.n_charge_types = 0; + ctx.zero_charge_type = -1; + ctx.n_catype_types = 0; + ctx.zero_catype_type = -1; + ctx.cimpropers.reset(); + ctx.coords.reset(); + ctx.ctorsions.reset(); ctx.excluded.reset(); ctx.heavy.reset(); - ctx.impropers.clear(); - ctx.torsions.clear(); - ctx.LJ_matrix.clear(); + ctx.impropers.reset(); + ctx.winv.reset(); + ctx.torsions.reset(); + ctx.LJ_matrix.reset(); ctx.molecules.clear(); ctx.q_angcouples.clear(); ctx.q_atoms.clear(); @@ -841,30 +958,35 @@ void clean_variables() { ctx.q_charges.clear(); ctx.q_angles.clear(); ctx.q_bonds.clear(); - ctx.q_elscales.clear(); + ctx.q_elscales.reset(); ctx.q_exclpairs.clear(); ctx.q_impropers.clear(); ctx.q_shakes.clear(); ctx.q_softcores.clear(); ctx.q_torsions.clear(); - ctx.wshells.clear(); + ctx.lambdas.reset(); + ctx.wshells.reset(); ctx.theta.clear(); ctx.theta0.clear(); ctx.tdum.clear(); ctx.list_sh.clear(); ctx.nsort.clear(); - ctx.restrseqs.clear(); + ctx.restrseqs.reset(); + ctx.restrangs.reset(); + ctx.restrdists.reset(); + ctx.restrspos.reset(); + ctx.restrwalls.reset(); ctx.shell.reset(); - ctx.velocities.clear(); - ctx.dvelocities.clear(); - ctx.xcoords.clear(); - ctx.mol_n_shakes.clear(); - ctx.shake_bonds.clear(); + ctx.velocities.reset(); + ctx.dvelocities.reset(); + ctx.xcoords.reset(); + ctx.mol_n_shakes.reset(); + ctx.shake_bonds.reset(); ctx.EQ_total.clear(); ctx.EQ_bond.clear(); ctx.EQ_nonbond_qq.clear(); ctx.EQ_nonbond_qp.clear(); ctx.EQ_nonbond_qw.clear(); ctx.EQ_nonbond_qx.clear(); - ctx.EQ_restraint.clear(); + ctx.EQ_restraint.reset(); } diff --git a/src/core/common/src/output.cpp b/src/core/common/src/output.cpp index ea734cfd..d70d086a 100644 --- a/src/core/common/src/output.cpp +++ b/src/core/common/src/output.cpp @@ -3,6 +3,8 @@ void print_energies() { auto& host = Context::instance(); + auto *lambdas = host.lambdas->cpu_data_p; + auto *EQ_restraint = host.EQ_restraint->cpu_data_p; printf("[temperature]\n"); printf("Temp\t%f\n", host.Temp); printf("\n"); @@ -34,8 +36,8 @@ void print_energies() { printf("[q-energies]\n"); printf("lambda\tSUM\tUbond\tUangle\tUtor\tUimp\tUcoul\tUvdw\tUrestr\n"); for (int state = 0; state < host.n_lambdas; state++) { - printf("%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\n", host.lambdas[state], host.EQ_total[state].Utot, host.EQ_bond[state].Ubond, - host.EQ_bond[state].Uangle, host.EQ_bond[state].Utor, host.EQ_bond[state].Uimp, host.EQ_nonbond_qx[state].Ucoul, host.EQ_nonbond_qx[state].Uvdw, host.EQ_restraint[state].Urestr); + printf("%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\n", lambdas[state], host.EQ_total[state].Utot, host.EQ_bond[state].Ubond, + host.EQ_bond[state].Uangle, host.EQ_bond[state].Utor, host.EQ_bond[state].Uimp, host.EQ_nonbond_qx[state].Ucoul, host.EQ_nonbond_qx[state].Uvdw, EQ_restraint[state].Urestr); } printf("\n"); @@ -64,6 +66,7 @@ void write_header(const char* filename) { // Write header (number of atoms & lambdas) to output file void write_energy_header() { auto& ctx = Context::instance(); + auto *lambdas = ctx.lambdas->cpu_data_p; FILE* fp; char path[1024]; @@ -77,7 +80,7 @@ void write_energy_header() { fprintf(fp, "%d\n", ctx.n_lambdas); for (int state = 0; state < ctx.n_lambdas; state++) { - fprintf(fp, "%f\n", ctx.lambdas[state]); + fprintf(fp, "%f\n", lambdas[state]); } fclose(fp); @@ -86,6 +89,7 @@ void write_energy_header() { // Write step number, coordinates of atoms to coordinate output file void write_coords(int iteration) { auto& ctx = Context::instance(); + auto &coords = ctx.coords->cpu_data_p; if (iteration % ctx.md.trajectory != 0) return; FILE* fp; int i; @@ -97,7 +101,7 @@ void write_coords(int iteration) { fprintf(fp, "%d\n", iteration / ctx.md.trajectory); for (i = 0; i < ctx.n_atoms; i++) { - fprintf(fp, "%f;%f;%f\n", ctx.coords[i].x, ctx.coords[i].y, ctx.coords[i].z); + fprintf(fp, "%f;%f;%f\n", coords[i].x, coords[i].y, coords[i].z); } fclose(fp); @@ -106,6 +110,7 @@ void write_coords(int iteration) { // Write step number, velocities of atoms to coordinate output file void write_velocities(int iteration) { auto& ctx = Context::instance(); + auto &velocities = ctx.velocities->cpu_data_p; if (iteration % ctx.md.trajectory != 0) return; FILE* fp; int i; @@ -117,7 +122,7 @@ void write_velocities(int iteration) { fprintf(fp, "%d\n", iteration / ctx.md.trajectory); for (i = 0; i < ctx.n_atoms; i++) { - fprintf(fp, "%f;%f;%f\n", ctx.velocities[i].x, ctx.velocities[i].y, ctx.velocities[i].z); + fprintf(fp, "%f;%f;%f\n", velocities[i].x, velocities[i].y, velocities[i].z); } fclose(fp); @@ -126,6 +131,8 @@ void write_velocities(int iteration) { // Write step number, energies of atoms to coordinate output file void write_energies(int iteration) { auto& ctx = Context::instance(); + auto *lambdas = ctx.lambdas->cpu_data_p; + auto *EQ_restraint = ctx.EQ_restraint->cpu_data_p; if (iteration % ctx.md.energy != 0) return; FILE* fp; @@ -167,8 +174,8 @@ void write_energies(int iteration) { fprintf(fp, "[q-energies]\n"); fprintf(fp, "lambda\tSUM\tUbond\tUangle\tUtor\tUimp\tUcoul\tUvdw\tUrestr\n"); for (int state = 0; state < ctx.n_lambdas; state++) { - fprintf(fp, "%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\n", ctx.lambdas[state], ctx.EQ_total[state].Utot, ctx.EQ_bond[state].Ubond, - ctx.EQ_bond[state].Uangle, ctx.EQ_bond[state].Utor, ctx.EQ_bond[state].Uimp, ctx.EQ_nonbond_qx[state].Ucoul, ctx.EQ_nonbond_qx[state].Uvdw, ctx.EQ_restraint[state].Urestr); + fprintf(fp, "%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\n", lambdas[state], ctx.EQ_total[state].Utot, ctx.EQ_bond[state].Ubond, + ctx.EQ_bond[state].Uangle, ctx.EQ_bond[state].Utor, ctx.EQ_bond[state].Uimp, ctx.EQ_nonbond_qx[state].Ucoul, ctx.EQ_nonbond_qx[state].Uvdw, EQ_restraint[state].Urestr); } fprintf(fp, "\n"); @@ -179,4 +186,4 @@ void write_energies(int iteration) { fprintf(fp, "\n"); fclose(fp); -} \ No newline at end of file +} diff --git a/src/core/common/src/parse.cpp b/src/core/common/src/parse.cpp index 8a6f3b00..fc0ca7b6 100644 --- a/src/core/common/src/parse.cpp +++ b/src/core/common/src/parse.cpp @@ -215,18 +215,23 @@ void init_md(const char* filename) { #ifdef VERBOSE printf("reading in %d lambdas (%s in file)\n", ctx.n_lambdas, file.buffer[k][1]); #endif - ctx.lambdas.resize(ctx.n_lambdas); + ctx.lambdas = std::make_unique>(ctx.n_lambdas, true, ctx.run_gpu); + auto *lambdas = ctx.lambdas->cpu_data_p; k++; for (int i = 0; i < ctx.n_lambdas; i++) { - ctx.lambdas[i] = strtod(file.buffer[k][0], &eptr); + lambdas[i] = strtod(file.buffer[k][0], &eptr); k++; } + if (ctx.run_gpu) { + ctx.lambdas->upload(); + } // [sequence_restraints] printf("k = %d\n", k); ctx.n_restrseqs = atoi(file.buffer[k][0]); printf("reading in %d sequence restraints (%s in file)\n", ctx.n_restrseqs, file.buffer[k][1]); - ctx.restrseqs.resize(ctx.n_restrseqs); + ctx.restrseqs = std::make_unique>(ctx.n_restrseqs, true, ctx.run_gpu); + auto &restrseqs = ctx.restrseqs->cpu_data_p; k++; for (int i = 0; i < ctx.n_restrseqs; i++) { restrseq_t restrseq; @@ -237,14 +242,19 @@ void init_md(const char* filename) { restrseq.ih = strcmp(file.buffer[k][3], "1") == 0; restrseq.to_center = atoi(file.buffer[k][4]); - ctx.restrseqs[i] = restrseq; + restrseqs[i] = restrseq; k++; } + if (ctx.run_gpu) { + ctx.restrseqs->upload(); + } + // [position_restraints] ctx.n_restrspos = atoi(file.buffer[k][0]); printf("reading in %d position restraints\n (%s in file )", ctx.n_restrspos, file.buffer[k][1]); - ctx.restrspos.resize(ctx.n_restrspos); + ctx.restrspos = std::make_unique>(ctx.n_restrspos, true, ctx.run_gpu); + auto &restrspos = ctx.restrspos->cpu_data_p; k++; for (int i = 0; i < ctx.n_restrspos; i++) { restrpos_t restrpos; @@ -264,13 +274,18 @@ void init_md(const char* filename) { restrpos.x = r_x; restrpos.k = r_k; - ctx.restrspos[i] = restrpos; + restrspos[i] = restrpos; k++; } + if (ctx.run_gpu) { + ctx.restrspos->upload(); + } + // [distance_restraints] ctx.n_restrdists = atoi(file.buffer[k][0]); - ctx.restrdists.resize(ctx.n_restrdists); + ctx.restrdists = std::make_unique>(ctx.n_restrdists, true, ctx.run_gpu); + auto &restrdists = ctx.restrdists->cpu_data_p; printf("reading in %d distance restraints (%s in file)\n", ctx.n_restrdists, file.buffer[k][1]); k++; for (int i = 0; i < ctx.n_restrdists; i++) { @@ -283,13 +298,18 @@ void init_md(const char* filename) { restrdist.k = strtod(file.buffer[k][4], &eptr); restrdist.ipsi = atoi(file.buffer[k][5]); - ctx.restrdists[i] = restrdist; + restrdists[i] = restrdist; k++; } + if (ctx.run_gpu) { + ctx.restrdists->upload(); + } + // [angle_restraints] ctx.n_restrangs = atoi(file.buffer[k][0]); - ctx.restrangs.resize(ctx.n_restrangs); + ctx.restrangs = std::make_unique>(ctx.n_restrangs, true, ctx.run_gpu); + auto &restrangs = ctx.restrangs->cpu_data_p; printf("reading in %d angle restraints (%s in file)\n", ctx.n_restrangs, file.buffer[k][1]); k++; for (int i = 0; i < ctx.n_restrangs; i++) { @@ -302,13 +322,18 @@ void init_md(const char* filename) { restrang.ang = strtod(file.buffer[k][4], &eptr); restrang.k = strtod(file.buffer[k][5], &eptr); - ctx.restrangs[i] = restrang; + restrangs[i] = restrang; k++; } + if (ctx.run_gpu) { + ctx.restrangs->upload(); + } + // [wall_restraints] ctx.n_restrwalls = atoi(file.buffer[k][0]); - ctx.restrwalls.resize(ctx.n_restrwalls); + ctx.restrwalls = std::make_unique>(ctx.n_restrwalls, true, ctx.run_gpu); + auto &restrwalls = ctx.restrwalls->cpu_data_p; printf("reading in %d wall restraints (%s in file)\n", ctx.n_restrwalls, file.buffer[k][1]); k++; for (int i = 0; i < ctx.n_restrwalls; i++) { @@ -322,10 +347,14 @@ void init_md(const char* filename) { restrwall.aMorse = strtod(file.buffer[k][5], &eptr); restrwall.ih = strcmp(file.buffer[k][6], "1") == 0; - ctx.restrwalls[i] = restrwall; + restrwalls[i] = restrwall; k++; } + if (ctx.run_gpu) { + ctx.restrwalls->upload(); + } + clean_csv(file); } @@ -377,7 +406,6 @@ void init_coords(const char* filename) { auto& ctx = Context::instance(); csvfile_t file = read_csv(filename, 1, ctx.base_folder.c_str()); - ctx.n_coords = 0; ctx.n_atoms = 0; ctx.n_atoms_solute = 0; @@ -386,22 +414,28 @@ void init_coords(const char* filename) { return; } - ctx.n_coords = atoi(file.buffer[0][0]); - ctx.n_atoms = ctx.n_coords; + ctx.n_atoms = atoi(file.buffer[0][0]); ctx.n_atoms_solute = atoi(file.buffer[1][0]); - ctx.coords.resize(ctx.n_atoms); - ctx.coords_top.resize(ctx.n_atoms); + ctx.coords_init = std::make_unique>(ctx.n_atoms, true, ctx.run_gpu); + ctx.coords = std::make_unique>(ctx.n_atoms, true, ctx.run_gpu); + auto &coords = ctx.coords->cpu_data_p; + auto &coords_init = ctx.coords_init->cpu_data_p; for (int i = 0; i < file.n_lines; i++) { char* eptr; - ctx.coords[i].x = strtod(file.buffer[i + 2][0], &eptr); - ctx.coords[i].y = strtod(file.buffer[i + 2][1], &eptr); - ctx.coords[i].z = strtod(file.buffer[i + 2][2], &eptr); + coords[i].x = strtod(file.buffer[i + 2][0], &eptr); + coords[i].y = strtod(file.buffer[i + 2][1], &eptr); + coords[i].z = strtod(file.buffer[i + 2][2], &eptr); + + coords_init[i] = coords[i]; + } - ctx.coords_top[i] = ctx.coords[i]; + if (ctx.run_gpu) { + ctx.coords->upload(); + ctx.coords_init->upload(); } clean_csv(file); @@ -421,8 +455,8 @@ void init_bonds(const char* filename) { ctx.n_bonds = atoi(file.buffer[0][0]); ctx.n_bonds_solute = atoi(file.buffer[1][0]); - - ctx.bonds.resize(ctx.n_bonds); + ctx.bonds = std::make_unique>(ctx.n_bonds, true, ctx.run_gpu); + auto &bonds = ctx.bonds->cpu_data_p; for (int i = 0; i < ctx.n_bonds; i++) { bond_t bond; @@ -431,7 +465,11 @@ void init_bonds(const char* filename) { bond.aj = atoi(file.buffer[i + 2][1]); bond.code = atoi(file.buffer[i + 2][2]); - ctx.bonds[i] = bond; + bonds[i] = bond; + } + + if (ctx.run_gpu) { + ctx.bonds->upload(); } clean_csv(file); @@ -449,7 +487,8 @@ void init_cbonds(const char* filename) { } ctx.n_cbonds = atoi(file.buffer[0][0]); - ctx.cbonds.resize(ctx.n_cbonds); + ctx.cbonds = std::make_unique>(ctx.n_cbonds, true, ctx.run_gpu); + auto &cbonds = ctx.cbonds->cpu_data_p; for (int i = 0; i < ctx.n_cbonds; i++) { cbond_t cbond; @@ -459,7 +498,11 @@ void init_cbonds(const char* filename) { cbond.kb = strtod(file.buffer[i + 1][1], &eptr); cbond.b0 = strtod(file.buffer[i + 1][2], &eptr); - ctx.cbonds[i] = cbond; + cbonds[i] = cbond; + } + + if (ctx.run_gpu) { + ctx.cbonds->upload(); } clean_csv(file); @@ -479,8 +522,8 @@ void init_angles(const char* filename) { ctx.n_angles = atoi(file.buffer[0][0]); ctx.n_angles_solute = atoi(file.buffer[1][0]); - - ctx.angles.resize(ctx.n_angles); + ctx.angles = std::make_unique>(ctx.n_angles, true, ctx.run_gpu); + auto &angles = ctx.angles->cpu_data_p; for (int i = 0; i < ctx.n_angles; i++) { angle_t angle; @@ -490,9 +533,14 @@ void init_angles(const char* filename) { angle.ak = atoi(file.buffer[i + 2][2]); angle.code = atoi(file.buffer[i + 2][3]); - ctx.angles[i] = angle; + angles[i] = angle; } + if (ctx.run_gpu) { + ctx.angles->upload(); + } + + clean_csv(file); } @@ -508,7 +556,8 @@ void init_cangles(const char* filename) { } ctx.n_cangles = atoi(file.buffer[0][0]); - ctx.cangles.resize(ctx.n_cangles); + ctx.cangles = std::make_unique>(ctx.n_cangles, true, ctx.run_gpu); + auto &cangles = ctx.cangles->cpu_data_p; for (int i = 0; i < ctx.n_cangles; i++) { cangle_t cangle; @@ -518,7 +567,11 @@ void init_cangles(const char* filename) { cangle.kth = strtod(file.buffer[i + 1][1], &eptr); cangle.th0 = strtod(file.buffer[i + 1][2], &eptr); - ctx.cangles[i] = cangle; + cangles[i] = cangle; + } + + if (ctx.run_gpu) { + ctx.cangles->upload(); } clean_csv(file); @@ -526,9 +579,10 @@ void init_cangles(const char* filename) { void init_excluded(const char* filename) { auto& ctx = Context::instance(); - ctx.excluded = std::make_unique(ctx.n_atoms); + ctx.excluded = std::make_unique>(ctx.n_atoms, true, ctx.run_gpu); + auto *excluded = ctx.excluded->cpu_data_p; for (int i = 0; i < ctx.n_atoms; ++i) { - ctx.excluded[i] = false; + excluded[i] = false; } ctx.n_excluded = 0; @@ -562,10 +616,14 @@ void init_excluded(const char* filename) { ctx.n_excluded = 0; for (int i = 0; i < ctx.n_atoms && i < read_len; i++) { bool excl = (line[i] == '1'); - ctx.excluded[i] = excl; + excluded[i] = excl; if (excl) ctx.n_excluded++; } + if (ctx.run_gpu) { + ctx.excluded->upload(); + } + printf("Number of excluded atoms: %d\n", ctx.n_excluded); free(line); @@ -586,8 +644,8 @@ void init_torsions(const char* filename) { ctx.n_torsions = atoi(file.buffer[0][0]); ctx.n_torsions_solute = atoi(file.buffer[1][0]); - - ctx.torsions.resize(ctx.n_torsions); + ctx.torsions = std::make_unique>(ctx.n_torsions, true, ctx.run_gpu); + auto &torsions = ctx.torsions->cpu_data_p; for (int i = 0; i < ctx.n_torsions; i++) { torsion_t torsion; @@ -598,7 +656,11 @@ void init_torsions(const char* filename) { torsion.al = atoi(file.buffer[i + 2][3]); torsion.code = atoi(file.buffer[i + 2][4]); - ctx.torsions[i] = torsion; + torsions[i] = torsion; + } + + if (ctx.run_gpu) { + ctx.torsions->upload(); } clean_csv(file); @@ -616,8 +678,8 @@ void init_ctorsions(const char* filename) { } ctx.n_ctorsions = atoi(file.buffer[0][0]); - - ctx.ctorsions.resize(ctx.n_ctorsions); + ctx.ctorsions = std::make_unique>(ctx.n_ctorsions, true, ctx.run_gpu); + auto &ctorsions = ctx.ctorsions->cpu_data_p; for (int i = 0; i < ctx.n_ctorsions; i++) { ctorsion_t ctorsion; @@ -630,7 +692,11 @@ void init_ctorsions(const char* filename) { ctorsion.paths = strtod(file.buffer[i + 1][4], &eptr); ctorsion.paths = 1.0 / (ctorsion.paths); - ctx.ctorsions[i] = ctorsion; + ctorsions[i] = ctorsion; + } + + if (ctx.run_gpu) { + ctx.ctorsions->upload(); } clean_csv(file); @@ -650,8 +716,8 @@ void init_impropers(const char* filename) { ctx.n_impropers = atoi(file.buffer[0][0]); ctx.n_impropers_solute = atoi(file.buffer[1][0]); - - ctx.impropers.resize(ctx.n_impropers); + ctx.impropers = std::make_unique>(ctx.n_impropers, true, ctx.run_gpu); + auto &impropers = ctx.impropers->cpu_data_p; for (int i = 0; i < ctx.n_impropers; i++) { improper_t improper; @@ -662,7 +728,11 @@ void init_impropers(const char* filename) { improper.al = atoi(file.buffer[i + 2][3]); improper.code = atoi(file.buffer[i + 2][4]); - ctx.impropers[i] = improper; + impropers[i] = improper; + } + + if (ctx.run_gpu) { + ctx.impropers->upload(); } clean_csv(file); @@ -680,8 +750,8 @@ void init_cimpropers(const char* filename) { } ctx.n_cimpropers = atoi(file.buffer[0][0]); - - ctx.cimpropers.resize(ctx.n_cimpropers); + ctx.cimpropers = std::make_unique>(ctx.n_cimpropers, true, ctx.run_gpu); + auto &cimpropers = ctx.cimpropers->cpu_data_p; for (int i = 0; i < ctx.n_cimpropers; i++) { cimproper_t cimproper; @@ -691,7 +761,11 @@ void init_cimpropers(const char* filename) { cimproper.k = strtod(file.buffer[i + 1][1], &eptr); cimproper.phi0 = strtod(file.buffer[i + 1][2], &eptr); - ctx.cimpropers[i] = cimproper; + cimpropers[i] = cimproper; + } + + if (ctx.run_gpu) { + ctx.cimpropers->upload(); } clean_csv(file); @@ -709,8 +783,8 @@ void init_charges(const char* filename) { } ctx.n_charges = atoi(file.buffer[0][0]); - - ctx.charges.resize(ctx.n_charges); + ctx.charges = std::make_unique>(ctx.n_charges, true, ctx.run_gpu); + auto &charges = ctx.charges->cpu_data_p; for (int i = 0; i < ctx.n_charges; i++) { charge_t charge; @@ -718,7 +792,11 @@ void init_charges(const char* filename) { charge.a = atoi(file.buffer[i + 1][0]); charge.code = atoi(file.buffer[i + 1][1]); - ctx.charges[i] = charge; + charges[i] = charge; + } + + if (ctx.run_gpu) { + ctx.charges->upload(); } clean_csv(file); @@ -736,8 +814,8 @@ void init_ccharges(const char* filename) { } ctx.n_ccharges = atoi(file.buffer[0][0]); - - ctx.ccharges.resize(ctx.n_ccharges); + ctx.ccharges = std::make_unique>(ctx.n_ccharges, true, ctx.run_gpu); + auto &ccharges = ctx.ccharges->cpu_data_p; for (int i = 0; i < ctx.n_ccharges; i++) { ccharge_t ccharge; @@ -746,7 +824,11 @@ void init_ccharges(const char* filename) { ccharge.code = atoi(file.buffer[i + 1][0]); ccharge.charge = strtod(file.buffer[i + 1][1], &eptr); - ctx.ccharges[i] = ccharge; + ccharges[i] = ccharge; + } + + if (ctx.run_gpu) { + ctx.ccharges->upload(); } clean_csv(file); @@ -754,6 +836,7 @@ void init_ccharges(const char* filename) { void init_ngbrs14(const char* filename) { auto& ctx = Context::instance(); + auto &LJ_matrix = ctx.LJ_matrix->cpu_data_p; FILE* fp; char path[1024]; @@ -787,10 +870,10 @@ void init_ngbrs14(const char* filename) { int ix = lineI; int jx = (lineI + i + 1) % lines; // if (ix < 100 && jx < 100) printf("i = %d j = %d\n", ix+1, jx+1); - ctx.LJ_matrix[ix * ctx.n_atoms_solute + jx] = 1; - ctx.LJ_matrix[jx * ctx.n_atoms_solute + ix] = 1; + LJ_matrix[ix * ctx.n_atoms_solute + jx] = 1; + LJ_matrix[jx * ctx.n_atoms_solute + ix] = 1; - ctx.ngbrs_14.push_back({ix, jx, NONBONDED_14_PP}); // the type may is wrong, just set in here + ctx.ngbrs_14_builder.push_back({ix, jx, NONBONDED_14_PP}); // the type may is wrong, just set in here } } lineI++; @@ -801,6 +884,7 @@ void init_ngbrs14(const char* filename) { void init_ngbrs23(const char* filename) { auto& ctx = Context::instance(); + auto &LJ_matrix = ctx.LJ_matrix->cpu_data_p; FILE* fp; char path[1024]; @@ -834,8 +918,8 @@ void init_ngbrs23(const char* filename) { int ix = lineI; int jx = (lineI + i + 1) % lines; // if (ix < 100 && jx < 100) printf("i = %d j = %d\n", ix+1, jx+1); - ctx.LJ_matrix[ix * ctx.n_atoms_solute + jx] = 3; - ctx.LJ_matrix[jx * ctx.n_atoms_solute + ix] = 3; + LJ_matrix[ix * ctx.n_atoms_solute + jx] = 3; + LJ_matrix[jx * ctx.n_atoms_solute + ix] = 3; } } lineI++; @@ -846,6 +930,7 @@ void init_ngbrs23(const char* filename) { void init_ngbrs14_long(const char* filename) { auto& ctx = Context::instance(); + auto &LJ_matrix = ctx.LJ_matrix->cpu_data_p; csvfile_t file = read_csv(filename, 0, ctx.base_folder.c_str()); if (file.n_lines < 1) { @@ -858,13 +943,13 @@ void init_ngbrs14_long(const char* filename) { for (int i = 0; i < n_ngbrs14_long; i++) { int ix = atoi(file.buffer[i + 1][0]) - 1; int jx = atoi(file.buffer[i + 1][1]) - 1; - ctx.LJ_matrix[ix * ctx.n_atoms_solute + jx] = 1; - ctx.LJ_matrix[jx * ctx.n_atoms_solute + ix] = 1; + LJ_matrix[ix * ctx.n_atoms_solute + jx] = 1; + LJ_matrix[jx * ctx.n_atoms_solute + ix] = 1; int mi_x = std::min(ix, jx); int mx_x = std::max(ix, jx); - ctx.ngbrs_14.push_back({mi_x, mx_x, NONBONDED_14_PP}); // the type may is wrong, just set in here + ctx.ngbrs_14_builder.push_back({mi_x, mx_x, NONBONDED_14_PP}); // the type may is wrong, just set in here } @@ -873,6 +958,7 @@ void init_ngbrs14_long(const char* filename) { void init_ngbrs23_long(const char* filename) { auto& ctx = Context::instance(); + auto &LJ_matrix = ctx.LJ_matrix->cpu_data_p; csvfile_t file = read_csv(filename, 0, ctx.base_folder.c_str()); if (file.n_lines < 1) { @@ -885,8 +971,8 @@ void init_ngbrs23_long(const char* filename) { for (int i = 0; i < n_ngbrs23_long; i++) { int ix = atoi(file.buffer[i + 1][0]) - 1; int jx = atoi(file.buffer[i + 1][1]) - 1; - ctx.LJ_matrix[ix * ctx.n_atoms_solute + jx] = 3; - ctx.LJ_matrix[jx * ctx.n_atoms_solute + ix] = 3; + LJ_matrix[ix * ctx.n_atoms_solute + jx] = 3; + LJ_matrix[jx * ctx.n_atoms_solute + ix] = 3; } clean_csv(file); @@ -894,7 +980,7 @@ void init_ngbrs23_long(const char* filename) { void init_LJ_matrix() { auto& ctx = Context::instance(); - ctx.LJ_matrix.assign(ctx.n_atoms_solute * ctx.n_atoms_solute, 0); + ctx.LJ_matrix = std::make_unique>(ctx.n_atoms_solute * ctx.n_atoms_solute, true, ctx.run_gpu); } void init_catypes(const char* filename) { @@ -909,7 +995,8 @@ void init_catypes(const char* filename) { } ctx.n_catypes = atoi(file.buffer[0][0]); - ctx.catypes.resize(ctx.n_catypes); + ctx.catypes = std::make_unique>(ctx.n_catypes, true, ctx.run_gpu); + auto &catypes = ctx.catypes->cpu_data_p; for (int i = 0; i < ctx.n_catypes; i++) { catype_t catype; @@ -925,21 +1012,25 @@ void init_catypes(const char* filename) { catype.aii_1_4 = strtod(file.buffer[i + 1][6], &eptr); catype.bii_1_4 = strtod(file.buffer[i + 1][7], &eptr); - ctx.catypes[i] = catype; + catypes[i] = catype; } // Preprocess bii parameters for arithmetic rule: convert ε to √ε // This matches Fortran md.f90:14747-14752 preprocessing if (ctx.topo.vdw_rule == VDW_ARITHMETIC) { for (int i = 0; i < ctx.n_catypes; i++) { - ctx.catypes[i].bii_normal = sqrt(fabs(ctx.catypes[i].bii_normal)); - ctx.catypes[i].bii_1_4 = sqrt(fabs(ctx.catypes[i].bii_1_4)); + catypes[i].bii_normal = sqrt(fabs(catypes[i].bii_normal)); + catypes[i].bii_1_4 = sqrt(fabs(catypes[i].bii_1_4)); } #ifdef VERBOSE printf("Preprocessed catypes bii parameters for arithmetic vdW rule\n"); #endif } + if (ctx.run_gpu) { + ctx.catypes->upload(); + } + clean_csv(file); } @@ -956,14 +1047,19 @@ void init_atypes(const char* filename) { ctx.n_atypes = atoi(file.buffer[0][0]); - ctx.atypes.resize(ctx.n_atypes); + ctx.atypes = std::make_unique>(ctx.n_atypes, true, ctx.run_gpu); + auto &atypes = ctx.atypes->cpu_data_p; for (int i = 0; i < ctx.n_atypes; i++) { atype_t atype; atype.a = atoi(file.buffer[i + 1][0]); atype.code = atoi(file.buffer[i + 1][1]); - ctx.atypes[i] = atype; + atypes[i] = atype; + } + + if (ctx.run_gpu) { + ctx.atypes->upload(); } clean_csv(file); @@ -1398,22 +1494,29 @@ void init_qelscales(const char* filename) { csvfile_t file = read_csv(filename, 0, ctx.base_folder.c_str()); if (file.n_lines < 1) { + ctx.n_qelscales = 0; + ctx.q_elscales = std::make_unique>(0, true, ctx.run_gpu); clean_csv(file); return; } ctx.n_qelscales = atoi(file.buffer[0][0]) / ctx.n_lambdas; - ctx.q_elscales.resize(ctx.n_qelscales * ctx.n_lambdas); + ctx.q_elscales = std::make_unique>(ctx.n_qelscales * ctx.n_lambdas, true, ctx.run_gpu); + auto *q_elscales = ctx.q_elscales->cpu_data_p; for (int i = 0; i < ctx.n_qelscales; i++) { for (int j = 0; j < ctx.n_lambdas; j++) { char* eptr; - ctx.q_elscales[i + j * ctx.n_qelscales].qi = atoi(file.buffer[i + j * ctx.n_qelscales + 1][0]); - ctx.q_elscales[i + j * ctx.n_qelscales].qj = atoi(file.buffer[i + j * ctx.n_qelscales + 1][1]); - ctx.q_elscales[i + j * ctx.n_qelscales].mu = strtod(file.buffer[i + j * ctx.n_qelscales + 1][2], &eptr); + q_elscales[i + j * ctx.n_qelscales].qi = atoi(file.buffer[i + j * ctx.n_qelscales + 1][0]); + q_elscales[i + j * ctx.n_qelscales].qj = atoi(file.buffer[i + j * ctx.n_qelscales + 1][1]); + q_elscales[i + j * ctx.n_qelscales].mu = strtod(file.buffer[i + j * ctx.n_qelscales + 1][2], &eptr); } } + if (ctx.run_gpu) { + ctx.q_elscales->upload(); + } + clean_csv(file); } @@ -1552,11 +1655,12 @@ void init_icoords(const char* filename) { return; } + auto &coords = ctx.coords->cpu_data_p; for (int i = 0; i < ctx.n_atoms; i++) { char* eptr; - ctx.coords[i].x = strtod(file.buffer[i + 1][0], &eptr); - ctx.coords[i].y = strtod(file.buffer[i + 1][1], &eptr); - ctx.coords[i].z = strtod(file.buffer[i + 1][2], &eptr); + coords[i].x = strtod(file.buffer[i + 1][0], &eptr); + coords[i].y = strtod(file.buffer[i + 1][1], &eptr); + coords[i].z = strtod(file.buffer[i + 1][2], &eptr); } clean_csv(file); @@ -1571,11 +1675,12 @@ void init_ivelocities(const char* filename) { return; } + auto &velocities = ctx.velocities->cpu_data_p; for (int i = 0; i < ctx.n_atoms; i++) { char* eptr; - ctx.velocities[i].x = strtod(file.buffer[i + 1][0], &eptr); - ctx.velocities[i].y = strtod(file.buffer[i + 1][1], &eptr); - ctx.velocities[i].z = strtod(file.buffer[i + 1][2], &eptr); + velocities[i].x = strtod(file.buffer[i + 1][0], &eptr); + velocities[i].y = strtod(file.buffer[i + 1][1], &eptr); + velocities[i].z = strtod(file.buffer[i + 1][2], &eptr); } clean_csv(file); diff --git a/src/core/cpu/src/cpu_angle_force.cpp b/src/core/cpu/src/cpu_angle_force.cpp index cd49f36c..a9c29c1e 100644 --- a/src/core/cpu/src/cpu_angle_force.cpp +++ b/src/core/cpu/src/cpu_angle_force.cpp @@ -7,6 +7,8 @@ double calc_angle_forces(int start, int end) { auto& ctx = Context::instance(); + auto &coords = ctx.coords->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; int aii, aji, aki; coord_t ai, aj, ak; @@ -19,15 +21,18 @@ double calc_angle_forces(int start, int end) { double ener; double angle = 0; + auto &angles = ctx.angles->cpu_data_p; + auto &cangles = ctx.cangles->cpu_data_p; + for (int i = start; i < end; i++) { - aii = ctx.angles[i].ai - 1; - aji = ctx.angles[i].aj - 1; - aki = ctx.angles[i].ak - 1; - ai = ctx.coords[aii]; - aj = ctx.coords[aji]; - ak = ctx.coords[aki]; + aii = angles[i].ai - 1; + aji = angles[i].aj - 1; + aki = angles[i].ak - 1; + ai = coords[aii]; + aj = coords[aji]; + ak = coords[aki]; - cangle = ctx.cangles[ctx.angles[i].code - 1]; + cangle = cangles[angles[i].code - 1]; rji.x = ai.x - aj.x; rji.y = ai.y - aj.y; @@ -78,17 +83,17 @@ double calc_angle_forces(int start, int end) { dk.y = f1 * (rji.y * bjiinv * bjkinv - cos_th * rjk.y * bjk2inv); dk.z = f1 * (rji.z * bjiinv * bjkinv - cos_th * rjk.z * bjk2inv); - ctx.dvelocities[aii].x += dv * di.x; - ctx.dvelocities[aii].y += dv * di.y; - ctx.dvelocities[aii].z += dv * di.z; + dvelocities[aii].x += dv * di.x; + dvelocities[aii].y += dv * di.y; + dvelocities[aii].z += dv * di.z; - ctx.dvelocities[aki].x += dv * dk.x; - ctx.dvelocities[aki].y += dv * dk.y; - ctx.dvelocities[aki].z += dv * dk.z; + dvelocities[aki].x += dv * dk.x; + dvelocities[aki].y += dv * dk.y; + dvelocities[aki].z += dv * dk.z; - ctx.dvelocities[aji].x -= dv * (di.x + dk.x); - ctx.dvelocities[aji].y -= dv * (di.y + dk.y); - ctx.dvelocities[aji].z -= dv * (di.z + dk.z); + dvelocities[aji].x -= dv * (di.x + dk.x); + dvelocities[aji].y -= dv * (di.y + dk.y); + dvelocities[aji].z -= dv * (di.z + dk.z); // printf("ANGLE ener = %f\n", ener); } diff --git a/src/core/cpu/src/cpu_bond_force.cpp b/src/core/cpu/src/cpu_bond_force.cpp index 38ad3862..2a539f90 100644 --- a/src/core/cpu/src/cpu_bond_force.cpp +++ b/src/core/cpu/src/cpu_bond_force.cpp @@ -6,6 +6,10 @@ double calc_bond_forces(int start, int end) { auto& ctx = Context::instance(); + auto &bonds = ctx.bonds->cpu_data_p; + auto &cbonds = ctx.cbonds->cpu_data_p; + auto &coords = ctx.coords->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; int aii, aji; coord_t ai, aj, dx; cbond_t cbond; @@ -13,12 +17,12 @@ double calc_bond_forces(int start, int end) { double bond = 0; for (int i = start; i < end; i++) { - aii = ctx.bonds[i].ai - 1; - aji = ctx.bonds[i].aj - 1; - ai = ctx.coords[aii]; - aj = ctx.coords[aji]; + aii = bonds[i].ai - 1; + aji = bonds[i].aj - 1; + ai = coords[aii]; + aj = coords[aji]; - cbond = ctx.cbonds[ctx.bonds[i].code - 1]; + cbond = cbonds[bonds[i].code - 1]; // Calculate distance vector, norm of distance vector dx.x = aj.x - ai.x; @@ -36,13 +40,13 @@ double calc_bond_forces(int start, int end) { // Update forces ampl = cbond.kb * ddx / dx1; - ctx.dvelocities[aji].x += ampl * dx.x; - ctx.dvelocities[aji].y += ampl * dx.y; - ctx.dvelocities[aji].z += ampl * dx.z; + dvelocities[aji].x += ampl * dx.x; + dvelocities[aji].y += ampl * dx.y; + dvelocities[aji].z += ampl * dx.z; - ctx.dvelocities[aii].x -= ampl * dx.x; - ctx.dvelocities[aii].y -= ampl * dx.y; - ctx.dvelocities[aii].z -= ampl * dx.z; + dvelocities[aii].x -= ampl * dx.x; + dvelocities[aii].y -= ampl * dx.y; + dvelocities[aii].z -= ampl * dx.z; } return bond; diff --git a/src/core/cpu/src/cpu_improper2_force.cpp b/src/core/cpu/src/cpu_improper2_force.cpp index c632ec9c..af73a9cc 100644 --- a/src/core/cpu/src/cpu_improper2_force.cpp +++ b/src/core/cpu/src/cpu_improper2_force.cpp @@ -7,6 +7,10 @@ double calc_improper2_forces(int start, int end) { auto& ctx = Context::instance(); + auto &impropers = ctx.impropers->cpu_data_p; + auto &cimpropers = ctx.cimpropers->cpu_data_p; + auto &coords = ctx.coords->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; int aii, aji, aki, ali; coord_t ai, aj, ak, al; @@ -20,18 +24,18 @@ double calc_improper2_forces(int start, int end) { double improper = 0; for (int i = start; i < end; i++) { - imp = ctx.impropers[i]; - cimp = ctx.cimpropers[imp.code - 1]; + imp = impropers[i]; + cimp = cimpropers[imp.code - 1]; aii = imp.ai - 1; aji = imp.aj - 1; aki = imp.ak - 1; ali = imp.al - 1; - ai = ctx.coords[aii]; - aj = ctx.coords[aji]; - ak = ctx.coords[aki]; - al = ctx.coords[ali]; + ai = coords[aii]; + aj = coords[aji]; + ak = coords[aki]; + al = coords[ali]; rji.x = ai.x - aj.x; rji.y = ai.y - aj.y; @@ -110,21 +114,21 @@ double calc_improper2_forces(int start, int end) { // Update energy and forces improper += ener; - ctx.dvelocities[aii].x += dv * dpi.x; - ctx.dvelocities[aii].y += dv * dpi.y; - ctx.dvelocities[aii].z += dv * dpi.z; + dvelocities[aii].x += dv * dpi.x; + dvelocities[aii].y += dv * dpi.y; + dvelocities[aii].z += dv * dpi.z; - ctx.dvelocities[aji].x += dv * dpj.x; - ctx.dvelocities[aji].y += dv * dpj.y; - ctx.dvelocities[aji].z += dv * dpj.z; + dvelocities[aji].x += dv * dpj.x; + dvelocities[aji].y += dv * dpj.y; + dvelocities[aji].z += dv * dpj.z; - ctx.dvelocities[aki].x += dv * dpk.x; - ctx.dvelocities[aki].y += dv * dpk.y; - ctx.dvelocities[aki].z += dv * dpk.z; + dvelocities[aki].x += dv * dpk.x; + dvelocities[aki].y += dv * dpk.y; + dvelocities[aki].z += dv * dpk.z; - ctx.dvelocities[ali].x += dv * dpl.x; - ctx.dvelocities[ali].y += dv * dpl.y; - ctx.dvelocities[ali].z += dv * dpl.z; + dvelocities[ali].x += dv * dpl.x; + dvelocities[ali].y += dv * dpl.y; + dvelocities[ali].z += dv * dpl.z; } return improper; diff --git a/src/core/cpu/src/cpu_leapfrog.cpp b/src/core/cpu/src/cpu_leapfrog.cpp index 31b96bb7..9d1ff43a 100644 --- a/src/core/cpu/src/cpu_leapfrog.cpp +++ b/src/core/cpu/src/cpu_leapfrog.cpp @@ -5,49 +5,55 @@ void calc_leapfrog() { auto& ctx = Context::instance(); + auto &atypes = ctx.atypes->cpu_data_p; + auto &catypes = ctx.catypes->cpu_data_p; + auto &coords = ctx.coords->cpu_data_p; + auto &velocities = ctx.velocities->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; + auto *xcoords = ctx.xcoords->cpu_data_p; double mass_i; double winv_i; for (int i = 0; i < ctx.n_atoms_solute; i++) { - mass_i = ctx.catypes[ctx.atypes[i].code - 1].m; + mass_i = catypes[atypes[i].code - 1].m; winv_i = 1 / mass_i; - ctx.velocities[i].x = (ctx.velocities[i].x - ctx.dvelocities[i].x * ctx.dt * winv_i) * ctx.Tscale_solute; - ctx.velocities[i].y = (ctx.velocities[i].y - ctx.dvelocities[i].y * ctx.dt * winv_i) * ctx.Tscale_solute; - ctx.velocities[i].z = (ctx.velocities[i].z - ctx.dvelocities[i].z * ctx.dt * winv_i) * ctx.Tscale_solute; + velocities[i].x = (velocities[i].x - dvelocities[i].x * ctx.dt * winv_i) * ctx.Tscale_solute; + velocities[i].y = (velocities[i].y - dvelocities[i].y * ctx.dt * winv_i) * ctx.Tscale_solute; + velocities[i].z = (velocities[i].z - dvelocities[i].z * ctx.dt * winv_i) * ctx.Tscale_solute; - ctx.xcoords[i].x = ctx.coords[i].x; - ctx.xcoords[i].y = ctx.coords[i].y; - ctx.xcoords[i].z = ctx.coords[i].z; + xcoords[i].x = coords[i].x; + xcoords[i].y = coords[i].y; + xcoords[i].z = coords[i].z; - ctx.coords[i].x += ctx.velocities[i].x * ctx.dt; - ctx.coords[i].y += ctx.velocities[i].y * ctx.dt; - ctx.coords[i].z += ctx.velocities[i].z * ctx.dt; + coords[i].x += velocities[i].x * ctx.dt; + coords[i].y += velocities[i].y * ctx.dt; + coords[i].z += velocities[i].z * ctx.dt; } for (int i = ctx.n_atoms_solute; i < ctx.n_atoms; i++) { - mass_i = ctx.catypes[ctx.atypes[i].code - 1].m; + mass_i = catypes[atypes[i].code - 1].m; winv_i = 1 / mass_i; - ctx.velocities[i].x = (ctx.velocities[i].x - ctx.dvelocities[i].x * ctx.dt * winv_i) * ctx.Tscale_solvent; - ctx.velocities[i].y = (ctx.velocities[i].y - ctx.dvelocities[i].y * ctx.dt * winv_i) * ctx.Tscale_solvent; - ctx.velocities[i].z = (ctx.velocities[i].z - ctx.dvelocities[i].z * ctx.dt * winv_i) * ctx.Tscale_solvent; + velocities[i].x = (velocities[i].x - dvelocities[i].x * ctx.dt * winv_i) * ctx.Tscale_solvent; + velocities[i].y = (velocities[i].y - dvelocities[i].y * ctx.dt * winv_i) * ctx.Tscale_solvent; + velocities[i].z = (velocities[i].z - dvelocities[i].z * ctx.dt * winv_i) * ctx.Tscale_solvent; - ctx.xcoords[i].x = ctx.coords[i].x; - ctx.xcoords[i].y = ctx.coords[i].y; - ctx.xcoords[i].z = ctx.coords[i].z; + xcoords[i].x = coords[i].x; + xcoords[i].y = coords[i].y; + xcoords[i].z = coords[i].z; - ctx.coords[i].x += ctx.velocities[i].x * ctx.dt; - ctx.coords[i].y += ctx.velocities[i].y * ctx.dt; - ctx.coords[i].z += ctx.velocities[i].z * ctx.dt; + coords[i].x += velocities[i].x * ctx.dt; + coords[i].y += velocities[i].y * ctx.dt; + coords[i].z += velocities[i].z * ctx.dt; } if (ctx.n_shake_constraints > 0) { - calc_shake_constraints(ctx.coords.data(), ctx.xcoords.data()); + calc_shake_constraints(coords, xcoords); for (int i = 0; i < ctx.n_atoms; i++) { - ctx.velocities[i].x = (ctx.coords[i].x - ctx.xcoords[i].x) / ctx.dt; - ctx.velocities[i].y = (ctx.coords[i].y - ctx.xcoords[i].y) / ctx.dt; - ctx.velocities[i].z = (ctx.coords[i].z - ctx.xcoords[i].z) / ctx.dt; + velocities[i].x = (coords[i].x - xcoords[i].x) / ctx.dt; + velocities[i].y = (coords[i].y - xcoords[i].y) / ctx.dt; + velocities[i].z = (coords[i].z - xcoords[i].z) / ctx.dt; } } } diff --git a/src/core/cpu/src/cpu_nonbonded_pp_force.cpp b/src/core/cpu/src/cpu_nonbonded_pp_force.cpp index 302586f0..ce744ad0 100644 --- a/src/core/cpu/src/cpu_nonbonded_pp_force.cpp +++ b/src/core/cpu/src/cpu_nonbonded_pp_force.cpp @@ -8,6 +8,10 @@ void calc_nonbonded_pp_forces() { auto& ctx = Context::instance(); + auto &coords = ctx.coords->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; + auto &LJ_matrix = ctx.LJ_matrix->cpu_data_p; + auto *excluded = ctx.excluded->cpu_data_p; bool bond14, bond23; double scaling; coord_t da; @@ -21,11 +25,11 @@ void calc_nonbonded_pp_forces() { for (int pj = pi + 1; pj < ctx.n_patoms; pj++) { i = ctx.p_atoms[pi]; j = ctx.p_atoms[pj]; - bond23 = ctx.LJ_matrix[i * ctx.n_atoms_solute + j] == 3; - bond14 = ctx.LJ_matrix[i * ctx.n_atoms_solute + j] == 1; + bond23 = LJ_matrix[i * ctx.n_atoms_solute + j] == 3; + bond14 = LJ_matrix[i * ctx.n_atoms_solute + j] == 1; if (bond23) continue; - if (ctx.excluded[i] || ctx.excluded[j]) continue; + if (excluded[i] || excluded[j]) continue; scaling = bond14 ? ctx.topo.el14_scale : 1; @@ -35,9 +39,9 @@ void calc_nonbonded_pp_forces() { const catype_t& ai_type = ctx.unified_catype(i, 0); const catype_t& aj_type = ctx.unified_catype(j, 0); - da.x = ctx.coords[j].x - ctx.coords[i].x; - da.y = ctx.coords[j].y - ctx.coords[i].y; - da.z = ctx.coords[j].z - ctx.coords[i].z; + da.x = coords[j].x - coords[i].x; + da.y = coords[j].y - coords[i].y; + da.z = coords[j].z - coords[i].z; r2a = 1 / (std::pow(da.x, 2) + std::pow(da.y, 2) + std::pow(da.z, 2)); ra = sqrt(r2a); r6a = r2a * r2a * r2a; @@ -56,13 +60,13 @@ void calc_nonbonded_pp_forces() { } dva = r2a * (-Vela - 12 * V_a + 6 * V_b); - ctx.dvelocities[i].x -= dva * da.x; - ctx.dvelocities[i].y -= dva * da.y; - ctx.dvelocities[i].z -= dva * da.z; + dvelocities[i].x -= dva * da.x; + dvelocities[i].y -= dva * da.y; + dvelocities[i].z -= dva * da.z; - ctx.dvelocities[j].x += dva * da.x; - ctx.dvelocities[j].y += dva * da.y; - ctx.dvelocities[j].z += dva * da.z; + dvelocities[j].x += dva * da.x; + dvelocities[j].y += dva * da.y; + dvelocities[j].z += dva * da.z; ctx.E_nonbond_pp.Ucoul += Vela; ctx.E_nonbond_pp.Uvdw += (V_a - V_b); diff --git a/src/core/cpu/src/cpu_nonbonded_pw_force.cpp b/src/core/cpu/src/cpu_nonbonded_pw_force.cpp index 881f7d54..6bf2c27e 100644 --- a/src/core/cpu/src/cpu_nonbonded_pw_force.cpp +++ b/src/core/cpu/src/cpu_nonbonded_pw_force.cpp @@ -8,6 +8,9 @@ void calc_nonbonded_pw_forces() { auto& ctx = Context::instance(); + auto &coords = ctx.coords->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; + auto *excluded = ctx.excluded->cpu_data_p; if (ctx.n_waters == 0 || ctx.n_patoms == 0) { return; } @@ -15,7 +18,7 @@ void calc_nonbonded_pw_forces() { for (int pi = 0; pi < ctx.n_patoms; ++pi) { const int atom_i = ctx.p_atoms[pi]; for (int atom_j = ctx.n_atoms_solute; atom_j < ctx.n_atoms; ++atom_j) { - if (ctx.excluded[atom_i] || ctx.excluded[atom_j]) { + if (excluded[atom_i] || excluded[atom_j]) { continue; } @@ -27,9 +30,9 @@ void calc_nonbonded_pw_forces() { double v_a = 0.0; double v_b = 0.0; - const double dx = ctx.coords[atom_j].x - ctx.coords[atom_i].x; - const double dy = ctx.coords[atom_j].y - ctx.coords[atom_i].y; - const double dz = ctx.coords[atom_j].z - ctx.coords[atom_i].z; + const double dx = coords[atom_j].x - coords[atom_i].x; + const double dy = coords[atom_j].y - coords[atom_i].y; + const double dz = coords[atom_j].z - coords[atom_i].z; const double r2inv = 1.0 / (dx * dx + dy * dy + dz * dz); const double rinv = std::sqrt(r2inv); const double r6inv = r2inv * r2inv * r2inv; @@ -55,13 +58,13 @@ void calc_nonbonded_pw_forces() { const double scale = r2inv * (-ecoul - 12.0 * v_a + 6.0 * v_b); - ctx.dvelocities[atom_i].x -= scale * dx; - ctx.dvelocities[atom_i].y -= scale * dy; - ctx.dvelocities[atom_i].z -= scale * dz; + dvelocities[atom_i].x -= scale * dx; + dvelocities[atom_i].y -= scale * dy; + dvelocities[atom_i].z -= scale * dz; - ctx.dvelocities[atom_j].x += scale * dx; - ctx.dvelocities[atom_j].y += scale * dy; - ctx.dvelocities[atom_j].z += scale * dz; + dvelocities[atom_j].x += scale * dx; + dvelocities[atom_j].y += scale * dy; + dvelocities[atom_j].z += scale * dz; ctx.E_nonbond_pw.Ucoul += ecoul; ctx.E_nonbond_pw.Uvdw += (v_a - v_b); diff --git a/src/core/cpu/src/cpu_nonbonded_qp_force.cpp b/src/core/cpu/src/cpu_nonbonded_qp_force.cpp index c12c81ae..65a74a6c 100644 --- a/src/core/cpu/src/cpu_nonbonded_qp_force.cpp +++ b/src/core/cpu/src/cpu_nonbonded_qp_force.cpp @@ -8,6 +8,11 @@ void calc_nonbonded_qp_forces() { auto& ctx = Context::instance(); + auto *lambdas = ctx.lambdas->cpu_data_p; + auto &coords = ctx.coords->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; + auto &LJ_matrix = ctx.LJ_matrix->cpu_data_p; + auto *excluded = ctx.excluded->cpu_data_p; int i, j; coord_t da; double r2, r6, r; @@ -20,17 +25,17 @@ void calc_nonbonded_qp_forces() { i = ctx.q_atoms[qi]; j = ctx.p_atoms[pj]; - bond23 = ctx.LJ_matrix[i * ctx.n_atoms_solute + j] == 3; - bond14 = ctx.LJ_matrix[i * ctx.n_atoms_solute + j] == 1; + bond23 = LJ_matrix[i * ctx.n_atoms_solute + j] == 3; + bond14 = LJ_matrix[i * ctx.n_atoms_solute + j] == 1; if (bond23) continue; - if (ctx.excluded[i] || ctx.excluded[j]) continue; + if (excluded[i] || excluded[j]) continue; scaling = bond14 ? ctx.topo.el14_scale : 1; - da.x = ctx.coords[j].x - ctx.coords[i].x; - da.y = ctx.coords[j].y - ctx.coords[i].y; - da.z = ctx.coords[j].z - ctx.coords[i].z; + da.x = coords[j].x - coords[i].x; + da.y = coords[j].y - coords[i].y; + da.z = coords[j].z - coords[i].z; r2 = pow(da.x, 2) + pow(da.y, 2) + pow(da.z, 2); @@ -54,15 +59,15 @@ void calc_nonbonded_qp_forces() { } else { calc_vdw_arithmetic(ai_aii, aj_aii, ai_bii, aj_bii, r6inv, &V_a, &V_b); } - dv = r2 * (-Vel - (12 * V_a - 6 * V_b)) * ctx.lambdas[state]; + dv = r2 * (-Vel - (12 * V_a - 6 * V_b)) * lambdas[state]; // Update forces - ctx.dvelocities[i].x -= dv * da.x; - ctx.dvelocities[i].y -= dv * da.y; - ctx.dvelocities[i].z -= dv * da.z; - ctx.dvelocities[j].x += dv * da.x; - ctx.dvelocities[j].y += dv * da.y; - ctx.dvelocities[j].z += dv * da.z; + dvelocities[i].x -= dv * da.x; + dvelocities[i].y -= dv * da.y; + dvelocities[i].z -= dv * da.z; + dvelocities[j].x += dv * da.x; + dvelocities[j].y += dv * da.y; + dvelocities[j].z += dv * da.z; // Update Q totals ctx.EQ_nonbond_qp[state].Ucoul += Vel; diff --git a/src/core/cpu/src/cpu_nonbonded_qq_force.cpp b/src/core/cpu/src/cpu_nonbonded_qq_force.cpp index f4fcfc26..2b062d48 100644 --- a/src/core/cpu/src/cpu_nonbonded_qq_force.cpp +++ b/src/core/cpu/src/cpu_nonbonded_qq_force.cpp @@ -8,6 +8,12 @@ void calc_nonbonded_qq_forces() { auto& ctx = Context::instance(); + auto *lambdas = ctx.lambdas->cpu_data_p; + auto &coords = ctx.coords->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; + auto &LJ_matrix = ctx.LJ_matrix->cpu_data_p; + auto *excluded = ctx.excluded->cpu_data_p; + auto *q_elscales = ctx.q_elscales->cpu_data_p; int ai, aj; double crg_i, crg_j; double elscale, scaling; @@ -27,27 +33,27 @@ void calc_nonbonded_qq_forces() { crg_i = ctx.unified_ccharge(ai, state).charge; crg_j = ctx.unified_ccharge(aj, state).charge; - bond23 = ctx.LJ_matrix[ai * ctx.n_atoms_solute + aj] == 3; - bond14 = ctx.LJ_matrix[ai * ctx.n_atoms_solute + aj] == 1; + bond23 = LJ_matrix[ai * ctx.n_atoms_solute + aj] == 3; + bond14 = LJ_matrix[ai * ctx.n_atoms_solute + aj] == 1; if (bond23) continue; - if (ctx.excluded[ai] || ctx.excluded[aj]) continue; + if (excluded[ai] || excluded[aj]) continue; scaling = bond14 ? ctx.topo.el14_scale : 1; elscale = 1; for (int k = 0; k < ctx.n_qelscales; k++) { - if (ctx.q_elscales[k + ctx.n_qelscales * state].qi == qi + 1 && ctx.q_elscales[k + ctx.n_qelscales * state].qj == qj + 1) { - elscale = ctx.q_elscales[k + ctx.n_qelscales * state].mu; + if (q_elscales[k + ctx.n_qelscales * state].qi == qi + 1 && q_elscales[k + ctx.n_qelscales * state].qj == qj + 1) { + elscale = q_elscales[k + ctx.n_qelscales * state].mu; } } const catype_t& qi_type = ctx.unified_catype(ai, state); const catype_t& qj_type = ctx.unified_catype(aj, state); - da.x = ctx.coords[aj].x - ctx.coords[ai].x; - da.y = ctx.coords[aj].y - ctx.coords[ai].y; - da.z = ctx.coords[aj].z - ctx.coords[ai].z; + da.x = coords[aj].x - coords[ai].x; + da.y = coords[aj].y - coords[ai].y; + da.z = coords[aj].z - coords[ai].z; r2a = 1 / (pow(da.x, 2) + pow(da.y, 2) + pow(da.z, 2)); ra = sqrt(r2a); r6a = r2a * r2a * r2a; @@ -64,15 +70,15 @@ void calc_nonbonded_qq_forces() { } else { calc_vdw_arithmetic(ai_aii, aj_aii, ai_bii, aj_bii, r6a, &V_a, &V_b); } - dva = r2a * (-Vela - 12 * V_a + 6 * V_b) * ctx.lambdas[state]; + dva = r2a * (-Vela - 12 * V_a + 6 * V_b) * lambdas[state]; - ctx.dvelocities[ai].x -= dva * da.x; - ctx.dvelocities[ai].y -= dva * da.y; - ctx.dvelocities[ai].z -= dva * da.z; + dvelocities[ai].x -= dva * da.x; + dvelocities[ai].y -= dva * da.y; + dvelocities[ai].z -= dva * da.z; - ctx.dvelocities[aj].x += dva * da.x; - ctx.dvelocities[aj].y += dva * da.y; - ctx.dvelocities[aj].z += dva * da.z; + dvelocities[aj].x += dva * da.x; + dvelocities[aj].y += dva * da.y; + dvelocities[aj].z += dva * da.z; ctx.EQ_nonbond_qq[state].Ucoul += Vela; ctx.EQ_nonbond_qq[state].Uvdw += (V_a - V_b); diff --git a/src/core/cpu/src/cpu_nonbonded_qw_force.cpp b/src/core/cpu/src/cpu_nonbonded_qw_force.cpp index 7b9b2f23..71d07247 100644 --- a/src/core/cpu/src/cpu_nonbonded_qw_force.cpp +++ b/src/core/cpu/src/cpu_nonbonded_qw_force.cpp @@ -7,6 +7,10 @@ #include "vdw_rules.h" void calc_nonbonded_qw_forces() { auto& ctx = Context::instance(); + auto *lambdas = ctx.lambdas->cpu_data_p; + auto &coords = ctx.coords->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; + auto *excluded = ctx.excluded->cpu_data_p; int i; coord_t dO, dH1, dH2; double r2O, rH1, rH2, r6O, rO, r2H1, r2H2; @@ -25,16 +29,16 @@ void calc_nonbonded_qw_forces() { for (int j = ctx.n_atoms_solute; j < ctx.n_atoms; j += 3) { for (int qi = 0; qi < ctx.n_qatoms; qi++) { i = ctx.q_atoms[qi]; - if (ctx.excluded[i] || ctx.excluded[j]) continue; - dO.x = ctx.coords[j].x - ctx.coords[i].x; - dO.y = ctx.coords[j].y - ctx.coords[i].y; - dO.z = ctx.coords[j].z - ctx.coords[i].z; - dH1.x = ctx.coords[j + 1].x - ctx.coords[i].x; - dH1.y = ctx.coords[j + 1].y - ctx.coords[i].y; - dH1.z = ctx.coords[j + 1].z - ctx.coords[i].z; - dH2.x = ctx.coords[j + 2].x - ctx.coords[i].x; - dH2.y = ctx.coords[j + 2].y - ctx.coords[i].y; - dH2.z = ctx.coords[j + 2].z - ctx.coords[i].z; + if (excluded[i] || excluded[j]) continue; + dO.x = coords[j].x - coords[i].x; + dO.y = coords[j].y - coords[i].y; + dO.z = coords[j].z - coords[i].z; + dH1.x = coords[j + 1].x - coords[i].x; + dH1.y = coords[j + 1].y - coords[i].y; + dH1.z = coords[j + 1].z - coords[i].z; + dH2.x = coords[j + 2].x - coords[i].x; + dH2.y = coords[j + 2].y - coords[i].y; + dH2.z = coords[j + 2].z - coords[i].z; r2O = pow(dO.x, 2) + pow(dO.y, 2) + pow(dO.z, 2); rH1 = sqrt(1.0 / (pow(dH1.x, 2) + pow(dH1.y, 2) + pow(dH1.z, 2))); rH2 = sqrt(1.0 / (pow(dH2.x, 2) + pow(dH2.y, 2) + pow(dH2.z, 2))); @@ -69,9 +73,9 @@ void calc_nonbonded_qw_forces() { // if (state == 0 && qi == 1) printf("j = %d ai__aii = %f A_O = %f B_O = %f V_a = %f V_b = %f r6O = %f\n", j, ai_aii, A_O, B_O, V_a, V_b, r6O); - dvO += r2O * (-VelO - (12 * V_a - 6 * V_b)) * ctx.lambdas[state]; - dvH1 -= r2H1 * VelH1 * ctx.lambdas[state]; - dvH2 -= r2H2 * VelH2 * ctx.lambdas[state]; + dvO += r2O * (-VelO - (12 * V_a - 6 * V_b)) * lambdas[state]; + dvH1 -= r2H1 * VelH1 * lambdas[state]; + dvH2 -= r2H2 * VelH2 * lambdas[state]; ctx.EQ_nonbond_qw[state].Ucoul += (VelO + VelH1 + VelH2); ctx.EQ_nonbond_qw[state].Uvdw += (V_a - V_b); @@ -80,20 +84,20 @@ void calc_nonbonded_qw_forces() { // Note r6O is not the usual 1/rO^6, but rather rO^6. be careful!!! // Update forces on Q-atom - ctx.dvelocities[i].x -= (dvO * dO.x + dvH1 * dH1.x + dvH2 * dH2.x); - ctx.dvelocities[i].y -= (dvO * dO.y + dvH1 * dH1.y + dvH2 * dH2.y); - ctx.dvelocities[i].z -= (dvO * dO.z + dvH1 * dH1.z + dvH2 * dH2.z); + dvelocities[i].x -= (dvO * dO.x + dvH1 * dH1.x + dvH2 * dH2.x); + dvelocities[i].y -= (dvO * dO.y + dvH1 * dH1.y + dvH2 * dH2.y); + dvelocities[i].z -= (dvO * dO.z + dvH1 * dH1.z + dvH2 * dH2.z); // Update forces on water - ctx.dvelocities[j].x += dvO * dO.x; - ctx.dvelocities[j].y += dvO * dO.y; - ctx.dvelocities[j].z += dvO * dO.z; - ctx.dvelocities[j + 1].x += dvH1 * dH1.x; - ctx.dvelocities[j + 1].y += dvH1 * dH1.y; - ctx.dvelocities[j + 1].z += dvH1 * dH1.z; - ctx.dvelocities[j + 2].x += dvH2 * dH2.x; - ctx.dvelocities[j + 2].y += dvH2 * dH2.y; - ctx.dvelocities[j + 2].z += dvH2 * dH2.z; + dvelocities[j].x += dvO * dO.x; + dvelocities[j].y += dvO * dO.y; + dvelocities[j].z += dvO * dO.z; + dvelocities[j + 1].x += dvH1 * dH1.x; + dvelocities[j + 1].y += dvH1 * dH1.y; + dvelocities[j + 1].z += dvH1 * dH1.z; + dvelocities[j + 2].x += dvH2 * dH2.x; + dvelocities[j + 2].y += dvH2 * dH2.y; + dvelocities[j + 2].z += dvH2 * dH2.z; } } diff --git a/src/core/cpu/src/cpu_nonbonded_ww_force.cpp b/src/core/cpu/src/cpu_nonbonded_ww_force.cpp index e094ffca..cf823e0e 100644 --- a/src/core/cpu/src/cpu_nonbonded_ww_force.cpp +++ b/src/core/cpu/src/cpu_nonbonded_ww_force.cpp @@ -41,9 +41,11 @@ void accumulate_pair_force(Context& ctx, double vdw_a, double vdw_b, E_nonbonded_t& energy) { - const double dx = ctx.coords[atom_j].x - ctx.coords[atom_i].x; - const double dy = ctx.coords[atom_j].y - ctx.coords[atom_i].y; - const double dz = ctx.coords[atom_j].z - ctx.coords[atom_i].z; + auto &coords = ctx.coords->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; + const double dx = coords[atom_j].x - coords[atom_i].x; + const double dy = coords[atom_j].y - coords[atom_i].y; + const double dz = coords[atom_j].z - coords[atom_i].z; const double r2inv = 1.0 / (dx * dx + dy * dy + dz * dz); const double rinv = std::sqrt(r2inv); @@ -61,13 +63,13 @@ void accumulate_pair_force(Context& ctx, const double scale = r2inv * dva; - ctx.dvelocities[atom_i].x -= scale * dx; - ctx.dvelocities[atom_i].y -= scale * dy; - ctx.dvelocities[atom_i].z -= scale * dz; + dvelocities[atom_i].x -= scale * dx; + dvelocities[atom_i].y -= scale * dy; + dvelocities[atom_i].z -= scale * dz; - ctx.dvelocities[atom_j].x += scale * dx; - ctx.dvelocities[atom_j].y += scale * dy; - ctx.dvelocities[atom_j].z += scale * dz; + dvelocities[atom_j].x += scale * dx; + dvelocities[atom_j].y += scale * dy; + dvelocities[atom_j].z += scale * dz; energy.Ucoul += ecoul; energy.Uvdw += evdw; diff --git a/src/core/cpu/src/cpu_polx_water_force.cpp b/src/core/cpu/src/cpu_polx_water_force.cpp index 44bfa1c4..9d0e4711 100644 --- a/src/core/cpu/src/cpu_polx_water_force.cpp +++ b/src/core/cpu/src/cpu_polx_water_force.cpp @@ -8,6 +8,9 @@ void calc_polx_w_forces(int iteration) { auto& ctx = Context::instance(); + auto &coords = ctx.coords->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; + auto *wshells = ctx.wshells->cpu_data_p; int wi, imin, jw, ii, iis, jmin; double tmin; @@ -18,9 +21,9 @@ void calc_polx_w_forces(int iteration) { double ener; for (int is = 0; is < ctx.n_shells; is++) { - ctx.wshells[is].n_inshell = 0; + wshells[is].n_inshell = 0; if (iteration == 0) { - ctx.wshells[is].theta_corr = 0; + wshells[is].theta_corr = 0; } } @@ -30,9 +33,9 @@ void calc_polx_w_forces(int iteration) { wi = ctx.n_atoms_solute + 3 * i; - rmu.x = ctx.coords[wi + 1].x + ctx.coords[wi + 2].x - 2 * ctx.coords[wi].x; - rmu.y = ctx.coords[wi + 1].y + ctx.coords[wi + 2].y - 2 * ctx.coords[wi].y; - rmu.z = ctx.coords[wi + 1].z + ctx.coords[wi + 2].z - 2 * ctx.coords[wi].z; + rmu.x = coords[wi + 1].x + coords[wi + 2].x - 2 * coords[wi].x; + rmu.y = coords[wi + 1].y + coords[wi + 2].y - 2 * coords[wi].y; + rmu.z = coords[wi + 1].z + coords[wi + 2].z - 2 * coords[wi].z; rm = sqrt(pow(rmu.x, 2) + pow(rmu.y, 2) + pow(rmu.z, 2)); @@ -40,9 +43,9 @@ void calc_polx_w_forces(int iteration) { rmu.y /= rm; rmu.z /= rm; - rcu.x = ctx.coords[wi].x - ctx.topo.solvent_center.x; - rcu.y = ctx.coords[wi].y - ctx.topo.solvent_center.y; - rcu.z = ctx.coords[wi].z - ctx.topo.solvent_center.z; + rcu.x = coords[wi].x - ctx.topo.solvent_center.x; + rcu.y = coords[wi].y - ctx.topo.solvent_center.y; + rcu.z = coords[wi].z - ctx.topo.solvent_center.z; rc = sqrt(pow(rcu.x, 2) + pow(rcu.y, 2) + pow(rcu.z, 2)); rcu.x /= rc; rcu.y /= rc; @@ -58,23 +61,23 @@ void calc_polx_w_forces(int iteration) { ctx.theta[i] = acos(cos_th); ctx.tdum[i] = ctx.theta[i]; - if (rc > ctx.wshells[ctx.n_shells - 1].router - ctx.wshells[ctx.n_shells - 1].dr) { + if (rc > wshells[ctx.n_shells - 1].router - wshells[ctx.n_shells - 1].dr) { for (iis = ctx.n_shells - 1; iis > 0; iis--) { - if (rc <= ctx.wshells[iis].router) { + if (rc <= wshells[iis].router) { break; } } - ctx.wshells[iis].n_inshell += 1; - ctx.list_sh[ctx.wshells[iis].n_inshell - 1][iis] = i; + wshells[iis].n_inshell += 1; + ctx.list_sh[wshells[iis].n_inshell - 1][iis] = i; } } for (int is = 0; is < ctx.n_shells; is++) { imin = 0; - for (int il = 0; il < ctx.wshells[is].n_inshell; il++) { + for (int il = 0; il < wshells[is].n_inshell; il++) { tmin = 2 * M_PI; - for (int jl = 0; jl < ctx.wshells[is].n_inshell; jl++) { + for (int jl = 0; jl < wshells[is].n_inshell; jl++) { jw = ctx.list_sh[jl][is]; if (ctx.tdum[jw] < tmin) { jmin = jw; @@ -90,29 +93,29 @@ void calc_polx_w_forces(int iteration) { if (iteration != 0 && iteration % itdis_update == 0) { for (int is = 0; is < ctx.n_shells; is++) { printf("SHELL %d\n", is); - ctx.wshells[is].avtheta /= (double)itdis_update; - ctx.wshells[is].avn_inshell /= (double)itdis_update; - ctx.wshells[is].theta_corr = - ctx.wshells[is].theta_corr + ctx.wshells[is].avtheta - acos(ctx.wshells[is].cstb); + wshells[is].avtheta /= (double)itdis_update; + wshells[is].avn_inshell /= (double)itdis_update; + wshells[is].theta_corr = + wshells[is].theta_corr + wshells[is].avtheta - acos(wshells[is].cstb); printf("average theta = %f, average in shell = %f, theta_corr = %f\n", - ctx.wshells[is].avtheta * 180 / M_PI, ctx.wshells[is].avn_inshell, - ctx.wshells[is].theta_corr * 180 / M_PI); - ctx.wshells[is].avtheta = 0; - ctx.wshells[is].avn_inshell = 0; + wshells[is].avtheta * 180 / M_PI, wshells[is].avn_inshell, + wshells[is].theta_corr * 180 / M_PI); + wshells[is].avtheta = 0; + wshells[is].avn_inshell = 0; } } for (int is = 0; is < ctx.n_shells; is++) { - if (ctx.wshells[is].n_inshell == 0) { + if (wshells[is].n_inshell == 0) { continue; } avtdum = 0; - for (int il = 0; il < ctx.wshells[is].n_inshell; il++) { + for (int il = 0; il < wshells[is].n_inshell; il++) { ii = ctx.nsort[il][is]; - arg = 1 + ((1 - 2 * (double)(il + 1)) / (double)ctx.wshells[is].n_inshell); + arg = 1 + ((1 - 2 * (double)(il + 1)) / (double)wshells[is].n_inshell); ctx.theta0[il] = acos(arg); - ctx.theta0[il] = ctx.theta0[il] - 3 * sin(ctx.theta0[il]) * ctx.wshells[is].cstb / 2; + ctx.theta0[il] = ctx.theta0[il] - 3 * sin(ctx.theta0[il]) * wshells[is].cstb / 2; if (ctx.theta0[il] < 0) { ctx.theta0[il] = 0; } @@ -122,16 +125,16 @@ void calc_polx_w_forces(int iteration) { avtdum += ctx.theta[ii]; ener = .5 * ctx.md.polarisation_force * - pow(ctx.theta[ii] - ctx.theta0[il] + ctx.wshells[is].theta_corr, 2); + pow(ctx.theta[ii] - ctx.theta0[il] + wshells[is].theta_corr, 2); ctx.E_restraint.Upolx += ener; - dv = ctx.md.polarisation_force * (ctx.theta[ii] - ctx.theta0[il] + ctx.wshells[is].theta_corr); + dv = ctx.md.polarisation_force * (ctx.theta[ii] - ctx.theta0[il] + wshells[is].theta_corr); wi = ctx.n_atoms_solute + 3 * ii; - rmu.x = ctx.coords[wi + 1].x + ctx.coords[wi + 2].x - 2 * ctx.coords[wi].x; - rmu.y = ctx.coords[wi + 1].y + ctx.coords[wi + 2].y - 2 * ctx.coords[wi].y; - rmu.z = ctx.coords[wi + 1].z + ctx.coords[wi + 2].z - 2 * ctx.coords[wi].z; + rmu.x = coords[wi + 1].x + coords[wi + 2].x - 2 * coords[wi].x; + rmu.y = coords[wi + 1].y + coords[wi + 2].y - 2 * coords[wi].y; + rmu.z = coords[wi + 1].z + coords[wi + 2].z - 2 * coords[wi].z; rm = sqrt(pow(rmu.x, 2) + pow(rmu.y, 2) + pow(rmu.z, 2)); @@ -139,9 +142,9 @@ void calc_polx_w_forces(int iteration) { rmu.y /= rm; rmu.z /= rm; - rcu.x = ctx.coords[wi].x - ctx.topo.solvent_center.x; - rcu.y = ctx.coords[wi].y - ctx.topo.solvent_center.y; - rcu.z = ctx.coords[wi].z - ctx.topo.solvent_center.z; + rcu.x = coords[wi].x - ctx.topo.solvent_center.x; + rcu.y = coords[wi].y - ctx.topo.solvent_center.y; + rcu.z = coords[wi].z - ctx.topo.solvent_center.z; rc = sqrt(pow(rcu.x, 2) + pow(rcu.y, 2) + pow(rcu.z, 2)); rcu.x /= rc; rcu.y /= rc; @@ -175,18 +178,18 @@ void calc_polx_w_forces(int iteration) { f2.y = (rmu.y - rcu.y * cos_th) / rc; f2.z = (rmu.z - rcu.z * cos_th) / rc; - ctx.dvelocities[wi].x += f0 * (f1O.x + f2.x); - ctx.dvelocities[wi].y += f0 * (f1O.y + f2.y); - ctx.dvelocities[wi].z += f0 * (f1O.z + f2.z); - ctx.dvelocities[wi + 1].x += f0 * f1H1.x; - ctx.dvelocities[wi + 1].y += f0 * f1H1.y; - ctx.dvelocities[wi + 1].z += f0 * f1H1.z; - ctx.dvelocities[wi + 2].x += f0 * f1H2.x; - ctx.dvelocities[wi + 2].y += f0 * f1H2.y; - ctx.dvelocities[wi + 2].z += f0 * f1H2.z; + dvelocities[wi].x += f0 * (f1O.x + f2.x); + dvelocities[wi].y += f0 * (f1O.y + f2.y); + dvelocities[wi].z += f0 * (f1O.z + f2.z); + dvelocities[wi + 1].x += f0 * f1H1.x; + dvelocities[wi + 1].y += f0 * f1H1.y; + dvelocities[wi + 1].z += f0 * f1H1.z; + dvelocities[wi + 2].x += f0 * f1H2.x; + dvelocities[wi + 2].y += f0 * f1H2.y; + dvelocities[wi + 2].z += f0 * f1H2.z; } - ctx.wshells[is].avtheta += avtdum / (double)ctx.wshells[is].n_inshell; - ctx.wshells[is].avn_inshell += ctx.wshells[is].n_inshell; + wshells[is].avtheta += avtdum / (double)wshells[is].n_inshell; + wshells[is].avn_inshell += wshells[is].n_inshell; } } diff --git a/src/core/cpu/src/cpu_pshell_force.cpp b/src/core/cpu/src/cpu_pshell_force.cpp index eef4e4d6..9ff083cc 100644 --- a/src/core/cpu/src/cpu_pshell_force.cpp +++ b/src/core/cpu/src/cpu_pshell_force.cpp @@ -7,34 +7,38 @@ void calc_pshell_forces() { auto& ctx = Context::instance(); + auto &coords = ctx.coords->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; + auto *excluded = ctx.excluded->cpu_data_p; + auto *shell = ctx.shell->cpu_data_p; coord_t dr; double k, r2, ener; for (int i = 0; i < ctx.n_atoms_solute; i++) { - if (ctx.shell[i] || ctx.excluded[i]) { - if (ctx.excluded[i]) { + if (shell[i] || excluded[i]) { + if (excluded[i]) { k = k_fix; } else { k = k_pshell; } - dr.x = ctx.coords[i].x - ctx.coords_top[i].x; - dr.y = ctx.coords[i].y - ctx.coords_top[i].y; - dr.z = ctx.coords[i].z - ctx.coords_top[i].z; + dr.x = coords[i].x - ctx.coords_init->cpu_data_p[i].x; + dr.y = coords[i].y - ctx.coords_init->cpu_data_p[i].y; + dr.z = coords[i].z - ctx.coords_init->cpu_data_p[i].z; r2 = pow(dr.x, 2) + pow(dr.y, 2) + pow(dr.z, 2); ener = 0.5 * k * r2; - if (ctx.excluded[i]) { + if (excluded[i]) { ctx.E_restraint.Ufix += ener; } - if (ctx.shell[i]) { + if (shell[i]) { ctx.E_restraint.Ushell += ener; } - ctx.dvelocities[i].x += k * dr.x; - ctx.dvelocities[i].y += k * dr.y; - ctx.dvelocities[i].z += k * dr.z; + dvelocities[i].x += k * dr.x; + dvelocities[i].y += k * dr.y; + dvelocities[i].z += k * dr.z; } } } diff --git a/src/core/cpu/src/cpu_q_angle_force.cpp b/src/core/cpu/src/cpu_q_angle_force.cpp index 9cd70f4e..c9c2ea65 100644 --- a/src/core/cpu/src/cpu_q_angle_force.cpp +++ b/src/core/cpu/src/cpu_q_angle_force.cpp @@ -8,6 +8,9 @@ void calc_qangle_forces(int state) { auto& ctx = Context::instance(); + auto &coords = ctx.coords->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; + auto *lambdas = ctx.lambdas->cpu_data_p; int ic; int ai, aj, ak; coord_t rji, rjk; @@ -28,13 +31,13 @@ void calc_qangle_forces(int state) { aj = ctx.q_angles[i + ctx.n_qangles * state].aj - 1; ak = ctx.q_angles[i + ctx.n_qangles * state].ak - 1; - rji.x = ctx.coords[ai].x - ctx.coords[aj].x; - rji.y = ctx.coords[ai].y - ctx.coords[aj].y; - rji.z = ctx.coords[ai].z - ctx.coords[aj].z; + rji.x = coords[ai].x - coords[aj].x; + rji.y = coords[ai].y - coords[aj].y; + rji.z = coords[ai].z - coords[aj].z; - rjk.x = ctx.coords[ak].x - ctx.coords[aj].x; - rjk.y = ctx.coords[ak].y - ctx.coords[aj].y; - rjk.z = ctx.coords[ak].z - ctx.coords[aj].z; + rjk.x = coords[ak].x - coords[aj].x; + rjk.y = coords[ak].y - coords[aj].y; + rjk.z = coords[ak].z - coords[aj].z; bji = sqrt(pow(rji.x, 2) + pow(rji.y, 2) + pow(rji.z, 2)); bjk = sqrt(pow(rjk.x, 2) + pow(rjk.y, 2) + pow(rjk.z, 2)); @@ -51,7 +54,7 @@ void calc_qangle_forces(int state) { ener = .5 * ctx.q_cangles[ic].kth * pow(dth, 2); ctx.EQ_bond[state].Uangle += ener; - dv = ctx.q_cangles[ic].kth * dth * ctx.lambdas[state]; + dv = ctx.q_cangles[ic].kth * dth * lambdas[state]; f1 = sin(th); if (abs(f1) < 1E-12) { f1 = 1E-12; @@ -65,14 +68,14 @@ void calc_qangle_forces(int state) { dk.y = f1 * (rji.y / (bji * bjk) - cos_th * rjk.y / pow(bjk, 2)); dk.z = f1 * (rji.z / (bji * bjk) - cos_th * rjk.z / pow(bjk, 2)); - ctx.dvelocities[ai].x += dv * di.x; - ctx.dvelocities[ai].y += dv * di.y; - ctx.dvelocities[ai].z += dv * di.z; - ctx.dvelocities[ak].x += dv * dk.x; - ctx.dvelocities[ak].y += dv * dk.y; - ctx.dvelocities[ak].z += dv * dk.z; - ctx.dvelocities[aj].x -= dv * (di.x + dk.x); - ctx.dvelocities[aj].y -= dv * (di.y + dk.y); - ctx.dvelocities[aj].z -= dv * (di.z + dk.z); + dvelocities[ai].x += dv * di.x; + dvelocities[ai].y += dv * di.y; + dvelocities[ai].z += dv * di.z; + dvelocities[ak].x += dv * dk.x; + dvelocities[ak].y += dv * dk.y; + dvelocities[ak].z += dv * dk.z; + dvelocities[aj].x -= dv * (di.x + dk.x); + dvelocities[aj].y -= dv * (di.y + dk.y); + dvelocities[aj].z -= dv * (di.z + dk.z); } } diff --git a/src/core/cpu/src/cpu_q_bond_force.cpp b/src/core/cpu/src/cpu_q_bond_force.cpp index 35d86abe..5f2f7203 100644 --- a/src/core/cpu/src/cpu_q_bond_force.cpp +++ b/src/core/cpu/src/cpu_q_bond_force.cpp @@ -6,6 +6,9 @@ void calc_qbond_forces(int state) { auto& ctx = Context::instance(); + auto &coords = ctx.coords->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; + auto *lambdas = ctx.lambdas->cpu_data_p; int ic; int ai, aj; double b, db, ener, dv; @@ -21,22 +24,22 @@ void calc_qbond_forces(int state) { ai = ctx.q_bonds[i + ctx.n_qbonds * state].ai - 1; aj = ctx.q_bonds[i + ctx.n_qbonds * state].aj - 1; - rij.x = ctx.coords[aj].x - ctx.coords[ai].x; - rij.y = ctx.coords[aj].y - ctx.coords[ai].y; - rij.z = ctx.coords[aj].z - ctx.coords[ai].z; + rij.x = coords[aj].x - coords[ai].x; + rij.y = coords[aj].y - coords[ai].y; + rij.z = coords[aj].z - coords[ai].z; b = sqrt(pow(rij.x, 2) + pow(rij.y, 2) + pow(rij.z, 2)); db = b - ctx.q_cbonds[ic].b0; ener = 0.5 * ctx.q_cbonds[ic].kb * pow(db, 2); ctx.EQ_bond[state].Ubond += ener; - dv = db * ctx.q_cbonds[ic].kb * ctx.lambdas[state] / b; - - ctx.dvelocities[ai].x -= dv * rij.x; - ctx.dvelocities[ai].y -= dv * rij.y; - ctx.dvelocities[ai].z -= dv * rij.z; - ctx.dvelocities[aj].x += dv * rij.x; - ctx.dvelocities[aj].y += dv * rij.y; - ctx.dvelocities[aj].z += dv * rij.z; + dv = db * ctx.q_cbonds[ic].kb * lambdas[state] / b; + + dvelocities[ai].x -= dv * rij.x; + dvelocities[ai].y -= dv * rij.y; + dvelocities[ai].z -= dv * rij.z; + dvelocities[aj].x += dv * rij.x; + dvelocities[aj].y += dv * rij.y; + dvelocities[aj].z += dv * rij.z; } } diff --git a/src/core/cpu/src/cpu_q_torsion_force.cpp b/src/core/cpu/src/cpu_q_torsion_force.cpp index 35ab4b14..be309347 100644 --- a/src/core/cpu/src/cpu_q_torsion_force.cpp +++ b/src/core/cpu/src/cpu_q_torsion_force.cpp @@ -7,6 +7,9 @@ void calc_qtorsion_forces(int state) { auto& ctx = Context::instance(); + auto &coords = ctx.coords->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; + auto *lambdas = ctx.lambdas->cpu_data_p; int ic; int ai, aj, ak, al; coord_t rji, rjk, rkl, rnj, rnk, rki, rlj; @@ -29,15 +32,15 @@ void calc_qtorsion_forces(int state) { ak = ctx.q_torsions[i + ctx.n_qtorsions * state].ak - 1; al = ctx.q_torsions[i + ctx.n_qtorsions * state].al - 1; - rji.x = ctx.coords[ai].x - ctx.coords[aj].x; - rji.y = ctx.coords[ai].y - ctx.coords[aj].y; - rji.z = ctx.coords[ai].z - ctx.coords[aj].z; - rjk.x = ctx.coords[ak].x - ctx.coords[aj].x; - rjk.y = ctx.coords[ak].y - ctx.coords[aj].y; - rjk.z = ctx.coords[ak].z - ctx.coords[aj].z; - rkl.x = ctx.coords[al].x - ctx.coords[ak].x; - rkl.y = ctx.coords[al].y - ctx.coords[ak].y; - rkl.z = ctx.coords[al].z - ctx.coords[ak].z; + rji.x = coords[ai].x - coords[aj].x; + rji.y = coords[ai].y - coords[aj].y; + rji.z = coords[ai].z - coords[aj].z; + rjk.x = coords[ak].x - coords[aj].x; + rjk.y = coords[ak].y - coords[aj].y; + rjk.z = coords[ak].z - coords[aj].z; + rkl.x = coords[al].x - coords[ak].x; + rkl.y = coords[al].y - coords[ak].y; + rkl.z = coords[al].z - coords[ak].z; rnj.x = rji.y * rjk.z - rji.z * rjk.y; rnj.y = rji.z * rjk.x - rji.x * rjk.z; rnj.z = rji.x * rjk.y - rji.y * rjk.x; @@ -69,7 +72,7 @@ void calc_qtorsion_forces(int state) { // Energy arg = ctx.q_ctorsions[ic].n * phi - to_radians(ctx.q_ctorsions[ic].d); ener = ctx.q_ctorsions[ic].k * (1 + cos(arg)); - dv = -ctx.q_ctorsions[ic].n * ctx.q_ctorsions[ic].k * sin(arg) * ctx.lambdas[state]; + dv = -ctx.q_ctorsions[ic].n * ctx.q_ctorsions[ic].k * sin(arg) * lambdas[state]; // Forces f1 = sin(phi); @@ -108,20 +111,20 @@ void calc_qtorsion_forces(int state) { // Update energy and forces ctx.EQ_bond[state].Utor += ener; - ctx.dvelocities[ai].x += dv * dpi.x; - ctx.dvelocities[ai].y += dv * dpi.y; - ctx.dvelocities[ai].z += dv * dpi.z; + dvelocities[ai].x += dv * dpi.x; + dvelocities[ai].y += dv * dpi.y; + dvelocities[ai].z += dv * dpi.z; - ctx.dvelocities[aj].x += dv * dpj.x; - ctx.dvelocities[aj].y += dv * dpj.y; - ctx.dvelocities[aj].z += dv * dpj.z; + dvelocities[aj].x += dv * dpj.x; + dvelocities[aj].y += dv * dpj.y; + dvelocities[aj].z += dv * dpj.z; - ctx.dvelocities[ak].x += dv * dpk.x; - ctx.dvelocities[ak].y += dv * dpk.y; - ctx.dvelocities[ak].z += dv * dpk.z; + dvelocities[ak].x += dv * dpk.x; + dvelocities[ak].y += dv * dpk.y; + dvelocities[ak].z += dv * dpk.z; - ctx.dvelocities[al].x += dv * dpl.x; - ctx.dvelocities[al].y += dv * dpl.y; - ctx.dvelocities[al].z += dv * dpl.z; + dvelocities[al].x += dv * dpl.x; + dvelocities[al].y += dv * dpl.y; + dvelocities[al].z += dv * dpl.z; } } diff --git a/src/core/cpu/src/cpu_radix_water_force.cpp b/src/core/cpu/src/cpu_radix_water_force.cpp index 217b343f..a887ad31 100644 --- a/src/core/cpu/src/cpu_radix_water_force.cpp +++ b/src/core/cpu/src/cpu_radix_water_force.cpp @@ -7,6 +7,8 @@ void calc_radix_w_forces() { auto& ctx = Context::instance(); + auto &coords = ctx.coords->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; double b, db, ener, dv, fexp; coord_t dr; @@ -19,9 +21,9 @@ void calc_radix_w_forces() { } for (int i = ctx.n_atoms_solute; i < ctx.n_atoms; i += 3) { - dr.x = ctx.coords[i].x - ctx.topo.solvent_center.x; - dr.y = ctx.coords[i].y - ctx.topo.solvent_center.y; - dr.z = ctx.coords[i].z - ctx.topo.solvent_center.z; + dr.x = coords[i].x - ctx.topo.solvent_center.x; + dr.y = coords[i].y - ctx.topo.solvent_center.y; + dr.z = coords[i].z - ctx.topo.solvent_center.z; b = sqrt(pow(dr.x, 2) + pow(dr.y, 2) + pow(dr.z, 2)); db = b - (ctx.topo.solvent_radius - shift); @@ -38,8 +40,8 @@ void calc_radix_w_forces() { } ctx.E_restraint.Uradx += ener; - ctx.dvelocities[i].x += dv * dr.x; - ctx.dvelocities[i].y += dv * dr.y; - ctx.dvelocities[i].z += dv * dr.z; + dvelocities[i].x += dv * dr.x; + dvelocities[i].y += dv * dr.y; + dvelocities[i].z += dv * dr.z; } } diff --git a/src/core/cpu/src/cpu_restrang_force.cpp b/src/core/cpu/src/cpu_restrang_force.cpp index baaf2385..d809a9c1 100644 --- a/src/core/cpu/src/cpu_restrang_force.cpp +++ b/src/core/cpu/src/cpu_restrang_force.cpp @@ -7,6 +7,11 @@ void calc_restrang_forces() { auto& ctx = Context::instance(); + auto &coords = ctx.coords->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; + auto &restrangs = ctx.restrangs->cpu_data_p; + auto *lambdas = ctx.lambdas->cpu_data_p; + auto *EQ_restraint = ctx.EQ_restraint->cpu_data_p; int state, i, j, k; coord_t dr, dr2, di, dk; @@ -14,21 +19,21 @@ void calc_restrang_forces() { double dth, dv, ener, f1; for (int ir = 0; ir < ctx.n_restrangs; ir++) { - state = ctx.restrangs[ir].ipsi - 1; - i = ctx.restrangs[ir].ai - 1; - j = ctx.restrangs[ir].aj - 1; - k = ctx.restrangs[ir].ak - 1; + state = restrangs[ir].ipsi - 1; + i = restrangs[ir].ai - 1; + j = restrangs[ir].aj - 1; + k = restrangs[ir].ak - 1; - dr.x = ctx.coords[i].x - ctx.coords[j].x; - dr.y = ctx.coords[i].y - ctx.coords[j].y; - dr.z = ctx.coords[i].z - ctx.coords[j].z; + dr.x = coords[i].x - coords[j].x; + dr.y = coords[i].y - coords[j].y; + dr.z = coords[i].z - coords[j].z; - dr2.x = ctx.coords[k].x - ctx.coords[j].x; - dr2.y = ctx.coords[k].y - ctx.coords[j].y; - dr2.z = ctx.coords[k].z - ctx.coords[j].z; + dr2.x = coords[k].x - coords[j].x; + dr2.y = coords[k].y - coords[j].y; + dr2.z = coords[k].z - coords[j].z; - if (ctx.restrangs[ir].ipsi != 0) { - lambda = ctx.lambdas[state]; + if (restrangs[ir].ipsi != 0) { + lambda = lambdas[state]; } else { lambda = 1; } @@ -50,10 +55,10 @@ void calc_restrang_forces() { } th = acos(cos_th); - dth = th - to_radians(ctx.restrangs[ir].ang); + dth = th - to_radians(restrangs[ir].ang); - ener = .5 * ctx.restrangs[ir].k * pow(dth, 2); - dv = lambda * ctx.restrangs[ir].k * dth; + ener = .5 * restrangs[ir].k * pow(dth, 2); + dv = lambda * restrangs[ir].k * dth; f1 = sin(th); if (fabs(f1) < 1E-12) { @@ -69,25 +74,25 @@ void calc_restrang_forces() { dk.y = f1 * (dr.y / (rij * rjk) - cos_th * dr2.y / r2jk); dk.z = f1 * (dr.z / (rij * rjk) - cos_th * dr2.z / r2jk); - ctx.dvelocities[i].x += dv * di.x; - ctx.dvelocities[i].y += dv * di.y; - ctx.dvelocities[i].z += dv * di.z; - ctx.dvelocities[k].x += dv * dk.x; - ctx.dvelocities[k].y += dv * dk.y; - ctx.dvelocities[k].z += dv * dk.z; - ctx.dvelocities[j].x -= dv * (di.x + dk.x); - ctx.dvelocities[j].y -= dv * (di.y + dk.y); - ctx.dvelocities[j].z -= dv * (di.z + dk.z); - - if (ctx.restrangs[ir].ipsi == 0) { + dvelocities[i].x += dv * di.x; + dvelocities[i].y += dv * di.y; + dvelocities[i].z += dv * di.z; + dvelocities[k].x += dv * dk.x; + dvelocities[k].y += dv * dk.y; + dvelocities[k].z += dv * dk.z; + dvelocities[j].x -= dv * (di.x + dk.x); + dvelocities[j].y -= dv * (di.y + dk.y); + dvelocities[j].z -= dv * (di.z + dk.z); + + if (restrangs[ir].ipsi == 0) { for (int lambda_idx = 0; lambda_idx < ctx.n_lambdas; lambda_idx++) { - ctx.EQ_restraint[lambda_idx].Urestr += ener; + EQ_restraint[lambda_idx].Urestr += ener; } if (ctx.n_lambdas == 0) { ctx.E_restraint.Upres += ener; } } else { - ctx.EQ_restraint[state].Urestr += ener; + EQ_restraint[state].Urestr += ener; } } } diff --git a/src/core/cpu/src/cpu_restrdis_force.cpp b/src/core/cpu/src/cpu_restrdis_force.cpp index 8c6346b2..c15cbef7 100644 --- a/src/core/cpu/src/cpu_restrdis_force.cpp +++ b/src/core/cpu/src/cpu_restrdis_force.cpp @@ -6,54 +6,59 @@ void calc_restrdis_forces() { auto& ctx = Context::instance(); + auto &coords = ctx.coords->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; + auto &restrdists = ctx.restrdists->cpu_data_p; + auto *lambdas = ctx.lambdas->cpu_data_p; + auto *EQ_restraint = ctx.EQ_restraint->cpu_data_p; int state, i, j; coord_t dr; double lambda, b, db, dv, ener; for (int ir = 0; ir < ctx.n_restrdists; ir++) { - state = ctx.restrdists[ir].ipsi - 1; - i = ctx.restrdists[ir].ai - 1; - j = ctx.restrdists[ir].aj - 1; + state = restrdists[ir].ipsi - 1; + i = restrdists[ir].ai - 1; + j = restrdists[ir].aj - 1; - dr.x = ctx.coords[j].x - ctx.coords[i].x; - dr.y = ctx.coords[j].y - ctx.coords[i].y; - dr.z = ctx.coords[j].z - ctx.coords[i].z; + dr.x = coords[j].x - coords[i].x; + dr.y = coords[j].y - coords[i].y; + dr.z = coords[j].z - coords[i].z; - if (ctx.restrdists[ir].ipsi != 0) { - lambda = ctx.lambdas[state]; + if (restrdists[ir].ipsi != 0) { + lambda = lambdas[state]; } else { lambda = 1; } b = sqrt(pow(dr.x, 2) + pow(dr.y, 2) + pow(dr.z, 2)); - if (b < ctx.restrdists[ir].d1) { - db = b - ctx.restrdists[ir].d1; - } else if (b > ctx.restrdists[ir].d2) { - db = b - ctx.restrdists[ir].d2; + if (b < restrdists[ir].d1) { + db = b - restrdists[ir].d1; + } else if (b > restrdists[ir].d2) { + db = b - restrdists[ir].d2; } else { continue; } - ener = .5 * ctx.restrdists[ir].k * pow(db, 2); - dv = lambda * ctx.restrdists[ir].k * db / b; + ener = .5 * restrdists[ir].k * pow(db, 2); + dv = lambda * restrdists[ir].k * db / b; - ctx.dvelocities[j].x += dr.x * dv; - ctx.dvelocities[j].y += dr.y * dv; - ctx.dvelocities[j].z += dr.z * dv; - ctx.dvelocities[i].x -= dr.x * dv; - ctx.dvelocities[i].y -= dr.y * dv; - ctx.dvelocities[i].z -= dr.z * dv; + dvelocities[j].x += dr.x * dv; + dvelocities[j].y += dr.y * dv; + dvelocities[j].z += dr.z * dv; + dvelocities[i].x -= dr.x * dv; + dvelocities[i].y -= dr.y * dv; + dvelocities[i].z -= dr.z * dv; - if (ctx.restrdists[ir].ipsi == 0) { + if (restrdists[ir].ipsi == 0) { for (int k = 0; k < ctx.n_lambdas; k++) { - ctx.EQ_restraint[k].Urestr += ener; + EQ_restraint[k].Urestr += ener; } if (ctx.n_lambdas == 0) { ctx.E_restraint.Upres += ener; } } else { - ctx.EQ_restraint[state].Urestr += ener; + EQ_restraint[state].Urestr += ener; } } } diff --git a/src/core/cpu/src/cpu_restrpos_force.cpp b/src/core/cpu/src/cpu_restrpos_force.cpp index 98e78352..6db044b4 100644 --- a/src/core/cpu/src/cpu_restrpos_force.cpp +++ b/src/core/cpu/src/cpu_restrpos_force.cpp @@ -6,21 +6,26 @@ void calc_restrpos_forces() { auto& ctx = Context::instance(); + auto &coords = ctx.coords->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; + auto &restrspos = ctx.restrspos->cpu_data_p; + auto *lambdas = ctx.lambdas->cpu_data_p; + auto *EQ_restraint = ctx.EQ_restraint->cpu_data_p; int state, i; coord_t dr; double lambda, ener, x2, y2, z2; for (int ir = 0; ir < ctx.n_restrspos; ir++) { - state = ctx.restrspos[ir].ipsi - 1; - i = ctx.restrspos[ir].a - 1; + state = restrspos[ir].ipsi - 1; + i = restrspos[ir].a - 1; - dr.x = ctx.coords[i].x - ctx.restrspos[ir].x.x; - dr.y = ctx.coords[i].y - ctx.restrspos[ir].x.y; - dr.z = ctx.coords[i].z - ctx.restrspos[ir].x.z; + dr.x = coords[i].x - restrspos[ir].x.x; + dr.y = coords[i].y - restrspos[ir].x.y; + dr.z = coords[i].z - restrspos[ir].x.z; - if (ctx.restrspos[ir].ipsi != 0) { - lambda = ctx.lambdas[state]; + if (restrspos[ir].ipsi != 0) { + lambda = lambdas[state]; } else { lambda = 1; } @@ -29,21 +34,21 @@ void calc_restrpos_forces() { y2 = pow(dr.y, 2); z2 = pow(dr.z, 2); - ener = .5 * ctx.restrspos[ir].k.x * x2 + .5 * ctx.restrspos[ir].k.y * y2 + .5 * ctx.restrspos[ir].k.z * z2; + ener = .5 * restrspos[ir].k.x * x2 + .5 * restrspos[ir].k.y * y2 + .5 * restrspos[ir].k.z * z2; - ctx.dvelocities[i].x += ctx.restrspos[ir].k.x * dr.x * lambda; - ctx.dvelocities[i].y += ctx.restrspos[ir].k.y * dr.y * lambda; - ctx.dvelocities[i].z += ctx.restrspos[ir].k.z * dr.z * lambda; + dvelocities[i].x += restrspos[ir].k.x * dr.x * lambda; + dvelocities[i].y += restrspos[ir].k.y * dr.y * lambda; + dvelocities[i].z += restrspos[ir].k.z * dr.z * lambda; - if (ctx.restrspos[ir].ipsi == 0) { + if (restrspos[ir].ipsi == 0) { for (int k = 0; k < ctx.n_lambdas; k++) { - ctx.EQ_restraint[k].Urestr += ener; + EQ_restraint[k].Urestr += ener; } if (ctx.n_lambdas == 0) { ctx.E_restraint.Upres += ener; } } else { - ctx.EQ_restraint[state].Urestr += ener; + EQ_restraint[state].Urestr += ener; } } } diff --git a/src/core/cpu/src/cpu_restrseq_force.cpp b/src/core/cpu/src/cpu_restrseq_force.cpp index e8da11d2..296762e8 100644 --- a/src/core/cpu/src/cpu_restrseq_force.cpp +++ b/src/core/cpu/src/cpu_restrseq_force.cpp @@ -6,13 +6,19 @@ void calc_restrseq_forces() { auto& ctx = Context::instance(); + auto &atypes = ctx.atypes->cpu_data_p; + auto &catypes = ctx.catypes->cpu_data_p; + auto &coords = ctx.coords->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; + auto &restrseqs = ctx.restrseqs->cpu_data_p; + auto *heavy = ctx.heavy->cpu_data_p; double k, mass, totmass; coord_t dr; double r2, ener; for (int s = 0; s < ctx.n_restrseqs; s++) { - k = ctx.restrseqs[s].k; + k = restrseqs[s].k; dr.x = 0; dr.y = 0; @@ -20,13 +26,13 @@ void calc_restrseq_forces() { int n_ctr = 0; totmass = 0; - if (ctx.restrseqs[s].to_center == 1) { - for (int i = ctx.restrseqs[s].ai - 1; i < ctx.restrseqs[s].aj - 1; i++) { - if (ctx.heavy[i] || ctx.restrseqs[s].ih) { + if (restrseqs[s].to_center == 1) { + for (int i = restrseqs[s].ai - 1; i < restrseqs[s].aj - 1; i++) { + if (heavy[i] || restrseqs[s].ih) { n_ctr++; - dr.x += (ctx.coords[i].x - ctx.coords_top[i].x); - dr.y += (ctx.coords[i].y - ctx.coords_top[i].y); - dr.z += (ctx.coords[i].z - ctx.coords_top[i].z); + dr.x += (coords[i].x - ctx.coords_init->cpu_data_p[i].x); + dr.y += (coords[i].y - ctx.coords_init->cpu_data_p[i].y); + dr.z += (coords[i].z - ctx.coords_init->cpu_data_p[i].z); } } @@ -38,23 +44,23 @@ void calc_restrseq_forces() { ener = .5 * k * r2; ctx.E_restraint.Upres += ener; - for (int i = ctx.restrseqs[s].ai - 1; i < ctx.restrseqs[s].aj - 1; i++) { - if (ctx.heavy[i] || ctx.restrseqs[s].ih) { - mass = ctx.catypes[ctx.atypes[i].code - 1].m; - ctx.dvelocities[i].x += (k * dr.x * mass / 12.010); - ctx.dvelocities[i].y += (k * dr.y * mass / 12.010); - ctx.dvelocities[i].z += (k * dr.z * mass / 12.010); + for (int i = restrseqs[s].ai - 1; i < restrseqs[s].aj - 1; i++) { + if (heavy[i] || restrseqs[s].ih) { + mass = catypes[atypes[i].code - 1].m; + dvelocities[i].x += (k * dr.x * mass / 12.010); + dvelocities[i].y += (k * dr.y * mass / 12.010); + dvelocities[i].z += (k * dr.z * mass / 12.010); } } } - } else if (ctx.restrseqs[s].to_center == 2) { - for (int i = ctx.restrseqs[s].ai - 1; i < ctx.restrseqs[s].aj - 1; i++) { - if (ctx.heavy[i] || ctx.restrseqs[s].ih) { - mass = ctx.catypes[ctx.atypes[i].code - 1].m; + } else if (restrseqs[s].to_center == 2) { + for (int i = restrseqs[s].ai - 1; i < restrseqs[s].aj - 1; i++) { + if (heavy[i] || restrseqs[s].ih) { + mass = catypes[atypes[i].code - 1].m; totmass += mass; - dr.x += (ctx.coords[i].x - ctx.coords_top[i].x) * mass; - dr.y += (ctx.coords[i].y - ctx.coords_top[i].y) * mass; - dr.z += (ctx.coords[i].z - ctx.coords_top[i].z) * mass; + dr.x += (coords[i].x - ctx.coords_init->cpu_data_p[i].x) * mass; + dr.y += (coords[i].y - ctx.coords_init->cpu_data_p[i].y) * mass; + dr.z += (coords[i].z - ctx.coords_init->cpu_data_p[i].z) * mass; } } @@ -66,28 +72,28 @@ void calc_restrseq_forces() { ener = .5 * k * r2; ctx.E_restraint.Upres += ener; - for (int i = ctx.restrseqs[s].ai - 1; i < ctx.restrseqs[s].aj - 1; i++) { - if (ctx.heavy[i] || ctx.restrseqs[s].ih) { - ctx.dvelocities[i].x += k * dr.x; - ctx.dvelocities[i].y += k * dr.y; - ctx.dvelocities[i].z += k * dr.z; + for (int i = restrseqs[s].ai - 1; i < restrseqs[s].aj - 1; i++) { + if (heavy[i] || restrseqs[s].ih) { + dvelocities[i].x += k * dr.x; + dvelocities[i].y += k * dr.y; + dvelocities[i].z += k * dr.z; } } } } else { - for (int i = ctx.restrseqs[s].ai - 1; i < ctx.restrseqs[s].aj - 1; i++) { - if (ctx.heavy[i] || ctx.restrseqs[s].ih) { - dr.x = ctx.coords[i].x - ctx.coords_top[i].x; - dr.y = ctx.coords[i].y - ctx.coords_top[i].y; - dr.z = ctx.coords[i].z - ctx.coords_top[i].z; + for (int i = restrseqs[s].ai - 1; i < restrseqs[s].aj - 1; i++) { + if (heavy[i] || restrseqs[s].ih) { + dr.x = coords[i].x - ctx.coords_init->cpu_data_p[i].x; + dr.y = coords[i].y - ctx.coords_init->cpu_data_p[i].y; + dr.z = coords[i].z - ctx.coords_init->cpu_data_p[i].z; r2 = pow(dr.x, 2) + pow(dr.y, 2) + pow(dr.z, 2); ener = .5 * k * r2; ctx.E_restraint.Upres += ener; - ctx.dvelocities[i].x += k * dr.x; - ctx.dvelocities[i].y += k * dr.y; - ctx.dvelocities[i].z += k * dr.z; + dvelocities[i].x += k * dr.x; + dvelocities[i].y += k * dr.y; + dvelocities[i].z += k * dr.z; } } } diff --git a/src/core/cpu/src/cpu_restrwall_force.cpp b/src/core/cpu/src/cpu_restrwall_force.cpp index 0c57038d..fd49749a 100644 --- a/src/core/cpu/src/cpu_restrwall_force.cpp +++ b/src/core/cpu/src/cpu_restrwall_force.cpp @@ -6,34 +6,38 @@ void calc_restrwall_forces() { auto& ctx = Context::instance(); + auto &coords = ctx.coords->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; + auto &restrwalls = ctx.restrwalls->cpu_data_p; + auto *heavy = ctx.heavy->cpu_data_p; double k, b, db, ener, dv, fexp; coord_t dr; for (int ir = 0; ir < ctx.n_restrwalls; ir++) { - k = ctx.restrwalls[ir].k; - for (int i = ctx.restrwalls[ir].ai - 1; i < ctx.restrwalls[ir].aj - 1; i++) { - if (ctx.heavy[i] || ctx.restrwalls[ir].ih) { - dr.x = ctx.coords[i].x - ctx.topo.solvent_center.x; - dr.y = ctx.coords[i].y - ctx.topo.solvent_center.y; - dr.z = ctx.coords[i].z - ctx.topo.solvent_center.z; + k = restrwalls[ir].k; + for (int i = restrwalls[ir].ai - 1; i < restrwalls[ir].aj - 1; i++) { + if (heavy[i] || restrwalls[ir].ih) { + dr.x = coords[i].x - ctx.topo.solvent_center.x; + dr.y = coords[i].y - ctx.topo.solvent_center.y; + dr.z = coords[i].z - ctx.topo.solvent_center.z; b = sqrt(pow(dr.x, 2) + pow(dr.y, 2) + pow(dr.z, 2)); - db = b - ctx.restrwalls[ir].d; + db = b - restrwalls[ir].d; if (db > 0) { - ener = .5 * k * pow(db, 2) - ctx.restrwalls[ir].dMorse; + ener = .5 * k * pow(db, 2) - restrwalls[ir].dMorse; dv = k * db / b; } else { - fexp = exp(ctx.restrwalls[ir].aMorse * db); - ener = ctx.restrwalls[ir].dMorse * (fexp * fexp - 2 * fexp); - dv = -2 * ctx.restrwalls[ir].dMorse * ctx.restrwalls[ir].aMorse * (fexp - fexp * fexp) / b; + fexp = exp(restrwalls[ir].aMorse * db); + ener = restrwalls[ir].dMorse * (fexp * fexp - 2 * fexp); + dv = -2 * restrwalls[ir].dMorse * restrwalls[ir].aMorse * (fexp - fexp * fexp) / b; } ctx.E_restraint.Upres += ener; - ctx.dvelocities[i].x += dv * dr.x; - ctx.dvelocities[i].y += dv * dr.y; - ctx.dvelocities[i].z += dv * dr.z; + dvelocities[i].x += dv * dr.x; + dvelocities[i].y += dv * dr.y; + dvelocities[i].z += dv * dr.z; } } } diff --git a/src/core/cpu/src/cpu_shake.cpp b/src/core/cpu/src/cpu_shake.cpp index 7bc98b73..cb29a0f0 100644 --- a/src/core/cpu/src/cpu_shake.cpp +++ b/src/core/cpu/src/cpu_shake.cpp @@ -9,23 +9,26 @@ int calc_shake_constraints(coord_t* coords, coord_t* xcoords) { auto& ctx = Context::instance(); + auto *winv = ctx.winv->cpu_data_p; + auto *mol_n_shakes = ctx.mol_n_shakes->cpu_data_p; + auto *shake_bonds = ctx.shake_bonds->cpu_data_p; int total_iterations = 0; int shake = 0; for (int mol = 0; mol < ctx.n_molecules; mol++) { - if (ctx.mol_n_shakes[mol] == 0) { + if (mol_n_shakes[mol] == 0) { continue; } int n_iterations = 0; bool converged = false; do { - for (int i = 0; i < ctx.mol_n_shakes[mol]; i++) { - ctx.shake_bonds[shake + i].ready = false; + for (int i = 0; i < mol_n_shakes[mol]; i++) { + shake_bonds[shake + i].ready = false; } - for (int i = 0; i < ctx.mol_n_shakes[mol]; i++) { - shake_bond_t& shake_bond = ctx.shake_bonds[shake + i]; + for (int i = 0; i < mol_n_shakes[mol]; i++) { + shake_bond_t& shake_bond = shake_bonds[shake + i]; if (!shake_bond.ready) { const int ai = shake_bond.ai - 1; const int aj = shake_bond.aj - 1; @@ -45,22 +48,22 @@ int calc_shake_constraints(coord_t* coords, coord_t* xcoords) { xxij.y = xcoords[ai].y - xcoords[aj].y; xxij.z = xcoords[ai].z - xcoords[aj].z; scp = xij.x * xxij.x + xij.y * xxij.y + xij.z * xxij.z; - corr = diff / (2.0 * scp * (ctx.winv[ai] + ctx.winv[aj])); - - coords[ai].x += xxij.x * corr * ctx.winv[ai]; - coords[ai].y += xxij.y * corr * ctx.winv[ai]; - coords[ai].z += xxij.z * corr * ctx.winv[ai]; - coords[aj].x -= xxij.x * corr * ctx.winv[aj]; - coords[aj].y -= xxij.y * corr * ctx.winv[aj]; - coords[aj].z -= xxij.z * corr * ctx.winv[aj]; + corr = diff / (2.0 * scp * (winv[ai] + winv[aj])); + + coords[ai].x += xxij.x * corr * winv[ai]; + coords[ai].y += xxij.y * corr * winv[ai]; + coords[ai].z += xxij.z * corr * winv[ai]; + coords[aj].x -= xxij.x * corr * winv[aj]; + coords[aj].y -= xxij.y * corr * winv[aj]; + coords[aj].z -= xxij.z * corr * winv[aj]; } } n_iterations++; converged = true; - for (int i = 0; i < ctx.mol_n_shakes[mol]; i++) { - if (!ctx.shake_bonds[shake + i].ready) { + for (int i = 0; i < mol_n_shakes[mol]; i++) { + if (!shake_bonds[shake + i].ready) { converged = false; break; } @@ -68,9 +71,9 @@ int calc_shake_constraints(coord_t* coords, coord_t* xcoords) { } while (n_iterations < shake_max_iter && !converged); if (!converged) { - for (int i = 0; i < ctx.mol_n_shakes[mol]; i++) { - const int ai = ctx.shake_bonds[shake + i].ai - 1; - const int aj = ctx.shake_bonds[shake + i].aj - 1; + for (int i = 0; i < mol_n_shakes[mol]; i++) { + const int ai = shake_bonds[shake + i].ai - 1; + const int aj = shake_bonds[shake + i].aj - 1; coord_t xxij; double xxij2; @@ -79,12 +82,12 @@ int calc_shake_constraints(coord_t* coords, coord_t* xcoords) { xxij.z = xcoords[ai].z - xcoords[aj].z; xxij2 = std::pow(xxij.x, 2) + std::pow(xxij.y, 2) + std::pow(xxij.z, 2); std::printf(">>> Shake failed, i = %d,j = %d, d = %f, d0 = %f", ai, aj, std::sqrt(xxij2), - ctx.shake_bonds[shake + i].dist2); + shake_bonds[shake + i].dist2); } std::exit(EXIT_FAILURE); } - shake += ctx.mol_n_shakes[mol]; + shake += mol_n_shakes[mol]; total_iterations += n_iterations; } @@ -94,37 +97,43 @@ int calc_shake_constraints(coord_t* coords, coord_t* xcoords) { void initial_shaking() { auto& ctx = Context::instance(); + auto &coords = ctx.coords->cpu_data_p; + auto &velocities = ctx.velocities->cpu_data_p; + auto *xcoords = ctx.xcoords->cpu_data_p; for (int i = 0; i < ctx.n_atoms; i++) { - ctx.xcoords[i].x = ctx.coords[i].x; - ctx.xcoords[i].y = ctx.coords[i].y; - ctx.xcoords[i].z = ctx.coords[i].z; + xcoords[i].x = coords[i].x; + xcoords[i].y = coords[i].y; + xcoords[i].z = coords[i].z; } - calc_shake_constraints(ctx.coords.data(), ctx.xcoords.data()); + calc_shake_constraints(coords, xcoords); for (int i = 0; i < ctx.n_atoms; i++) { - ctx.xcoords[i].x = ctx.coords[i].x - ctx.dt * ctx.velocities[i].x; - ctx.xcoords[i].y = ctx.coords[i].y - ctx.dt * ctx.velocities[i].y; - ctx.xcoords[i].z = ctx.coords[i].z - ctx.dt * ctx.velocities[i].z; + xcoords[i].x = coords[i].x - ctx.dt * velocities[i].x; + xcoords[i].y = coords[i].y - ctx.dt * velocities[i].y; + xcoords[i].z = coords[i].z - ctx.dt * velocities[i].z; } - calc_shake_constraints(ctx.xcoords.data(), ctx.coords.data()); + calc_shake_constraints(xcoords, coords); for (int i = 0; i < ctx.n_atoms; i++) { - ctx.velocities[i].x = (ctx.coords[i].x - ctx.xcoords[i].x) / ctx.dt; - ctx.velocities[i].y = (ctx.coords[i].y - ctx.xcoords[i].y) / ctx.dt; - ctx.velocities[i].z = (ctx.coords[i].z - ctx.xcoords[i].z) / ctx.dt; + velocities[i].x = (coords[i].x - xcoords[i].x) / ctx.dt; + velocities[i].y = (coords[i].y - xcoords[i].y) / ctx.dt; + velocities[i].z = (coords[i].z - xcoords[i].z) / ctx.dt; } } void stop_cm_translation() { auto& ctx = Context::instance(); + auto &atypes = ctx.atypes->cpu_data_p; + auto &catypes = ctx.catypes->cpu_data_p; + auto &velocities = ctx.velocities->cpu_data_p; double total_mass = 0; coord_t vcm = {}; for (int ai = 0; ai < ctx.n_atoms; ai++) { - const double rmass = ctx.catypes[ctx.atypes[ai].code - 1].m; + const double rmass = catypes[atypes[ai].code - 1].m; total_mass += rmass; - vcm.x += ctx.velocities[ai].x * rmass; - vcm.y += ctx.velocities[ai].y; - vcm.z += ctx.velocities[ai].z; + vcm.x += velocities[ai].x * rmass; + vcm.y += velocities[ai].y; + vcm.z += velocities[ai].z; } vcm.x /= total_mass; @@ -132,8 +141,8 @@ void stop_cm_translation() { vcm.z /= total_mass; for (int ai = 0; ai < ctx.n_atoms; ai++) { - ctx.velocities[ai].x -= vcm.x; - ctx.velocities[ai].y -= vcm.y; - ctx.velocities[ai].z -= vcm.z; + velocities[ai].x -= vcm.x; + velocities[ai].y -= vcm.y; + velocities[ai].z -= vcm.z; } } diff --git a/src/core/cpu/src/cpu_temperature.cpp b/src/core/cpu/src/cpu_temperature.cpp index 0542b418..f2080635 100644 --- a/src/core/cpu/src/cpu_temperature.cpp +++ b/src/core/cpu/src/cpu_temperature.cpp @@ -11,6 +11,10 @@ void calc_temperature() { auto& ctx = Context::instance(); + auto &atypes = ctx.atypes->cpu_data_p; + auto &catypes = ctx.catypes->cpu_data_p; + auto &velocities = ctx.velocities->cpu_data_p; + auto *excluded = ctx.excluded->cpu_data_p; printf("Ndegf = %f, Ndegfree = %f, n_excluded = %d, Ndegfree_solvent = %f, Ndegfree_solute = %f\n", ctx.Ndegf, ctx.Ndegfree, ctx.n_excluded, ctx.Ndegfree_solvent, ctx.Ndegfree_solute); ctx.Temp = 0; @@ -23,10 +27,10 @@ void calc_temperature() { ctx.Temp = 0; for (int i = 0; i < ctx.n_atoms_solute; i++) { - mass_i = ctx.catypes[ctx.atypes[i].code - 1].m; - ener = .5 * mass_i * (pow(ctx.velocities[i].x, 2) + pow(ctx.velocities[i].y, 2) + pow(ctx.velocities[i].z, 2)); + mass_i = catypes[atypes[i].code - 1].m; + ener = .5 * mass_i * (pow(velocities[i].x, 2) + pow(velocities[i].y, 2) + pow(velocities[i].z, 2)); Temp_solute += ener; - if (!ctx.excluded[i]) { + if (!excluded[i]) { Tfree_solute += ener; } else { Texcl_solute += ener; @@ -37,10 +41,10 @@ void calc_temperature() { } for (int i = ctx.n_atoms_solute; i < ctx.n_atoms; i++) { - mass_i = ctx.catypes[ctx.atypes[i].code - 1].m; - ener = .5 * mass_i * (pow(ctx.velocities[i].x, 2) + pow(ctx.velocities[i].y, 2) + pow(ctx.velocities[i].z, 2)); + mass_i = catypes[atypes[i].code - 1].m; + ener = .5 * mass_i * (pow(velocities[i].x, 2) + pow(velocities[i].y, 2) + pow(velocities[i].z, 2)); Temp_solvent += ener; - if (!ctx.excluded[i]) { + if (!excluded[i]) { Tfree_solvent += ener; } else { Texcl_solvent += ener; @@ -66,4 +70,4 @@ void calc_temperature() { ctx.Tscale_solute = ctx.Tscale_solvent; } printf("Tscale = %f, tau_T = %f, Temp = %f, Tfree = %f\n", ctx.Tscale_solvent, ctx.tau_T, ctx.Temp, ctx.Tfree); -} \ No newline at end of file +} diff --git a/src/core/cpu/src/cpu_torsion_force.cpp b/src/core/cpu/src/cpu_torsion_force.cpp index b352a31f..e8aaa2a3 100644 --- a/src/core/cpu/src/cpu_torsion_force.cpp +++ b/src/core/cpu/src/cpu_torsion_force.cpp @@ -7,6 +7,10 @@ double calc_torsion_forces(int start, int end) { auto& ctx = Context::instance(); + auto &torsions = ctx.torsions->cpu_data_p; + auto &ctorsions = ctx.ctorsions->cpu_data_p; + auto &coords = ctx.coords->cpu_data_p; + auto &dvelocities = ctx.dvelocities->cpu_data_p; int aii, aji, aki, ali; coord_t ai, aj, ak, al; @@ -23,18 +27,18 @@ double calc_torsion_forces(int start, int end) { ctorsion_t ctors; for (int i = start; i < end; i++) { - t = ctx.torsions[i]; - ctors = ctx.ctorsions[t.code - 1]; + t = torsions[i]; + ctors = ctorsions[t.code - 1]; aii = t.ai - 1; aji = t.aj - 1; aki = t.ak - 1; ali = t.al - 1; - ai = ctx.coords[aii]; - aj = ctx.coords[aji]; - ak = ctx.coords[aki]; - al = ctx.coords[ali]; + ai = coords[aii]; + aj = coords[aji]; + ak = coords[aki]; + al = coords[ali]; rji.x = ai.x - aj.x; rji.y = ai.y - aj.y; @@ -119,21 +123,21 @@ double calc_torsion_forces(int start, int end) { // Update energy and forces torsion += ener; - ctx.dvelocities[aii].x += dv * dpi.x; - ctx.dvelocities[aii].y += dv * dpi.y; - ctx.dvelocities[aii].z += dv * dpi.z; + dvelocities[aii].x += dv * dpi.x; + dvelocities[aii].y += dv * dpi.y; + dvelocities[aii].z += dv * dpi.z; - ctx.dvelocities[aji].x += dv * dpj.x; - ctx.dvelocities[aji].y += dv * dpj.y; - ctx.dvelocities[aji].z += dv * dpj.z; + dvelocities[aji].x += dv * dpj.x; + dvelocities[aji].y += dv * dpj.y; + dvelocities[aji].z += dv * dpj.z; - ctx.dvelocities[aki].x += dv * dpk.x; - ctx.dvelocities[aki].y += dv * dpk.y; - ctx.dvelocities[aki].z += dv * dpk.z; + dvelocities[aki].x += dv * dpk.x; + dvelocities[aki].y += dv * dpk.y; + dvelocities[aki].z += dv * dpk.z; - ctx.dvelocities[ali].x += dv * dpl.x; - ctx.dvelocities[ali].y += dv * dpl.y; - ctx.dvelocities[ali].z += dv * dpl.z; + dvelocities[ali].x += dv * dpl.x; + dvelocities[ali].y += dv * dpl.y; + dvelocities[ali].z += dv * dpl.z; } return torsion; diff --git a/src/core/cuda/include/cuda_context.cuh b/src/core/cuda/include/cuda_context.cuh deleted file mode 100644 index c3aad50c..00000000 --- a/src/core/cuda/include/cuda_context.cuh +++ /dev/null @@ -1,187 +0,0 @@ -#pragma once - -#include - -#include -#include -#include - -#include "common/include/md_types.h" - -#include "cuda_utility.cuh" - -class CudaContext { - public: - /* - Common data - */ - coord_t* d_coords = nullptr; - dvel_t* d_dvelocities = nullptr; - vel_t* d_velocities = nullptr; - - /* - Used in cuda_angle_force.cu - */ - angle_t* d_angles = nullptr; - cangle_t* d_cangles = nullptr; - - /* - Used in cuda_bond_force.cu - */ - bond_t* d_bonds = nullptr; - cbond_t* d_cbonds = nullptr; - - /* - Used in cuda_improper2_force.cu - */ - improper_t* d_impropers = nullptr; - cimproper_t* d_cimpropers = nullptr; - - /* - Used in cuda_leapfrog.cu - */ - atype_t* d_atypes = nullptr; - catype_t* d_catypes = nullptr; - - /* - Used in cuda_shake_constraints.cu - */ - int* d_mol_n_shakes = nullptr; - shake_bond_t* d_shake_bonds = nullptr; - double* d_winv = nullptr; - coord_t* d_xcoords = nullptr; - - /* - Used in cuda_nonbonded_qq_force.cu - */ - int* d_q_atoms = nullptr; - ccharge_t* d_q_charges = nullptr; - int* d_LJ_matrix = nullptr; - bool* d_excluded = nullptr; - q_elscale_t* d_q_elscales = nullptr; - atype_t* d_q_atypes = nullptr; - E_nonbonded_t* d_EQ_nonbond_qq = nullptr; - double* d_lambdas = nullptr; - - /* - Used in cuda_polx_water_force.cu - */ - shell_t* d_wshells = nullptr; - /* - Used in cuda_pshell_force.cu - */ - bool* d_shell = nullptr; - coord_t* d_coords_top = nullptr; - - /* - Used in cuda_restrang_force.cu - */ - restrang_t* d_restrangs = nullptr; - E_restraint_t* d_EQ_restraint = nullptr; - restrdis_t* d_restrdists = nullptr; - - /* - Used in cuda_restrpos_force.cu - */ - restrpos_t* d_restrspos = nullptr; - - /* - Used in cuda_restrseq_force.cu - */ - restrseq_t* d_restrseqs = nullptr; - bool* d_heavy = nullptr; - - /* - Used in cuda_restrwall_force.cu - */ - restrwall_t* d_restrwalls = nullptr; - - /* - Used in cuda_torsion_force.cu - */ - torsion_t* d_torsions = nullptr; - ctorsion_t* d_ctorsions = nullptr; - - /* - Used in cuda_nonbonded_pp_force.cu - */ - ccharge_t* d_ccharges; - charge_t* d_charges; - int* d_p_atoms = nullptr; - - /* - Other helper arrays - */ - ccharge_t* d_charge_table_all; // Device copy of h_charge_table_all - double* d_charge_pair_products = nullptr; - catype_t* d_catype_table_all; // Device copy of h_catype_table_all - vdw_pair_param_t* d_catype_pair_params = nullptr; - ccharge_t* d_unified_ccharges = nullptr; - catype_t* d_unified_catypes = nullptr; - int3* d_ngbrs_14 = nullptr; - std::map, int> catype_to_type_host; - int n_charge_types = 0; - int zero_charge_type = -1; - int n_catype_types = 0; - int zero_catype_type = -1; - int* d_p_charge_types; - int* d_w_charge_types; - int* d_q_charge_types; // [0, lambdas * n_qatoms) is the normal q_charge_type, [lambdas * n_qatoms, ... ) is the lambda-scaled q_charge_type] - - int* d_p_catype_types; - int* d_w_catype_types; - int* d_q_catype_types; // [0, lambdas * n_qatoms) is the normal q_catype_type, [lambdas * n_qatoms, ... ) is the lambda-scaled q_catype_type] - - int *d_p_atoms_list; - int *d_w_atoms_list; - int *d_q_atoms_list; - std::vector h_p_atoms_list; - std::vector h_w_atoms_list; - std::vector h_q_atoms_list; - - static CudaContext& instance() { - static CudaContext ctx; - return ctx; - } - void init(); - - template - void sync_array_to_device(T* dst, const T* src, int count); - - template - void sync_array_to_host(T* dst, const T* src, int count); - - void sync_all_to_device(); - void sync_all_to_host(); - void reset_energies(); - - std::array get_catype_key(const catype_t& catype) { - return { - catype.aii_normal, - catype.bii_normal, - catype.aii_1_4, - catype.bii_1_4}; - } - - private: - CudaContext() = default; - - void free(); - - ~CudaContext() { free(); } - CudaContext(const CudaContext&) = delete; - CudaContext& operator=(const CudaContext&) = delete; - - void initialize_charge_tables_host(); - void initialize_catype_tables_host(); - void initialize_atom_lists_host(); - void initialize_ngbrs14_host(); -}; -template -void CudaContext::sync_array_to_device(T* dst, const T* src, int count) { - check_cuda(cudaMemcpy(dst, src, count * sizeof(T), cudaMemcpyHostToDevice)); -} -template -void CudaContext::sync_array_to_host(T* dst, const T* src, int count) { - check_cuda(cudaMemcpy(dst, src, count * sizeof(T), cudaMemcpyDeviceToHost)); -} diff --git a/src/core/cuda/include/cuda_handler.cuh b/src/core/cuda/include/cuda_handler.cuh index ec34d0f4..68325c5b 100644 --- a/src/core/cuda/include/cuda_handler.cuh +++ b/src/core/cuda/include/cuda_handler.cuh @@ -1,7 +1,6 @@ #pragma once #include "handler.h" -#include "cuda_context.cuh" class CudaHandler : public Handler { public: @@ -19,7 +18,6 @@ class CudaHandler : public Handler { protected: bool initialized_ = false; - CudaContext& ctx_ = CudaContext::instance(); void calc_internal_forces(int iteration) override; void calc_nonbonded_forces() override; diff --git a/src/core/cuda/include/cuda_utility.cuh b/src/core/cuda/include/cuda_utility.cuh index 784cebf2..36767be0 100644 --- a/src/core/cuda/include/cuda_utility.cuh +++ b/src/core/cuda/include/cuda_utility.cuh @@ -1,12 +1,9 @@ #pragma once -#include - #include +#include "common/include/cuda_runtime_utility.h" + __device__ inline double to_radians_device(double degrees) { return degrees * (M_PI / 180.0); } - -void check_cudaMalloc(void** devPtr, size_t size); -void check_cuda(cudaError_t status); diff --git a/src/core/cuda/src/cuda_angle_force.cu b/src/core/cuda/src/cuda_angle_force.cu index 3ed51957..7c49cffb 100644 --- a/src/core/cuda/src/cuda_angle_force.cu +++ b/src/core/cuda/src/cuda_angle_force.cu @@ -1,6 +1,6 @@ #include "cuda/include/cuda_angle_force.cuh" -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_utility.cuh" +#include "context.h" namespace CudaAngleForce { bool is_initialized = false; @@ -77,11 +77,11 @@ double calc_angle_forces_host(int start, int end) { int blockSize = 256; int numBlocks = (N + blockSize - 1) / blockSize; - CudaContext& ctx = CudaContext::instance(); - auto d_angles = ctx.d_angles; - auto d_coords = ctx.d_coords; - auto d_cangles = ctx.d_cangles; - auto d_dvelocities = ctx.d_dvelocities; + auto &host_ctx = Context::instance(); + auto d_angles = host_ctx.angles->gpu_data_p; + auto d_coords = host_ctx.coords->gpu_data_p; + auto d_cangles = host_ctx.cangles->gpu_data_p; + auto d_dvelocities = host_ctx.dvelocities->gpu_data_p; // todo: now have to do that, after moving all to CudaContext, can remove it // ctx.sync_all_to_device(); diff --git a/src/core/cuda/src/cuda_bond_force.cu b/src/core/cuda/src/cuda_bond_force.cu index 77fab9a8..9b31a660 100644 --- a/src/core/cuda/src/cuda_bond_force.cu +++ b/src/core/cuda/src/cuda_bond_force.cu @@ -1,5 +1,5 @@ #include "cuda/include/cuda_bond_force.cuh" -#include "cuda/include/cuda_context.cuh" +#include "context.h" #include "cuda_utility.cuh" namespace CudaBondForce { bool is_initialized = false; @@ -43,11 +43,11 @@ double calc_bond_forces_host(int start, int end) { double energy = 0.0; cudaMemcpy(d_energy_sum, &energy, sizeof(double), cudaMemcpyHostToDevice); - CudaContext& context = CudaContext::instance(); - bond_t* d_bonds = context.d_bonds; - coord_t* d_coords = context.d_coords; - cbond_t* d_cbonds = context.d_cbonds; - dvel_t* d_dvelocities = context.d_dvelocities; + auto& host_ctx = Context::instance(); + bond_t* d_bonds = host_ctx.bonds->gpu_data_p; + coord_t* d_coords = host_ctx.coords->gpu_data_p; + cbond_t* d_cbonds = host_ctx.cbonds->gpu_data_p; + dvel_t* d_dvelocities = host_ctx.dvelocities->gpu_data_p; calc_bond_forces_kernel<<>>(start, end, d_bonds, d_coords, d_cbonds, d_dvelocities, d_energy_sum); cudaDeviceSynchronize(); diff --git a/src/core/cuda/src/cuda_context.cu b/src/core/cuda/src/cuda_context.cu index 7092d017..a1d95e5c 100644 --- a/src/core/cuda/src/cuda_context.cu +++ b/src/core/cuda/src/cuda_context.cu @@ -1,327 +1,153 @@ -#include "cuda_context.cuh" +#include +#include +#include + #include "context.h" #include "common/include/constants.h" #include "common/include/vdw_rules.h" -void CudaContext::init() { - auto& host = Context::instance(); - - check_cudaMalloc((void**)&d_coords, sizeof(coord_t) * host.n_atoms); - check_cudaMalloc((void**)&d_dvelocities, sizeof(dvel_t) * host.n_atoms); - check_cudaMalloc((void**)&d_velocities, sizeof(vel_t) * host.n_atoms); - check_cudaMalloc((void**)&d_angles, sizeof(angle_t) * host.n_angles); - check_cudaMalloc((void**)&d_cangles, sizeof(cangle_t) * host.n_cangles); - check_cudaMalloc((void**)&d_bonds, sizeof(bond_t) * host.n_bonds); - check_cudaMalloc((void**)&d_cbonds, sizeof(cbond_t) * host.n_cbonds); - check_cudaMalloc((void**)&d_impropers, sizeof(improper_t) * host.n_impropers); - check_cudaMalloc((void**)&d_cimpropers, sizeof(cimproper_t) * host.n_cimpropers); - - check_cudaMalloc((void**)&d_mol_n_shakes, sizeof(int) * host.n_molecules); - check_cudaMalloc((void**)&d_shake_bonds, sizeof(shake_bond_t) * host.n_shake_constraints); - check_cudaMalloc((void**)&d_winv, sizeof(double) * host.n_atoms); - check_cudaMalloc((void**)&d_xcoords, sizeof(coord_t) * host.n_atoms); - - check_cudaMalloc((void**)&d_atypes, sizeof(atype_t) * host.n_atypes); - check_cudaMalloc((void**)&d_catypes, sizeof(catype_t) * host.n_catypes); - - check_cudaMalloc((void**)&d_q_atoms, sizeof(int) * host.n_qatoms); - check_cudaMalloc((void**)&d_q_charges, sizeof(ccharge_t) * host.n_qatoms * host.n_lambdas); - check_cudaMalloc((void**)&d_LJ_matrix, sizeof(int) * host.n_atoms_solute * host.n_atoms_solute); - check_cudaMalloc((void**)&d_excluded, sizeof(bool) * host.n_atoms); - check_cudaMalloc((void**)&d_q_elscales, sizeof(q_elscale_t) * host.n_qelscales); - check_cudaMalloc((void**)&d_q_atypes, sizeof(atype_t) * host.n_qatoms * host.n_lambdas); - check_cudaMalloc((void**)&d_EQ_nonbond_qq, sizeof(E_nonbonded_t) * host.n_lambdas); - check_cudaMalloc((void**)&d_lambdas, sizeof(double) * host.n_lambdas); - - check_cudaMalloc((void**)&d_wshells, host.n_shells * sizeof(shell_t)); - check_cudaMalloc((void**)&d_shell, sizeof(bool) * host.n_atoms); - check_cudaMalloc((void**)&d_coords_top, sizeof(coord_t) * host.n_atoms); - - check_cudaMalloc((void**)&d_restrangs, sizeof(restrang_t) * host.n_restrangs); - check_cudaMalloc((void**)&d_EQ_restraint, sizeof(E_restraint_t) * host.n_lambdas); - check_cudaMalloc((void**)&d_restrdists, sizeof(restrdis_t) * host.n_restrdists); - - check_cudaMalloc((void**)&d_restrspos, sizeof(restrpos_t) * host.n_restrspos); - - check_cudaMalloc((void**)&d_restrseqs, sizeof(restrseq_t) * host.n_restrseqs); - check_cudaMalloc((void**)&d_heavy, sizeof(bool) * host.n_atoms); - - check_cudaMalloc((void**)&d_restrwalls, sizeof(restrwall_t) * host.n_restrwalls); - - check_cudaMalloc((void**)&d_torsions, sizeof(torsion_t) * host.n_torsions); - check_cudaMalloc((void**)&d_ctorsions, sizeof(ctorsion_t) * host.n_ctorsions); - - check_cudaMalloc((void**)&d_ccharges, sizeof(ccharge_t) * host.n_ccharges); - check_cudaMalloc((void**)&d_charges, sizeof(charge_t) * host.n_charges); - check_cudaMalloc((void**)&d_p_atoms, sizeof(int) * host.n_patoms); - check_cudaMalloc((void**)&d_unified_ccharges, sizeof(ccharge_t) * host.unified_ccharges.size()); - check_cudaMalloc((void**)&d_unified_catypes, sizeof(catype_t) * host.unified_catypes.size()); - - initialize_atom_lists_host(); - initialize_ngbrs14_host(); - initialize_charge_tables_host(); - initialize_catype_tables_host(); - - sync_all_to_device(); +namespace { +template +std::unique_ptr> make_host_device_buffer_from_vector( + const std::vector& src, + bool run_gpu) { + auto buffer = std::make_unique>(src.size(), true, run_gpu); + if (!src.empty()) { + std::copy(src.begin(), src.end(), buffer->cpu_data_p); + } + return buffer; } - -void CudaContext::sync_all_to_device() { - auto& host = Context::instance(); - - sync_array_to_device(d_coords, host.coords.data(), host.n_atoms); - sync_array_to_device(d_dvelocities, host.dvelocities.data(), host.n_atoms); - sync_array_to_device(d_velocities, host.velocities.data(), host.n_atoms); - sync_array_to_device(d_angles, host.angles.data(), host.n_angles); - sync_array_to_device(d_cangles, host.cangles.data(), host.n_cangles); - sync_array_to_device(d_bonds, host.bonds.data(), host.n_bonds); - sync_array_to_device(d_cbonds, host.cbonds.data(), host.n_cbonds); - sync_array_to_device(d_impropers, host.impropers.data(), host.n_impropers); - sync_array_to_device(d_cimpropers, host.cimpropers.data(), host.n_cimpropers); - - sync_array_to_device(d_mol_n_shakes, host.mol_n_shakes.data(), host.n_molecules); - sync_array_to_device(d_shake_bonds, host.shake_bonds.data(), host.n_shake_constraints); - sync_array_to_device(d_winv, host.winv.data(), host.n_atoms); - sync_array_to_device(d_xcoords, host.xcoords.data(), host.n_atoms); - - sync_array_to_device(d_atypes, host.atypes.data(), host.n_atypes); - sync_array_to_device(d_catypes, host.catypes.data(), host.n_catypes); - - sync_array_to_device(d_q_atoms, host.q_atoms.data(), host.n_qatoms); - sync_array_to_device(d_q_charges, host.q_charges.data(), host.n_qatoms * host.n_lambdas); - sync_array_to_device(d_LJ_matrix, host.LJ_matrix.data(), host.n_atoms_solute * host.n_atoms_solute); - sync_array_to_device(d_excluded, host.excluded.get(), host.n_atoms); - sync_array_to_device(d_q_elscales, host.q_elscales.data(), host.n_qelscales); - sync_array_to_device(d_q_atypes, host.q_atypes.data(), host.n_qatoms * host.n_lambdas); - sync_array_to_device(d_EQ_nonbond_qq, host.EQ_nonbond_qq.data(), host.n_lambdas); - sync_array_to_device(d_lambdas, host.lambdas.data(), host.n_lambdas); - sync_array_to_device(d_wshells, host.wshells.data(), host.n_shells); - - sync_array_to_device(d_shell, host.shell.get(), host.n_atoms); - sync_array_to_device(d_coords_top, host.coords_top.data(), host.n_atoms); - - sync_array_to_device(d_restrangs, host.restrangs.data(), host.n_restrangs); - sync_array_to_device(d_EQ_restraint, host.EQ_restraint.data(), host.n_lambdas); - sync_array_to_device(d_restrdists, host.restrdists.data(), host.n_restrdists); - - sync_array_to_device(d_restrspos, host.restrspos.data(), host.n_restrspos); - - sync_array_to_device(d_restrseqs, host.restrseqs.data(), host.n_restrseqs); - sync_array_to_device(d_heavy, host.heavy.get(), host.n_atoms); - sync_array_to_device(d_restrwalls, host.restrwalls.data(), host.n_restrwalls); - - sync_array_to_device(d_torsions, host.torsions.data(), host.n_torsions); - sync_array_to_device(d_ctorsions, host.ctorsions.data(), host.n_ctorsions); - - sync_array_to_device(d_ccharges, host.ccharges.data(), host.n_ccharges); - sync_array_to_device(d_charges, host.charges.data(), host.n_charges); - sync_array_to_device(d_p_atoms, host.p_atoms.data(), host.n_patoms); - sync_array_to_device(d_unified_ccharges, host.unified_ccharges.data(), host.unified_ccharges.size()); - sync_array_to_device(d_unified_catypes, host.unified_catypes.data(), host.unified_catypes.size()); +} // namespace + +void Context::cuda_initialize_helpers() { + cuda_initialize_atom_lists_host(); + cuda_initialize_ngbrs14_host(); + cuda_initialize_charge_tables_host(); + cuda_initialize_catype_tables_host(); + cuda_sync_all_to_device(); } -void CudaContext::sync_all_to_host() { - auto& host = Context::instance(); - - sync_array_to_host(host.coords.data(), d_coords, host.n_atoms); - sync_array_to_host(host.dvelocities.data(), d_dvelocities, host.n_atoms); - sync_array_to_host(host.velocities.data(), d_velocities, host.n_atoms); - sync_array_to_host(host.angles.data(), d_angles, host.n_angles); - sync_array_to_host(host.cangles.data(), d_cangles, host.n_cangles); - sync_array_to_host(host.bonds.data(), d_bonds, host.n_bonds); - sync_array_to_host(host.cbonds.data(), d_cbonds, host.n_cbonds); - sync_array_to_host(host.impropers.data(), d_impropers, host.n_impropers); - sync_array_to_host(host.cimpropers.data(), d_cimpropers, host.n_cimpropers); - - sync_array_to_host(host.mol_n_shakes.data(), d_mol_n_shakes, host.n_molecules); - sync_array_to_host(host.shake_bonds.data(), d_shake_bonds, host.n_shake_constraints); - sync_array_to_host(host.winv.data(), d_winv, host.n_atoms); - sync_array_to_host(host.xcoords.data(), d_xcoords, host.n_atoms); - - sync_array_to_host(host.atypes.data(), d_atypes, host.n_atypes); - sync_array_to_host(host.catypes.data(), d_catypes, host.n_catypes); - - sync_array_to_host(host.q_atoms.data(), d_q_atoms, host.n_qatoms); - sync_array_to_host(host.q_charges.data(), d_q_charges, host.n_qatoms * host.n_lambdas); - sync_array_to_host(host.LJ_matrix.data(), d_LJ_matrix, host.n_atoms_solute * host.n_atoms_solute); - sync_array_to_host(host.excluded.get(), d_excluded, host.n_atoms); - sync_array_to_host(host.q_elscales.data(), d_q_elscales, host.n_qelscales); - sync_array_to_host(host.q_atypes.data(), d_q_atypes, host.n_qatoms * host.n_lambdas); - sync_array_to_host(host.EQ_nonbond_qq.data(), d_EQ_nonbond_qq, host.n_lambdas); - sync_array_to_host(host.lambdas.data(), d_lambdas, host.n_lambdas); - sync_array_to_host(host.wshells.data(), d_wshells, host.n_shells); - sync_array_to_host(host.shell.get(), d_shell, host.n_atoms); - sync_array_to_host(host.coords_top.data(), d_coords_top, host.n_atoms); - - sync_array_to_host(host.restrangs.data(), d_restrangs, host.n_restrangs); - sync_array_to_host(host.EQ_restraint.data(), d_EQ_restraint, host.n_lambdas); - sync_array_to_host(host.restrdists.data(), d_restrdists, host.n_restrdists); - - sync_array_to_host(host.restrspos.data(), d_restrspos, host.n_restrspos); - - sync_array_to_host(host.restrseqs.data(), d_restrseqs, host.n_restrseqs); - sync_array_to_host(host.heavy.get(), d_heavy, host.n_atoms); - sync_array_to_host(host.restrwalls.data(), d_restrwalls, host.n_restrwalls); - - sync_array_to_host(host.torsions.data(), d_torsions, host.n_torsions); - sync_array_to_host(host.ctorsions.data(), d_ctorsions, host.n_ctorsions); - - sync_array_to_host(host.ccharges.data(), d_ccharges, host.n_ccharges); - sync_array_to_host(host.charges.data(), d_charges, host.n_charges); - sync_array_to_host(host.p_atoms.data(), d_p_atoms, host.n_patoms); - sync_array_to_host(host.unified_ccharges.data(), d_unified_ccharges, host.unified_ccharges.size()); - sync_array_to_host(host.unified_catypes.data(), d_unified_catypes, host.unified_catypes.size()); +void Context::cuda_sync_all_to_device() { + coords->upload(); + dvelocities->upload(); + velocities->upload(); + LJ_matrix->upload(); + xcoords->upload(); + winv->upload(); + mol_n_shakes->upload(); + shake_bonds->upload(); + heavy->upload(); + excluded->upload(); + q_elscales->upload(); + shell->upload(); + wshells->upload(); + lambdas->upload(); + EQ_restraint->upload(); + unified_ccharges->upload(); + unified_catypes->upload(); + ngbrs_14->upload(); + p_atoms_list->upload(); + w_atoms_list->upload(); + q_atoms_list->upload(); + charge_table_all->upload(); + charge_pair_products->upload(); + p_charge_types->upload(); + w_charge_types->upload(); + q_charge_types->upload(); + catype_table_all->upload(); + catype_pair_params->upload(); + p_catype_types->upload(); + w_catype_types->upload(); + q_catype_types->upload(); } -void CudaContext::free() { - cudaFree(d_coords); - cudaFree(d_dvelocities); - cudaFree(d_velocities); - cudaFree(d_angles); - cudaFree(d_cangles); - cudaFree(d_bonds); - cudaFree(d_cbonds); - cudaFree(d_impropers); - cudaFree(d_cimpropers); - - cudaFree(d_atypes); - cudaFree(d_catypes); - - cudaFree(d_mol_n_shakes); - cudaFree(d_shake_bonds); - cudaFree(d_winv); - cudaFree(d_xcoords); - - cudaFree(d_q_atoms); - cudaFree(d_q_charges); - cudaFree(d_LJ_matrix); - cudaFree(d_excluded); - cudaFree(d_q_elscales); - cudaFree(d_q_atypes); - cudaFree(d_EQ_nonbond_qq); - cudaFree(d_lambdas); - - cudaFree(d_wshells); - cudaFree(d_shell); - cudaFree(d_coords_top); - - cudaFree(d_restrangs); - cudaFree(d_EQ_restraint); - cudaFree(d_restrdists); - - cudaFree(d_restrspos); - - cudaFree(d_restrseqs); - cudaFree(d_heavy); - - cudaFree(d_restrwalls); - - cudaFree(d_torsions); - cudaFree(d_ctorsions); - - cudaFree(d_ccharges); - cudaFree(d_charges); - cudaFree(d_p_atoms); - cudaFree(d_charge_table_all); - cudaFree(d_charge_pair_products); - cudaFree(d_catype_table_all); - cudaFree(d_catype_pair_params); - cudaFree(d_unified_ccharges); - cudaFree(d_unified_catypes); - cudaFree(d_ngbrs_14); - cudaFree(d_p_charge_types); - cudaFree(d_w_charge_types); - cudaFree(d_q_charge_types); - cudaFree(d_p_catype_types); - cudaFree(d_w_catype_types); - cudaFree(d_q_catype_types); - cudaFree(d_p_atoms_list); - cudaFree(d_w_atoms_list); - cudaFree(d_q_atoms_list); - - d_charge_table_all = nullptr; - d_charge_pair_products = nullptr; - d_catype_table_all = nullptr; - d_catype_pair_params = nullptr; - d_unified_ccharges = nullptr; - d_unified_catypes = nullptr; - d_ngbrs_14 = nullptr; + +void Context::cuda_free_helpers() { + unified_ccharges.reset(); + unified_catypes.reset(); + ngbrs_14.reset(); + p_atoms_list.reset(); + w_atoms_list.reset(); + q_atoms_list.reset(); + charge_table_all.reset(); + charge_pair_products.reset(); + p_charge_types.reset(); + w_charge_types.reset(); + q_charge_types.reset(); + catype_table_all.reset(); + catype_pair_params.reset(); + p_catype_types.reset(); + w_catype_types.reset(); + q_catype_types.reset(); + catype_to_type_host.clear(); n_charge_types = 0; zero_charge_type = -1; n_catype_types = 0; zero_catype_type = -1; - d_p_charge_types = nullptr; - d_w_charge_types = nullptr; - d_q_charge_types = nullptr; - d_p_catype_types = nullptr; - d_w_catype_types = nullptr; - d_q_catype_types = nullptr; - d_p_atoms_list = nullptr; - d_w_atoms_list = nullptr; - d_q_atoms_list = nullptr; } -void CudaContext::reset_energies() { - auto& host = Context::instance(); - cudaMemset(d_dvelocities, 0, sizeof(dvel_t) * host.n_atoms); - cudaMemset(d_EQ_nonbond_qq, 0, sizeof(E_nonbonded_t) * host.n_lambdas); - cudaMemset(d_EQ_restraint, 0, sizeof(E_restraint_t) * host.n_lambdas); +void Context::cuda_reset_energies() { + cudaMemset(dvelocities->gpu_data_p, 0, sizeof(dvel_t) * n_atoms); + cudaMemset(EQ_restraint->gpu_data_p, 0, sizeof(E_restraint_t) * n_lambdas); } -void CudaContext::initialize_atom_lists_host() { - auto& host = Context::instance(); +void Context::cuda_initialize_atom_lists_host() { + auto *excluded = this->excluded->cpu_data_p; - h_p_atoms_list.clear(); - for (int i = 0; i < host.n_patoms; i++) { - int id = host.p_atoms[i]; - if (!host.excluded[id]) { + std::vector h_p_atoms_list; + h_p_atoms_list.reserve(n_patoms); + for (int i = 0; i < n_patoms; i++) { + int id = p_atoms[i]; + if (!excluded[id]) { h_p_atoms_list.push_back(id); } } - h_w_atoms_list.clear(); - for (int i = host.n_atoms_solute; i < host.n_atoms; i++) { - int id = i; - if (!host.excluded[id]) { - h_w_atoms_list.push_back(id); + std::vector h_w_atoms_list; + h_w_atoms_list.reserve(n_atoms - n_atoms_solute); + for (int i = n_atoms_solute; i < n_atoms; i++) { + if (!excluded[i]) { + h_w_atoms_list.push_back(i); } } - printf("Number of water atoms: %d, number of all water atoms: %d\n", (int)h_w_atoms_list.size(), host.n_atoms - host.n_atoms_solute); + printf("Number of water atoms: %d, number of all water atoms: %d\n", (int)h_w_atoms_list.size(), n_atoms - n_atoms_solute); - h_q_atoms_list.clear(); - for (int i = 0; i < host.n_qatoms; i++) { - int id = host.q_atoms[i]; - if (!host.excluded[id]) { + std::vector h_q_atoms_list; + h_q_atoms_list.reserve(n_qatoms); + for (int i = 0; i < n_qatoms; i++) { + int id = q_atoms[i]; + if (!excluded[id]) { h_q_atoms_list.push_back(id); } } - check_cudaMalloc((void**)&d_p_atoms_list, sizeof(int) * h_p_atoms_list.size()); - cudaMemcpy(d_p_atoms_list, h_p_atoms_list.data(), sizeof(int) * h_p_atoms_list.size(), cudaMemcpyHostToDevice); - check_cudaMalloc((void**)&d_w_atoms_list, sizeof(int) * h_w_atoms_list.size()); - cudaMemcpy(d_w_atoms_list, h_w_atoms_list.data(), sizeof(int) * h_w_atoms_list.size(), cudaMemcpyHostToDevice); - check_cudaMalloc((void**)&d_q_atoms_list, sizeof(int) * h_q_atoms_list.size()); - cudaMemcpy(d_q_atoms_list, h_q_atoms_list.data(), sizeof(int) * h_q_atoms_list.size(), cudaMemcpyHostToDevice); + p_atoms_list = make_host_device_buffer_from_vector(h_p_atoms_list, run_gpu); + w_atoms_list = make_host_device_buffer_from_vector(h_w_atoms_list, run_gpu); + q_atoms_list = make_host_device_buffer_from_vector(h_q_atoms_list, run_gpu); } -void CudaContext::initialize_ngbrs14_host() { - auto& host = Context::instance(); - - if (!host.ngbrs_14.empty()) { - check_cudaMalloc((void**)&d_ngbrs_14, sizeof(int3) * host.ngbrs_14.size()); - cudaMemcpy(d_ngbrs_14, host.ngbrs_14.data(), sizeof(int3) * host.ngbrs_14.size(), cudaMemcpyHostToDevice); +void Context::cuda_initialize_ngbrs14_host() { + if (ngbrs_14 == nullptr) { + ngbrs_14 = std::make_unique>(0, true, run_gpu); } } -void CudaContext::initialize_charge_tables_host() { - auto& host = Context::instance(); +void Context::cuda_initialize_charge_tables_host() { + auto *excluded = this->excluded->cpu_data_p; + auto &charges = this->charges->cpu_data_p; + auto &ccharges = this->ccharges->cpu_data_p; + auto *lambda_values = lambdas->cpu_data_p; + auto *p_atoms_cpu = p_atoms_list->cpu_data_p; + auto *w_atoms_cpu = w_atoms_list->cpu_data_p; + auto *q_atoms_cpu = q_atoms_list->cpu_data_p; std::map charge_to_type_host; - std::vector h_charge_table_all; // h_charge_table_all[charge type] = (code, charge) + std::vector h_charge_table_all; auto add_charge = [&](double charge) -> int { if (charge_to_type_host.count(charge) == 0) { - int sz = h_charge_table_all.size(); - ccharge_t new_ccharge; + int sz = static_cast(h_charge_table_all.size()); + ccharge_t new_ccharge = {}; new_ccharge.code = sz; new_ccharge.charge = charge; h_charge_table_all.push_back(new_ccharge); @@ -330,16 +156,14 @@ void CudaContext::initialize_charge_tables_host() { return charge_to_type_host[charge]; }; - for (int i = 0; i < host.n_ccharges; i++) { - double charge = host.ccharges[i].charge; - add_charge(charge); + for (int i = 0; i < n_ccharges; i++) { + add_charge(ccharges[i].charge); } - for (int state = 0; state < host.n_lambdas; state++) { - for (int i = 0; i < host.n_qatoms; i++) { - double charge = host.q_charges[i + host.n_qatoms * state].charge; - add_charge(charge); - charge *= host.lambdas[state]; + for (int state = 0; state < n_lambdas; state++) { + for (int i = 0; i < n_qatoms; i++) { + double charge = q_charges[i + n_qatoms * state].charge; add_charge(charge); + add_charge(charge * lambda_values[state]); } } @@ -353,61 +177,63 @@ void CudaContext::initialize_charge_tables_host() { } } - std::vector p_charge_types(h_p_atoms_list.size()); - for (int i = 0; i < static_cast(h_p_atoms_list.size()); i++) { - int id = h_p_atoms_list[i]; - double charge = host.ccharges[host.charges[id].code - 1].charge; - p_charge_types[i] = charge_to_type_host[charge]; + std::vector p_charge_types_cpu(p_atoms_list->length); + for (int i = 0; i < static_cast(p_atoms_list->length); i++) { + const int id = p_atoms_cpu[i]; + const double charge = ccharges[charges[id].code - 1].charge; + p_charge_types_cpu[i] = charge_to_type_host[charge]; } - int h_q_atoms_size = h_q_atoms_list.size(); - std::vector q_charge_types(h_q_atoms_size * host.n_lambdas); + std::vector q_charge_types_cpu(q_atoms_list->length * n_lambdas); std::map q_idx; - for (int i = 0; i < host.n_qatoms; i++) { - int id = host.q_atoms[i]; - if (!host.excluded[id]) { + for (int i = 0; i < n_qatoms; i++) { + int id = q_atoms[i]; + if (!excluded[id]) { q_idx[id] = i; } } - for (int state = 0; state < host.n_lambdas; state++) { - for (int i = 0; i < h_q_atoms_size; i++) { - int id = h_q_atoms_list[i]; - double charge = host.q_charges[q_idx[id] + host.n_qatoms * state].charge; - q_charge_types[state * h_q_atoms_size + i] = charge_to_type_host[charge]; + for (int state = 0; state < n_lambdas; state++) { + for (int i = 0; i < static_cast(q_atoms_list->length); i++) { + const int id = q_atoms_cpu[i]; + const double charge = q_charges[q_idx[id] + n_qatoms * state].charge; + q_charge_types_cpu[state * q_atoms_list->length + i] = charge_to_type_host[charge]; } } - int h_w_atoms_size = h_w_atoms_list.size(); - std::vector w_charge_types(h_w_atoms_size); - for (int i = 0; i < h_w_atoms_size; i++) { - int id = h_w_atoms_list[i]; - double charge = host.ccharges[host.charges[id].code - 1].charge; - w_charge_types[i] = charge_to_type_host[charge]; + std::vector w_charge_types_cpu(w_atoms_list->length); + for (int i = 0; i < static_cast(w_atoms_list->length); i++) { + const int id = w_atoms_cpu[i]; + const double charge = ccharges[charges[id].code - 1].charge; + w_charge_types_cpu[i] = charge_to_type_host[charge]; } - check_cudaMalloc((void**)&d_charge_table_all, sizeof(ccharge_t) * h_charge_table_all.size()); - cudaMemcpy(d_charge_table_all, h_charge_table_all.data(), sizeof(ccharge_t) * h_charge_table_all.size(), cudaMemcpyHostToDevice); - check_cudaMalloc((void**)&d_charge_pair_products, sizeof(double) * h_charge_pair_products.size()); - cudaMemcpy(d_charge_pair_products, h_charge_pair_products.data(), sizeof(double) * h_charge_pair_products.size(), cudaMemcpyHostToDevice); - check_cudaMalloc((void**)&d_p_charge_types, sizeof(int) * p_charge_types.size()); - cudaMemcpy(d_p_charge_types, p_charge_types.data(), sizeof(int) * p_charge_types.size(), cudaMemcpyHostToDevice); - check_cudaMalloc((void**)&d_q_charge_types, sizeof(int) * q_charge_types.size()); - cudaMemcpy(d_q_charge_types, q_charge_types.data(), sizeof(int) * q_charge_types.size(), cudaMemcpyHostToDevice); - check_cudaMalloc((void**)&d_w_charge_types, sizeof(int) * w_charge_types.size()); - cudaMemcpy(d_w_charge_types, w_charge_types.data(), sizeof(int) * w_charge_types.size(), cudaMemcpyHostToDevice); + charge_table_all = make_host_device_buffer_from_vector(h_charge_table_all, run_gpu); + charge_pair_products = make_host_device_buffer_from_vector(h_charge_pair_products, run_gpu); + p_charge_types = make_host_device_buffer_from_vector(p_charge_types_cpu, run_gpu); + q_charge_types = make_host_device_buffer_from_vector(q_charge_types_cpu, run_gpu); + w_charge_types = make_host_device_buffer_from_vector(w_charge_types_cpu, run_gpu); } -void CudaContext::initialize_catype_tables_host() { - auto& host = Context::instance(); +void Context::cuda_initialize_catype_tables_host() { + auto *excluded = this->excluded->cpu_data_p; + auto &atypes = this->atypes->cpu_data_p; + auto &catypes = this->catypes->cpu_data_p; + auto *p_atoms_cpu = p_atoms_list->cpu_data_p; + auto *w_atoms_cpu = w_atoms_list->cpu_data_p; + auto *q_atoms_cpu = q_atoms_list->cpu_data_p; - std::vector h_catype_table_all; // h_catype_table_all[catype code] = catype_t + std::vector h_catype_table_all; catype_to_type_host.clear(); auto add_catype = [&](catype_t catype) -> int { - auto key = get_catype_key(catype); + const std::array key = { + catype.aii_normal, + catype.bii_normal, + catype.aii_1_4, + catype.bii_1_4}; if (catype_to_type_host.count(key) == 0) { - int sz = h_catype_table_all.size(); + int sz = static_cast(h_catype_table_all.size()); catype.code = sz; h_catype_table_all.push_back(catype); catype_to_type_host[key] = sz; @@ -415,14 +241,14 @@ void CudaContext::initialize_catype_tables_host() { return catype_to_type_host[key]; }; - for (int i = 0; i < host.n_catypes; i++) { - add_catype(host.catypes[i]); + for (int i = 0; i < n_catypes; i++) { + add_catype(catypes[i]); } - for (int state = 0; state < host.n_lambdas; state++) { - for (int i = 0; i < host.n_qatoms; i++) { - const atype_t& qat = host.q_atypes[i + host.n_qatoms * state]; - add_catype(host.q_catypes[qat.code - 1]); + for (int state = 0; state < n_lambdas; state++) { + for (int i = 0; i < n_qatoms; i++) { + const atype_t& qat = q_atypes[i + n_qatoms * state]; + add_catype(q_catypes[qat.code - 1]); } } @@ -436,7 +262,7 @@ void CudaContext::initialize_catype_tables_host() { const catype_t& ci = h_catype_table_all[i]; const catype_t& cj = h_catype_table_all[j]; vdw_pair_param_t pair_param = {}; - if (host.topo.vdw_rule == VDW_GEOMETRIC) { + if (topo.vdw_rule == VDW_GEOMETRIC) { calc_vdw_geometric(ci.aii_normal, cj.aii_normal, ci.bii_normal, cj.bii_normal, 1.0, &pair_param.a, &pair_param.b); } else { calc_vdw_arithmetic(ci.aii_normal, cj.aii_normal, ci.bii_normal, cj.bii_normal, 1.0, &pair_param.a, &pair_param.b); @@ -445,53 +271,45 @@ void CudaContext::initialize_catype_tables_host() { } } - std::vector p_catype_types(h_p_atoms_list.size()); - for (int i = 0; i < static_cast(h_p_atoms_list.size()); i++) { - int id = h_p_atoms_list[i]; - auto catype = host.catypes[host.atypes[id].code - 1]; - auto key = get_catype_key(catype); - p_catype_types[i] = catype_to_type_host[key]; + std::vector p_catype_types_cpu(p_atoms_list->length); + for (int i = 0; i < static_cast(p_atoms_list->length); i++) { + const int id = p_atoms_cpu[i]; + const catype_t catype = catypes[atypes[id].code - 1]; + const std::array key = {catype.aii_normal, catype.bii_normal, catype.aii_1_4, catype.bii_1_4}; + p_catype_types_cpu[i] = catype_to_type_host[key]; } - int h_q_atoms_size = h_q_atoms_list.size(); - std::vector q_catype_types(h_q_atoms_size * host.n_lambdas); + std::vector q_catype_types_cpu(q_atoms_list->length * n_lambdas); std::map q_idx; - for (int i = 0; i < host.n_qatoms; i++) { - int id = host.q_atoms[i]; - if (!host.excluded[id]) { + for (int i = 0; i < n_qatoms; i++) { + int id = q_atoms[i]; + if (!excluded[id]) { q_idx[id] = i; } } - - for (int state = 0; state < host.n_lambdas; state++) { - for (int i = 0; i < h_q_atoms_size; i++) { - int id = h_q_atoms_list[i]; - const atype_t& qat = host.q_atypes[q_idx[id] + host.n_qatoms * state]; - auto key_normal = get_catype_key(host.q_catypes[qat.code - 1]); - q_catype_types[state * h_q_atoms_size + i] = catype_to_type_host[key_normal]; + for (int state = 0; state < n_lambdas; state++) { + for (int i = 0; i < static_cast(q_atoms_list->length); i++) { + const int id = q_atoms_cpu[i]; + const atype_t& qat = q_atypes[q_idx[id] + n_qatoms * state]; + const catype_t& qcatype = q_catypes[qat.code - 1]; + const std::array key = {qcatype.aii_normal, qcatype.bii_normal, qcatype.aii_1_4, qcatype.bii_1_4}; + q_catype_types_cpu[state * q_atoms_list->length + i] = catype_to_type_host[key]; } } - int h_w_atoms_size = h_w_atoms_list.size(); - std::vector w_catype_types(h_w_atoms_size); - for (int i = 0; i < h_w_atoms_size; i++) { - int id = h_w_atoms_list[i]; - auto catype = host.catypes[host.atypes[id].code - 1]; - auto key = get_catype_key(catype); - w_catype_types[i] = catype_to_type_host[key]; - // printf("Water atom %d: catype code %d mapped to %d, data: m=%f, aii_normal=%f, bii_normal=%f, aii_polar=%f, bii_polar=%f, aii_1_4=%f, bii_1_4=%f\n", id, atypes[id].code, w_catype_types[i], catype.m, catype.aii_normal, catype.bii_normal, catype.aii_polar, catype.bii_polar, catype.aii_1_4, catype.bii_1_4); + std::vector w_catype_types_cpu(w_atoms_list->length); + for (int i = 0; i < static_cast(w_atoms_list->length); i++) { + const int id = w_atoms_cpu[i]; + const catype_t catype = catypes[atypes[id].code - 1]; + const std::array key = {catype.aii_normal, catype.bii_normal, catype.aii_1_4, catype.bii_1_4}; + w_catype_types_cpu[i] = catype_to_type_host[key]; } - printf("Total water atom number: %d, w_catype_types size: %lu\n", h_w_atoms_size, w_catype_types.size()); - - check_cudaMalloc((void**)&d_catype_table_all, sizeof(catype_t) * h_catype_table_all.size()); - cudaMemcpy(d_catype_table_all, h_catype_table_all.data(), sizeof(catype_t) * h_catype_table_all.size(), cudaMemcpyHostToDevice); - check_cudaMalloc((void**)&d_catype_pair_params, sizeof(vdw_pair_param_t) * h_catype_pair_params.size()); - cudaMemcpy(d_catype_pair_params, h_catype_pair_params.data(), sizeof(vdw_pair_param_t) * h_catype_pair_params.size(), cudaMemcpyHostToDevice); - check_cudaMalloc((void**)&d_p_catype_types, sizeof(int) * p_catype_types.size()); - cudaMemcpy(d_p_catype_types, p_catype_types.data(), sizeof(int) * p_catype_types.size(), cudaMemcpyHostToDevice); - check_cudaMalloc((void**)&d_q_catype_types, sizeof(int) * q_catype_types.size()); - cudaMemcpy(d_q_catype_types, q_catype_types.data(), sizeof(int) * q_catype_types.size(), cudaMemcpyHostToDevice); - check_cudaMalloc((void**)&d_w_catype_types, sizeof(int) * w_catype_types.size()); - cudaMemcpy(d_w_catype_types, w_catype_types.data(), sizeof(int) * w_catype_types.size(), cudaMemcpyHostToDevice); + printf("Total water atom number: %lu, w_catype_types size: %lu\n", w_atoms_list->length, w_catype_types_cpu.size()); + + catype_table_all = make_host_device_buffer_from_vector(h_catype_table_all, run_gpu); + catype_pair_params = make_host_device_buffer_from_vector(h_catype_pair_params, run_gpu); + p_catype_types = make_host_device_buffer_from_vector(p_catype_types_cpu, run_gpu); + q_catype_types = make_host_device_buffer_from_vector(q_catype_types_cpu, run_gpu); + w_catype_types = make_host_device_buffer_from_vector(w_catype_types_cpu, run_gpu); } diff --git a/src/core/cuda/src/cuda_handler.cu b/src/core/cuda/src/cuda_handler.cu index bf45497c..b38c7376 100644 --- a/src/core/cuda/src/cuda_handler.cu +++ b/src/core/cuda/src/cuda_handler.cu @@ -3,7 +3,6 @@ #include "common/include/context.h" #include "cuda/include/cuda_angle_force.cuh" #include "cuda/include/cuda_bond_force.cuh" -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_handler.cuh" #include "cuda/include/cuda_improper2_force.cuh" #include "cuda/include/cuda_leapfrog.cuh" @@ -29,7 +28,7 @@ void CudaHandler::initialize() { if (!initialized_) { - ctx_.init(); + Context::instance().cuda_initialize_helpers(); init_angle_force_kernel_data(); init_bond_force_kernel_data(); init_improper2_force_kernel_data(); @@ -82,6 +81,7 @@ void CudaHandler::shutdown() { cleanup_shake_constraints(); cleanup_temperature(); cleanup_torsion_force(); + Context::instance().cuda_free_helpers(); initialized_ = false; } } @@ -135,5 +135,5 @@ void CudaHandler::calc_leapfrog() { void CudaHandler::reset_energies() { Handler::reset_energies(); - ctx_.reset_energies(); + Context::instance().cuda_reset_energies(); } diff --git a/src/core/cuda/src/cuda_improper2_force.cu b/src/core/cuda/src/cuda_improper2_force.cu index 7e8cea14..e44678e0 100644 --- a/src/core/cuda/src/cuda_improper2_force.cu +++ b/src/core/cuda/src/cuda_improper2_force.cu @@ -1,6 +1,6 @@ -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_improper2_force.cuh" #include "cuda/include/cuda_utility.cuh" +#include "context.h" namespace CudaImproper2Force { bool is_initialized = false; @@ -134,12 +134,11 @@ double calc_improper2_forces_host(int start, int end) { double energy = 0.0; cudaMemcpy(d_energy_sum, &energy, sizeof(double), cudaMemcpyHostToDevice); - CudaContext& context = CudaContext::instance(); - // context.sync_all_to_device(); - coord_t* d_coords = context.d_coords; - dvel_t* d_dvelocities = context.d_dvelocities; - improper_t* d_impropers = context.d_impropers; - cimproper_t* d_cimpropers = context.d_cimpropers; + auto& host_ctx = Context::instance(); + coord_t* d_coords = host_ctx.coords->gpu_data_p; + dvel_t* d_dvelocities = host_ctx.dvelocities->gpu_data_p; + improper_t* d_impropers = host_ctx.impropers->gpu_data_p; + cimproper_t* d_cimpropers = host_ctx.cimpropers->gpu_data_p; calc_improper2_forces_kernel<<>>(start, end, d_impropers, d_cimpropers, d_coords, d_dvelocities, d_energy_sum); cudaDeviceSynchronize(); @@ -161,4 +160,4 @@ void cleanup_improper2_force() { cudaFree(d_energy_sum); is_initialized = false; } -} \ No newline at end of file +} diff --git a/src/core/cuda/src/cuda_leapfrog.cu b/src/core/cuda/src/cuda_leapfrog.cu index 6b68ef99..49312337 100644 --- a/src/core/cuda/src/cuda_leapfrog.cu +++ b/src/core/cuda/src/cuda_leapfrog.cu @@ -1,7 +1,6 @@ #include #include "common/include/context.h" -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_leapfrog.cuh" #include "cuda/include/cuda_shake_constraints.cuh" #include "cuda_utility.cuh" @@ -48,13 +47,12 @@ __global__ void calc_leapfrog_kernel( void calc_leapfrog_host() { auto& host = Context::instance(); - auto& ctx = CudaContext::instance(); - auto d_atypes = ctx.d_atypes; - auto d_catypes = ctx.d_catypes; - auto d_velocities = ctx.d_velocities; - auto d_dvelocities = ctx.d_dvelocities; - auto d_coords = ctx.d_coords; - auto d_xcoords = ctx.d_xcoords; + auto d_atypes = host.atypes->gpu_data_p; + auto d_catypes = host.catypes->gpu_data_p; + auto d_velocities = host.velocities->gpu_data_p; + auto d_dvelocities = host.dvelocities->gpu_data_p; + auto d_coords = host.coords->gpu_data_p; + auto d_xcoords = host.xcoords->gpu_data_p; int blockSize = 256; int numBlocks = (host.n_atoms + blockSize - 1) / blockSize; @@ -72,20 +70,23 @@ void calc_leapfrog_host() { host.dt); check_cuda(cudaDeviceSynchronize()); - check_cuda(cudaMemcpy(host.velocities.data(), d_velocities, sizeof(vel_t) * host.n_atoms, cudaMemcpyDeviceToHost)); - check_cuda(cudaMemcpy(host.dvelocities.data(), d_dvelocities, sizeof(dvel_t) * host.n_atoms, cudaMemcpyDeviceToHost)); - check_cuda(cudaMemcpy(host.coords.data(), d_coords, sizeof(coord_t) * host.n_atoms, cudaMemcpyDeviceToHost)); - check_cuda(cudaMemcpy(host.xcoords.data(), d_xcoords, sizeof(coord_t) * host.n_atoms, cudaMemcpyDeviceToHost)); + host.velocities->download(); + host.dvelocities->download(); + host.coords->download(); + host.xcoords->download(); // shake // todo: Here is some problem, it writes into cpu memory, but we use gpu.. printf("n_shake_constraints: %d\n", host.n_shake_constraints); if (host.n_shake_constraints > 0) { calc_shake_constraints_host(); + auto &velocities = host.velocities->cpu_data_p; + auto &coords = host.coords->cpu_data_p; + auto *xcoords = host.xcoords->cpu_data_p; for (int i = 0; i < host.n_atoms; i++) { - host.velocities[i].x = (host.coords[i].x - host.xcoords[i].x) / host.dt; - host.velocities[i].y = (host.coords[i].y - host.xcoords[i].y) / host.dt; - host.velocities[i].z = (host.coords[i].z - host.xcoords[i].z) / host.dt; + velocities[i].x = (coords[i].x - xcoords[i].x) / host.dt; + velocities[i].y = (coords[i].y - xcoords[i].y) / host.dt; + velocities[i].z = (coords[i].z - xcoords[i].z) / host.dt; } } } diff --git a/src/core/cuda/src/cuda_nonbonded_14_force.cu b/src/core/cuda/src/cuda_nonbonded_14_force.cu index 4d5c1cca..fa404ee7 100644 --- a/src/core/cuda/src/cuda_nonbonded_14_force.cu +++ b/src/core/cuda/src/cuda_nonbonded_14_force.cu @@ -1,7 +1,6 @@ #include "constants.h" #include "common/include/context.h" #include "common/include/vdw_rules.h" -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_nonbonded_14_force.cuh" #include "cuda_utility.cuh" @@ -153,7 +152,6 @@ static Nonbonded14EnergyBuckets calc_nonbonded_14_force_state_host( using namespace CudaNonbonded14Force; auto& host = Context::instance(); - auto& context = CudaContext::instance(); const int n_ngbrs_14 = host.n_ngbrs14; Nonbonded14EnergyBuckets energies = {}; if (n_ngbrs_14 == 0) return energies; @@ -166,14 +164,14 @@ static Nonbonded14EnergyBuckets calc_nonbonded_14_force_state_host( calc_nonbonded_14_force_kernel<<>>( n_ngbrs_14, - context.d_ngbrs_14, + host.ngbrs_14->gpu_data_p, host.topo, - context.d_excluded, + host.excluded->gpu_data_p, d_atom_to_qi, - context.d_unified_ccharges, - context.d_unified_catypes, - context.d_coords, - context.d_dvelocities, + host.unified_ccharges->gpu_data_p, + host.unified_catypes->gpu_data_p, + host.coords->gpu_data_p, + host.dvelocities->gpu_data_p, d_evdw_totals, d_ecoul_totals, include_pp, @@ -192,11 +190,12 @@ static Nonbonded14EnergyBuckets calc_nonbonded_14_force_state_host( void calc_nonbonded_14_forces_host() { auto& host = Context::instance(); + auto *lambdas = host.lambdas->cpu_data_p; if (host.n_ngbrs14 == 0) return; for (int state = 0; state < host.n_lambdas; state++) { - const double lambda = host.lambdas[state]; + const double lambda = lambdas[state]; const bool include_pp = (state == 0); Nonbonded14EnergyBuckets energies = calc_nonbonded_14_force_state_host(state, lambda, include_pp); diff --git a/src/core/cuda/src/cuda_nonbonded_force.cu b/src/core/cuda/src/cuda_nonbonded_force.cu index f2ed84ad..432a7137 100644 --- a/src/core/cuda/src/cuda_nonbonded_force.cu +++ b/src/core/cuda/src/cuda_nonbonded_force.cu @@ -1,7 +1,6 @@ #include #include "context.h" -#include "cuda_context.cuh" #include "cuda_nonbonded_force.cuh" #include "constants.h" #include "cuda_utility.cuh" @@ -293,7 +292,6 @@ std::pair calc_nonbonded_force_host( const bool disable_water_h_lj, const double lambda) { using namespace CudaNonbondedForce; Context& host = Context::instance(); - CudaContext& context = CudaContext::instance(); const int thread_num = 256; dim3 block_sz = dim3(thread_num); @@ -315,29 +313,29 @@ std::pair calc_nonbonded_force_host( ny, x_charges_types, y_charges_types, - context.d_charge_pair_products, + host.charge_pair_products->gpu_data_p, x_atypes_types, y_atypes_types, - context.d_catype_pair_params, + host.catype_pair_params->gpu_data_p, host.topo, - context.d_excluded, - context.d_LJ_matrix, + host.excluded->gpu_data_p, + host.LJ_matrix->gpu_data_p, x_idx_list, y_idx_list, - context.d_coords, - context.d_dvelocities, + host.coords->gpu_data_p, + host.dvelocities->gpu_data_p, d_evdw_total, d_ecoul_total, symmetric, disable_water_h_lj, host.n_atoms_solute, - context.n_charge_types, - context.zero_charge_type, - context.n_catype_types, - context.zero_catype_type, + host.n_charge_types, + host.zero_charge_type, + host.n_catype_types, + host.zero_catype_type, host.n_qelscales, lambda, - context.d_q_elscales); + host.q_elscales->gpu_data_p); cudaDeviceSynchronize(); diff --git a/src/core/cuda/src/cuda_nonbonded_pp_force.cu b/src/core/cuda/src/cuda_nonbonded_pp_force.cu index 4e483c81..76992aeb 100644 --- a/src/core/cuda/src/cuda_nonbonded_pp_force.cu +++ b/src/core/cuda/src/cuda_nonbonded_pp_force.cu @@ -2,7 +2,6 @@ #include #include "common/include/context.h" -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_nonbonded_force.cuh" #include "cuda/include/cuda_nonbonded_pp_force.cuh" #include "cuda_utility.cuh" @@ -20,22 +19,21 @@ void init_nonbonded_pp_force_kernel_data() { } void calc_nonbonded_pp_forces_host_v2() { - int nx = CudaContext::instance().h_p_atoms_list.size(); + Context& host = Context::instance(); + int nx = static_cast(host.p_atoms_list->length); int ny = nx; auto result = calc_nonbonded_force_host( nx, ny, - CudaContext::instance().d_p_atoms_list, - CudaContext::instance().d_p_atoms_list, + host.p_atoms_list->gpu_data_p, + host.p_atoms_list->gpu_data_p, true, // symmetric - CudaContext::instance().d_p_charge_types, - CudaContext::instance().d_p_charge_types, - CudaContext::instance().d_p_catype_types, - CudaContext::instance().d_p_catype_types, + host.p_charge_types->gpu_data_p, + host.p_charge_types->gpu_data_p, + host.p_catype_types->gpu_data_p, + host.p_catype_types->gpu_data_p, false); - - Context& host = Context::instance(); host.E_nonbond_pp.Uvdw = result.first; host.E_nonbond_pp.Ucoul = result.second; } diff --git a/src/core/cuda/src/cuda_nonbonded_pw_force.cu b/src/core/cuda/src/cuda_nonbonded_pw_force.cu index efcf0865..461e46e2 100644 --- a/src/core/cuda/src/cuda_nonbonded_pw_force.cu +++ b/src/core/cuda/src/cuda_nonbonded_pw_force.cu @@ -2,7 +2,6 @@ #include #include "common/include/context.h" -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_nonbonded_force.cuh" #include "cuda/include/cuda_nonbonded_pw_force.cuh" @@ -14,21 +13,20 @@ bool is_initialized = false; void calc_nonbonded_pw_forces_host_v2() { using namespace CudaNonbondedPWForce; - int nx = CudaContext::instance().h_p_atoms_list.size(); - int ny = CudaContext::instance().h_w_atoms_list.size(); + Context& host = Context::instance(); + int nx = static_cast(host.p_atoms_list->length); + int ny = static_cast(host.w_atoms_list->length); auto result = calc_nonbonded_force_host( nx, ny, - CudaContext::instance().d_p_atoms_list, - CudaContext::instance().d_w_atoms_list, + host.p_atoms_list->gpu_data_p, + host.w_atoms_list->gpu_data_p, false, - CudaContext::instance().d_p_charge_types, - CudaContext::instance().d_w_charge_types, - CudaContext::instance().d_p_catype_types, - CudaContext::instance().d_w_catype_types, + host.p_charge_types->gpu_data_p, + host.w_charge_types->gpu_data_p, + host.p_catype_types->gpu_data_p, + host.w_catype_types->gpu_data_p, false); // printf("Nonbonded PW Force (Host) - VdW: %f, Coulomb: %f\n", result.first, result.second); - - Context& host = Context::instance(); host.E_nonbond_pw.Uvdw = result.first; host.E_nonbond_pw.Ucoul = result.second; } diff --git a/src/core/cuda/src/cuda_nonbonded_qp_force.cu b/src/core/cuda/src/cuda_nonbonded_qp_force.cu index 79d2ea34..37e6d1e5 100644 --- a/src/core/cuda/src/cuda_nonbonded_qp_force.cu +++ b/src/core/cuda/src/cuda_nonbonded_qp_force.cu @@ -3,7 +3,6 @@ #include #include "common/include/context.h" -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_nonbonded_force.cuh" #include "cuda/include/cuda_nonbonded_qp_force.cuh" #include "cuda_utility.cuh" @@ -15,25 +14,25 @@ bool is_initialized = false; void calc_nonbonded_qp_forces_host_v2() { using namespace CudaNonbondedQPForce; - int nx = CudaContext::instance().h_q_atoms_list.size(); - int ny = CudaContext::instance().h_p_atoms_list.size(); - Context& host = Context::instance(); + int nx = static_cast(host.q_atoms_list->length); + int ny = static_cast(host.p_atoms_list->length); + auto *lambdas = host.lambdas->cpu_data_p; for (int state = 0; state < host.n_lambdas; state++) { auto result = calc_nonbonded_force_host( nx, ny, - CudaContext::instance().d_q_atoms_list, - CudaContext::instance().d_p_atoms_list, + host.q_atoms_list->gpu_data_p, + host.p_atoms_list->gpu_data_p, false, - CudaContext::instance().d_q_charge_types + state * nx, - CudaContext::instance().d_p_charge_types, - CudaContext::instance().d_q_catype_types + state * nx, - CudaContext::instance().d_p_catype_types, - false, host.lambdas[state]); + host.q_charge_types->gpu_data_p + state * nx, + host.p_charge_types->gpu_data_p, + host.q_catype_types->gpu_data_p + state * nx, + host.p_catype_types->gpu_data_p, + false, lambdas[state]); - host.EQ_nonbond_qp[state].Uvdw = result.first / host.lambdas[state]; - host.EQ_nonbond_qp[state].Ucoul = result.second / host.lambdas[state]; + host.EQ_nonbond_qp[state].Uvdw = result.first / lambdas[state]; + host.EQ_nonbond_qp[state].Ucoul = result.second / lambdas[state]; // printf("Nonbonded QP Force State %d: Uvdw = %f, Ucoul = %f\n", state, result.first, result.second); } } diff --git a/src/core/cuda/src/cuda_nonbonded_qq_force.cu b/src/core/cuda/src/cuda_nonbonded_qq_force.cu index 25d82b05..b71a5fa8 100644 --- a/src/core/cuda/src/cuda_nonbonded_qq_force.cu +++ b/src/core/cuda/src/cuda_nonbonded_qq_force.cu @@ -2,7 +2,6 @@ #include #include "common/include/context.h" -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_nonbonded_force.cuh" #include "cuda/include/cuda_nonbonded_qq_force.cuh" #include "cuda_utility.cuh" @@ -16,22 +15,23 @@ void calc_nonbonded_qq_forces_host() { using namespace CudaNonbondedQQForce; Context& host = Context::instance(); - int n = CudaContext::instance().h_q_atoms_list.size(); + auto *lambdas = host.lambdas->cpu_data_p; + int n = static_cast(host.q_atoms_list->length); for (int state = 0; state < host.n_lambdas; state++) { auto result = calc_nonbonded_force_host( n, n, - CudaContext::instance().d_q_atoms_list, - CudaContext::instance().d_q_atoms_list, + host.q_atoms_list->gpu_data_p, + host.q_atoms_list->gpu_data_p, true, - CudaContext::instance().d_q_charge_types + state * n, - CudaContext::instance().d_q_charge_types + state * n, - CudaContext::instance().d_q_catype_types + state * n, - CudaContext::instance().d_q_catype_types + state * n, - false, host.lambdas[state]); - - host.EQ_nonbond_qq[state].Uvdw = result.first / host.lambdas[state]; - host.EQ_nonbond_qq[state].Ucoul = result.second / host.lambdas[state]; + host.q_charge_types->gpu_data_p + state * n, + host.q_charge_types->gpu_data_p + state * n, + host.q_catype_types->gpu_data_p + state * n, + host.q_catype_types->gpu_data_p + state * n, + false, lambdas[state]); + + host.EQ_nonbond_qq[state].Uvdw = result.first / lambdas[state]; + host.EQ_nonbond_qq[state].Ucoul = result.second / lambdas[state]; } } diff --git a/src/core/cuda/src/cuda_nonbonded_qw_force.cu b/src/core/cuda/src/cuda_nonbonded_qw_force.cu index bb826cc5..32978e19 100644 --- a/src/core/cuda/src/cuda_nonbonded_qw_force.cu +++ b/src/core/cuda/src/cuda_nonbonded_qw_force.cu @@ -2,7 +2,6 @@ #include #include "common/include/context.h" -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_nonbonded_force.cuh" #include "cuda/include/cuda_nonbonded_qw_force.cuh" #include "cuda_utility.cuh" @@ -14,25 +13,26 @@ bool is_initialized = false; void calc_nonbonded_qw_forces_host_v2() { using namespace CudaNonbondedQWForce; - int nx = CudaContext::instance().h_q_atoms_list.size(); - int ny = CudaContext::instance().h_w_atoms_list.size(); Context& host = Context::instance(); + int nx = static_cast(host.q_atoms_list->length); + int ny = static_cast(host.w_atoms_list->length); + auto *lambdas = host.lambdas->cpu_data_p; for (int state = 0; state < host.n_lambdas; state++) { auto result = calc_nonbonded_force_host( nx, ny, - CudaContext::instance().d_q_atoms_list, - CudaContext::instance().d_w_atoms_list, + host.q_atoms_list->gpu_data_p, + host.w_atoms_list->gpu_data_p, false, - CudaContext::instance().d_q_charge_types + state * nx, - CudaContext::instance().d_w_charge_types, - CudaContext::instance().d_q_catype_types + state * nx, - CudaContext::instance().d_w_catype_types, - true, host.lambdas[state]); - - host.EQ_nonbond_qw[state].Uvdw = result.first / host.lambdas[state]; - host.EQ_nonbond_qw[state].Ucoul = result.second / host.lambdas[state]; + host.q_charge_types->gpu_data_p + state * nx, + host.w_charge_types->gpu_data_p, + host.q_catype_types->gpu_data_p + state * nx, + host.w_catype_types->gpu_data_p, + true, lambdas[state]); + + host.EQ_nonbond_qw[state].Uvdw = result.first / lambdas[state]; + host.EQ_nonbond_qw[state].Ucoul = result.second / lambdas[state]; // printf("Nonbonded QW Force State %d: Uvdw = %f, Ucoul = %f\n", state, result.first, result.second); } } diff --git a/src/core/cuda/src/cuda_nonbonded_ww_force.cu b/src/core/cuda/src/cuda_nonbonded_ww_force.cu index 304fa21a..dc5dcbb1 100644 --- a/src/core/cuda/src/cuda_nonbonded_ww_force.cu +++ b/src/core/cuda/src/cuda_nonbonded_ww_force.cu @@ -2,7 +2,6 @@ #include #include "common/include/context.h" -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_nonbonded_force.cuh" #include "cuda/include/cuda_nonbonded_ww_force.cuh" namespace CudaNonbondedWWForce { @@ -12,19 +11,19 @@ bool is_initialized = false; void calc_nonbonded_ww_forces_host_v2() { using namespace CudaNonbondedWWForce; - int nx = CudaContext::instance().h_w_atoms_list.size(); + Context& host = Context::instance(); + int nx = static_cast(host.w_atoms_list->length); int ny = nx; auto result = calc_nonbonded_force_host( nx, ny, - CudaContext::instance().d_w_atoms_list, - CudaContext::instance().d_w_atoms_list, + host.w_atoms_list->gpu_data_p, + host.w_atoms_list->gpu_data_p, true, - CudaContext::instance().d_w_charge_types, - CudaContext::instance().d_w_charge_types, - CudaContext::instance().d_w_catype_types, - CudaContext::instance().d_w_catype_types, + host.w_charge_types->gpu_data_p, + host.w_charge_types->gpu_data_p, + host.w_catype_types->gpu_data_p, + host.w_catype_types->gpu_data_p, true); - Context& host = Context::instance(); host.E_nonbond_ww.Uvdw = result.first; host.E_nonbond_ww.Ucoul = result.second; } diff --git a/src/core/cuda/src/cuda_polx_water_force.cu b/src/core/cuda/src/cuda_polx_water_force.cu index ac2347aa..9b0eb667 100644 --- a/src/core/cuda/src/cuda_polx_water_force.cu +++ b/src/core/cuda/src/cuda_polx_water_force.cu @@ -3,7 +3,6 @@ #include "common/include/context.h" #include "common/include/constants.h" -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_polx_water_force.cuh" #include "cuda_utility.cuh" @@ -171,16 +170,17 @@ __global__ void calc_polx_water_forces_kernel( void sort_waters() { using namespace CudaPolxWaterForce; auto& ctx = Context::instance(); + auto *wshells = ctx.wshells->cpu_data_p; int imin, jmin, jw; double tmin; // Sort the waters according to theta for (int is = 0; is < ctx.n_shells; is++) { imin = 0; - for (int il = 0; il < ctx.wshells[is].n_inshell; il++) { + for (int il = 0; il < wshells[is].n_inshell; il++) { tmin = 2 * M_PI; - for (int jl = 0; jl < ctx.wshells[is].n_inshell; jl++) { - // printf("Searching water %d in shell %d, total number: %d\n", jl, is, ctx.wshells[is].n_inshell); + for (int jl = 0; jl < wshells[is].n_inshell; jl++) { + // printf("Searching water %d in shell %d, total number: %d\n", jl, is, wshells[is].n_inshell); jw = polx_list_sh[jl * ctx.n_shells + is]; // printf("Water %d in shell %d has theta %f\n", jw, is, ctx.tdum[jw] * 180 / M_PI); if (ctx.tdum[jw] < tmin) { @@ -198,23 +198,21 @@ void sort_waters() { } void calc_polx_water_forces_host(int iteration) { - CudaContext& cuda_ctx = CudaContext::instance(); auto& ctx = Context::instance(); + auto *wshells = ctx.wshells->cpu_data_p; for (int is = 0; is < ctx.n_shells; is++) { - ctx.wshells[is].n_inshell = 0; + wshells[is].n_inshell = 0; if (iteration == 0) { - ctx.wshells[is].theta_corr = 0; + wshells[is].theta_corr = 0; } } using namespace CudaPolxWaterForce; - // cuda_ctx.sync_all_to_device(); - cudaMemcpy(cuda_ctx.d_wshells, ctx.wshells.data(), ctx.n_shells * sizeof(shell_t), cudaMemcpyHostToDevice); - - coord_t* d_coords = cuda_ctx.d_coords; - dvel_t* d_dvelocities = cuda_ctx.d_dvelocities; - shell_t* d_wshells = cuda_ctx.d_wshells; + coord_t* d_coords = ctx.coords->gpu_data_p; + dvel_t* d_dvelocities = ctx.dvelocities->gpu_data_p; + shell_t* d_wshells = ctx.wshells->gpu_data_p; + ctx.wshells->upload(); int blockSize = 256; int numBlocks = (ctx.n_waters + blockSize - 1) / blockSize; @@ -224,7 +222,7 @@ void calc_polx_water_forces_host(int iteration) { // printf("Calculated theta for %d waters in %d shells\n", ctx.n_waters, ctx.n_shells); // todo: sort in cpu now.. - cudaMemcpy(ctx.wshells.data(), d_wshells, ctx.n_shells * sizeof(shell_t), cudaMemcpyDeviceToHost); + ctx.wshells->download(); cudaMemcpy(ctx.tdum.data(), d_tdum, ctx.n_waters * sizeof(double), cudaMemcpyDeviceToHost); cudaMemcpy(polx_list_sh, d_list_sh, ctx.n_max_inshell * ctx.n_shells * sizeof(int), cudaMemcpyDeviceToHost); @@ -245,15 +243,15 @@ void calc_polx_water_forces_host(int iteration) { if (iteration != 0 && iteration % itdis_update == 0) { for (int is = 0; is < ctx.n_shells; is++) { printf("SHELL %d\n", is); - ctx.wshells[is].avtheta /= (double)itdis_update; - ctx.wshells[is].avn_inshell /= (double)itdis_update; - ctx.wshells[is].theta_corr = ctx.wshells[is].theta_corr + ctx.wshells[is].avtheta - acos(ctx.wshells[is].cstb); + wshells[is].avtheta /= (double)itdis_update; + wshells[is].avn_inshell /= (double)itdis_update; + wshells[is].theta_corr = wshells[is].theta_corr + wshells[is].avtheta - acos(wshells[is].cstb); printf("average theta = %f, average in shell = %f, theta_corr = %f\n", - ctx.wshells[is].avtheta * 180 / M_PI, ctx.wshells[is].avn_inshell, ctx.wshells[is].theta_corr * 180 / M_PI); - ctx.wshells[is].avtheta = 0; - ctx.wshells[is].avn_inshell = 0; + wshells[is].avtheta * 180 / M_PI, wshells[is].avn_inshell, wshells[is].theta_corr * 180 / M_PI); + wshells[is].avtheta = 0; + wshells[is].avn_inshell = 0; } - cudaMemcpy(d_wshells, ctx.wshells.data(), ctx.n_shells * sizeof(shell_t), cudaMemcpyHostToDevice); + ctx.wshells->upload(); } // Calculate energy and force @@ -264,7 +262,7 @@ void calc_polx_water_forces_host(int iteration) { double energy; cudaMemcpy(&energy, d_energy, sizeof(double), cudaMemcpyDeviceToHost); ctx.E_restraint.Upolx += energy; - cudaMemcpy(ctx.wshells.data(), d_wshells, ctx.n_shells * sizeof(shell_t), cudaMemcpyDeviceToHost); + ctx.wshells->download(); // Copy back forces for all atoms (solute + solvent); water forces were being dropped. } diff --git a/src/core/cuda/src/cuda_pshell_force.cu b/src/core/cuda/src/cuda_pshell_force.cu index 39fb4ff3..a01fb536 100644 --- a/src/core/cuda/src/cuda_pshell_force.cu +++ b/src/core/cuda/src/cuda_pshell_force.cu @@ -1,4 +1,3 @@ -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_pshell_force.cuh" #include "common/include/constants.h" #include "common/include/context.h" @@ -15,7 +14,7 @@ __global__ void calc_pshell_force_kernel( bool* shell, bool* excluded, coord_t* coords, - coord_t* coords_top, + coord_t* coords_init, double* ufix_energy, double* ushell_energy, dvel_t* dvelocities) { @@ -32,9 +31,9 @@ __global__ void calc_pshell_force_kernel( } else { k = k_pshell; } - dr.x = coords[i].x - coords_top[i].x; - dr.y = coords[i].y - coords_top[i].y; - dr.z = coords[i].z - coords_top[i].z; + dr.x = coords[i].x - coords_init[i].x; + dr.y = coords[i].y - coords_init[i].y; + dr.z = coords[i].z - coords_init[i].z; r2 = pow(dr.x, 2) + pow(dr.y, 2) + pow(dr.z, 2); ener = 0.5 * k * r2; // printf("dr = %f %f %f\n", dr.x, dr.y, dr.z); @@ -49,15 +48,14 @@ __global__ void calc_pshell_force_kernel( } void calc_pshell_forces_host() { - CudaContext& ctx = CudaContext::instance(); auto& host = Context::instance(); using namespace CudaPshellForce; - auto d_shell = ctx.d_shell; - auto d_excluded = ctx.d_excluded; - auto d_coords = ctx.d_coords; - auto d_coords_top = ctx.d_coords_top; - auto d_dvelocities = ctx.d_dvelocities; + auto d_shell = host.shell->gpu_data_p; + auto d_excluded = host.excluded->gpu_data_p; + auto d_coords = host.coords->gpu_data_p; + auto d_coords_init = host.coords_init->gpu_data_p; + auto d_dvelocities = host.dvelocities->gpu_data_p; cudaMemset(d_ufix_energy, 0, sizeof(double)); cudaMemset(d_ushell_energy, 0, sizeof(double)); @@ -69,7 +67,7 @@ void calc_pshell_forces_host() { d_shell, d_excluded, d_coords, - d_coords_top, + d_coords_init, d_ufix_energy, d_ushell_energy, d_dvelocities); @@ -100,4 +98,4 @@ void cleanup_pshell_force() { cudaFree(d_ushell_energy); is_initialized = false; } -} \ No newline at end of file +} diff --git a/src/core/cuda/src/cuda_radix_water_force.cu b/src/core/cuda/src/cuda_radix_water_force.cu index c911574d..06f5f5a3 100644 --- a/src/core/cuda/src/cuda_radix_water_force.cu +++ b/src/core/cuda/src/cuda_radix_water_force.cu @@ -2,7 +2,6 @@ #include "common/include/constants.h" #include "common/include/context.h" -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_radix_water_force.cuh" #include "cuda/include/cuda_utility.cuh" namespace CudaRadixWaterForce { @@ -69,11 +68,8 @@ void calc_radix_water_forces_host() { int oxygen_atoms = water_atoms / 3; int numBlocks = (oxygen_atoms + blockSize - 1) / blockSize; - CudaContext& ctx = CudaContext::instance(); - // ctx.sync_all_to_device(); - - auto d_coords = ctx.d_coords; - auto d_dvelocities = ctx.d_dvelocities; + auto d_coords = host.coords->gpu_data_p; + auto d_dvelocities = host.dvelocities->gpu_data_p; check_cuda(cudaMemset(d_energy, 0, sizeof(double))); double shift; @@ -95,7 +91,7 @@ void calc_radix_water_forces_host() { d_dvelocities, d_energy); check_cuda(cudaDeviceSynchronize()); - check_cuda(cudaMemcpy(host.dvelocities.data(), d_dvelocities, sizeof(dvel_t) * host.n_atoms, cudaMemcpyDeviceToHost)); + host.dvelocities->download(); check_cuda(cudaMemcpy(&energy, d_energy, sizeof(double), cudaMemcpyDeviceToHost)); host.E_restraint.Uradx += energy; } diff --git a/src/core/cuda/src/cuda_restrang_force.cu b/src/core/cuda/src/cuda_restrang_force.cu index 76cc436f..eb0813f5 100644 --- a/src/core/cuda/src/cuda_restrang_force.cu +++ b/src/core/cuda/src/cuda_restrang_force.cu @@ -1,4 +1,3 @@ -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_restrang_force.cuh" #include "cuda/include/cuda_utility.cuh" #include "common/include/context.h" @@ -104,13 +103,12 @@ void calc_restrang_force_host() { auto& host = Context::instance(); if (host.n_restrangs == 0) return; using namespace CudaRestrangForce; - CudaContext& ctx = CudaContext::instance(); - auto d_restrangs = ctx.d_restrangs; - auto d_coords = ctx.d_coords; - auto d_lambdas = ctx.d_lambdas; - auto d_dvelocities = ctx.d_dvelocities; - auto d_EQ_restraint = ctx.d_EQ_restraint; + auto d_restrangs = host.restrangs->gpu_data_p; + auto d_coords = host.coords->gpu_data_p; + auto d_lambdas = host.lambdas->gpu_data_p; + auto d_dvelocities = host.dvelocities->gpu_data_p; + auto d_EQ_restraint = host.EQ_restraint->gpu_data_p; double val = 0; cudaMemcpy(d_E_restraint, &val, sizeof(double), cudaMemcpyHostToDevice); @@ -127,7 +125,7 @@ void calc_restrang_force_host() { d_EQ_restraint, d_E_restraint); cudaDeviceSynchronize(); - cudaMemcpy(host.EQ_restraint.data(), d_EQ_restraint, sizeof(E_restraint_t) * host.n_lambdas, cudaMemcpyDeviceToHost); + host.EQ_restraint->download(); cudaMemcpy(&val, d_E_restraint, sizeof(double), cudaMemcpyDeviceToHost); host.E_restraint.Upres += val; } diff --git a/src/core/cuda/src/cuda_restrdis_force.cu b/src/core/cuda/src/cuda_restrdis_force.cu index d6fb0f3c..9aacf977 100644 --- a/src/core/cuda/src/cuda_restrdis_force.cu +++ b/src/core/cuda/src/cuda_restrdis_force.cu @@ -1,6 +1,5 @@ #include -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_restrdis_force.cuh" #include "cuda/include/cuda_utility.cuh" #include "common/include/context.h" @@ -77,12 +76,11 @@ void calc_restrdis_forces_host() { auto& host = Context::instance(); if (host.n_restrdists == 0) return; using namespace CudaRestrdisForce; - CudaContext& ctx = CudaContext::instance(); - auto d_restrdists = ctx.d_restrdists; - auto d_coords = ctx.d_coords; - auto d_lambdas = ctx.d_lambdas; - auto d_dvelocities = ctx.d_dvelocities; - auto d_EQ_restraint = ctx.d_EQ_restraint; + auto d_restrdists = host.restrdists->gpu_data_p; + auto d_coords = host.coords->gpu_data_p; + auto d_lambdas = host.lambdas->gpu_data_p; + auto d_dvelocities = host.dvelocities->gpu_data_p; + auto d_EQ_restraint = host.EQ_restraint->gpu_data_p; cudaMemset(d_E_restraint, 0, sizeof(double)); @@ -98,7 +96,7 @@ void calc_restrdis_forces_host() { d_EQ_restraint, d_E_restraint); cudaDeviceSynchronize(); - cudaMemcpy(host.EQ_restraint.data(), d_EQ_restraint, sizeof(E_restraint_t) * host.n_lambdas, cudaMemcpyDeviceToHost); + host.EQ_restraint->download(); double ener; cudaMemcpy(&ener, d_E_restraint, sizeof(double), cudaMemcpyDeviceToHost); printf("Energy restraint: %f\n", ener); diff --git a/src/core/cuda/src/cuda_restrpos_force.cu b/src/core/cuda/src/cuda_restrpos_force.cu index 8409ee61..5f479364 100644 --- a/src/core/cuda/src/cuda_restrpos_force.cu +++ b/src/core/cuda/src/cuda_restrpos_force.cu @@ -1,6 +1,5 @@ #include -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_restrpos_force.cuh" #include "cuda/include/cuda_utility.cuh" #include "common/include/context.h" @@ -68,12 +67,11 @@ void calc_restrpos_forces_host() { double val = 0.0; cudaMemcpy(d_E_restraint, &val, sizeof(double), cudaMemcpyHostToDevice); - CudaContext& ctx = CudaContext::instance(); - auto d_restrspos = ctx.d_restrspos; - auto d_coords = ctx.d_coords; - auto d_lambdas = ctx.d_lambdas; - auto d_EQ_restraint = ctx.d_EQ_restraint; - auto d_dvelocities = ctx.d_dvelocities; + auto d_restrspos = host.restrspos->gpu_data_p; + auto d_coords = host.coords->gpu_data_p; + auto d_lambdas = host.lambdas->gpu_data_p; + auto d_EQ_restraint = host.EQ_restraint->gpu_data_p; + auto d_dvelocities = host.dvelocities->gpu_data_p; int blockSize = 256; int numBlocks = (host.n_restrspos + blockSize - 1) / blockSize; @@ -89,7 +87,7 @@ void calc_restrpos_forces_host() { cudaDeviceSynchronize(); cudaMemcpy(&val, d_E_restraint, sizeof(double), cudaMemcpyDeviceToHost); host.E_restraint.Upres += val; - cudaMemcpy(host.EQ_restraint.data(), d_EQ_restraint, sizeof(E_restraint_t) * host.n_lambdas, cudaMemcpyDeviceToHost); + host.EQ_restraint->download(); } void init_restrpos_force_kernel_data() { diff --git a/src/core/cuda/src/cuda_restrseq_force.cu b/src/core/cuda/src/cuda_restrseq_force.cu index 488354e3..b5db3552 100644 --- a/src/core/cuda/src/cuda_restrseq_force.cu +++ b/src/core/cuda/src/cuda_restrseq_force.cu @@ -1,4 +1,3 @@ -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_restrseq_force.cuh" #include "common/include/context.h" #include "iostream" @@ -11,7 +10,7 @@ __global__ void calc_restrseq_forces_kernel( int n_restrseqs, restrseq_t* restrseqs, coord_t* coords, - coord_t* coords_top, + coord_t* coords_init, atype_t* atypes, catype_t* catypes, bool* heavy, @@ -37,9 +36,9 @@ __global__ void calc_restrseq_forces_kernel( for (int i = restrseqs[s].ai - 1; i < restrseqs[s].aj - 1; i++) { if (heavy[i] || restrseqs[s].ih) { n_ctr++; - dr.x += (coords[i].x - coords_top[i].x); - dr.y += (coords[i].y - coords_top[i].y); - dr.z += (coords[i].z - coords_top[i].z); + dr.x += (coords[i].x - coords_init[i].x); + dr.y += (coords[i].y - coords_init[i].y); + dr.z += (coords[i].z - coords_init[i].z); } } @@ -68,9 +67,9 @@ __global__ void calc_restrseq_forces_kernel( if (heavy[i] || restrseqs[s].ih) { mass = catypes[atypes[i].code - 1].m; totmass += mass; - dr.x += (coords[i].x - coords_top[i].x) * mass; - dr.y += (coords[i].y - coords_top[i].y) * mass; - dr.z += (coords[i].z - coords_top[i].z) * mass; + dr.x += (coords[i].x - coords_init[i].x) * mass; + dr.y += (coords[i].y - coords_init[i].y) * mass; + dr.z += (coords[i].z - coords_init[i].z) * mass; } } @@ -97,9 +96,9 @@ __global__ void calc_restrseq_forces_kernel( else { for (int i = restrseqs[s].ai - 1; i < restrseqs[s].aj - 1; i++) { if (heavy[i] || restrseqs[s].ih) { - dr.x = coords[i].x - coords_top[i].x; - dr.y = coords[i].y - coords_top[i].y; - dr.z = coords[i].z - coords_top[i].z; + dr.x = coords[i].x - coords_init[i].x; + dr.y = coords[i].y - coords_init[i].y; + dr.z = coords[i].z - coords_init[i].z; r2 = pow(dr.x, 2) + pow(dr.y, 2) + pow(dr.z, 2); ener = .5 * k * r2; @@ -117,14 +116,13 @@ void calc_restrseq_forces_host() { auto& host = Context::instance(); if (host.n_restrseqs == 0) return; using namespace CudaRestrseqForce; - CudaContext& ctx = CudaContext::instance(); - auto d_restrseq = ctx.d_restrseqs; - auto d_coords = ctx.d_coords; - auto d_coords_top = ctx.d_coords_top; - auto d_atypes = ctx.d_atypes; - auto d_catypes = ctx.d_catypes; - auto d_heavy = ctx.d_heavy; - auto d_dvelocities = ctx.d_dvelocities; + auto d_restrseq = host.restrseqs->gpu_data_p; + auto d_coords = host.coords->gpu_data_p; + auto d_coords_init = host.coords_init->gpu_data_p; + auto d_atypes = host.atypes->gpu_data_p; + auto d_catypes = host.catypes->gpu_data_p; + auto d_heavy = host.heavy->gpu_data_p; + auto d_dvelocities = host.dvelocities->gpu_data_p; cudaMemset(d_upres_energy, 0, sizeof(double)); // ctx.sync_all_to_device(); @@ -134,7 +132,7 @@ void calc_restrseq_forces_host() { host.n_restrseqs, d_restrseq, d_coords, - d_coords_top, + d_coords_init, d_atypes, d_catypes, d_heavy, diff --git a/src/core/cuda/src/cuda_restrwall_force.cu b/src/core/cuda/src/cuda_restrwall_force.cu index 24183427..12d890ad 100644 --- a/src/core/cuda/src/cuda_restrwall_force.cu +++ b/src/core/cuda/src/cuda_restrwall_force.cu @@ -1,6 +1,5 @@ #include -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_restrwall_force.cuh" #include "common/include/context.h" @@ -55,11 +54,10 @@ void calc_restrwall_forces_host() { auto& host = Context::instance(); if (host.n_restrwalls == 0) return; using namespace CudaRestrwallForce; - CudaContext& ctx = CudaContext::instance(); - auto d_restrwalls = ctx.d_restrwalls; - auto d_coords = ctx.d_coords; - auto d_dvelocities = ctx.d_dvelocities; - auto d_heavy = ctx.d_heavy; + auto d_restrwalls = host.restrwalls->gpu_data_p; + auto d_coords = host.coords->gpu_data_p; + auto d_dvelocities = host.dvelocities->gpu_data_p; + auto d_heavy = host.heavy->gpu_data_p; cudaMemset(d_energies, 0, sizeof(double)); int blockSize = 256; diff --git a/src/core/cuda/src/cuda_shake_constraints.cu b/src/core/cuda/src/cuda_shake_constraints.cu index 8c68a92e..e9dfd051 100644 --- a/src/core/cuda/src/cuda_shake_constraints.cu +++ b/src/core/cuda/src/cuda_shake_constraints.cu @@ -1,6 +1,5 @@ #include -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_shake_constraints.cuh" #include "common/include/constants.h" #include "common/include/context.h" @@ -99,13 +98,14 @@ __global__ void calc_shake_constraints_kernel( void init_shake_constraints_kernel_data() { auto& host = Context::instance(); + auto *mol_n_shakes = host.mol_n_shakes->cpu_data_p; using namespace CudaShakeConstraints; if (!is_initialized) { if (host.n_molecules > 0) { int* mol_shake_offset_host = (int*)malloc(sizeof(int) * host.n_molecules); mol_shake_offset_host[0] = 0; for (int i = 1; i < host.n_molecules; i++) { - mol_shake_offset_host[i] = mol_shake_offset_host[i - 1] + host.mol_n_shakes[i - 1]; + mol_shake_offset_host[i] = mol_shake_offset_host[i - 1] + mol_n_shakes[i - 1]; } check_cudaMalloc((void**)&d_mol_shake_offset, sizeof(int) * host.n_molecules); cudaMemcpy(d_mol_shake_offset, mol_shake_offset_host, sizeof(int) * host.n_molecules, cudaMemcpyHostToDevice); @@ -137,12 +137,11 @@ int calc_shake_constraints_host() { int blocks = host.n_molecules; int threads = 32; - CudaContext& ctx = CudaContext::instance(); - auto d_mol_n_shakes = ctx.d_mol_n_shakes; - auto d_shake_bonds = ctx.d_shake_bonds; - auto d_coords = ctx.d_coords; - auto d_xcoords = ctx.d_xcoords; - auto d_winv = ctx.d_winv; + auto d_mol_n_shakes = host.mol_n_shakes->gpu_data_p; + auto d_shake_bonds = host.shake_bonds->gpu_data_p; + auto d_coords = host.coords->gpu_data_p; + auto d_xcoords = host.xcoords->gpu_data_p; + auto d_winv = host.winv->gpu_data_p; calc_shake_constraints_kernel<<>>( host.n_molecules, @@ -155,6 +154,6 @@ int calc_shake_constraints_host() { d_mol_shake_offset); cudaDeviceSynchronize(); cudaMemcpy(&total_iterations_host, d_total_iterations, sizeof(int), cudaMemcpyDeviceToHost); - cudaMemcpy(host.coords.data(), d_coords, sizeof(coord_t) * host.n_atoms, cudaMemcpyDeviceToHost); + host.coords->download(); return host.n_molecules == 0 ? 0 : total_iterations_host / host.n_molecules; } diff --git a/src/core/cuda/src/cuda_temperature.cu b/src/core/cuda/src/cuda_temperature.cu index 138fa617..22b4dc82 100644 --- a/src/core/cuda/src/cuda_temperature.cu +++ b/src/core/cuda/src/cuda_temperature.cu @@ -1,4 +1,3 @@ -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_temperature.cuh" #include "cuda/include/cuda_utility.cuh" #include "common/include/constants.h" @@ -58,11 +57,10 @@ void calc_temperature_host() { cudaMemcpy(d_Tfree_solvent, &h_Tfree_solvent, sizeof(double), cudaMemcpyHostToDevice); cudaMemcpy(d_Texcl_solvent, &h_Texcl_solvent, sizeof(double), cudaMemcpyHostToDevice); - CudaContext& ctx = CudaContext::instance(); - atype_t* d_atypes = ctx.d_atypes; - catype_t* d_catypes = ctx.d_catypes; - vel_t* d_velocities = ctx.d_velocities; - bool* d_excluded = ctx.d_excluded; + atype_t* d_atypes = host.atypes->gpu_data_p; + catype_t* d_catypes = host.catypes->gpu_data_p; + vel_t* d_velocities = host.velocities->gpu_data_p; + bool* d_excluded = host.excluded->gpu_data_p; int blockSize = 256; int numBlocks = (host.n_atoms + blockSize - 1) / blockSize; diff --git a/src/core/cuda/src/cuda_torsion_force.cu b/src/core/cuda/src/cuda_torsion_force.cu index 3d6d3174..6ef7cd45 100644 --- a/src/core/cuda/src/cuda_torsion_force.cu +++ b/src/core/cuda/src/cuda_torsion_force.cu @@ -1,6 +1,6 @@ -#include "cuda/include/cuda_context.cuh" #include "cuda/include/cuda_torsion_force.cuh" #include "cuda/include/cuda_utility.cuh" +#include "context.h" namespace CudaTorsionForce { bool is_initialized = false; @@ -133,11 +133,11 @@ double calc_torsion_forces_host(int start, int end) { double zero = 0.0; cudaMemcpy(d_energy_sum, &zero, sizeof(double), cudaMemcpyHostToDevice); - CudaContext& ctx = CudaContext::instance(); - coord_t* d_coords = ctx.d_coords; - dvel_t* d_dvelocities = ctx.d_dvelocities; - torsion_t* d_torsions = ctx.d_torsions; - ctorsion_t* d_ctorsions = ctx.d_ctorsions; + auto& host_ctx = Context::instance(); + coord_t* d_coords = host_ctx.coords->gpu_data_p; + dvel_t* d_dvelocities = host_ctx.dvelocities->gpu_data_p; + torsion_t* d_torsions = host_ctx.torsions->gpu_data_p; + ctorsion_t* d_ctorsions = host_ctx.ctorsions->gpu_data_p; calc_torsion_forces_kernel<<>>(start, end, d_torsions, d_ctorsions, d_coords, d_dvelocities, d_energy_sum); cudaDeviceSynchronize(); @@ -161,4 +161,4 @@ void cleanup_torsion_force() { cudaFree(d_energy_sum); is_initialized = false; } -} \ No newline at end of file +} diff --git a/src/core/cuda/src/cuda_utility.cu b/src/core/cuda/src/cuda_utility.cu index ab1cca7c..3430736f 100644 --- a/src/core/cuda/src/cuda_utility.cu +++ b/src/core/cuda/src/cuda_utility.cu @@ -1,19 +1 @@ #include "cuda/include/cuda_utility.cuh" - -#include -#include - -void check_cudaMalloc(void** devPtr, size_t size) { - cudaError_t error = cudaMalloc((void**)devPtr, size); - if (error != cudaSuccess) { - printf(">>> FATAL: cudaMalloc failed with error code %d: %s\n", error, cudaGetErrorString(error)); - exit(EXIT_FAILURE); - } -} - -void check_cuda(cudaError_t status) { - if (status != cudaSuccess) { - printf(">>> FATAL: CUDA call failed with error code %d: %s\n", status, cudaGetErrorString(status)); - exit(EXIT_FAILURE); - } -}