diff --git a/docs/documentation/case.md b/docs/documentation/case.md index 5acd85a7a3..883d6619ab 100644 --- a/docs/documentation/case.md +++ b/docs/documentation/case.md @@ -312,6 +312,7 @@ This is enabled by adding ``'elliptic_smoothing': "T",`` and ``'elliptic_smoothi | Parameter | Type | Description | | ---: | :----: | :--- | | `num_ibs` | Integer | Number of immersed boundary patches | +| `ib_neighborhood_radius` | Integer | Parameter that controls the neighborhood size for IB detection. | | `geometry` | Integer | Geometry configuration of the patch.| | `x[y,z]_centroid` | Real | Centroid of the applied geometry in the [x,y,z]-direction. | | `length_x[y,z]` | Real | Length, if applicable, in the [x,y,z]-direction. | @@ -373,6 +374,8 @@ Additional details on this specification can be found in [NACA airfoil](https:// - `ib_coefficient_of_friction` is the coefficient of friction used in IB collisions. +- `ib_neighborhood_radius` controls the size of the neighborhood size. This value defaults to 1, which indicates that any given rank is aware of IB's up to 1 ranks away. This parameter is required to strong-scale a case when IB's eventually grow to be larger than one full processor domain wide. + ### 5. Fluid Material's {#sec-fluid-materials} | Parameter | Type | Description | @@ -638,7 +641,7 @@ To restart the simulation from $k$-th time step, see @ref running "Restarting Ca | `alpha_wrt(i)` | Logical | Add the volume fraction of fluid $i$ to the database | | `gamma_wrt` | Logical | Add the specific heat ratio function to the database | | `heat_ratio_wrt` | Logical | Add the specific heat ratio to the database | -| `ib_state_wrt` | Logical | Write IB state and loads to a datafile at each time step | +| `ib_state_wrt` | Logical | Parameter to handle writing IB state on saves and outputting the state as a point mesh to SILO files. | | `pi_inf_wrt` | Logical | Add the liquid stiffness function to the database | | `pres_inf_wrt` | Logical | Add the liquid stiffness to the formatted database | | `c_wrt` | Logical | Add the sound speed to the database | @@ -706,7 +709,7 @@ If `file_per_process` is true, then pre_process, simulation, and post_process mu - `probe_wrt` activates the output of state variables at coordinates specified by `probe(i)%[x;y,z]`. -- `ib_state_wrt` activates the output of data specified by patch_ib(i)%force(:) (and torque, vel, angular_vel, angles, [x,y,z]_centroid) into a single binary datafile for all IBs at all timesteps. During post_processing, this file is converted into separate time histories for each IB. +- `ib_state_wrt` is used to trigger post-processing of the IB state to be written out as a point mesh in the SILO files. When no IBs are moving, it also triggers force and torque calculation so that those values may be written to the output state files. - `output_partial_domain` activates the output of part of the domain specified by `[x,y,z]_output%%beg` and `[x,y,z]_output%%end`. This is useful for large domains where only a portion of the domain is of interest. diff --git a/examples/2D_mibm_shock_cylinder/case.py b/examples/2D_mibm_shock_cylinder/case.py index 69aca0cc9b..9fbc86829d 100644 --- a/examples/2D_mibm_shock_cylinder/case.py +++ b/examples/2D_mibm_shock_cylinder/case.py @@ -87,7 +87,7 @@ "precision": 2, "prim_vars_wrt": "T", "E_wrt": "T", - "ib_state_wrt": "T", + "ib_state_wrt": "F", "parallel_io": "T", # Patch: Constant Tube filled with air # Specify the cylindrical air tube grid geometry diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp index cacc50f528..d66fb10614 100644 --- a/src/common/m_constants.fpp +++ b/src/common/m_constants.fpp @@ -25,6 +25,7 @@ module m_constants integer, parameter :: num_probes_max = 10 !< Maximum number of flow probes in the simulation integer, parameter :: num_patches_max = 10 !< Maximum number of IC patches integer, parameter :: num_ib_patches_max = 50000 !< Maximum number of immersed boundary patches (patch_ib) + integer, parameter :: num_local_ibs_max = 2000 !< Maximum number of immersed boundary patches (patch_ib) integer, parameter :: num_bc_patches_max = 10 !< Maximum number of boundary condition patches integer, parameter :: max_2d_fourier_modes = 10 !< Max Fourier mode index for 2D modal patch (geometry 13) integer, parameter :: max_sph_harm_degree = 5 !< Max degree L for 3D spherical harmonic patch (geometry 14) diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp index 6ee2e836a5..c4e5873ead 100644 --- a/src/common/m_derived_types.fpp +++ b/src/common/m_derived_types.fpp @@ -202,12 +202,10 @@ module m_derived_types type :: t_model_array ! Original CPU-side fields (unchanged) - type(t_model), allocatable :: model !< STL/OBJ geometry model - real(wp), allocatable, dimension(:,:,:) :: boundary_v !< Boundary vertices - real(wp), allocatable, dimension(:,:) :: interpolated_boundary_v !< Interpolated boundary vertices - integer :: boundary_edge_count !< Number of boundary edges - integer :: total_vertices !< Total vertex count - integer :: interpolate !< Interpolation flag + type(t_model), allocatable :: model !< STL/OBJ geometry model + real(wp), allocatable, dimension(:,:,:) :: boundary_v !< Boundary vertices + integer :: boundary_edge_count !< Number of boundary edges + integer :: total_vertices !< Total vertex count ! GPU-friendly flattened arrays integer :: ntrs !< Copy of model%ntrs @@ -272,9 +270,10 @@ module m_derived_types end type ic_patch_parameters type ib_patch_parameters - integer :: geometry !< Type of geometry for the patch + integer :: gbl_patch_id real(wp) :: x_centroid, y_centroid, z_centroid !< Geometric center coordinates of the patch + !> Centroid locations of intermediate steps in the time_stepper module real(wp) :: step_x_centroid, step_y_centroid, step_z_centroid real(wp), dimension(1:3) :: centroid_offset !< offset of center of mass from computed cell center for odd-shaped IBs diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp index 0dab036f9b..a526eb1266 100644 --- a/src/common/m_model.fpp +++ b/src/common/m_model.fpp @@ -983,12 +983,13 @@ contains dx_local = minval(dx); dy_local = minval(dy) if (p /= 0) dz_local = minval(dz) + num_gbl_ibs = num_ibs allocate (stl_bounding_boxes(num_ibs,1:3,1:3)) do patch_id = 1, num_ibs if (patch_ib(patch_id)%geometry == 5 .or. patch_ib(patch_id)%geometry == 12) then allocate (models(patch_id)%model) - print *, " * Reading model: " // trim(patch_ib(patch_id)%model_filepath) + if (proc_rank == 0) print *, " * Reading model: " // trim(patch_ib(patch_id)%model_filepath) model = f_model_read(patch_ib(patch_id)%model_filepath) params%scale(:) = patch_ib(patch_id)%model_scale(:) @@ -1001,9 +1002,7 @@ contains params%scale(:) = 1._wp end if - if (proc_rank == 0) then - print *, " * Transforming model." - end if + if (proc_rank == 0) print *, " * Transforming model." ! Get the model center before transforming the model bbox_old = f_create_bbox(model) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 7d4b92705c..9cfdd82e14 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -94,6 +94,8 @@ contains proc_rank = 0 #endif + $:GPU_UPDATE(device='[num_procs, proc_rank]') + end subroutine s_mpi_initialize !> Set up MPI I/O data views and variable pointers for parallel file output. diff --git a/src/post_process/m_data_output.fpp b/src/post_process/m_data_output.fpp index aece31ff8d..3a3a7bb10c 100644 --- a/src/post_process/m_data_output.fpp +++ b/src/post_process/m_data_output.fpp @@ -1396,48 +1396,42 @@ contains end do close (file_unit) - end if - - call MPI_BCAST(ib_data, nBodies*NFIELDS_PER_IB, mpi_p, 0, MPI_COMM_WORLD, ierr) - - do i = 1, nBodies - force_x(i) = ib_data(i, 2); force_y(i) = ib_data(i, 3); force_z(i) = ib_data(i, 4) - torque_x(i) = ib_data(i, 5); torque_y(i) = ib_data(i, 6); torque_z(i) = ib_data(i, 7) - vel_x(i) = ib_data(i, 8); vel_y(i) = ib_data(i, 9); vel_z(i) = ib_data(i, 10) - omega_x(i) = ib_data(i, 11); omega_y(i) = ib_data(i, 12); omega_z(i) = ib_data(i, 13) - angle_x(i) = ib_data(i, 14); angle_y(i) = ib_data(i, 15); angle_z(i) = ib_data(i, 16) - px(i) = ib_data(i, 17); py(i) = ib_data(i, 18); pz(i) = ib_data(i, 19) - ib_diameter(i) = ib_data(i, 20)*2.0_wp - end do - if (proc_rank == 0) then - do i = 1, num_procs - write (meshnames(i), '(A,I0,A,I0,A)') '../p', i - 1, '/', t_step, '.silo:ib_bodies' - meshtypes(i) = DB_POINTMESH + do i = 1, nBodies + force_x(i) = ib_data(i, 2); force_y(i) = ib_data(i, 3); force_z(i) = ib_data(i, 4) + torque_x(i) = ib_data(i, 5); torque_y(i) = ib_data(i, 6); torque_z(i) = ib_data(i, 7) + vel_x(i) = ib_data(i, 8); vel_y(i) = ib_data(i, 9); vel_z(i) = ib_data(i, 10) + omega_x(i) = ib_data(i, 11); omega_y(i) = ib_data(i, 12); omega_z(i) = ib_data(i, 13) + angle_x(i) = ib_data(i, 14); angle_y(i) = ib_data(i, 15); angle_z(i) = ib_data(i, 16) + px(i) = ib_data(i, 17); py(i) = ib_data(i, 18); pz(i) = ib_data(i, 19) + ib_diameter(i) = ib_data(i, 20)*2.0_wp end do + + write (meshnames(1), '(A,I0,A)') '../p0/', t_step, '.silo:ib_bodies' + meshtypes(1) = DB_POINTMESH err = DBSET2DSTRLEN(len(meshnames(1))) - err = DBPUTMMESH(dbroot, 'ib_bodies', 16, num_procs, meshnames, len_trim(meshnames), meshtypes, DB_F77NULL, ierr) + err = DBPUTMMESH(dbroot, 'ib_bodies', 16, 1, meshnames, len_trim(meshnames), meshtypes, DB_F77NULL, ierr) + + err = DBPUTPM(dbfile, 'ib_bodies', 9, 3, px, py, pz, nBodies, DB_DOUBLE, DB_F77NULL, ierr) + + call s_write_ib_variable('ib_force_x', t_step, force_x, nBodies) + call s_write_ib_variable('ib_force_y', t_step, force_y, nBodies) + call s_write_ib_variable('ib_force_z', t_step, force_z, nBodies) + call s_write_ib_variable('ib_torque_x', t_step, torque_x, nBodies) + call s_write_ib_variable('ib_torque_y', t_step, torque_y, nBodies) + call s_write_ib_variable('ib_torque_z', t_step, torque_z, nBodies) + call s_write_ib_variable('ib_vel_x', t_step, vel_x, nBodies) + call s_write_ib_variable('ib_vel_y', t_step, vel_y, nBodies) + call s_write_ib_variable('ib_vel_z', t_step, vel_z, nBodies) + call s_write_ib_variable('ib_omega_x', t_step, omega_x, nBodies) + call s_write_ib_variable('ib_omega_y', t_step, omega_y, nBodies) + call s_write_ib_variable('ib_omega_z', t_step, omega_z, nBodies) + call s_write_ib_variable('ib_angle_x', t_step, angle_x, nBodies) + call s_write_ib_variable('ib_angle_y', t_step, angle_y, nBodies) + call s_write_ib_variable('ib_angle_z', t_step, angle_z, nBodies) + call s_write_ib_variable('ib_diameter', t_step, ib_diameter, nBodies) end if - err = DBPUTPM(dbfile, 'ib_bodies', 9, 3, px, py, pz, nBodies, DB_DOUBLE, DB_F77NULL, ierr) - - call s_write_ib_variable('ib_force_x', t_step, force_x, nBodies) - call s_write_ib_variable('ib_force_y', t_step, force_y, nBodies) - call s_write_ib_variable('ib_force_z', t_step, force_z, nBodies) - call s_write_ib_variable('ib_torque_x', t_step, torque_x, nBodies) - call s_write_ib_variable('ib_torque_y', t_step, torque_y, nBodies) - call s_write_ib_variable('ib_torque_z', t_step, torque_z, nBodies) - call s_write_ib_variable('ib_vel_x', t_step, vel_x, nBodies) - call s_write_ib_variable('ib_vel_y', t_step, vel_y, nBodies) - call s_write_ib_variable('ib_vel_z', t_step, vel_z, nBodies) - call s_write_ib_variable('ib_omega_x', t_step, omega_x, nBodies) - call s_write_ib_variable('ib_omega_y', t_step, omega_y, nBodies) - call s_write_ib_variable('ib_omega_z', t_step, omega_z, nBodies) - call s_write_ib_variable('ib_angle_x', t_step, angle_x, nBodies) - call s_write_ib_variable('ib_angle_y', t_step, angle_y, nBodies) - call s_write_ib_variable('ib_angle_z', t_step, angle_z, nBodies) - call s_write_ib_variable('ib_diameter', t_step, ib_diameter, nBodies) - deallocate (ib_data, px, py, pz, force_x, force_y, force_z) deallocate (torque_x, torque_y, torque_z, vel_x, vel_y, vel_z) deallocate (omega_x, omega_y, omega_z, angle_x, angle_y, angle_z) @@ -1450,23 +1444,18 @@ contains !> Write a single IB point-variable to the Silo database slave and master files. subroutine s_write_ib_variable(varname, t_step, data, nBodies) - character(len=*), intent(in) :: varname - integer, intent(in) :: t_step - real(wp), dimension(:), intent(in) :: data - integer, intent(in) :: nBodies - character(len=4*name_len), dimension(num_procs) :: var_names - integer, dimension(num_procs) :: var_types - integer :: ierr, i - - if (proc_rank == 0) then - do i = 1, num_procs - write (var_names(i), '(A,I0,A,I0,A)') '../p', i - 1, '/', t_step, '.silo:' // trim(varname) - var_types(i) = DB_POINTVAR - end do - err = DBSET2DSTRLEN(len(var_names(1))) - err = DBPUTMVAR(dbroot, trim(varname), len_trim(varname), num_procs, var_names, len_trim(var_names), var_types, & - & DB_F77NULL, ierr) - end if + character(len=*), intent(in) :: varname + integer, intent(in) :: t_step + real(wp), dimension(:), intent(in) :: data + integer, intent(in) :: nBodies + character(len=4*name_len) :: var_name_entry + integer :: var_type_entry, ierr + + write (var_name_entry, '(A,I0,A)') '../p0/', t_step, '.silo:' // trim(varname) + var_type_entry = DB_POINTVAR + err = DBSET2DSTRLEN(len(var_name_entry)) + err = DBPUTMVAR(dbroot, trim(varname), len_trim(varname), 1, var_name_entry, len_trim(var_name_entry), var_type_entry, & + & DB_F77NULL, ierr) err = DBPUTPV1(dbfile, trim(varname), len_trim(varname), 'ib_bodies', 9, data, nBodies, DB_DOUBLE, DB_F77NULL, ierr) diff --git a/src/simulation/m_collisions.fpp b/src/simulation/m_collisions.fpp index fe860cb055..aec23af491 100644 --- a/src/simulation/m_collisions.fpp +++ b/src/simulation/m_collisions.fpp @@ -19,7 +19,8 @@ module m_collisions implicit none - private; public :: s_apply_collision_forces, s_initialize_collisions_module, s_finalize_collisions_module + private; public :: s_apply_collision_forces, s_initialize_collisions_module, s_finalize_collisions_module, & + & f_local_rank_owns_location, f_neighborhood_ranks_own_location ! overlap distances for computing collisions integer, allocatable, dimension(:,:) :: collision_lookup real(wp), allocatable, dimension(:,:) :: wall_overlap_distances @@ -113,7 +114,7 @@ contains overlap_distance = patch_ib(pid1)%radius + patch_ib(pid2)%radius - norm2(normal_vector) if (overlap_distance > 0._wp) then ! if the two patches are close enough to collide normal_vector = normal_vector/norm2(normal_vector) - if (f_local_rank_owns_collision(centroid_1)) then + if (f_local_rank_owns_location(centroid_1)) then ! compute constants of the collision effective_mass = 1.0_wp/((1.0_wp/patch_ib(pid1)%mass) + (1._wp/(patch_ib(pid2)%mass))) k = spring_stiffness*effective_mass @@ -192,7 +193,7 @@ contains ! ensure the local rank owns that collision before proceeding collision_location = [patch_ib(patch_id)%x_centroid, patch_ib(patch_id)%y_centroid, 0._wp] if (num_dims == 3) collision_location(3) = patch_ib(patch_id)%z_centroid - if (f_local_rank_owns_collision(collision_location)) then + if (f_local_rank_owns_location(collision_location)) then k = spring_stiffness*patch_ib(patch_id)%mass eta = damping_parameter*patch_ib(patch_id)%mass @@ -397,45 +398,78 @@ contains end subroutine s_detect_wall_collisions !> @brief function checks if this local MPI processor owns this specific collision - function f_local_rank_owns_collision(collision_location) result(owns_collision) + function f_local_rank_owns_location(location) result(owns_collision) $:GPU_ROUTINE(parallelism='[seq]') - real(wp), dimension(3), intent(in) :: collision_location + real(wp), dimension(3), intent(in) :: location logical :: owns_collision real(wp), dimension(3) :: projected_location - #:if defined('MFC_MPI') - if (num_procs == 1) then - owns_collision = .true. - else - projected_location(:) = collision_location(:) - - ! catch the edge case where th collision lies just outside the computational domain - #:for X, ID in [('x', 1), ('y', 2), ('z', 3)] - if (num_dims >= ${ID}$) then - if (ib_bc_${X}$%beg /= BC_PERIODIC) then - ! if it is outside the domain in one direction, project it somewhere inside so at least one rank owns it - if (collision_location(${ID}$) < ${X}$_domain%beg) then - projected_location(${ID}$) = ${X}$_domain%beg - else if (${X}$_domain%end < collision_location(${ID}$)) then - projected_location(${ID}$) = ${X}$_domain%end - 1.0e-10_wp - end if + owns_collision = .true. + +#ifdef MFC_MPI + if (num_procs > 1) then + projected_location(:) = location(:) + + ! catch the edge case where th collision lies just outside the computational domain + #:for X, ID, DIM in [('x', 1, 'm'), ('y', 2, 'n'), ('z', 3, 'p')] + if (num_dims >= ${ID}$) then + if (ib_bc_${X}$%beg /= BC_PERIODIC) then + ! if it is outside the domain in one direction, project it somewhere inside so at least one rank owns it + if (location(${ID}$) < ${X}$_domain%beg) then + projected_location(${ID}$) = ${X}$_domain%beg + else if (${X}$_domain%end < location(${ID}$)) then + projected_location(${ID}$) = ${X}$_domain%end - 1.0e-10_wp end if end if - #:endfor + owns_collision = owns_collision .and. ${X}$_cb(-1) <= projected_location(${ID}$) & + & .and. projected_location(${ID}$) < ${X}$_cb(${DIM}$) + end if + #:endfor + end if +#endif - ! the object that contains the collision location owns the collisions - owns_collision = x_cb(-1) <= projected_location(1) .and. projected_location(1) < x_cb(m) - owns_collision = owns_collision .and. y_cb(-1) <= projected_location(2) .and. projected_location(2) < y_cb(n) - if (num_dims == 3) owns_collision = owns_collision .and. z_cb(-1) <= projected_location(3) & - & .and. projected_location(3) < z_cb(p) - end if - #:else + end function f_local_rank_owns_location + + !> @brief function checks if this local MPI processor owns this specific collision + function f_neighborhood_ranks_own_location(location) result(owns_collision) + + $:GPU_ROUTINE(parallelism='[seq]') + + real(wp), dimension(3), intent(in) :: location + logical :: owns_collision, periodic_owner + real(wp) :: temp_neighbor_domain + integer :: i + + owns_collision = .true. + +#ifdef MFC_MPI + if (num_procs > 2) then + ! catch the edge case where th collision lies just outside the computational domain owns_collision = .true. - #:endif + #:for X, ID in [('x', 1), ('y', 2,), ('z', 3,)] + if (num_dims >= ${ID}$) then + if (ib_bc_${X}$%beg == BC_PERIODIC .and. neighbor_domain_${X}$%beg >= neighbor_domain_${X}$%end) then + ! project right side to the left + temp_neighbor_domain = neighbor_domain_${X}$%end + (${X}$_domain%end - ${X}$_domain%beg) + periodic_owner = neighbor_domain_${X}$%beg <= location(${ID}$) .and. location(${ID}$) < temp_neighbor_domain + ! project the left side to the right + temp_neighbor_domain = neighbor_domain_${X}$%beg - (${X}$_domain%end - ${X}$_domain%beg) + periodic_owner = periodic_owner .or. (temp_neighbor_domain <= location(${ID}$) .and. location(${ID}$) & + & < neighbor_domain_${X}$%end) + + owns_collision = owns_collision .and. periodic_owner + else + owns_collision = owns_collision .and. neighbor_domain_${X}$%beg <= location(${ID}$) .and. location(${ID}$) & + & < neighbor_domain_${X}$%end + end if + end if + #:endfor + end if +#endif - end function f_local_rank_owns_collision + end function f_neighborhood_ranks_own_location subroutine s_finalize_collisions_module() diff --git a/src/simulation/m_compute_levelset.fpp b/src/simulation/m_compute_levelset.fpp index 96fdebb357..ac0071db73 100644 --- a/src/simulation/m_compute_levelset.fpp +++ b/src/simulation/m_compute_levelset.fpp @@ -88,8 +88,8 @@ contains radius = patch_ib(ib_patch_id)%radius - dist_vec(1) = x_cc(i) - patch_ib(ib_patch_id)%x_centroid - real(gp%x_periodicity, wp)*(x_domain%end - x_domain%beg) - dist_vec(2) = y_cc(j) - patch_ib(ib_patch_id)%y_centroid - real(gp%y_periodicity, wp)*(y_domain%end - y_domain%beg) + dist_vec(1) = x_cc(i) - (patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(x_domain%end - x_domain%beg)) + dist_vec(2) = y_cc(j) - (patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(y_domain%end - y_domain%beg)) dist_vec(3) = 0._wp dist = sqrt(sum(dist_vec**2)) diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index a5fdb3ef81..effc020325 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -909,7 +909,7 @@ contains integer :: ifile, ierr integer, dimension(MPI_STATUS_SIZE) :: status logical :: file_exist - integer :: i + integer :: i, ib_idx integer, parameter :: NFIELDS_PER_IB = 20 real(wp) :: ib_buf(NFIELDS_PER_IB) @@ -923,19 +923,6 @@ contains end if call s_mpi_barrier() - ! Divide num_ibs across num_procs - nibs_per_rank = num_ibs/num_procs - remainder = mod(num_ibs, num_procs) - - ! Ranks < remainder get one extra IB - if (proc_rank < remainder) then - ib_start = proc_rank*(nibs_per_rank + 1) + 1 - ib_end = ib_start + nibs_per_rank ! nibs_per_rank + 1 total - else - ib_start = remainder*(nibs_per_rank + 1) + (proc_rank - remainder)*nibs_per_rank + 1 - ib_end = ib_start + nibs_per_rank - 1 - end if - write (file_loc, '(A,I0,A)') '/restart_data/ib_state_', t_step, '.dat' file_loc = trim(case_dir) // trim(file_loc) @@ -947,20 +934,21 @@ contains call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), mpi_info_int, ifile, ierr) - do i = ib_start, ib_end + do i = 1, num_local_ibs + ib_idx = local_ib_patch_ids(i) ib_buf(1) = mytime - ib_buf(2:4) = patch_ib(i)%force(1:3) - ib_buf(5:7) = patch_ib(i)%torque(1:3) - ib_buf(8:10) = patch_ib(i)%vel(1:3) - ib_buf(11:13) = patch_ib(i)%angular_vel(1:3) - ib_buf(14:16) = patch_ib(i)%angles(1:3) - ib_buf(17) = patch_ib(i)%x_centroid - ib_buf(18) = patch_ib(i)%y_centroid - ib_buf(19) = patch_ib(i)%z_centroid - ib_buf(20) = patch_ib(i)%radius + ib_buf(2:4) = patch_ib(ib_idx)%force(1:3) + ib_buf(5:7) = patch_ib(ib_idx)%torque(1:3) + ib_buf(8:10) = patch_ib(ib_idx)%vel(1:3) + ib_buf(11:13) = patch_ib(ib_idx)%angular_vel(1:3) + ib_buf(14:16) = patch_ib(ib_idx)%angles(1:3) + ib_buf(17) = patch_ib(ib_idx)%x_centroid + ib_buf(18) = patch_ib(ib_idx)%y_centroid + ib_buf(19) = patch_ib(ib_idx)%z_centroid + ib_buf(20) = patch_ib(ib_idx)%radius ! Global IB index (i) determines position in file - disp = int(i - 1, MPI_OFFSET_KIND)*int(NFIELDS_PER_IB, MPI_OFFSET_KIND)*WP_MOK + disp = int(patch_ib(ib_idx)%gbl_patch_id - 1, MPI_OFFSET_KIND)*int(NFIELDS_PER_IB, MPI_OFFSET_KIND)*WP_MOK call MPI_FILE_WRITE_AT(ifile, disp, ib_buf, NFIELDS_PER_IB, mpi_p, status, ierr) end do diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 7fb3353e2c..071fdc49be 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -28,6 +28,7 @@ module m_global_parameters integer :: t_step_old !< Existing IC/grid folder ! Computational Domain Parameters integer :: proc_rank !< Rank of the local processor + $:GPU_DECLARE(create='[num_procs, proc_rank]') !> @name Number of cells in the x-, y- and z-directions, respectively !> @{ integer :: m, n, p @@ -229,7 +230,8 @@ module m_global_parameters $:GPU_DECLARE(create='[ib_bc_x, ib_bc_y, ib_bc_z]') #endif type(bounds_info) :: x_domain, y_domain, z_domain - $:GPU_DECLARE(create='[x_domain, y_domain, z_domain]') + type(bounds_info) :: neighbor_domain_x, neighbor_domain_y, neighbor_domain_z + $:GPU_DECLARE(create='[x_domain, y_domain, z_domain, neighbor_domain_x, neighbor_domain_y, neighbor_domain_z]') real(wp) :: x_a, y_a, z_a real(wp) :: x_b, y_b, z_b logical :: parallel_io !< Format of the data files @@ -336,18 +338,23 @@ module m_global_parameters !> @name Immersed Boundaries !> @{ - logical :: ib - integer :: num_ibs - integer :: collision_model - real(wp) :: coefficient_of_restitution - real(wp) :: collision_time - real(wp) :: ib_coefficient_of_friction - logical :: ib_state_wrt - type(ib_patch_parameters), dimension(num_ib_patches_max) :: patch_ib !< Immersed boundary patch parameters - type(vec3_dt), allocatable, dimension(:) :: airfoil_grid_u, airfoil_grid_l - integer :: Np - - $:GPU_DECLARE(create='[ib, num_ibs, patch_ib, Np, airfoil_grid_u, airfoil_grid_l]') + logical :: ib + integer :: num_ibs !< number of IBs that the current processor is aware of + integer :: num_gbl_ibs !< number of IBs in the overall simulation + integer :: num_local_ibs !< number of IBs that lie inside the processor domain + integer :: ib_neighborhood_radius !< neighborhood radius in ranks (1 = immediate neighbors) + integer :: collision_model + real(wp) :: coefficient_of_restitution + real(wp) :: collision_time + real(wp) :: ib_coefficient_of_friction + logical :: ib_state_wrt + type(ib_patch_parameters), allocatable, dimension(:) :: patch_ib !< Immersed boundary patch parameters + integer, dimension(num_local_ibs_max) :: local_ib_patch_ids !< lookup table of IBs in the local compute domain + integer, allocatable, dimension(:,:,:) :: ib_neighbor_ranks !< MPI ranks of neighborhood domains, indexed (-N:N,-N:N,-N:N) + type(vec3_dt), allocatable, dimension(:) :: airfoil_grid_u, airfoil_grid_l + integer :: Np + + $:GPU_DECLARE(create='[ib, num_ibs, num_gbl_ibs, num_local_ibs, patch_ib, Np, airfoil_grid_u, airfoil_grid_l, local_ib_patch_ids]') $:GPU_DECLARE(create='[ib_coefficient_of_friction]') !> @} @@ -634,6 +641,7 @@ contains ! Immersed Boundaries ib = .false. num_ibs = dflt_int + ib_neighborhood_radius = 1 collision_model = 0 coefficient_of_restitution = dflt_real collision_time = dflt_real @@ -787,7 +795,9 @@ contains relativity = .false. #:endif + allocate (patch_ib(num_ib_patches_max)) do i = 1, num_ib_patches_max + patch_ib(i)%gbl_patch_id = i patch_ib(i)%geometry = dflt_int patch_ib(i)%x_centroid = 0._wp patch_ib(i)%y_centroid = 0._wp diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index cf3ad9ecfa..9bdcc9dee7 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -109,7 +109,7 @@ contains radius = patch_ib(patch_id)%radius ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, 0, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, 0, encoded_patch_id) ! find the indices to the left and right of the IB in i, j, k il = -gp_layers - 1 @@ -221,7 +221,7 @@ contains end if ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, 0, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, 0, encoded_patch_id) ! find the indices to the left and right of the IB in i, j, k il = -gp_layers - 1 @@ -376,7 +376,7 @@ contains end if ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, zp, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, zp, encoded_patch_id) ! find the indices to the left and right of the IB in i, j, k il = -gp_layers - 1 @@ -466,7 +466,7 @@ contains inverse_rotation(:,:) = patch_ib(patch_id)%rotation_matrix_inverse(:,:) ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, 0, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, 0, encoded_patch_id) ! find the indices to the left and right of the IB in i, j, k il = -gp_layers - 1 @@ -510,16 +510,12 @@ contains real(wp) :: radius real(wp), dimension(1:3) :: center - ! Variables to initialize the pressure field that corresponds to the bubble-collapse test case found in Tiwari et al. (2013) - - ! Transferring spherical patch's radius, centroid, smoothing patch identity and smoothing coefficient information - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(z_domain%end - z_domain%beg) radius = patch_ib(patch_id)%radius - ! completely skip particles no in the domain + ! completely skip particles not in the domain if (center(1) - radius > x_cc(m + gp_layers + 1) .or. center(1) + radius < x_cc(-gp_layers - 1) .or. center(2) & & - radius > y_cc(n + gp_layers + 1) .or. center(2) + radius < y_cc(-gp_layers - 1) .or. center(3) - radius > z_cc(p & & + gp_layers + 1) .or. center(3) + radius < z_cc(-gp_layers - 1)) then @@ -527,7 +523,7 @@ contains end if ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, zp, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, zp, encoded_patch_id) ! find the indices to the left and right of the IB in i, j, k il = -gp_layers - 1 @@ -542,18 +538,12 @@ contains ! Checking whether the sphere covers a particular cell in the domain and verifying whether the current patch has permission ! to write to that cell. If both queries check out, the primitive variables of the current patch are assigned to this cell. - $:GPU_PARALLEL_LOOP(private='[i, j, k, cart_y, cart_z]', copyin='[encoded_patch_id, center, radius]', collapse=3) + $:GPU_PARALLEL_LOOP(private='[i, j, k]', copyin='[encoded_patch_id, center, radius]', collapse=3) do k = kl, kr do j = jl, jr do i = il, ir - if (grid_geometry == 3) then - call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) - else - cart_y = y_cc(j) - cart_z = z_cc(k) - end if ! Updating the patch identities bookkeeping variable - if (((x_cc(i) - center(1))**2 + (cart_y - center(2))**2 + (cart_z - center(3))**2 <= radius**2)) then + if (((x_cc(i) - center(1))**2 + (y_cc(j) - center(2))**2 + (z_cc(k) - center(3))**2 <= radius**2)) then ib_markers%sf(i, j, k) = encoded_patch_id end if end do @@ -586,7 +576,7 @@ contains inverse_rotation(:,:) = patch_ib(patch_id)%rotation_matrix_inverse(:,:) ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, zp, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, zp, encoded_patch_id) ! find the indices to the left and right of the IB in i, j, k il = -gp_layers - 1 @@ -656,7 +646,7 @@ contains inverse_rotation(:,:) = patch_ib(patch_id)%rotation_matrix_inverse(:,:) ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, zp, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, zp, encoded_patch_id) il = -gp_layers - 1 jl = -gp_layers - 1 @@ -724,7 +714,7 @@ contains inverse_rotation(:,:) = patch_ib(patch_id)%rotation_matrix_inverse(:,:) ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, 0, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, 0, encoded_patch_id) ! find the indices to the left and right of the IB in i, j, k il = -gp_layers - 1 @@ -762,7 +752,7 @@ contains integer, intent(in) :: xp, yp !< integers containing the periodicity projection information integer :: i, j, il, ir, jl, jr !< Generic loop iterators integer :: spc, encoded_patch_id - integer :: cx, cy + integer :: cx, cy, gbl_patch_id real(wp) :: lx(2), ly(2) real(wp), dimension(1:2) :: bbox_min, bbox_max real(wp), dimension(1:3) :: local_corner, world_corner @@ -771,6 +761,7 @@ contains real(wp), dimension(1:3) :: center, xy_local real(wp), dimension(1:3,1:3) :: inverse_rotation, rotation + gbl_patch_id = patch_ib(patch_id)%gbl_patch_id center = 0._wp center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) @@ -781,7 +772,7 @@ contains threshold = patch_ib(patch_id)%model_threshold ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, 0, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, 0, encoded_patch_id) il = -gp_layers - 1 jl = -gp_layers - 1 @@ -789,10 +780,10 @@ contains jr = n + gp_layers + 1 ! Local-space bounding box extents (min=1, max=2 in the third index) - lx(1) = stl_bounding_boxes(patch_id, 1, 1) + offset(1) - lx(2) = stl_bounding_boxes(patch_id, 1, 3) + offset(1) - ly(1) = stl_bounding_boxes(patch_id, 2, 1) + offset(2) - ly(2) = stl_bounding_boxes(patch_id, 2, 3) + offset(2) + lx(1) = stl_bounding_boxes(gbl_patch_id, 1, 1) + offset(1) + lx(2) = stl_bounding_boxes(gbl_patch_id, 1, 3) + offset(1) + ly(1) = stl_bounding_boxes(gbl_patch_id, 2, 1) + offset(2) + ly(2) = stl_bounding_boxes(gbl_patch_id, 2, 3) + offset(2) bbox_min = 1e12 bbox_max = -1e12 @@ -819,7 +810,7 @@ contains xy_local = matmul(inverse_rotation, xy_local) xy_local = xy_local - offset - eta = f_model_is_inside_flat(gpu_ntrs(patch_id), patch_id, xy_local) + eta = f_model_is_inside_flat(gpu_ntrs(gbl_patch_id), gbl_patch_id, xy_local) ! Reading STL boundary vertices and compute the levelset and levelset_norm if (eta > threshold) then @@ -838,7 +829,7 @@ contains type(integer_field), intent(inout) :: ib_markers integer, intent(in) :: xp, yp, zp !< integers containing the periodicity projection information integer :: i, j, k, il, ir, jl, jr, kl, kr !< Generic loop iterators - integer :: spc, encoded_patch_id + integer :: spc, encoded_patch_id, gbl_patch_id real(wp) :: eta, threshold real(wp), dimension(1:3) :: offset real(wp), dimension(1:3) :: center, xyz_local @@ -847,6 +838,7 @@ contains real(wp) :: lx(2), ly(2), lz(2) real(wp), dimension(1:3) :: bbox_min, bbox_max, local_corner, world_corner + gbl_patch_id = patch_ib(patch_id)%gbl_patch_id center = 0._wp center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) @@ -858,7 +850,7 @@ contains rotation(:,:) = patch_ib(patch_id)%rotation_matrix(:,:) ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, zp, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, zp, encoded_patch_id) il = -gp_layers - 1 jl = -gp_layers - 1 @@ -868,12 +860,12 @@ contains kr = p + gp_layers + 1 ! Local-space bounding box extents (min=1, max=2 in the third index) - lx(1) = stl_bounding_boxes(patch_id, 1, 1) + offset(1) - lx(2) = stl_bounding_boxes(patch_id, 1, 3) + offset(1) - ly(1) = stl_bounding_boxes(patch_id, 2, 1) + offset(2) - ly(2) = stl_bounding_boxes(patch_id, 2, 3) + offset(2) - lz(1) = stl_bounding_boxes(patch_id, 3, 1) + offset(3) - lz(2) = stl_bounding_boxes(patch_id, 3, 3) + offset(3) + lx(1) = stl_bounding_boxes(gbl_patch_id, 1, 1) + offset(1) + lx(2) = stl_bounding_boxes(gbl_patch_id, 1, 3) + offset(1) + ly(1) = stl_bounding_boxes(gbl_patch_id, 2, 1) + offset(2) + ly(2) = stl_bounding_boxes(gbl_patch_id, 2, 3) + offset(2) + lz(1) = stl_bounding_boxes(gbl_patch_id, 3, 1) + offset(3) + lz(2) = stl_bounding_boxes(gbl_patch_id, 3, 3) + offset(3) bbox_min = 1e12 bbox_max = -1e12 @@ -906,7 +898,7 @@ contains xyz_local = matmul(inverse_rotation, xyz_local) xyz_local = xyz_local - offset - eta = f_model_is_inside_flat(gpu_ntrs(patch_id), patch_id, xyz_local) + eta = f_model_is_inside_flat(gpu_ntrs(gbl_patch_id), gbl_patch_id, xyz_local) if (eta > patch_ib(patch_id)%model_threshold) then ib_markers%sf(i, j, k) = encoded_patch_id @@ -1039,7 +1031,7 @@ contains temp_y_per = y_periodicity; if (y_periodicity == -1) temp_y_per = 2 temp_z_per = z_periodicity; if (z_periodicity == -1) temp_z_per = 2 - offset = (num_ibs + 1)*temp_x_per + 3*(num_ibs + 1)*temp_y_per + 9*(num_ibs + 1)*temp_z_per + offset = (num_gbl_ibs + 1)*temp_x_per + 3*(num_gbl_ibs + 1)*temp_y_per + 9*(num_gbl_ibs + 1)*temp_z_per encoded_patch_id = patch_id + offset end subroutine s_encode_patch_periodicity @@ -1054,7 +1046,7 @@ contains integer, intent(out), optional :: x_periodicity, y_periodicity, z_periodicity integer :: offset, remainder, xp, yp, zp, base - base = num_ibs + 1 + base = num_gbl_ibs + 1 patch_id = mod(encoded_patch_id - 1, base) + 1 offset = (encoded_patch_id - patch_id)/base diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 5f865cffeb..7544277af3 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -71,25 +71,20 @@ contains call nvtxStartRange("SETUP-IBM-MODULE") ! do all set up for moving immersed boundaries - moving_immersed_boundary_flag = .false. do i = 1, num_ibs if (patch_ib(i)%moving_ibm /= 0) then call s_compute_moment_of_inertia(i, patch_ib(i)%angular_vel) - moving_immersed_boundary_flag = .true. end if call s_update_ib_rotation_matrix(i) end do $:GPU_UPDATE(device='[patch_ib(1:num_ibs)]') ! GPU routines require updated cell centers - $:GPU_UPDATE(device='[num_ibs, x_cc, y_cc, dx, dy, x_domain, y_domain, ib_bc_x%beg, ib_bc_y%beg]') + $:GPU_UPDATE(device='[num_ibs, num_gbl_ibs, x_cc, y_cc, dx, dy, x_domain, y_domain, ib_bc_x%beg, ib_bc_y%beg]') if (p /= 0) then $:GPU_UPDATE(device='[z_cc, dz, z_domain, ib_bc_z%beg]') end if - ! allocate STL models - call s_instantiate_STL_models() - ! recompute the new ib_patch locations and broadcast them. ib_markers%sf = 0._wp $:GPU_UPDATE(device='[ib_markers%sf]') @@ -123,6 +118,8 @@ contains call nvtxEndRange + ! print *, proc_rank, num_local_ibs, num_ibs, num_gbl_ibs + end subroutine s_ibm_setup !> Update the conservative variables at the ghost points @@ -247,6 +244,8 @@ contains q_prim_vf(eqn_idx%E)%sf(j, k, l) = q_prim_vf(eqn_idx%E)%sf(j, k, & & l) + pres_IP/(1._wp - 2._wp*abs(gp%levelset*alpha_rho_IP(q)/pres_IP) & & *dot_product(patch_ib(patch_id) %force/patch_ib(patch_id)%mass, gp%levelset_norm)) + ! q_prim_vf(eqn_idx%E)%sf(j, k, l) = q_prim_vf(eqn_idx%E)%sf(j, k, & & l) + pres_IP/(1._wp - + ! 2._wp*abs(gp%levelset*alpha_rho_IP(q)/pres_IP)) ! TODO :: REMOVE ME end do end if @@ -535,7 +534,7 @@ contains integer :: i, j, k, ii, jj, kk, gp_layers_z !< Iterator variables integer :: xp, yp, zp !< periodicities integer :: count, count_i, local_idx - integer :: patch_id, encoded_patch_id + integer :: patch_id, encoded_patch_id, neighborhood_patch_id logical :: is_gp count = 0 @@ -543,9 +542,9 @@ contains gp_layers_z = gp_layers if (p == 0) gp_layers_z = 0 - $:GPU_PARALLEL_LOOP(private='[i, j, k, ii, jj, kk, is_gp, local_idx, patch_id, encoded_patch_id, xp, yp, zp]', & - & copyin='[count, count_i, x_domain, y_domain, z_domain]', firstprivate='[gp_layers, gp_layers_z]', & - & collapse=3) + $:GPU_PARALLEL_LOOP(private='[i, j, k, ii, jj, kk, is_gp, local_idx, patch_id, encoded_patch_id, neighborhood_patch_id, & + & xp, yp, zp]', copyin='[count, count_i, x_domain, y_domain, z_domain]', firstprivate='[gp_layers, & + & gp_layers_z]', collapse=3) do i = 0, m do j = 0, n do k = 0, p @@ -572,11 +571,12 @@ contains ghost_points_in(local_idx)%loc = [i, j, k] encoded_patch_id = ib_markers%sf(i, j, k) call s_decode_patch_periodicity(encoded_patch_id, patch_id, xp, yp, zp) - ghost_points_in(local_idx)%ib_patch_id = patch_id + call s_get_neighborhood_idx(patch_id, neighborhood_patch_id) + ghost_points_in(local_idx)%ib_patch_id = neighborhood_patch_id ghost_points_in(local_idx)%x_periodicity = xp ghost_points_in(local_idx)%y_periodicity = yp ghost_points_in(local_idx)%z_periodicity = zp - ghost_points_in(local_idx)%slip = patch_ib(patch_id)%slip + ghost_points_in(local_idx)%slip = patch_ib(neighborhood_patch_id)%slip if ((x_cc(i) - dx(i)) < x_domain%beg) then ghost_points_in(local_idx)%DB(1) = -1 @@ -719,9 +719,10 @@ contains !> Interpolate primitive variables to a ghost point's image point using bilinear or trilinear interpolation subroutine s_interpolate_image_point(q_prim_vf, gp, alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, r_IP, v_IP, pb_IP, mv_IP, & + & nmom_IP, pb_in, mv_in, presb_IP, massv_IP) - & nmom_IP, pb_in, mv_in, presb_IP, massv_IP) $:GPU_ROUTINE(parallelism='[seq]') + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf !< Primitive Variables real(stp), optional, dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:,1:), intent(in) :: pb_in, mv_in type(ghost_point), intent(in) :: gp @@ -1010,9 +1011,8 @@ contains call s_apply_collision_forces(ghost_points, num_gps, ib_markers, forces, torques) - ! reduce the forces across all MPI ranks - call s_mpi_allreduce_vectors_sum(forces, forces, num_ibs, 3) - call s_mpi_allreduce_vectors_sum(torques, torques, num_ibs, 3) + ! reduce the forces across local neighborhood ranks + call s_communicate_ib_forces(forces, torques) ! consider body forces after reducing to avoid double counting do i = 1, num_ibs @@ -1108,35 +1108,33 @@ contains end subroutine s_compute_centroid_offset !> Computes the moment of inertia for an immersed boundary - subroutine s_compute_moment_of_inertia(ib_marker, axis) + subroutine s_compute_moment_of_inertia(ib_idx, axis) real(wp), dimension(3), intent(in) :: axis !< the axis about which we compute the moment. Only required in 3D. - integer, intent(in) :: ib_marker + integer, intent(in) :: ib_idx real(wp) :: moment, distance_to_axis, cell_volume real(wp), dimension(3) :: position, closest_point_along_axis, vector_to_axis, normal_axis - integer :: i, j, k, count + integer :: i, j, k, count, ib_marker if (p == 0) then normal_axis = [0, 0, 1] else if (sqrt(sum(axis**2)) < sgm_eps) then ! if the object is not actually rotating at this time, return a dummy value and exit - patch_ib(ib_marker)%moment = 1._wp + patch_ib(ib_idx)%moment = 1._wp return else normal_axis = axis/sqrt(sum(axis)) end if ! if the IB is in 2D or a 3D sphere, we can compute this exactly - if (patch_ib(ib_marker)%geometry == 2) then ! circle - patch_ib(ib_marker)%moment = 0.5_wp*patch_ib(ib_marker)%mass*(patch_ib(ib_marker)%radius)**2 - else if (patch_ib(ib_marker)%geometry == 3) then ! rectangle - patch_ib(ib_marker)%moment = patch_ib(ib_marker)%mass*(patch_ib(ib_marker)%length_x**2 + patch_ib(ib_marker) & - & %length_y**2)/6._wp - else if (patch_ib(ib_marker)%geometry == 6) then ! ellipse - patch_ib(ib_marker)%moment = 0.0625_wp*patch_ib(ib_marker)%mass*(patch_ib(ib_marker)%length_x**2 + patch_ib(ib_marker) & - & %length_y**2) - else if (patch_ib(ib_marker)%geometry == 8) then ! sphere - patch_ib(ib_marker)%moment = 0.4*patch_ib(ib_marker)%mass*(patch_ib(ib_marker)%radius)**2 + if (patch_ib(ib_idx)%geometry == 2) then ! circle + patch_ib(ib_idx)%moment = 0.5_wp*patch_ib(ib_idx)%mass*(patch_ib(ib_idx)%radius)**2 + else if (patch_ib(ib_idx)%geometry == 3) then ! rectangle + patch_ib(ib_idx)%moment = patch_ib(ib_idx)%mass*(patch_ib(ib_idx)%length_x**2 + patch_ib(ib_idx) %length_y**2)/6._wp + else if (patch_ib(ib_idx)%geometry == 6) then ! ellipse + patch_ib(ib_idx)%moment = 0.0625_wp*patch_ib(ib_idx)%mass*(patch_ib(ib_idx)%length_x**2 + patch_ib(ib_idx) %length_y**2) + else if (patch_ib(ib_idx)%geometry == 8) then ! sphere + patch_ib(ib_idx)%moment = 0.4*patch_ib(ib_idx)%mass*(patch_ib(ib_idx)%radius)**2 else ! we do not have an analytic moment of inertia calculation and need to approximate it directly via a sum count = 0 moment = 0._wp @@ -1146,6 +1144,8 @@ contains cell_volume = cell_volume*(z_cc(1) - z_cc(0)) end if + ib_marker = patch_ib(ib_idx)%gbl_patch_id + $:GPU_PARALLEL_LOOP(private='[position, closest_point_along_axis, vector_to_axis, distance_to_axis]', copy='[moment, & & count]', copyin='[ib_marker, cell_volume, normal_axis]', collapse=3) do i = 0, m @@ -1156,12 +1156,12 @@ contains count = count + 1 ! increment the count of total cells in the boundary ! get the position in local coordinates so that the axis passes through 0, 0, 0 - if (p == 0) then - position = [x_cc(i), y_cc(j), 0._wp] - [patch_ib(ib_marker)%x_centroid, & - & patch_ib(ib_marker)%y_centroid, 0._wp] + if (num_dims < 3) then + position = [x_cc(i), y_cc(j), 0._wp] - [patch_ib(ib_idx)%x_centroid, patch_ib(ib_idx)%y_centroid, & + & 0._wp] else - position = [x_cc(i), y_cc(j), z_cc(k)] - [patch_ib(ib_marker)%x_centroid, & - & patch_ib(ib_marker)%y_centroid, patch_ib(ib_marker)%z_centroid] + position = [x_cc(i), y_cc(j), z_cc(k)] - [patch_ib(ib_idx)%x_centroid, & + & patch_ib(ib_idx)%y_centroid, patch_ib(ib_idx)%z_centroid] end if ! project the position along the axis to find the closest distance to the rotation axis @@ -1179,8 +1179,7 @@ contains $:END_GPU_PARALLEL_LOOP() ! write the final moment assuming the points are all uniform density - patch_ib(ib_marker)%moment = moment*patch_ib(ib_marker)%mass/(count*cell_volume) - $:GPU_UPDATE(device='[patch_ib(ib_marker)%moment]') + patch_ib(ib_idx)%moment = moment*patch_ib(ib_idx)%mass/(count*cell_volume) end if end subroutine s_compute_moment_of_inertia @@ -1225,4 +1224,269 @@ contains end subroutine s_wrap_periodic_ibs + !> @brief reasseses ownership of IBs and passes ownership of IBs to neighbor processors + !> Reduces forces and torques across the local neighborhood without a global allreduce. Accumulation phase: 2 passes per + !! dimension receiving from the low-index (-X) neighbor. Pass 1: add received values; save what was received as recv_snap. Pass + !! 2: send current (post-pass-1) values; add received; subtract recv_snap to remove double-counting of the direct contribution + !! already added in pass 1. Back-propagation phase: 2 passes per dimension receiving from the high-index (+X) neighbor, each + !! overwriting local forces with the neighbor's accumulated total. + subroutine s_communicate_ib_forces(forces, torques) + + real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques + +#ifdef MFC_MPI + integer :: i, j, k, pack_pos, unpack_pos, buf_size, ierr + integer :: send_neighbor, recv_neighbor, recv_count, pid, tag + real(wp), dimension(3) :: fval, tval + real(wp), allocatable :: recv_forces_snap(:,:), recv_torques_snap(:,:) + character(len=1), allocatable :: send_buf(:), recv_buf(:) + + if (num_procs == 1) return + + buf_size = storage_size(0)/8 + (storage_size(0)/8 + 6*storage_size(0._wp)/8)*size(patch_ib) + allocate (send_buf(buf_size), recv_buf(buf_size), recv_forces_snap(num_ibs, 3), recv_torques_snap(num_ibs, 3)) + + ! Accumulation phase: propagate contributions toward the high-index corner. + #:for X, ID in [('x', 1), ('y', 2), ('z', 3)] + if (num_dims >= ${ID}$) then + send_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0) + recv_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0) + + recv_forces_snap = 0._wp + recv_torques_snap = 0._wp + tag = 300 + + do k = 1, 2*ib_neighborhood_radius + ! send forces to +${X}$ neighbor; receive from -${X}$ neighbor. Add received values then + pack_pos = 0 + call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + do i = 1, num_ibs + call MPI_PACK(patch_ib(i)%gbl_patch_id, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + fval(:) = forces(i,:); tval(:) = torques(i,:) + call MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + end do + call MPI_SENDRECV(send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, recv_buf, buf_size, MPI_PACKED, & + & recv_neighbor, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + if (recv_neighbor /= MPI_PROC_NULL) then + unpack_pos = 0 + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + do i = 1, recv_count + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) + call s_get_neighborhood_idx(pid, j) + if (j > 0) then + ! add forces and subtract recv_snap prevent double-counting + forces(j,:) = forces(j,:) + fval(:) - recv_forces_snap(j,:) + torques(j,:) = torques(j,:) + tval(:) - recv_torques_snap(j,:) + recv_forces_snap(j,:) = fval(:) + recv_torques_snap(j,:) = tval(:) + end if + end do + end if + tag = tag + 2 + end do + end if + #:endfor + + ! Send final sums back to neighbors in -X direction + #:for X, ID in [('x', 1), ('y', 2), ('z', 3)] + if (num_dims >= ${ID}$) then + send_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0) + recv_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0) + + do k = 1, 2*ib_neighborhood_radius + pack_pos = 0 + call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + do i = 1, num_ibs + call MPI_PACK(patch_ib(i)%gbl_patch_id, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + fval(:) = forces(i,:); tval(:) = torques(i,:) + call MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + end do + call MPI_SENDRECV(send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, recv_buf, buf_size, MPI_PACKED, & + & recv_neighbor, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + if (recv_neighbor /= MPI_PROC_NULL) then + unpack_pos = 0 + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + do i = 1, recv_count + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) + call s_get_neighborhood_idx(pid, j) + if (j > 0) then + forces(j,:) = fval(:) + torques(j,:) = tval(:) + end if + end do + end if + tag = tag + 2 + end do + end if + #:endfor + + deallocate (send_buf, recv_buf, recv_forces_snap, recv_torques_snap) +#endif + + end subroutine s_communicate_ib_forces + + subroutine s_handoff_ib_ownership() + + integer :: i, j, k, output_idx, local_output_idx + integer :: old_num_local_ibs + integer :: new_count, recv_count + integer :: pack_pos, unpack_pos, buf_size, patch_bytes + integer :: send_neighbor, recv_neighbor, ierr + integer :: dx, dy, dz, tag, nbr_idx, nreqs + real(wp), dimension(3) :: centroid + logical :: is_new + type(ib_patch_parameters) :: tmp_patch + integer, dimension(num_local_ibs_max) :: local_ib_idx_old + ! 26 neighbors max in 3D (8 in 2D); each gets its own recv buffer + integer, parameter :: max_nbrs = 26 + character(len=1), allocatable :: send_buf(:), recv_bufs(:,:) + integer, dimension(2*max_nbrs) :: requests + integer, dimension(max_nbrs) :: recv_neighbor_list + +#ifdef MFC_MPI + if (num_procs > 1) then + ! save a copy of the local IB's global indices to cross-reference for later. + local_ib_idx_old = 0 + old_num_local_ibs = num_local_ibs + do i = 1, num_local_ibs + local_ib_idx_old(i) = patch_ib(local_ib_patch_ids(i))%gbl_patch_id + end do + + ! delete any particles that no longer need to be tracked and coalesce the array + output_idx = 0 + local_output_idx = 0 + do i = 1, num_ibs + centroid = [patch_ib(i)%x_centroid, patch_ib(i)%y_centroid, 0._wp] + if (num_dims == 3) centroid(3) = patch_ib(i)%z_centroid + + ! delete if not in neighborhood + if (f_neighborhood_ranks_own_location(centroid)) then + output_idx = output_idx + 1 + if (i /= output_idx) patch_ib(output_idx) = patch_ib(i) + + ! check if in local domain + if (f_local_rank_owns_location(centroid)) then + local_output_idx = local_output_idx + 1 + local_ib_patch_ids(local_output_idx) = output_idx + end if + end if + end do + num_ibs = output_idx + num_local_ibs = local_output_idx + + ! Broadcast newly-owned patches to all neighborhood neighbors + patch_bytes = storage_size(tmp_patch)/8 + buf_size = storage_size(0)/8 + patch_bytes*num_local_ibs_max + allocate (send_buf(buf_size), recv_bufs(buf_size, max_nbrs)) + + ! Write placeholder count at position 0 + pack_pos = 0 + call MPI_PACK(0, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + + ! pack new patches and count them + new_count = 0 + do i = 1, num_local_ibs + k = local_ib_patch_ids(i) + is_new = .true. + do j = 1, old_num_local_ibs + if (patch_ib(k)%gbl_patch_id == local_ib_idx_old(j)) then + is_new = .false. + exit + end if + end do + if (is_new) then + print *, proc_rank, " New Owner ", patch_ib(k)%gbl_patch_id ! TODO :: REMOVE THIS DEBUG PRINT + call MPI_PACK(patch_ib(k), patch_bytes, MPI_BYTE, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + new_count = new_count + 1 + end if + end do + + ! Overwrite the placeholder with the real count + pack_pos = 0 + call MPI_PACK(new_count, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + pack_pos = storage_size(0)/8 + new_count*patch_bytes + + ! Post all receives first, then sends + nreqs = 0 + nbr_idx = 0 + do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) + do dy = -1, 1 + do dx = -1, 1 + if (dx == 0 .and. dy == 0 .and. dz == 0) cycle + nbr_idx = nbr_idx + 1 + tag = 200 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + recv_neighbor = ib_neighbor_ranks(-dx, -dy, -dz) + recv_neighbor_list(nbr_idx) = MPI_PROC_NULL + if (recv_neighbor < 0) cycle + recv_neighbor_list(nbr_idx) = recv_neighbor + nreqs = nreqs + 1 + call MPI_IRECV(recv_bufs(:,nbr_idx), buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, & + & requests(nreqs), ierr) + end do + end do + end do + + do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) + do dy = -1, 1 + do dx = -1, 1 + if (dx == 0 .and. dy == 0 .and. dz == 0) cycle + tag = 200 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + send_neighbor = ib_neighbor_ranks(dx, dy, dz) + if (send_neighbor < 0) cycle + nreqs = nreqs + 1 + call MPI_ISEND(send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), ierr) + end do + end do + end do + + call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr) + + ! Unpack all received buffers + do nbr_idx = 1, merge(26, 8, num_dims == 3) + if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle + unpack_pos = 0 + call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + do i = 1, recv_count + call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, tmp_patch, patch_bytes, MPI_BYTE, MPI_COMM_WORLD, & + & ierr) + call s_get_neighborhood_idx(tmp_patch%gbl_patch_id, j) + if (j < 0) then + num_ibs = num_ibs + 1 + @:ASSERT(num_ibs <= size(patch_ib), 'patch_ib overflow in neighborhood handoff') + patch_ib(num_ibs) = tmp_patch + end if + end do + end do + + deallocate (send_buf, recv_bufs) + end if +#endif + + end subroutine s_handoff_ib_ownership + + subroutine s_get_neighborhood_idx(gbl_idx, neighborhood_idx) + + $:GPU_ROUTINE(parallelism='[seq]') + + integer, intent(in) :: gbl_idx + integer, intent(out) :: neighborhood_idx + integer :: i + + neighborhood_idx = -1 + + do i = 1, num_ibs + if (patch_ib(i)%gbl_patch_id == gbl_idx) then + neighborhood_idx = i + exit + end if + end do + + end subroutine s_get_neighborhood_idx + end module m_ibm diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index cf08cbc709..e67fcc41a8 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -76,7 +76,7 @@ contains & 'num_probes', 'num_integrals', 'bubble_model', 'thermal', & & 'num_source', 'relax_model', 'num_ibs', 'n_start', & & 'num_bc_patches', 'num_igr_iters', 'num_igr_warm_start_iters', & - & 'adap_dt_max_iters', 'collision_model' ] + & 'adap_dt_max_iters', 'collision_model', 'ib_neighborhood_radius' ] call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) #:endfor diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 310595249b..02ce17c6f4 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -39,6 +39,8 @@ module m_start_up use m_nvtx use m_ibm + use m_model + use m_collisions use m_compile_specific use m_checker_common use m_checker @@ -90,7 +92,7 @@ contains x_a, y_a, z_a, x_b, y_b, z_b, & x_domain, y_domain, z_domain, & hypoelasticity, & - ib, num_ibs, patch_ib, & + ib, num_ibs, ib_neighborhood_radius, patch_ib, & collision_model, coefficient_of_restitution, collision_time, & ib_coefficient_of_friction, ib_state_wrt, & fluid_pp, bub_pp, probe_wrt, prim_vars_wrt, & @@ -919,6 +921,8 @@ contains else if (t_step_start > 0) then call s_read_ib_restart_data(t_step_start) end if + call s_instantiate_STL_models() + call s_reduce_ib_patch_array() call s_ibm_setup() if (t_step_start == 0 .or. (cfl_dt .and. n_start == 0)) then call s_write_ib_data_file(0) @@ -1014,6 +1018,8 @@ contains #else "on CPUs" #endif + else + allocate (patch_ib(num_ib_patches_max)) end if call s_mpi_bcast_user_inputs() @@ -1196,4 +1202,274 @@ contains end subroutine s_read_ib_restart_data + !> @brief takes the patch_ib struct array that contains all global IB patches and reduces to only contain patches that are in + !! the local computational domain. + subroutine s_reduce_ib_patch_array() + + type(ib_patch_parameters), dimension(num_ib_patches_max) :: patch_ib_gbl + real(wp), dimension(3) :: centroid + integer :: i, j + integer :: num_aware_ibs + logical :: is_in_neighborhood, is_local + + ! do all set up for moving immersed boundaries + + moving_immersed_boundary_flag = .false. + do i = 1, num_ibs + if (patch_ib(i)%moving_ibm /= 0) then + moving_immersed_boundary_flag = .true. + exit + end if + end do + + patch_ib_gbl(1:num_ibs) = patch_ib(1:num_ibs) + call get_neighbor_bounds() ! make sure the bounds of the neighbors are correctly set up + call s_compute_ib_neighbor_ranks() ! build lookup of all neighbor MPI ranks + + deallocate (patch_ib) + num_aware_ibs = min(num_local_ibs_max*(2*ib_neighborhood_radius + 1)**num_dims, num_ib_patches_max) + allocate (patch_ib(num_aware_ibs)) + + ! assign defaults to all values + num_gbl_ibs = num_ibs + num_local_ibs = num_ibs + do i = 1, num_local_ibs_max + local_ib_patch_ids(i) = i + end do + +#ifdef MFC_MPI + ! fallback for 1-rank case + if (num_procs == 1) then + patch_ib(:) = patch_ib_gbl(1:num_aware_ibs) + else + ! determine the set of patches owned by local rank + num_local_ibs = 0 + num_ibs = 0 + do i = 1, num_gbl_ibs + ! catch the edge case where th collision lies just outside the computational domain + is_in_neighborhood = .true. + is_local = .true. + centroid = [patch_ib_gbl(i)%x_centroid, patch_ib_gbl(i)%y_centroid, 0._wp] + if (num_dims == 3) centroid(3) = patch_ib_gbl(i)%z_centroid + + if (f_neighborhood_ranks_own_location(centroid)) then + num_ibs = num_ibs + 1 + patch_ib(num_ibs) = patch_ib_gbl(i) + patch_ib(num_ibs)%gbl_patch_id = i + if (f_local_rank_owns_location(centroid)) then + num_local_ibs = num_local_ibs + 1 + local_ib_patch_ids(num_local_ibs) = num_ibs + end if + end if + end do + end if +#else + ! reduce the size of the array for local simulation in no-MPI case + patch_ib(:) = patch_ib_gbl(1:num_aware_ibs) +#endif + + end subroutine s_reduce_ib_patch_array + + !> Build ib_neighbor_ranks(-1:1,-1:1,-1:1): MPI ranks of all neighbor domains. Uses two rounds of MPI_SENDRECV cascades - face + !! neighbors are known from bc_*, edge neighbors are obtained in round 1, and (3D) corner neighbors in round 2. + subroutine s_compute_ib_neighbor_ranks() + + integer :: ax, k, nbr_idx, nreqs, sx, sy, sz, dx, dy, dz + integer, allocatable :: send_table(:,:,:), recv_tables(:,:,:,:) + integer, dimension(52) :: requests + +#ifdef MFC_MPI + integer :: ierr + integer, dimension(4) :: buf4, rbuf4 + integer, dimension(2) :: buf2, rbuf2 + + ax = ib_neighborhood_radius + + if (allocated(ib_neighbor_ranks)) deallocate (ib_neighbor_ranks) + allocate (ib_neighbor_ranks(-ax:ax,-ax:ax,-ax:ax)) + ib_neighbor_ranks = MPI_PROC_NULL + ib_neighbor_ranks(0, 0, 0) = proc_rank + + ! Fill radius-1 entries: face neighbors are known from domain decomposition + ib_neighbor_ranks(-1, 0, 0) = bc_x%beg + ib_neighbor_ranks(+1, 0, 0) = bc_x%end + if (num_dims >= 2) then + ib_neighbor_ranks(0, -1, 0) = bc_y%beg + ib_neighbor_ranks(0, +1, 0) = bc_y%end + end if + if (num_dims == 3) then + ib_neighbor_ranks(0, 0, -1) = bc_z%beg + ib_neighbor_ranks(0, 0, +1) = bc_z%end + end if + + if (num_dims >= 2) then + ! Round 1a: exchange y/z face ranks with +/-x face neighbors -> xy and xz edge ranks + buf4 = [bc_y%beg, bc_y%end, bc_z%beg, bc_z%end] + + ! Send to -x, receive from +x -> edges (+1,+/-1,0) and (+1,0,+/-1) + call MPI_SENDRECV(buf4, 4, MPI_INTEGER, merge(bc_x%beg, MPI_PROC_NULL, bc_x%beg >= 0), 310, rbuf4, 4, MPI_INTEGER, & + & merge(bc_x%end, MPI_PROC_NULL, bc_x%end >= 0), 310, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + if (bc_x%end >= 0) then + ib_neighbor_ranks(+1, -1, 0) = rbuf4(1) + ib_neighbor_ranks(+1, +1, 0) = rbuf4(2) + ib_neighbor_ranks(+1, 0, -1) = rbuf4(3) + ib_neighbor_ranks(+1, 0, +1) = rbuf4(4) + end if + + call MPI_SENDRECV(buf4, 4, MPI_INTEGER, merge(bc_x%end, MPI_PROC_NULL, bc_x%end >= 0), 311, rbuf4, 4, MPI_INTEGER, & + & merge(bc_x%beg, MPI_PROC_NULL, bc_x%beg >= 0), 311, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + if (bc_x%beg >= 0) then + ib_neighbor_ranks(-1, -1, 0) = rbuf4(1) + ib_neighbor_ranks(-1, +1, 0) = rbuf4(2) + ib_neighbor_ranks(-1, 0, -1) = rbuf4(3) + ib_neighbor_ranks(-1, 0, +1) = rbuf4(4) + end if + end if + + if (num_dims == 3) then + ! Round 1b: exchange z face ranks with +/-y face neighbors -> yz edge ranks + buf2 = [bc_z%beg, bc_z%end] + + call MPI_SENDRECV(buf2, 2, MPI_INTEGER, merge(bc_y%beg, MPI_PROC_NULL, bc_y%beg >= 0), 312, rbuf2, 2, MPI_INTEGER, & + & merge(bc_y%end, MPI_PROC_NULL, bc_y%end >= 0), 312, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + if (bc_y%end >= 0) then + ib_neighbor_ranks(0, +1, -1) = rbuf2(1) + ib_neighbor_ranks(0, +1, +1) = rbuf2(2) + end if + + call MPI_SENDRECV(buf2, 2, MPI_INTEGER, merge(bc_y%end, MPI_PROC_NULL, bc_y%end >= 0), 313, rbuf2, 2, MPI_INTEGER, & + & merge(bc_y%beg, MPI_PROC_NULL, bc_y%beg >= 0), 313, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + if (bc_y%beg >= 0) then + ib_neighbor_ranks(0, -1, -1) = rbuf2(1) + ib_neighbor_ranks(0, -1, +1) = rbuf2(2) + end if + + ! Round 2: exchange z face ranks with xy-diagonal edge neighbors -> corner ranks. Each of the 4 xy diagonals gives 2 + ! corners (the +/-z variants). Pattern: send buf2 to mirror diagonal, receive from this diagonal -> that edge's z face + ! ranks. + #:for DX, DY, MDX, MDY, TAG in [(1,1,-1,-1,320), (1,-1,-1,1,321), (-1,1,1,-1,322), (-1,-1,1,1,323)] + call MPI_SENDRECV(buf2, 2, MPI_INTEGER, merge(ib_neighbor_ranks(${MDX}$, ${MDY}$, 0), MPI_PROC_NULL, & + & ib_neighbor_ranks(${MDX}$, ${MDY}$, 0) >= 0), ${TAG}$, rbuf2, 2, MPI_INTEGER, & + & merge(ib_neighbor_ranks(${DX}$, ${DY}$, 0), MPI_PROC_NULL, ib_neighbor_ranks(${DX}$, ${DY}$, & + & 0) >= 0), ${TAG}$, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + if (ib_neighbor_ranks(${DX}$, ${DY}$, 0) >= 0) then + ib_neighbor_ranks(${DX}$, ${DY}$, -1) = rbuf2(1) + ib_neighbor_ranks(${DX}$, ${DY}$, +1) = rbuf2(2) + end if + #:endfor + end if + + ! For radius > 1: extend the table by iterative 26-neighbor full-table exchanges. In each round, every rank broadcasts its + ! current table to all 26 immediate neighbors. Their entry at offset (dx,dy,dz) from them = our entry at + ! (dx+sx,dy+sy,dz+sz). One extension round fills the entire next shell, so ax-1 rounds suffice. + if (ax > 1) then + allocate (send_table(-ax:ax,-ax:ax,-ax:ax)) + allocate (recv_tables(-ax:ax,-ax:ax,-ax:ax,1:26)) + + do k = 2, ax + send_table = ib_neighbor_ranks + + nreqs = 0 + nbr_idx = 0 + do sz = -1, 1 + do sy = -1, 1 + do sx = -1, 1 + if (sx == 0 .and. sy == 0 .and. sz == 0) cycle + nbr_idx = nbr_idx + 1 + if (ib_neighbor_ranks(sx, sy, sz) < 0) cycle + nreqs = nreqs + 1 + call MPI_IRECV(recv_tables(:,:,:,nbr_idx), (2*ax + 1)**3, MPI_INTEGER, ib_neighbor_ranks(sx, sy, sz), & + & 400, MPI_COMM_WORLD, requests(nreqs), ierr) + end do + end do + end do + + do sz = -1, 1 + do sy = -1, 1 + do sx = -1, 1 + if (sx == 0 .and. sy == 0 .and. sz == 0) cycle + if (ib_neighbor_ranks(sx, sy, sz) < 0) cycle + nreqs = nreqs + 1 + call MPI_ISEND(send_table, (2*ax + 1)**3, MPI_INTEGER, ib_neighbor_ranks(sx, sy, sz), 400, & + & MPI_COMM_WORLD, requests(nreqs), ierr) + end do + end do + end do + + call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr) + + nbr_idx = 0 + do sz = -1, 1 + do sy = -1, 1 + do sx = -1, 1 + if (sx == 0 .and. sy == 0 .and. sz == 0) cycle + nbr_idx = nbr_idx + 1 + if (ib_neighbor_ranks(sx, sy, sz) < 0) cycle + do dz = -ax, ax + do dy = -ax, ax + do dx = -ax, ax + if (recv_tables(dx, dy, dz, nbr_idx) == MPI_PROC_NULL) cycle + if (dx + sx < -ax .or. dx + sx > ax) cycle + if (dy + sy < -ax .or. dy + sy > ax) cycle + if (dz + sz < -ax .or. dz + sz > ax) cycle + if (ib_neighbor_ranks(dx + sx, dy + sy, dz + sz) /= MPI_PROC_NULL) cycle + ib_neighbor_ranks(dx + sx, dy + sy, dz + sz) = recv_tables(dx, dy, dz, nbr_idx) + end do + end do + end do + end do + end do + end do + end do + + deallocate (send_table, recv_tables) + end if +#endif + + end subroutine s_compute_ib_neighbor_ranks + + subroutine get_neighbor_bounds() + + real(wp) :: beg_val, end_val, recv_val + integer :: k, send_neighbor, recv_neighbor, ierr + + ! Default: unbounded in all directions (covers single-rank and no-MPI cases) + + neighbor_domain_x%beg = -huge(0._wp) + neighbor_domain_x%end = huge(0._wp) + neighbor_domain_y%beg = -huge(0._wp) + neighbor_domain_y%end = huge(0._wp) + neighbor_domain_z%beg = -huge(0._wp) + neighbor_domain_z%end = huge(0._wp) + +#ifdef MFC_MPI + ! For each direction, propagate the left/right boundary edges outward ib_neighborhood_radius hops. After k rounds: beg_val = + ! left edge of the rank k hops to the left; end_val = right edge of the rank k hops to the right. + #:for X, ID, TAG, DIM in [('x', 1, 100, 'm'), ('y', 2, 102, 'n'), ('z', 3, 104, 'p')] + if (num_dims >= ${ID}$) then + beg_val = ${X}$_cb(-1) + end_val = ${X}$_cb(${DIM}$) + do k = 1, ib_neighborhood_radius + send_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0) + recv_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0) + recv_val = -huge(0._wp) + call MPI_SENDRECV(beg_val, 1, mpi_p, send_neighbor, ${TAG}$, recv_val, 1, mpi_p, recv_neighbor, ${TAG}$, & + & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + beg_val = recv_val + + send_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0) + recv_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0) + recv_val = huge(0._wp) + call MPI_SENDRECV(end_val, 1, mpi_p, send_neighbor, ${TAG + 1}$, recv_val, 1, mpi_p, recv_neighbor, & + & ${TAG + 1}$, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + end_val = recv_val + end do + neighbor_domain_${X}$%beg = beg_val + neighbor_domain_${X}$%end = end_val + end if + #:endfor +#endif + + end subroutine get_neighbor_bounds + end module m_start_up diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index ce0ae48c4b..701d7a3cb0 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -552,7 +552,6 @@ contains ! check if any IBMS are moving, and if so, update the markers, ghost points, levelsets, and levelset norms if (moving_immersed_boundary_flag) then call s_propagate_immersed_boundaries(s) - ! compute ib forces for fixed immersed boundaries if requested for output end if ! update the ghost fluid properties point values based on IB state @@ -564,10 +563,11 @@ contains end if end do - ! if (ib) then - if (moving_immersed_boundary_flag) call s_wrap_periodic_ibs() - if (ib_state_wrt .and. (.not. moving_immersed_boundary_flag)) then + if (moving_immersed_boundary_flag) then + call s_wrap_periodic_ibs() ! wraps the positions of IBs to the local proc + call s_handoff_ib_ownership() ! recomputes which ranks own which IBs and communicate to neighbors + else if (ib_state_wrt) then call s_compute_ib_forces(q_prim_vf, fluid_pp) end if end if @@ -708,11 +708,11 @@ contains integer, intent(in) :: s integer :: i - logical :: forces_computed + integer :: gbl_id ! used for analytic ib patch motion call nvtxStartRange("PROPAGATE-IMMERSED-BOUNDARIES") - forces_computed = .false. + if (moving_immersed_boundary_flag) call s_compute_ib_forces(q_prim_vf, fluid_pp) do i = 1, num_ibs if (s == 1) then @@ -726,11 +726,6 @@ contains ! Compute forces BEFORE the RK velocity blend so the device copy of patch_ib%vel matches the host (pre-blend) when ! velocity-dependent collision damping forces are evaluated on the GPU. - if (patch_ib(i)%moving_ibm == 2 .and. .not. forces_computed) then - call s_compute_ib_forces(q_prim_vf, fluid_pp) - forces_computed = .true. - end if - if (patch_ib(i)%moving_ibm > 0) then patch_ib(i)%vel = (rk_coef(s, 1)*patch_ib(i)%step_vel + rk_coef(s, 2)*patch_ib(i)%vel)/rk_coef(s, 4) patch_ib(i)%angular_vel = (rk_coef(s, 1)*patch_ib(i)%step_angular_vel + rk_coef(s, & @@ -748,8 +743,7 @@ contains & 3)*dt*patch_ib(i)%torque/rk_coef(s, 4)) ! add the torque to the angular momentum call s_compute_moment_of_inertia(i, patch_ib(i)%angular_vel) ! update the moment of inertia to be based on the direction of the angular momentum - patch_ib(i)%angular_vel = patch_ib(i)%angular_vel/patch_ib(i) & - & %moment ! convert back to angular velocity with the new moment of inertia + patch_ib(i)%angular_vel = patch_ib(i)%angular_vel/patch_ib(i)%moment end if ! Update the angle of the IB diff --git a/toolchain/mfc/case.py b/toolchain/mfc/case.py index 65c49f8304..7aed9212dc 100644 --- a/toolchain/mfc/case.py +++ b/toolchain/mfc/case.py @@ -295,7 +295,7 @@ def rhs_replace(match): # each element separated by new line characters. Then write those # new lines as a fully concatenated string with fortran syntax srcs.append(f"""\ - if (i == {pid}) then + if (gbl_id == {pid}) then {f"{chr(10)}".join(lines)} end if\ """) @@ -305,6 +305,7 @@ def rhs_replace(match): ! parameterize the velocity and rotation rate of a moving IB. #:def mib_analytical() +gbl_id = patch_ib(i)%gbl_patch_id {f"{chr(10)}{chr(10)}".join(srcs)} #:enddef """ diff --git a/toolchain/mfc/params/definitions.py b/toolchain/mfc/params/definitions.py index ef04ad73c2..d9bd8a0016 100644 --- a/toolchain/mfc/params/definitions.py +++ b/toolchain/mfc/params/definitions.py @@ -226,6 +226,7 @@ def _fc(name: str) -> int: "hyperelasticity": "Enable hyperelastic model", "relativity": "Enable special relativity", "ib": "Enable immersed boundaries", + "ib_neighborhood_radius": "Neighborhood radius in ranks for IB awareness", "collision_model": "Collision model for immersed boundaries (0=none, 1=soft sphere)", "coefficient_of_restitution": "Coefficient of restitution for IB collisions", "collision_time": "Characteristic collision time for IB collisions", @@ -664,6 +665,7 @@ def get_value_label(param_name: str, value: int) -> str: "num_fluids": {"min": 1, "max": NF}, "num_patches": {"min": 0, "max": NUM_PATCHES_MAX}, "num_ibs": {"min": 0}, + "ib_neighborhood_radius": {"min": 1}, "num_source": {"min": 1}, "num_probes": {"min": 1}, "num_integrals": {"min": 1}, @@ -915,6 +917,7 @@ def _load(): # Immersed boundary _r("num_ibs", INT, {"ib"}) + _r("ib_neighborhood_radius", INT, {"ib"}) _r("ib", LOG, {"ib"}) _r("collision_model", INT, {"ib"}) _r("coefficient_of_restitution", REAL, {"ib"}) diff --git a/toolchain/mfc/params/descriptions.py b/toolchain/mfc/params/descriptions.py index dd542a8d1a..eebaf6d58a 100644 --- a/toolchain/mfc/params/descriptions.py +++ b/toolchain/mfc/params/descriptions.py @@ -132,6 +132,7 @@ # Immersed boundaries "ib": "Enable immersed boundary method", "num_ibs": "Number of immersed boundary patches", + "ib_neighborhood_radius": "Neighborhood radius in ranks for IB awareness", # Acoustic sources "acoustic_source": "Enable acoustic source terms", "num_source": "Number of acoustic sources",