LCOV - code coverage report
Current view: top level - mtx/private - bcyclic.f90 (source / functions) Coverage Total Hit
Test: coverage.info Lines: 78.2 % 174 136
Test Date: 2025-05-08 18:23:42 Functions: 80.0 % 10 8

            Line data    Source code
       1              : ! ***********************************************************************
       2              : !
       3              : !   Copyright (C) 2012  The MESA Team
       4              : !
       5              : !   This program is free software: you can redistribute it and/or modify
       6              : !   it under the terms of the GNU Lesser General Public License
       7              : !   as published by the Free Software Foundation,
       8              : !   either version 3 of the License, or (at your option) any later version.
       9              : !
      10              : !   This program is distributed in the hope that it will be useful,
      11              : !   but WITHOUT ANY WARRANTY; without even the implied warranty of
      12              : !   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
      13              : !   See the GNU Lesser General Public License for more details.
      14              : !
      15              : !   You should have received a copy of the GNU Lesser General Public License
      16              : !   along with this program. If not, see <https://www.gnu.org/licenses/>.
      17              : !
      18              : ! ***********************************************************************
      19              : 
      20              : ! derived from BCYCLIC as described in hirshman et. al.
      21              : ! S.P.Hirshman, K.S.Perumalla, V.E.Lynch, & R.Sanchez,
      22              : ! BCYCLIC: A parallel block tridiagonal matrix cyclic solver,
      23              : ! J. Computational Physics, 229 (2010) 6392-6404.
      24              : 
      25              :       module bcyclic
      26              : 
      27              :       use const_def, only: dp
      28              :       use my_lapack95
      29              :       use utils_lib, only: set_nan, mesa_error
      30              : 
      31              :       implicit none
      32              : 
      33              :       type ulstore
      34              :          integer :: ul_size    ! size of umat1 & lmat1 (0 if not allocated)
      35              :          real(dp), pointer :: umat1(:), lmat1(:)
      36              :       end type ulstore
      37              : 
      38              :       type(ulstore), pointer :: odd_storage(:) => null()
      39              :       logical, parameter :: dbg = .false.
      40              :       logical, parameter :: do_fill_with_NaNs = .false.
      41              : 
      42              :       contains
      43              : 
      44            1 :       subroutine bcyclic_factor ( &
      45              :             lblk1, dblk1, ublk1, ipivot1, brhs1, nvar, nz, sparse, &
      46              :             lrd, rpar_decsol, lid, ipar_decsol, ierr)
      47              :          real(dp), pointer :: lblk1(:)  ! row section of lower block
      48              :          real(dp), pointer :: dblk1(:)  ! row section of diagonal block
      49              :          real(dp), pointer :: ublk1(:)  ! row section of upper block
      50              :          integer, pointer :: ipivot1(:)  ! row section of pivot array for block factorization
      51              :          real(dp), pointer :: brhs1(:)  ! row section of rhs
      52              :          integer, intent(in) :: nvar  ! linear size of each block
      53              :          integer, intent(in) :: nz  ! number of block rows
      54              :          logical, intent(in) :: sparse
      55              :          integer, intent(in) :: lrd, lid
      56              :          real(dp), pointer, intent(inout) :: rpar_decsol(:)  ! (lrd)
      57              :          integer, pointer, intent(inout) :: ipar_decsol(:)  ! (lid)
      58              :          integer, intent(out) :: ierr
      59              : 
      60            1 :          integer, pointer :: nslevel(:), ipivot(:)
      61              :          integer :: ncycle, nstemp, maxlevels, nlevel
      62              :          logical :: have_odd_storage
      63            1 :          real(dp), pointer, dimension(:,:) :: dmat
      64            1 :          real(dp) :: dlamch, sfmin
      65              : 
      66              :          include 'formats'
      67              : 
      68            1 :          ierr = 0
      69              : 
      70              :          if (dbg) write(*,*) 'start bcyclic_factor'
      71              : 
      72              :          ! compute number of cyclic reduction levels
      73            1 :          ncycle = 1
      74            1 :          maxlevels = 0
      75            6 :          do while (ncycle < nz)
      76            5 :             ncycle = 2*ncycle
      77            5 :             maxlevels = maxlevels+1
      78              :          end do
      79            1 :          maxlevels = max(1, maxlevels)
      80              : 
      81            1 :          have_odd_storage = associated(odd_storage)
      82            1 :          if (have_odd_storage) then
      83            0 :             if (size(odd_storage) < maxlevels) then
      84            0 :                call clear_storage
      85              :                have_odd_storage = .false.
      86              :             end if
      87              :          end if
      88              : 
      89            1 :          if (.not. have_odd_storage) then
      90            1 :             allocate (odd_storage(maxlevels+3), stat=ierr)
      91            1 :             if (ierr /= 0) then
      92            0 :                write(*,*) 'alloc failed for odd_storage in bcyclic'
      93            0 :                return
      94              :             end if
      95            9 :             do nlevel = 1, size(odd_storage)
      96            9 :                odd_storage(nlevel)% ul_size = 0
      97              :             end do
      98              :          end if
      99              : 
     100            1 :          allocate (nslevel(maxlevels), stat=ierr)
     101            1 :          if (ierr /= 0) return
     102              : 
     103            1 :          if (sparse) then
     104            0 :             write(*,*) 'no support for sparse matrix in bcyclic'
     105            0 :             ierr = -1
     106            0 :             return
     107              :          end if
     108              : 
     109            1 :          ncycle = 1
     110            1 :          nstemp = nz
     111            1 :          nlevel = 1
     112              : 
     113              :          if (dbg) write(*,*) 'start factor_cycle'
     114              : 
     115            5 :          factor_cycle: do  ! perform cyclic-reduction factorization
     116              : 
     117            5 :             nslevel(nlevel) = nstemp
     118              : 
     119              :             if (dbg) write(*,2) 'call cycle_onestep', nstemp
     120              : 
     121              :             call cycle_onestep( &
     122              :                nvar, nz, nstemp, ncycle, nlevel, sparse, &
     123            5 :                lblk1, dblk1, ublk1, ipivot1, ierr)
     124            5 :             if (ierr /= 0) then
     125              :                !write(*,*) 'cycle_onestep failed'
     126            0 :                call dealloc
     127            0 :                return
     128              :             end if
     129              : 
     130            5 :             if (nstemp == 1) exit factor_cycle
     131              : 
     132            5 :             nstemp = (nstemp+1)/2
     133            5 :             nlevel = nlevel+1
     134            5 :             ncycle = 2*ncycle
     135              : 
     136            5 :             if (nlevel > maxlevels) exit factor_cycle
     137              : 
     138              :          end do factor_cycle
     139              : 
     140              :          if (dbg) write(*,*) 'done factor_cycle'
     141              : 
     142              :          ! factor row 1
     143            1 :          dmat(1:nvar,1:nvar) => dblk1(1:nvar*nvar)
     144            1 :          sfmin = dlamch('S')
     145            1 :          ipivot(1:nvar) => ipivot1(1:nvar)
     146            1 :          call my_getf2(nvar, dmat, nvar, ipivot, sfmin, ierr)
     147            1 :          if (ierr /= 0) then
     148            0 :             write(*,*) 'row 1 factor failed in bcyclic_factor'
     149            0 :             call dealloc
     150            0 :             return
     151              :          end if
     152              : 
     153            1 :          call dealloc
     154              : 
     155              : 
     156            1 :          if (dbg) write(*,*) 'done bcyclic_factor'
     157              : 
     158              :          contains
     159              : 
     160            1 :          subroutine dealloc
     161            1 :             deallocate (nslevel)
     162            1 :          end subroutine dealloc
     163              : 
     164              : 
     165              :       end subroutine bcyclic_factor
     166              : 
     167              : 
     168            1 :       subroutine bcyclic_solve ( &
     169              :             lblk1, dblk1, ublk1, ipivot1, brhs1, nvar, nz, sparse, &
     170              :             lrd, rpar_decsol, lid, ipar_decsol, ierr)
     171              :          real(dp), pointer :: lblk1(:)  ! row section of lower block
     172              :          real(dp), pointer :: dblk1(:)  ! row section of diagonal block
     173              :          real(dp), pointer :: ublk1(:)  ! row section of upper block
     174              :          integer, pointer :: ipivot1(:)  ! row section of pivot array for block factorization
     175              :          real(dp), pointer :: brhs1(:)   ! row section of rhs
     176              :          integer, intent(in) :: nvar  ! linear size of each block
     177              :          integer, intent(in) :: nz     ! number of block rows
     178              :          logical, intent(in) :: sparse
     179              :          integer, intent(in) :: lrd, lid
     180              :          real(dp), pointer, intent(inout) :: rpar_decsol(:)  ! (lrd)
     181              :          integer, pointer, intent(inout) :: ipar_decsol(:)  ! (lid)
     182              :          integer, intent(out) :: ierr
     183              : 
     184            1 :          integer, pointer :: nslevel(:), ipivot(:)
     185              :          integer :: ncycle, nstemp, maxlevels, nlevel, nvar2
     186            1 :          real(dp), pointer, dimension(:,:) :: dmat, bptr2
     187              : 
     188              :          include 'formats'
     189              : 
     190              : 
     191              :          if (dbg) write(*,*) 'start bcyclic_solve'
     192              : 
     193            1 :          ierr = 0
     194            1 :          nvar2 = nvar*nvar
     195            1 :          ncycle = 1
     196            1 :          maxlevels = 0
     197            6 :          do while (ncycle < nz)
     198            5 :             ncycle = 2*ncycle
     199            5 :             maxlevels = maxlevels+1
     200              :          end do
     201            1 :          maxlevels = max(1, maxlevels)
     202              : 
     203            1 :          allocate (nslevel(maxlevels), stat=ierr)
     204            1 :          if (ierr /= 0) return
     205              : 
     206            1 :          ncycle = 1
     207            1 :          nstemp = nz
     208            1 :          nlevel = 1
     209              : 
     210              :          if (dbg) write(*,*) 'start forward_cycle'
     211              : 
     212            5 :          forward_cycle: do
     213              : 
     214            5 :             nslevel(nlevel) = nstemp
     215              :             if (dbg) write(*,2) 'call cycle_rhs', nstemp
     216              :             call cycle_rhs( &
     217            5 :                nstemp, nvar, ncycle, nlevel, sparse, dblk1, brhs1, ipivot1, ierr)
     218            5 :             if (ierr /= 0) then
     219            0 :                call dealloc
     220            0 :                return
     221              :             end if
     222              : 
     223            5 :             if (nstemp == 1) exit forward_cycle
     224              : 
     225            5 :             nstemp = (nstemp+1)/2
     226            5 :             nlevel = nlevel+1
     227            5 :             ncycle = 2*ncycle
     228              : 
     229            5 :             if (nlevel > maxlevels) exit forward_cycle
     230              : 
     231              :          end do forward_cycle
     232              : 
     233              :          if (dbg) write(*,*) 'done forward_cycle'
     234              : 
     235            1 :          ipivot(1:nvar) => ipivot1(1:nvar)
     236            1 :          dmat(1:nvar,1:nvar) => dblk1(1:nvar2)
     237            1 :          bptr2(1:nvar,1:1) => brhs1(1:nvar)
     238            1 :          call my_getrs(nvar, 1, dmat, nvar, ipivot, bptr2, nvar, ierr)
     239            1 :          if (ierr /= 0) then
     240            0 :             write(*,*) 'failed in bcyclic_solve'
     241            0 :             call dealloc
     242            0 :             return
     243              :          end if
     244              : 
     245              :          ! back solve for even x's
     246            6 :          back_cycle: do while (ncycle > 1)
     247            5 :             ncycle = ncycle/2
     248            5 :             nlevel = nlevel-1
     249            5 :             if (nlevel < 1) then
     250            0 :                ierr = -1
     251            0 :                exit back_cycle
     252              :             end if
     253            5 :             nstemp = nslevel(nlevel)
     254              :             call cycle_solve( &
     255            5 :                nvar, nz, ncycle, nstemp, nlevel, sparse, lblk1, ublk1, brhs1)
     256              :          end do back_cycle
     257              : 
     258            1 :          call dealloc
     259              : 
     260            1 :          if (dbg) write(*,*) 'done bcyclic_solve'
     261              : 
     262              : 
     263              :          contains
     264              : 
     265            1 :          subroutine dealloc
     266            1 :             deallocate (nslevel)
     267            1 :          end subroutine dealloc
     268              : 
     269              : 
     270              :       end subroutine bcyclic_solve
     271              : 
     272              : 
     273            0 :       subroutine clear_storage
     274              :          integer :: nlevel
     275            0 :          nlevel = size(odd_storage)
     276            0 :          do while (nlevel > 0)
     277            0 :             if (odd_storage(nlevel)% ul_size > 0) then
     278            0 :                deallocate(odd_storage(nlevel)% umat1)
     279            0 :                deallocate(odd_storage(nlevel)% lmat1)
     280              :             end if
     281            0 :             nlevel = nlevel-1
     282              :          end do
     283            0 :          deallocate(odd_storage)
     284              :          nullify(odd_storage)
     285            0 :       end subroutine clear_storage
     286              : 
     287              : 
     288            5 :       subroutine cycle_onestep( &
     289              :             nvar, nz, nblk, ncycle, nlevel, sparse, &
     290              :             lblk1, dblk1, ublk1, ipivot1, ierr)
     291              :          integer, intent(in) :: nvar, nz, nblk, ncycle, nlevel
     292              :          logical, intent(in) :: sparse
     293              :          real(dp), pointer, intent(inout) :: lblk1(:), dblk1(:), ublk1(:)
     294              :          integer, pointer, intent(inout) :: ipivot1(:)
     295              :          integer, intent(out) :: ierr
     296              : 
     297            5 :          integer, pointer :: ipivot(:)
     298            5 :          real(dp), pointer, dimension(:,:) :: dmat, umat, lmat, umat0, lmat0
     299            5 :          real(dp), pointer, dimension(:,:) :: lnext, unext, lprev, uprev
     300            5 :          real(dp), pointer :: mat1(:)
     301              :          integer :: i, shift, min_sz, new_sz, shift1, shift2, nvar2, &
     302              :             ns, ierr_loc, nmin, kcount, k
     303            5 :          real(dp) :: dlamch, sfmin
     304              : 
     305              :          include 'formats'
     306              : 
     307            5 :          ierr = 0
     308           10 :          sfmin = dlamch('S')
     309            5 :          nvar2 = nvar*nvar
     310            5 :          nmin = 1
     311            5 :          kcount = 1+(nblk-nmin)/2
     312            5 :          min_sz = nvar2*kcount
     313            5 :          if (odd_storage(nlevel)% ul_size < min_sz) then
     314            5 :             if (odd_storage(nlevel)% ul_size > 0) &
     315            0 :                deallocate(odd_storage(nlevel)% umat1, odd_storage(nlevel)% lmat1)
     316            5 :             new_sz = FLOOR(min_sz*1.1) + 100
     317            5 :             odd_storage(nlevel)% ul_size = new_sz
     318              :             allocate (odd_storage(nlevel)% umat1(new_sz), &
     319            5 :                       odd_storage(nlevel)% lmat1(new_sz), stat=ierr)
     320            5 :             if (ierr /= 0) then
     321            0 :                write(*,*) 'allocation error in cycle_onestep'
     322            0 :                return
     323              :             end if
     324              :          end if
     325              : 
     326            5 : !$omp parallel do private(ns,kcount,shift,shift2,i)
     327              :          do ns = nmin, nblk, 2  ! copy umat and lmat
     328              :             kcount = (ns-nmin)/2 + 1
     329              :             shift = nvar2*(kcount-1)
     330              :             shift2 = nvar2*ncycle*(ns-1)
     331              :             do i=1,nvar2
     332              :                odd_storage(nlevel)% umat1(shift+i) = ublk1(shift2+i)
     333              :                odd_storage(nlevel)% lmat1(shift+i) = lblk1(shift2+i)
     334              :             end do
     335              :          end do
     336              : !$omp end parallel do
     337              : 
     338            5 :          if (nvar2*kcount > odd_storage(nlevel)% ul_size) then
     339            0 :             write(*,*) 'nvar2*kcount > ul_size in cycle_onestep'
     340            0 :             ierr = -1
     341            0 :             return
     342              :          end if
     343              : 
     344              :          if (dbg) write(*,*) 'start lu factorization'
     345              :          ! compute lu factorization of even diagonal blocks
     346            5 :          nmin = 2
     347              : !$omp parallel do schedule(static,3) &
     348            5 : !$omp private(ipivot,dmat,ns,ierr_loc,shift1,shift2,k)
     349              :          do ns = nmin, nblk, 2
     350              :             k = ncycle*(ns-1) + 1
     351              :             shift1 = nvar*(k-1)
     352              :             shift2 = nvar*shift1
     353              :             dmat(1:nvar,1:nvar) => dblk1(shift2+1:shift2+nvar2)
     354              :             ierr_loc = 0
     355              :             ipivot(1:nvar) => ipivot1(shift1+1:shift1+nvar)
     356              :             call my_getf2(nvar, dmat, nvar, ipivot, sfmin, ierr_loc)
     357              :             if (ierr_loc /= 0) then
     358              :                ierr = ierr_loc
     359              :             end if
     360              :          end do
     361              : !$omp end parallel do
     362            5 :          if (ierr /= 0) then
     363              :             !write(*,*) 'factorization failed in bcyclic'
     364              :             return
     365              :          end if
     366              : 
     367              :          if (dbg) write(*,*) 'done lu factorization; start solve'
     368              : 
     369              : !$omp parallel do schedule(static,3) &
     370            5 : !$omp private(ns,k,shift1,shift2,ipivot,dmat,umat,lmat,mat1,ierr_loc)
     371              :          do ns = nmin, nblk, 2
     372              :             ! compute new l=-d[-1]l, u=-d[-1]u for even blocks
     373              :             k = ncycle*(ns-1) + 1
     374              :             shift1 = nvar*(k-1)
     375              :             shift2 = nvar*shift1
     376              :             lmat(1:nvar,1:nvar) => lblk1(shift2+1:shift2+nvar2)
     377              :             ipivot(1:nvar) => ipivot1(shift1+1:shift1+nvar)
     378              :             dmat(1:nvar,1:nvar) => dblk1(shift2+1:shift2+nvar2)
     379              :             call my_getrs(nvar, nvar, dmat, nvar, ipivot, lmat, nvar, ierr_loc)
     380              :             if (ierr_loc /= 0) ierr = ierr_loc
     381              :             lmat = -lmat
     382              :             umat(1:nvar,1:nvar) => ublk1(shift2+1:shift2+nvar2)
     383              :             call my_getrs(nvar, nvar, dmat, nvar, ipivot, umat, nvar, ierr_loc)
     384              :             if (ierr_loc /= 0) ierr = ierr_loc
     385              :             umat = -umat
     386              :          end do
     387              : !$omp end parallel do
     388              :          if (dbg) write(*,*) 'done solve'
     389              : 
     390            5 :          if (ierr /= 0) return
     391              : 
     392              :          ! compute new odd blocks in terms of even block factors
     393              :          ! compute odd hatted matrix elements except at boundaries
     394            5 :          nmin = 1
     395              : !$omp parallel do schedule(static,3) &
     396            5 : !$omp private(i,ns,shift2,dmat,umat,lmat,lnext,unext,lprev,uprev,kcount,shift,umat0,lmat0,k)
     397              :          do i = 1, 3*(1+(nblk-nmin)/2)
     398              : 
     399              :             ns = 2*((i-1)/3) + nmin
     400              :             k = ncycle*(ns-1) + 1
     401              :             shift2 = nvar2*(k-1)
     402              :             dmat(1:nvar,1:nvar) => dblk1(shift2+1:shift2+nvar2)
     403              :             umat(1:nvar,1:nvar) => ublk1(shift2+1:shift2+nvar2)
     404              :             lmat(1:nvar,1:nvar) => lblk1(shift2+1:shift2+nvar2)
     405              : 
     406              :             if (ns < nblk) then
     407              :                shift2 = nvar2*ncycle*ns
     408              :                lnext(1:nvar,1:nvar) => lblk1(shift2+1:shift2+nvar2)
     409              :                unext(1:nvar,1:nvar) => ublk1(shift2+1:shift2+nvar2)
     410              :             end if
     411              : 
     412              :             if (ns > 1) then
     413              :                shift2 = nvar2*ncycle*(ns-2)
     414              :                lprev(1:nvar,1:nvar) => lblk1(shift2+1:shift2+nvar2)
     415              :                uprev(1:nvar,1:nvar) => ublk1(shift2+1:shift2+nvar2)
     416              :             end if
     417              : 
     418              :             kcount = 1+(ns-nmin)/2
     419              :             shift = nvar2*(kcount-1)
     420              :             lmat0(1:nvar,1:nvar) => odd_storage(nlevel)% lmat1(shift+1:shift+nvar2)
     421              :             umat0(1:nvar,1:nvar) => odd_storage(nlevel)% umat1(shift+1:shift+nvar2)
     422              : 
     423              :             select case(mod(i-1,3))
     424              :             case (0)
     425              :                if (ns > 1) then
     426              :                   ! lmat = matmul(lmat0, lprev)
     427              :                   call my_gemm0_p1(nvar,nvar,nvar,lmat0,nvar,lprev,nvar,lmat,nvar)
     428              :                end if
     429              :             case (1)
     430              :                if (ns < nblk) then
     431              :                   ! umat = matmul(umat0, unext)
     432              :                   call my_gemm0_p1(nvar,nvar,nvar,umat0,nvar,unext,nvar,umat,nvar)
     433              :                end if
     434              :             case (2)
     435              :                if (ns < nblk) then
     436              :                   if (ns > 1) then
     437              :                      ! dmat = dmat + matmul(umat0, lnext) + matmul(lmat0,uprev)
     438              :                      call my_gemm_plus_mm(nvar,nvar,nvar,umat0,lnext,lmat0,uprev,dmat)
     439              :                   else
     440              :                      ! dmat = dmat + matmul(umat0, lnext)
     441              :                      call my_gemm_p1(nvar,nvar,nvar,umat0,nvar,lnext,nvar,dmat,nvar)
     442              :                   end if
     443              :                else if (ns > 1) then
     444              :                   ! dmat = dmat + matmul(lmat0,uprev)
     445              :                   call my_gemm_p1(nvar,nvar,nvar,lmat0,nvar,uprev,nvar,dmat,nvar)
     446              :                end if
     447              :             end select
     448              : 
     449              :          end do
     450              : !$omp end parallel do
     451              :          if (dbg) write(*,*) 'done cycle_onestep'
     452              : 
     453            5 :       end subroutine cycle_onestep
     454              : 
     455              : 
     456            5 :       subroutine cycle_rhs( &
     457              :             nblk, nvar, ncycle, nlevel, sparse, dblk1, brhs1, ipivot1, ierr)
     458              :          integer, intent(in) :: nblk, nvar, ncycle, nlevel
     459              :          logical, intent(in) :: sparse
     460              :          real(dp), pointer, intent(in) :: dblk1(:)
     461              :          real(dp), pointer, intent(inout) :: brhs1(:)
     462              :          integer, pointer, intent(in) :: ipivot1(:)
     463              :          integer, intent(out) :: ierr
     464              : 
     465              :          integer :: k, ns, ierr_loc, nmin, kcount, shift, shift1, shift2, nvar2
     466            5 :          integer, pointer :: ipivot(:)
     467            5 :          real(dp), pointer, dimension(:,:) :: dmat, umat, lmat, bptr2
     468            5 :          real(dp), pointer, dimension(:) :: bprev, bnext, bptr
     469              : 
     470              :          include 'formats'
     471              : 
     472            5 :          ierr = 0
     473            5 :          nvar2 = nvar*nvar
     474              :          ! compute dblk[-1]*brhs for even indices and store in brhs(even)
     475            5 :          nmin = 2
     476            5 :          ierr_loc = 0
     477              : !$omp parallel do schedule(static,3) &
     478            5 : !$omp private(ns,shift1,ipivot,shift2,k,dmat,bptr2,bptr,ierr_loc)
     479              :          do ns = nmin, nblk, 2
     480              :             k = ncycle*(ns-1) + 1
     481              :             shift1 = nvar*(k-1)
     482              :             shift2 = nvar*shift1
     483              :             ipivot(1:nvar) => ipivot1(shift1+1:shift1+nvar)
     484              :             dmat(1:nvar,1:nvar) => dblk1(shift2+1:shift2+nvar2)
     485              :             bptr2(1:nvar,1:1) => brhs1(shift1+1:shift1+nvar)
     486              :             call my_getrs(nvar, 1, dmat, nvar, ipivot, bptr2, nvar, ierr_loc)
     487              :             if (ierr_loc /= 0) ierr = ierr_loc
     488              :          end do
     489              : !$omp end parallel do
     490            5 :         if (ierr /= 0) return
     491              : 
     492              :         ! compute odd (hatted) sources (b-hats) for interior rows
     493            5 :          nmin = 1
     494            5 :          kcount = 0
     495              : !$omp parallel do schedule(static,3) &
     496            5 : !$omp private(ns,shift1,bptr,kcount,shift,umat,lmat,bnext,bprev)
     497              :          do ns = nmin, nblk, 2
     498              :             shift1 = nvar*ncycle*(ns-1)
     499              :             bptr(1:nvar) => brhs1(shift1+1:shift1+nvar)
     500              :             kcount = 1+(ns-nmin)/2
     501              :             shift = nvar2*(kcount-1)
     502              :             umat(1:nvar,1:nvar) => odd_storage(nlevel)% umat1(shift+1:shift+nvar2)
     503              :             lmat(1:nvar,1:nvar) => odd_storage(nlevel)% lmat1(shift+1:shift+nvar2)
     504              :             if (ns > 1) then
     505              :                shift1 = nvar*ncycle*(ns-2)
     506              :                bprev => brhs1(shift1+1:shift1+nvar)
     507              :             end if
     508              :             if (ns < nblk) then
     509              :                shift1 = nvar*ncycle*ns
     510              :                bnext => brhs1(shift1+1:shift1+nvar)
     511              :                if (ns > 1) then
     512              :                   ! bptr = bptr - matmul(umat,bnext) - matmul(lmat,bprev)
     513              :                   call my_gemv_mv(nvar,nvar,umat,bnext,lmat,bprev,bptr)
     514              :                else
     515              :                   ! bptr = bptr - matmul(umat,bnext)
     516              :                   call my_gemv(nvar,nvar,umat,nvar,bnext,bptr)
     517              :                end if
     518              :             else if (ns > 1) then
     519              :                ! bptr = bptr - matmul(lmat,bprev)
     520              :                call my_gemv(nvar,nvar,lmat,nvar,bprev,bptr)
     521              :             end if
     522              :          end do
     523              : !$omp end parallel do
     524              : 
     525            5 :          if (nvar2*kcount > odd_storage(nlevel)% ul_size) then
     526            0 :             write(*,*) 'nvar2*kcount > ul_size in cycle_rhs'
     527            0 :             ierr = -1
     528            0 :             return
     529              :          end if
     530              : 
     531            5 :       end subroutine cycle_rhs
     532              : 
     533              : 
     534              :       ! computes even index solution from the computed (at previous,higher level)
     535              :       ! odd index solutions at this level.
     536              :       ! note at this point, the odd brhs values have been replaced (at the highest cycle)
     537              :       ! with the solution values (x), at subsequent (lower) cycles, the
     538              :       ! odd values are replaced by the even solutions at the next highest cycle. the even
     539              :       ! brhs values were multiplied by d[-1] and stored in cycle_rhs
     540              :       ! solve for even index values in terms of (computed at this point) odd index values
     541            5 :       subroutine cycle_solve( &
     542              :             nvar, nz, ncycle, nblk, nlevel, sparse, lblk1, ublk1, brhs1)
     543              :          integer, intent(in) :: nvar, nz, ncycle, nblk, nlevel
     544              :          logical, intent(in) :: sparse
     545              :          real(dp), pointer, intent(in) :: lblk1(:), ublk1(:)
     546              :          real(dp), pointer, intent(inout) :: brhs1(:)
     547              : 
     548            5 :          real(dp), pointer :: umat(:,:), lmat(:,:), bprev(:), bnext(:), bptr(:)
     549              :          integer :: shift1, shift2, nvar2, ns, nmin
     550              : 
     551            5 :          nvar2 = nvar*nvar
     552            5 :          nmin = 2
     553              : !$omp parallel do schedule(static,3) &
     554            5 : !$omp private(ns,shift1,bptr,shift2,lmat,bprev,umat,bnext)
     555              :          do ns = nmin, nblk, 2
     556              :             shift1 = ncycle*nvar*(ns-1)
     557              :             bptr(1:nvar) => brhs1(shift1+1:shift1+nvar)
     558              :             shift2 = nvar*shift1
     559              :             lmat(1:nvar,1:nvar) => lblk1(shift2+1:shift2+nvar2)
     560              :             if (ns > 1) then
     561              :                shift1 = ncycle*nvar*(ns-2)
     562              :                bprev(1:nvar) => brhs1(shift1+1:shift1+nvar)
     563              :             end if
     564              :             if (ns < nblk) then
     565              :                umat(1:nvar,1:nvar) => ublk1(shift2+1:shift2+nvar2)
     566              :                shift1 = ncycle*nvar*ns
     567              :                bnext(1:nvar) => brhs1(shift1+1:shift1+nvar)
     568              :                if (ns > 1) then
     569              :                   ! bptr = bptr + matmul(umat,bnext) + matmul(lmat,bprev)
     570              :                   call my_gemv_p_mv(nvar,nvar,umat,bnext,lmat,bprev,bptr)
     571              :                else
     572              :                   ! bptr = bptr + matmul(umat,bnext)
     573              :                   call my_gemv_p1(nvar,nvar,umat,nvar,bnext,bptr)
     574              :                end if
     575              :             else if (ns > 1) then
     576              :                ! bptr = bptr + matmul(lmat,bprev)
     577              :                call my_gemv_p1(nvar,nvar,lmat,nvar,bprev,bptr)
     578              :             end if
     579              :          end do
     580              : !$omp end parallel do
     581              : 
     582            5 :       end subroutine cycle_solve
     583              : 
     584              : 
     585            1 :       subroutine bcyclic_deallocate ( &
     586              :             lblk1, dblk1, ublk1, ipivot1, brhs1, nvar, nz, sparse, &
     587              :             lrd, rpar_decsol, lid, ipar_decsol, ierr)
     588              :          real(dp), pointer :: lblk1(:)  ! row section of lower block
     589              :          real(dp), pointer :: dblk1(:)  ! row section of diagonal block
     590              :          real(dp), pointer :: ublk1(:)  ! row section of upper block
     591              :          integer, pointer :: ipivot1(:)  ! row section of pivot array for block factorization
     592              :          real(dp), pointer :: brhs1(:)  ! row section of rhs
     593              :          integer, intent(in) :: nvar  ! linear size of each block
     594              :          integer, intent(in) :: nz  ! number of block rows
     595              :          logical, intent(in) :: sparse
     596              :          integer, intent(in) :: lrd, lid
     597              :          real(dp), pointer, intent(inout) :: rpar_decsol(:)  ! (lrd)
     598              :          integer, pointer, intent(inout) :: ipar_decsol(:)  ! (lid)
     599              :          integer, intent(out) :: ierr
     600            1 :          ierr = 0
     601            1 :       end subroutine bcyclic_deallocate
     602              : 
     603            0 :       end module bcyclic
        

Generated by: LCOV version 2.0-1