Line data Source code
1 : ! ***********************************************************************
2 : !
3 : ! Copyright (C) 2012 The MESA Team
4 : !
5 : ! This program is free software: you can redistribute it and/or modify
6 : ! it under the terms of the GNU Lesser General Public License
7 : ! as published by the Free Software Foundation,
8 : ! either version 3 of the License, or (at your option) any later version.
9 : !
10 : ! This program is distributed in the hope that it will be useful,
11 : ! but WITHOUT ANY WARRANTY; without even the implied warranty of
12 : ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 : ! See the GNU Lesser General Public License for more details.
14 : !
15 : ! You should have received a copy of the GNU Lesser General Public License
16 : ! along with this program. If not, see <https://www.gnu.org/licenses/>.
17 : !
18 : ! ***********************************************************************
19 :
20 : ! derived from BCYCLIC as described in hirshman et. al.
21 : ! S.P.Hirshman, K.S.Perumalla, V.E.Lynch, & R.Sanchez,
22 : ! BCYCLIC: A parallel block tridiagonal matrix cyclic solver,
23 : ! J. Computational Physics, 229 (2010) 6392-6404.
24 :
25 : module bcyclic
26 :
27 : use const_def, only: dp
28 : use my_lapack95
29 : use utils_lib, only: set_nan, mesa_error
30 :
31 : implicit none
32 :
33 : type ulstore
34 : integer :: ul_size ! size of umat1 & lmat1 (0 if not allocated)
35 : real(dp), pointer :: umat1(:), lmat1(:)
36 : end type ulstore
37 :
38 : type(ulstore), pointer :: odd_storage(:) => null()
39 : logical, parameter :: dbg = .false.
40 : logical, parameter :: do_fill_with_NaNs = .false.
41 :
42 : contains
43 :
44 1 : subroutine bcyclic_factor ( &
45 : lblk1, dblk1, ublk1, ipivot1, brhs1, nvar, nz, sparse, &
46 : lrd, rpar_decsol, lid, ipar_decsol, ierr)
47 : real(dp), pointer :: lblk1(:) ! row section of lower block
48 : real(dp), pointer :: dblk1(:) ! row section of diagonal block
49 : real(dp), pointer :: ublk1(:) ! row section of upper block
50 : integer, pointer :: ipivot1(:) ! row section of pivot array for block factorization
51 : real(dp), pointer :: brhs1(:) ! row section of rhs
52 : integer, intent(in) :: nvar ! linear size of each block
53 : integer, intent(in) :: nz ! number of block rows
54 : logical, intent(in) :: sparse
55 : integer, intent(in) :: lrd, lid
56 : real(dp), pointer, intent(inout) :: rpar_decsol(:) ! (lrd)
57 : integer, pointer, intent(inout) :: ipar_decsol(:) ! (lid)
58 : integer, intent(out) :: ierr
59 :
60 1 : integer, pointer :: nslevel(:), ipivot(:)
61 : integer :: ncycle, nstemp, maxlevels, nlevel
62 : logical :: have_odd_storage
63 1 : real(dp), pointer, dimension(:,:) :: dmat
64 1 : real(dp) :: dlamch, sfmin
65 :
66 : include 'formats'
67 :
68 1 : ierr = 0
69 :
70 : if (dbg) write(*,*) 'start bcyclic_factor'
71 :
72 : ! compute number of cyclic reduction levels
73 1 : ncycle = 1
74 1 : maxlevels = 0
75 6 : do while (ncycle < nz)
76 5 : ncycle = 2*ncycle
77 5 : maxlevels = maxlevels+1
78 : end do
79 1 : maxlevels = max(1, maxlevels)
80 :
81 1 : have_odd_storage = associated(odd_storage)
82 1 : if (have_odd_storage) then
83 0 : if (size(odd_storage) < maxlevels) then
84 0 : call clear_storage
85 : have_odd_storage = .false.
86 : end if
87 : end if
88 :
89 1 : if (.not. have_odd_storage) then
90 1 : allocate (odd_storage(maxlevels+3), stat=ierr)
91 1 : if (ierr /= 0) then
92 0 : write(*,*) 'alloc failed for odd_storage in bcyclic'
93 0 : return
94 : end if
95 9 : do nlevel = 1, size(odd_storage)
96 9 : odd_storage(nlevel)% ul_size = 0
97 : end do
98 : end if
99 :
100 1 : allocate (nslevel(maxlevels), stat=ierr)
101 1 : if (ierr /= 0) return
102 :
103 1 : if (sparse) then
104 0 : write(*,*) 'no support for sparse matrix in bcyclic'
105 0 : ierr = -1
106 0 : return
107 : end if
108 :
109 1 : ncycle = 1
110 1 : nstemp = nz
111 1 : nlevel = 1
112 :
113 : if (dbg) write(*,*) 'start factor_cycle'
114 :
115 5 : factor_cycle: do ! perform cyclic-reduction factorization
116 :
117 5 : nslevel(nlevel) = nstemp
118 :
119 : if (dbg) write(*,2) 'call cycle_onestep', nstemp
120 :
121 : call cycle_onestep( &
122 : nvar, nz, nstemp, ncycle, nlevel, sparse, &
123 5 : lblk1, dblk1, ublk1, ipivot1, ierr)
124 5 : if (ierr /= 0) then
125 : !write(*,*) 'cycle_onestep failed'
126 0 : call dealloc
127 0 : return
128 : end if
129 :
130 5 : if (nstemp == 1) exit factor_cycle
131 :
132 5 : nstemp = (nstemp+1)/2
133 5 : nlevel = nlevel+1
134 5 : ncycle = 2*ncycle
135 :
136 5 : if (nlevel > maxlevels) exit factor_cycle
137 :
138 : end do factor_cycle
139 :
140 : if (dbg) write(*,*) 'done factor_cycle'
141 :
142 : ! factor row 1
143 1 : dmat(1:nvar,1:nvar) => dblk1(1:nvar*nvar)
144 1 : sfmin = dlamch('S')
145 1 : ipivot(1:nvar) => ipivot1(1:nvar)
146 1 : call my_getf2(nvar, dmat, nvar, ipivot, sfmin, ierr)
147 1 : if (ierr /= 0) then
148 0 : write(*,*) 'row 1 factor failed in bcyclic_factor'
149 0 : call dealloc
150 0 : return
151 : end if
152 :
153 1 : call dealloc
154 :
155 :
156 1 : if (dbg) write(*,*) 'done bcyclic_factor'
157 :
158 : contains
159 :
160 1 : subroutine dealloc
161 1 : deallocate (nslevel)
162 1 : end subroutine dealloc
163 :
164 :
165 : end subroutine bcyclic_factor
166 :
167 :
168 1 : subroutine bcyclic_solve ( &
169 : lblk1, dblk1, ublk1, ipivot1, brhs1, nvar, nz, sparse, &
170 : lrd, rpar_decsol, lid, ipar_decsol, ierr)
171 : real(dp), pointer :: lblk1(:) ! row section of lower block
172 : real(dp), pointer :: dblk1(:) ! row section of diagonal block
173 : real(dp), pointer :: ublk1(:) ! row section of upper block
174 : integer, pointer :: ipivot1(:) ! row section of pivot array for block factorization
175 : real(dp), pointer :: brhs1(:) ! row section of rhs
176 : integer, intent(in) :: nvar ! linear size of each block
177 : integer, intent(in) :: nz ! number of block rows
178 : logical, intent(in) :: sparse
179 : integer, intent(in) :: lrd, lid
180 : real(dp), pointer, intent(inout) :: rpar_decsol(:) ! (lrd)
181 : integer, pointer, intent(inout) :: ipar_decsol(:) ! (lid)
182 : integer, intent(out) :: ierr
183 :
184 1 : integer, pointer :: nslevel(:), ipivot(:)
185 : integer :: ncycle, nstemp, maxlevels, nlevel, nvar2
186 1 : real(dp), pointer, dimension(:,:) :: dmat, bptr2
187 :
188 : include 'formats'
189 :
190 :
191 : if (dbg) write(*,*) 'start bcyclic_solve'
192 :
193 1 : ierr = 0
194 1 : nvar2 = nvar*nvar
195 1 : ncycle = 1
196 1 : maxlevels = 0
197 6 : do while (ncycle < nz)
198 5 : ncycle = 2*ncycle
199 5 : maxlevels = maxlevels+1
200 : end do
201 1 : maxlevels = max(1, maxlevels)
202 :
203 1 : allocate (nslevel(maxlevels), stat=ierr)
204 1 : if (ierr /= 0) return
205 :
206 1 : ncycle = 1
207 1 : nstemp = nz
208 1 : nlevel = 1
209 :
210 : if (dbg) write(*,*) 'start forward_cycle'
211 :
212 5 : forward_cycle: do
213 :
214 5 : nslevel(nlevel) = nstemp
215 : if (dbg) write(*,2) 'call cycle_rhs', nstemp
216 : call cycle_rhs( &
217 5 : nstemp, nvar, ncycle, nlevel, sparse, dblk1, brhs1, ipivot1, ierr)
218 5 : if (ierr /= 0) then
219 0 : call dealloc
220 0 : return
221 : end if
222 :
223 5 : if (nstemp == 1) exit forward_cycle
224 :
225 5 : nstemp = (nstemp+1)/2
226 5 : nlevel = nlevel+1
227 5 : ncycle = 2*ncycle
228 :
229 5 : if (nlevel > maxlevels) exit forward_cycle
230 :
231 : end do forward_cycle
232 :
233 : if (dbg) write(*,*) 'done forward_cycle'
234 :
235 1 : ipivot(1:nvar) => ipivot1(1:nvar)
236 1 : dmat(1:nvar,1:nvar) => dblk1(1:nvar2)
237 1 : bptr2(1:nvar,1:1) => brhs1(1:nvar)
238 1 : call my_getrs(nvar, 1, dmat, nvar, ipivot, bptr2, nvar, ierr)
239 1 : if (ierr /= 0) then
240 0 : write(*,*) 'failed in bcyclic_solve'
241 0 : call dealloc
242 0 : return
243 : end if
244 :
245 : ! back solve for even x's
246 6 : back_cycle: do while (ncycle > 1)
247 5 : ncycle = ncycle/2
248 5 : nlevel = nlevel-1
249 5 : if (nlevel < 1) then
250 0 : ierr = -1
251 0 : exit back_cycle
252 : end if
253 5 : nstemp = nslevel(nlevel)
254 : call cycle_solve( &
255 5 : nvar, nz, ncycle, nstemp, nlevel, sparse, lblk1, ublk1, brhs1)
256 : end do back_cycle
257 :
258 1 : call dealloc
259 :
260 1 : if (dbg) write(*,*) 'done bcyclic_solve'
261 :
262 :
263 : contains
264 :
265 1 : subroutine dealloc
266 1 : deallocate (nslevel)
267 1 : end subroutine dealloc
268 :
269 :
270 : end subroutine bcyclic_solve
271 :
272 :
273 0 : subroutine clear_storage
274 : integer :: nlevel
275 0 : nlevel = size(odd_storage)
276 0 : do while (nlevel > 0)
277 0 : if (odd_storage(nlevel)% ul_size > 0) then
278 0 : deallocate(odd_storage(nlevel)% umat1)
279 0 : deallocate(odd_storage(nlevel)% lmat1)
280 : end if
281 0 : nlevel = nlevel-1
282 : end do
283 0 : deallocate(odd_storage)
284 : nullify(odd_storage)
285 0 : end subroutine clear_storage
286 :
287 :
288 5 : subroutine cycle_onestep( &
289 : nvar, nz, nblk, ncycle, nlevel, sparse, &
290 : lblk1, dblk1, ublk1, ipivot1, ierr)
291 : integer, intent(in) :: nvar, nz, nblk, ncycle, nlevel
292 : logical, intent(in) :: sparse
293 : real(dp), pointer, intent(inout) :: lblk1(:), dblk1(:), ublk1(:)
294 : integer, pointer, intent(inout) :: ipivot1(:)
295 : integer, intent(out) :: ierr
296 :
297 5 : integer, pointer :: ipivot(:)
298 5 : real(dp), pointer, dimension(:,:) :: dmat, umat, lmat, umat0, lmat0
299 5 : real(dp), pointer, dimension(:,:) :: lnext, unext, lprev, uprev
300 5 : real(dp), pointer :: mat1(:)
301 : integer :: i, shift, min_sz, new_sz, shift1, shift2, nvar2, &
302 : ns, ierr_loc, nmin, kcount, k
303 5 : real(dp) :: dlamch, sfmin
304 :
305 : include 'formats'
306 :
307 5 : ierr = 0
308 10 : sfmin = dlamch('S')
309 5 : nvar2 = nvar*nvar
310 5 : nmin = 1
311 5 : kcount = 1+(nblk-nmin)/2
312 5 : min_sz = nvar2*kcount
313 5 : if (odd_storage(nlevel)% ul_size < min_sz) then
314 5 : if (odd_storage(nlevel)% ul_size > 0) &
315 0 : deallocate(odd_storage(nlevel)% umat1, odd_storage(nlevel)% lmat1)
316 5 : new_sz = FLOOR(min_sz*1.1) + 100
317 5 : odd_storage(nlevel)% ul_size = new_sz
318 : allocate (odd_storage(nlevel)% umat1(new_sz), &
319 5 : odd_storage(nlevel)% lmat1(new_sz), stat=ierr)
320 5 : if (ierr /= 0) then
321 0 : write(*,*) 'allocation error in cycle_onestep'
322 0 : return
323 : end if
324 : end if
325 :
326 5 : !$omp parallel do private(ns,kcount,shift,shift2,i)
327 : do ns = nmin, nblk, 2 ! copy umat and lmat
328 : kcount = (ns-nmin)/2 + 1
329 : shift = nvar2*(kcount-1)
330 : shift2 = nvar2*ncycle*(ns-1)
331 : do i=1,nvar2
332 : odd_storage(nlevel)% umat1(shift+i) = ublk1(shift2+i)
333 : odd_storage(nlevel)% lmat1(shift+i) = lblk1(shift2+i)
334 : end do
335 : end do
336 : !$omp end parallel do
337 :
338 5 : if (nvar2*kcount > odd_storage(nlevel)% ul_size) then
339 0 : write(*,*) 'nvar2*kcount > ul_size in cycle_onestep'
340 0 : ierr = -1
341 0 : return
342 : end if
343 :
344 : if (dbg) write(*,*) 'start lu factorization'
345 : ! compute lu factorization of even diagonal blocks
346 5 : nmin = 2
347 : !$omp parallel do schedule(static,3) &
348 5 : !$omp private(ipivot,dmat,ns,ierr_loc,shift1,shift2,k)
349 : do ns = nmin, nblk, 2
350 : k = ncycle*(ns-1) + 1
351 : shift1 = nvar*(k-1)
352 : shift2 = nvar*shift1
353 : dmat(1:nvar,1:nvar) => dblk1(shift2+1:shift2+nvar2)
354 : ierr_loc = 0
355 : ipivot(1:nvar) => ipivot1(shift1+1:shift1+nvar)
356 : call my_getf2(nvar, dmat, nvar, ipivot, sfmin, ierr_loc)
357 : if (ierr_loc /= 0) then
358 : ierr = ierr_loc
359 : end if
360 : end do
361 : !$omp end parallel do
362 5 : if (ierr /= 0) then
363 : !write(*,*) 'factorization failed in bcyclic'
364 : return
365 : end if
366 :
367 : if (dbg) write(*,*) 'done lu factorization; start solve'
368 :
369 : !$omp parallel do schedule(static,3) &
370 5 : !$omp private(ns,k,shift1,shift2,ipivot,dmat,umat,lmat,mat1,ierr_loc)
371 : do ns = nmin, nblk, 2
372 : ! compute new l=-d[-1]l, u=-d[-1]u for even blocks
373 : k = ncycle*(ns-1) + 1
374 : shift1 = nvar*(k-1)
375 : shift2 = nvar*shift1
376 : lmat(1:nvar,1:nvar) => lblk1(shift2+1:shift2+nvar2)
377 : ipivot(1:nvar) => ipivot1(shift1+1:shift1+nvar)
378 : dmat(1:nvar,1:nvar) => dblk1(shift2+1:shift2+nvar2)
379 : call my_getrs(nvar, nvar, dmat, nvar, ipivot, lmat, nvar, ierr_loc)
380 : if (ierr_loc /= 0) ierr = ierr_loc
381 : lmat = -lmat
382 : umat(1:nvar,1:nvar) => ublk1(shift2+1:shift2+nvar2)
383 : call my_getrs(nvar, nvar, dmat, nvar, ipivot, umat, nvar, ierr_loc)
384 : if (ierr_loc /= 0) ierr = ierr_loc
385 : umat = -umat
386 : end do
387 : !$omp end parallel do
388 : if (dbg) write(*,*) 'done solve'
389 :
390 5 : if (ierr /= 0) return
391 :
392 : ! compute new odd blocks in terms of even block factors
393 : ! compute odd hatted matrix elements except at boundaries
394 5 : nmin = 1
395 : !$omp parallel do schedule(static,3) &
396 5 : !$omp private(i,ns,shift2,dmat,umat,lmat,lnext,unext,lprev,uprev,kcount,shift,umat0,lmat0,k)
397 : do i = 1, 3*(1+(nblk-nmin)/2)
398 :
399 : ns = 2*((i-1)/3) + nmin
400 : k = ncycle*(ns-1) + 1
401 : shift2 = nvar2*(k-1)
402 : dmat(1:nvar,1:nvar) => dblk1(shift2+1:shift2+nvar2)
403 : umat(1:nvar,1:nvar) => ublk1(shift2+1:shift2+nvar2)
404 : lmat(1:nvar,1:nvar) => lblk1(shift2+1:shift2+nvar2)
405 :
406 : if (ns < nblk) then
407 : shift2 = nvar2*ncycle*ns
408 : lnext(1:nvar,1:nvar) => lblk1(shift2+1:shift2+nvar2)
409 : unext(1:nvar,1:nvar) => ublk1(shift2+1:shift2+nvar2)
410 : end if
411 :
412 : if (ns > 1) then
413 : shift2 = nvar2*ncycle*(ns-2)
414 : lprev(1:nvar,1:nvar) => lblk1(shift2+1:shift2+nvar2)
415 : uprev(1:nvar,1:nvar) => ublk1(shift2+1:shift2+nvar2)
416 : end if
417 :
418 : kcount = 1+(ns-nmin)/2
419 : shift = nvar2*(kcount-1)
420 : lmat0(1:nvar,1:nvar) => odd_storage(nlevel)% lmat1(shift+1:shift+nvar2)
421 : umat0(1:nvar,1:nvar) => odd_storage(nlevel)% umat1(shift+1:shift+nvar2)
422 :
423 : select case(mod(i-1,3))
424 : case (0)
425 : if (ns > 1) then
426 : ! lmat = matmul(lmat0, lprev)
427 : call my_gemm0_p1(nvar,nvar,nvar,lmat0,nvar,lprev,nvar,lmat,nvar)
428 : end if
429 : case (1)
430 : if (ns < nblk) then
431 : ! umat = matmul(umat0, unext)
432 : call my_gemm0_p1(nvar,nvar,nvar,umat0,nvar,unext,nvar,umat,nvar)
433 : end if
434 : case (2)
435 : if (ns < nblk) then
436 : if (ns > 1) then
437 : ! dmat = dmat + matmul(umat0, lnext) + matmul(lmat0,uprev)
438 : call my_gemm_plus_mm(nvar,nvar,nvar,umat0,lnext,lmat0,uprev,dmat)
439 : else
440 : ! dmat = dmat + matmul(umat0, lnext)
441 : call my_gemm_p1(nvar,nvar,nvar,umat0,nvar,lnext,nvar,dmat,nvar)
442 : end if
443 : else if (ns > 1) then
444 : ! dmat = dmat + matmul(lmat0,uprev)
445 : call my_gemm_p1(nvar,nvar,nvar,lmat0,nvar,uprev,nvar,dmat,nvar)
446 : end if
447 : end select
448 :
449 : end do
450 : !$omp end parallel do
451 : if (dbg) write(*,*) 'done cycle_onestep'
452 :
453 5 : end subroutine cycle_onestep
454 :
455 :
456 5 : subroutine cycle_rhs( &
457 : nblk, nvar, ncycle, nlevel, sparse, dblk1, brhs1, ipivot1, ierr)
458 : integer, intent(in) :: nblk, nvar, ncycle, nlevel
459 : logical, intent(in) :: sparse
460 : real(dp), pointer, intent(in) :: dblk1(:)
461 : real(dp), pointer, intent(inout) :: brhs1(:)
462 : integer, pointer, intent(in) :: ipivot1(:)
463 : integer, intent(out) :: ierr
464 :
465 : integer :: k, ns, ierr_loc, nmin, kcount, shift, shift1, shift2, nvar2
466 5 : integer, pointer :: ipivot(:)
467 5 : real(dp), pointer, dimension(:,:) :: dmat, umat, lmat, bptr2
468 5 : real(dp), pointer, dimension(:) :: bprev, bnext, bptr
469 :
470 : include 'formats'
471 :
472 5 : ierr = 0
473 5 : nvar2 = nvar*nvar
474 : ! compute dblk[-1]*brhs for even indices and store in brhs(even)
475 5 : nmin = 2
476 5 : ierr_loc = 0
477 : !$omp parallel do schedule(static,3) &
478 5 : !$omp private(ns,shift1,ipivot,shift2,k,dmat,bptr2,bptr,ierr_loc)
479 : do ns = nmin, nblk, 2
480 : k = ncycle*(ns-1) + 1
481 : shift1 = nvar*(k-1)
482 : shift2 = nvar*shift1
483 : ipivot(1:nvar) => ipivot1(shift1+1:shift1+nvar)
484 : dmat(1:nvar,1:nvar) => dblk1(shift2+1:shift2+nvar2)
485 : bptr2(1:nvar,1:1) => brhs1(shift1+1:shift1+nvar)
486 : call my_getrs(nvar, 1, dmat, nvar, ipivot, bptr2, nvar, ierr_loc)
487 : if (ierr_loc /= 0) ierr = ierr_loc
488 : end do
489 : !$omp end parallel do
490 5 : if (ierr /= 0) return
491 :
492 : ! compute odd (hatted) sources (b-hats) for interior rows
493 5 : nmin = 1
494 5 : kcount = 0
495 : !$omp parallel do schedule(static,3) &
496 5 : !$omp private(ns,shift1,bptr,kcount,shift,umat,lmat,bnext,bprev)
497 : do ns = nmin, nblk, 2
498 : shift1 = nvar*ncycle*(ns-1)
499 : bptr(1:nvar) => brhs1(shift1+1:shift1+nvar)
500 : kcount = 1+(ns-nmin)/2
501 : shift = nvar2*(kcount-1)
502 : umat(1:nvar,1:nvar) => odd_storage(nlevel)% umat1(shift+1:shift+nvar2)
503 : lmat(1:nvar,1:nvar) => odd_storage(nlevel)% lmat1(shift+1:shift+nvar2)
504 : if (ns > 1) then
505 : shift1 = nvar*ncycle*(ns-2)
506 : bprev => brhs1(shift1+1:shift1+nvar)
507 : end if
508 : if (ns < nblk) then
509 : shift1 = nvar*ncycle*ns
510 : bnext => brhs1(shift1+1:shift1+nvar)
511 : if (ns > 1) then
512 : ! bptr = bptr - matmul(umat,bnext) - matmul(lmat,bprev)
513 : call my_gemv_mv(nvar,nvar,umat,bnext,lmat,bprev,bptr)
514 : else
515 : ! bptr = bptr - matmul(umat,bnext)
516 : call my_gemv(nvar,nvar,umat,nvar,bnext,bptr)
517 : end if
518 : else if (ns > 1) then
519 : ! bptr = bptr - matmul(lmat,bprev)
520 : call my_gemv(nvar,nvar,lmat,nvar,bprev,bptr)
521 : end if
522 : end do
523 : !$omp end parallel do
524 :
525 5 : if (nvar2*kcount > odd_storage(nlevel)% ul_size) then
526 0 : write(*,*) 'nvar2*kcount > ul_size in cycle_rhs'
527 0 : ierr = -1
528 0 : return
529 : end if
530 :
531 5 : end subroutine cycle_rhs
532 :
533 :
534 : ! computes even index solution from the computed (at previous,higher level)
535 : ! odd index solutions at this level.
536 : ! note at this point, the odd brhs values have been replaced (at the highest cycle)
537 : ! with the solution values (x), at subsequent (lower) cycles, the
538 : ! odd values are replaced by the even solutions at the next highest cycle. the even
539 : ! brhs values were multiplied by d[-1] and stored in cycle_rhs
540 : ! solve for even index values in terms of (computed at this point) odd index values
541 5 : subroutine cycle_solve( &
542 : nvar, nz, ncycle, nblk, nlevel, sparse, lblk1, ublk1, brhs1)
543 : integer, intent(in) :: nvar, nz, ncycle, nblk, nlevel
544 : logical, intent(in) :: sparse
545 : real(dp), pointer, intent(in) :: lblk1(:), ublk1(:)
546 : real(dp), pointer, intent(inout) :: brhs1(:)
547 :
548 5 : real(dp), pointer :: umat(:,:), lmat(:,:), bprev(:), bnext(:), bptr(:)
549 : integer :: shift1, shift2, nvar2, ns, nmin
550 :
551 5 : nvar2 = nvar*nvar
552 5 : nmin = 2
553 : !$omp parallel do schedule(static,3) &
554 5 : !$omp private(ns,shift1,bptr,shift2,lmat,bprev,umat,bnext)
555 : do ns = nmin, nblk, 2
556 : shift1 = ncycle*nvar*(ns-1)
557 : bptr(1:nvar) => brhs1(shift1+1:shift1+nvar)
558 : shift2 = nvar*shift1
559 : lmat(1:nvar,1:nvar) => lblk1(shift2+1:shift2+nvar2)
560 : if (ns > 1) then
561 : shift1 = ncycle*nvar*(ns-2)
562 : bprev(1:nvar) => brhs1(shift1+1:shift1+nvar)
563 : end if
564 : if (ns < nblk) then
565 : umat(1:nvar,1:nvar) => ublk1(shift2+1:shift2+nvar2)
566 : shift1 = ncycle*nvar*ns
567 : bnext(1:nvar) => brhs1(shift1+1:shift1+nvar)
568 : if (ns > 1) then
569 : ! bptr = bptr + matmul(umat,bnext) + matmul(lmat,bprev)
570 : call my_gemv_p_mv(nvar,nvar,umat,bnext,lmat,bprev,bptr)
571 : else
572 : ! bptr = bptr + matmul(umat,bnext)
573 : call my_gemv_p1(nvar,nvar,umat,nvar,bnext,bptr)
574 : end if
575 : else if (ns > 1) then
576 : ! bptr = bptr + matmul(lmat,bprev)
577 : call my_gemv_p1(nvar,nvar,lmat,nvar,bprev,bptr)
578 : end if
579 : end do
580 : !$omp end parallel do
581 :
582 5 : end subroutine cycle_solve
583 :
584 :
585 1 : subroutine bcyclic_deallocate ( &
586 : lblk1, dblk1, ublk1, ipivot1, brhs1, nvar, nz, sparse, &
587 : lrd, rpar_decsol, lid, ipar_decsol, ierr)
588 : real(dp), pointer :: lblk1(:) ! row section of lower block
589 : real(dp), pointer :: dblk1(:) ! row section of diagonal block
590 : real(dp), pointer :: ublk1(:) ! row section of upper block
591 : integer, pointer :: ipivot1(:) ! row section of pivot array for block factorization
592 : real(dp), pointer :: brhs1(:) ! row section of rhs
593 : integer, intent(in) :: nvar ! linear size of each block
594 : integer, intent(in) :: nz ! number of block rows
595 : logical, intent(in) :: sparse
596 : integer, intent(in) :: lrd, lid
597 : real(dp), pointer, intent(inout) :: rpar_decsol(:) ! (lrd)
598 : integer, pointer, intent(inout) :: ipar_decsol(:) ! (lid)
599 : integer, intent(out) :: ierr
600 1 : ierr = 0
601 1 : end subroutine bcyclic_deallocate
602 :
603 0 : end module bcyclic
|