Line data Source code
1 : ! ***********************************************************************
2 : !
3 : ! Copyright (C) 2012-2019 The MESA Team
4 : !
5 : ! This program is free software: you can redistribute it and/or modify
6 : ! it under the terms of the GNU Lesser General Public License
7 : ! as published by the Free Software Foundation,
8 : ! either version 3 of the License, or (at your option) any later version.
9 : !
10 : ! This program is distributed in the hope that it will be useful,
11 : ! but WITHOUT ANY WARRANTY; without even the implied warranty of
12 : ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 : ! See the GNU Lesser General Public License for more details.
14 : !
15 : ! You should have received a copy of the GNU Lesser General Public License
16 : ! along with this program. If not, see <https://www.gnu.org/licenses/>.
17 : !
18 : ! ***********************************************************************
19 :
20 : module mod_newton
21 :
22 : use const_def, only: dp, qp, i8
23 : use math_lib
24 : use utils_lib, only: is_bad, mesa_error
25 : use num_def
26 : use mtx_def
27 : use mtx_lib, only: band_multiply_xa, lapack_work_sizes, block_multiply_xa, multiply_xa
28 :
29 : implicit none
30 :
31 : contains
32 :
33 10 : subroutine do_newton_wrapper(nz, nvar, x, xold, matrix_type, mljac, mujac, &
34 : decsol, decsolblk, lrd, rpar_decsol, lid, ipar_decsol, which_decsol, &
35 : tol_correction_norm, set_primaries, set_secondaries, set_xscale, &
36 : Bdomain, xdomain, eval_equations, size_equ, sizeb, inspectB, &
37 : enter_setmatrix, exit_setmatrix, failed_in_setmatrix, force_another_iteration, &
38 10 : xscale, equ, ldy, nsec, y, work, lwork, iwork, liwork, &
39 10 : AF, lrpar, rpar, lipar, ipar, convergence_failure, ierr)
40 :
41 : ! the primary variables
42 : integer, intent(in) :: nz ! number of zones
43 : integer, intent(in) :: nvar ! number of variables per zone
44 : ! the total number of primary variables is neq
45 : real(dp), pointer, dimension(:) :: x ! =(nvar,nz)
46 : ! new vector of primaries
47 : real(dp), pointer, dimension(:) :: xold ! =(nvar,nz)
48 : ! old vector of primaries
49 :
50 : ! information about the jacobian matrix
51 : integer, intent(in) :: matrix_type ! see num_def.f for values
52 : ! if matrix_type == banded_matrix_type, mljac and mujac give the bandwidths.
53 : ! if matrix_type /= banded_matrix_type, mljac and mujac are not used.
54 :
55 : integer, intent(in) :: mljac ! number of subdiagonals within the band of the jacobian
56 : integer, intent(in) :: mujac ! number of superdiagonals
57 : ! for example if you have a centered 3 zone stencil,
58 : ! then you will have mljac and mujac both equal to 2*nvar-1
59 : ! mljac and mujac are only used for matrix_type == banded matrix type
60 :
61 : ! matrix routines
62 : ! there are implementations of the matrix routines available in mesa/mtx.
63 : ! for example, the LAPACK versions are called lapack_dec, lapack_sol, etc.
64 : interface
65 : #include "mtx_decsol.dek"
66 : #include "mtx_decsolblk_dble.dek"
67 : end interface
68 : ! these arrays provide optional extra working storage for the matrix routines.
69 : ! the implementations in mesa/mtx include routines to determine the sizes.
70 : ! for example, the LAPACK version is called lapack_work_sizes.
71 : integer, intent(in) :: lrd, lid
72 : integer, intent(inout), pointer :: ipar_decsol(:) ! (lid)
73 : real(dp), intent(inout), pointer :: rpar_decsol(:) ! (lrd)
74 : integer, intent(in) :: which_decsol
75 :
76 : real(dp), pointer, dimension(:) :: xscale ! =(nvar,nz)
77 : ! typical values for x. set by set_xscale.
78 : real(dp), pointer, dimension(:) :: equ ! =(nvar,nz)
79 : ! equ(i) has the residual for equation i, i.e., the difference between
80 : ! the left and right hand sides of the equation.
81 :
82 : ! the secondary variables
83 : ! the "secondaries" for zone k depend only on the primaries of zone k
84 : ! and therefore need not be recomputed is the zone k primaries have not been modified.
85 : ! using this information can significantly accelerate the computation of numerical jacobians.
86 : ! for stellar evolution, the secondaries include such expensive-to-compute items
87 : ! as equation of state,
88 : ! nuclear reaction rates, and opacities.
89 : integer, intent(in) :: ldy ! leading dimension of y, >= nz
90 : integer, intent(in) :: nsec ! number of secondaries per zone
91 : real(dp), pointer, dimension(:) :: y ! the values. =(ldy,nsec)
92 :
93 : ! work arrays. required sizes provided by the routine newton_work_sizes.
94 : ! for standard use, set work and iwork to 0 before calling.
95 : ! NOTE: these arrays contain some optional parameter settings and outputs.
96 : ! see num_def for details.
97 : integer, intent(in) :: lwork, liwork
98 : real(dp), intent(inout), target :: work(:) ! (lwork)
99 : integer, intent(inout), target :: iwork(:) ! (liwork)
100 : real(dp), pointer, dimension(:) :: AF ! for factored jacobian
101 : ! will be allocated or reallocated as necessary.
102 :
103 : ! convergence criteria
104 : real(dp), intent(in) :: tol_correction_norm
105 : ! a trial solution is considered to have converged if
106 : ! max_correction <= tol_max_correction and
107 : !
108 : ! either
109 : ! (correction_norm <= tol_correction_norm)
110 : ! .and. (residual_norm <= tol_residual_norm)
111 : ! or
112 : ! (correction_norm*residual_norm <= tol_corr_resid_product)
113 : ! .and. (abs(slope) <= tol_abs_slope_min)
114 : !
115 : ! where "slope" is slope of the line for line search in the newton solver,
116 : ! and is analogous to the slope of df/dx in a 1D newton root finder.
117 :
118 : ! parameters for caller-supplied routines
119 : integer, intent(in) :: lrpar, lipar
120 : real(dp), intent(inout) :: rpar(:) ! (lrpar)
121 : integer, intent(inout) :: ipar(:) ! (lipar)
122 :
123 : ! output
124 : logical, intent(out) :: convergence_failure
125 : integer, intent(out) :: ierr ! 0 means okay.
126 :
127 : ! the following routines implement the problem-specific aspects of the newton solver.
128 : ! see num/include/newton_procs.dek for documentation.
129 : ! there are default implementations for most of the routines (see below).
130 : ! the only one without a default is the "eval_equations" routine that computes
131 : ! the equation residuals for your particular problem.
132 : interface
133 : #include "newton_procs.dek"
134 : end interface
135 :
136 : integer :: ldAF, neqns
137 10 : real(dp), pointer :: AF_copy(:) ! =(ldAF, neq)
138 :
139 : ! for sparse
140 : integer :: n, nzmax, need_lrd, need_lid
141 :
142 : integer(i8) :: test_time0, test_time1, clock_rate
143 : logical :: do_test_timing
144 :
145 : include 'formats'
146 :
147 10 : do_test_timing = (work(r_test_time) /= 0)
148 10 : work(r_test_time) = 0
149 :
150 10 : ierr = 0
151 :
152 10 : nzmax = 0
153 20 : if (which_decsol == lapack) then
154 10 : call lapack_work_sizes(n, need_lrd, need_lid)
155 : else
156 0 : write (*, *) 'newton: unknown value for matrix solver option'
157 0 : ierr = -1
158 0 : return
159 : end if
160 :
161 10 : if (need_lrd > lrd .or. need_lid > lid) then
162 0 : write (*, *) 'bad lrd or lid for newton'
163 0 : write (*, 2) 'need_lrd', need_lrd
164 0 : write (*, 2) ' lrd', lrd
165 0 : write (*, 2) 'need_lid', need_lid
166 0 : write (*, 2) ' lid', lid
167 0 : ierr = -1
168 0 : return
169 : end if
170 :
171 10 : neqns = nvar*nz
172 10 : if (matrix_type == block_tridiag_dble_matrix_type) then
173 0 : ldAF = 3*nvar
174 10 : else if (matrix_type == square_matrix_type) then
175 0 : ldAF = neqns
176 : else
177 10 : ldAF = 2*mljac + mujac + 1
178 : end if
179 :
180 10 : if (associated(AF)) then
181 0 : if (size(AF, dim=1) < ldAF*neqns) then
182 0 : deallocate (AF)
183 : nullify (AF)
184 : end if
185 : end if
186 :
187 10 : if (.not. associated(AF)) then
188 10 : allocate (AF((ldAF + 2)*(neqns + 200)), stat=ierr)
189 10 : if (ierr /= 0) return
190 : end if
191 :
192 10 : AF_copy => AF
193 :
194 10 : if (do_test_timing) call system_clock(test_time0, clock_rate)
195 :
196 : call do_newton(nz, nvar, x, xold, AF_copy, ldAF, neqns, &
197 : matrix_type, mljac, mujac, &
198 : decsol, decsolblk, &
199 : lrd, rpar_decsol, lid, ipar_decsol, &
200 : tol_correction_norm, &
201 : set_primaries, set_secondaries, set_xscale, Bdomain, xdomain, eval_equations, &
202 : size_equ, sizeb, inspectB, &
203 : enter_setmatrix, exit_setmatrix, failed_in_setmatrix, force_another_iteration, &
204 : xscale, equ, ldy, nsec, y, work, lwork, iwork, liwork, &
205 10 : lrpar, rpar, lipar, ipar, convergence_failure, ierr)
206 :
207 20 : if (do_test_timing) then
208 0 : call system_clock(test_time1, clock_rate)
209 0 : work(r_test_time) = work(r_test_time) + dble(test_time1 - test_time0)/clock_rate
210 : end if
211 :
212 : contains
213 :
214 : logical function bad_isize(a, sz, str)
215 : integer :: a(:)
216 : integer, intent(in) :: sz
217 : character(len=*), intent(in) :: str
218 : bad_isize = (size(a, dim=1) < sz)
219 : if (.not. bad_isize) return
220 : ierr = -1
221 : return
222 : end function bad_isize
223 :
224 : logical function bad_size(a, sz, str)
225 : real(dp) :: a(:)
226 : integer, intent(in) :: sz
227 : character(len=*), intent(in) :: str
228 : bad_size = (size(a, dim=1) < sz)
229 : if (.not. bad_size) return
230 : ierr = -1
231 : return
232 : end function bad_size
233 :
234 : logical function bad_size_dble(a, sz, str)
235 : real(dp) :: a(:)
236 : integer, intent(in) :: sz
237 : character(len=*), intent(in) :: str
238 : bad_size_dble = (size(a, dim=1) < sz)
239 : if (.not. bad_size_dble) return
240 : ierr = -1
241 : return
242 : end function bad_size_dble
243 :
244 : logical function bad_sizes(a, sz1, sz2, str)
245 : real(dp) :: a(:, :)
246 : integer, intent(in) :: sz1, sz2
247 : character(len=*), intent(in) :: str
248 : bad_sizes = (size(a, dim=1) < sz1 .or. size(a, dim=2) < sz2)
249 : if (.not. bad_sizes) return
250 : ierr = -1
251 : return
252 : end function bad_sizes
253 :
254 : end subroutine do_newton_wrapper
255 :
256 20 : subroutine do_newton(nz, nvar, x1, xold1, AF1, ldAF, neq, matrix_type, mljac, mujac, &
257 : decsol, decsolblk, lrd, rpar_decsol, lid, ipar_decsol, &
258 : tol_correction_norm, set_primaries, set_secondaries, set_xscale, &
259 : Bdomain, xdomain, eval_equations, size_equ, sizeb, inspectB, &
260 : enter_setmatrix, exit_setmatrix, failed_in_setmatrix, force_another_iteration, &
261 10 : xscale1, equ1, ldy, nsec, y_in1, work, lwork, iwork, liwork, &
262 10 : lrpar, rpar, lipar, ipar, convergence_failure, ierr)
263 :
264 : integer, intent(in) :: nz, nvar, mljac, mujac, ldy, nsec, ldAF, neq
265 :
266 : integer, intent(in) :: matrix_type
267 :
268 : real(dp), pointer, dimension(:) :: AF1 ! =(ldAF, neq), neq = neq
269 : real(dp), pointer, dimension(:) :: x1, xold1, equ1, xscale1
270 : real(dp), pointer, dimension(:) :: y_in1 ! the values. =(ldy,nsec)
271 :
272 : ! matrix routines
273 : interface
274 : #include "mtx_decsol.dek"
275 : #include "mtx_decsolblk_dble.dek"
276 : end interface
277 : integer, intent(in) :: lrd, lid
278 : integer, intent(inout), pointer :: ipar_decsol(:) ! (lid)
279 : real(dp), intent(inout), pointer :: rpar_decsol(:) ! (lrd)
280 :
281 : ! controls
282 : real(dp), intent(in) :: tol_correction_norm
283 :
284 : ! parameters for caller-supplied routines
285 : integer, intent(in) :: lrpar, lipar
286 : real(dp), intent(inout) :: rpar(:) ! (lrpar)
287 : integer, intent(inout) :: ipar(:) ! (lipar)
288 :
289 : ! work arrays
290 : integer, intent(in) :: lwork, liwork
291 : real(dp), intent(inout), target :: work(:) ! (lwork)
292 : integer, intent(inout), target :: iwork(:) ! (liwork)
293 :
294 : ! output
295 : logical, intent(out) :: convergence_failure
296 : integer, intent(out) :: ierr
297 :
298 : ! procedures
299 : interface
300 : #include "newton_procs.dek"
301 : end interface
302 :
303 : ! info saved in work and iwork
304 10 : real(dp), dimension(:, :), pointer :: A, Acopy
305 10 : real(dp), dimension(:), pointer :: A1, Acopy1
306 :
307 10 : real(dp), dimension(:, :), pointer :: xsave, dxsave, B, grad_f, B_init
308 10 : real(dp), dimension(:), pointer :: xsave1, dxsave1, B1, B_init1, grad_f1
309 10 : real(dp), dimension(:, :), pointer :: rhs
310 10 : integer, dimension(:), pointer :: ipiv1
311 10 : real(dp), dimension(:, :), pointer :: dx, xgg, dxd, dxdd, xder, equsave
312 10 : real(dp), dimension(:, :), pointer :: y1, y2
313 :
314 10 : real(dp), dimension(:), pointer :: lblk1, dblk1, ublk1
315 10 : real(dp), dimension(:), pointer :: lblkF1, dblkF1, ublkF1
316 10 : integer, dimension(:), pointer :: ipiv_blk1
317 :
318 : ! locals
319 10 : real(dp) :: coeff, f, slope, residual_norm, max_residual, &
320 10 : corr_norm_min, resid_norm_min, correction_factor, &
321 10 : residual_norm_save, corr_norm_min_save, resid_norm_min_save, correction_factor_save, &
322 10 : correction_norm, max_correction, &
323 10 : tol_max_correction, tol_residual_norm, tol_abs_slope_min, tol_corr_resid_product, &
324 10 : min_corr_coeff, tol_max_residual, max_corr_min, max_resid_min
325 : integer :: iiter, max_tries, ndiag, zone, idiag, tiny_corr_cnt, ldA, i, k, info, &
326 : last_jac_iter, max_iterations_for_jacobian, force_iter_value, &
327 : max_iter_for_enforce_resid_tol, max_iter_for_resid_tol2, max_iter_for_resid_tol3, caller_id
328 : integer(i8) :: time0, time1, clock_rate
329 : character(len=256) :: err_msg
330 : logical :: first_try, dbg_msg, passed_tol_tests, overlay_AF, do_mtx_timing, doing_extra
331 : integer, parameter :: num_tol_msgs = 15
332 : character(len=32) :: tol_msg(num_tol_msgs)
333 : character(len=64) :: message
334 :
335 : ! set pointers to 1D data
336 : real(dp), pointer, dimension(:, :) :: x, xold, equ, xscale ! (nvar,nz)
337 10 : real(dp), pointer, dimension(:, :) :: y ! (ldy,nsec)
338 : real(dp), pointer, dimension(:, :) :: AF ! (ldAF,neq)
339 10 : real(dp), pointer, dimension(:, :, :) :: ublk, dblk, lblk ! (nvar,nvar,nz)
340 10 : real(dp), dimension(:, :, :), pointer :: lblkF, dblkF, ublkF ! (nvar,nvar,nz)
341 :
342 : include 'formats'
343 :
344 10 : x(1:nvar, 1:nz) => x1(1:neq)
345 10 : xold(1:nvar, 1:nz) => xold1(1:neq)
346 10 : equ(1:nvar, 1:nz) => equ1(1:neq)
347 10 : xscale(1:nvar, 1:nz) => xscale1(1:neq)
348 10 : AF(1:ldAF, 1:neq) => AF1(1:ldAF*neq)
349 10 : y(1:ldy, 1:nsec) => y_in1(1:ldy*nsec)
350 :
351 10 : do_mtx_timing = (work(r_mtx_time) /= 0)
352 10 : work(r_mtx_time) = 0
353 :
354 10 : tol_msg(1) = 'avg corr'
355 10 : tol_msg(2) = 'max corr '
356 10 : tol_msg(3) = 'avg+max corr'
357 10 : tol_msg(4) = 'avg resid'
358 10 : tol_msg(5) = 'avg corr+resid'
359 10 : tol_msg(6) = 'max corr, avg resid'
360 10 : tol_msg(7) = 'avg+max corr, avg resid'
361 10 : tol_msg(8) = 'max resid'
362 10 : tol_msg(9) = 'avg corr, max resid'
363 10 : tol_msg(10) = 'max corr+resid'
364 10 : tol_msg(11) = 'avg+max corr, max resid'
365 10 : tol_msg(12) = 'avg+max resid'
366 10 : tol_msg(13) = 'avg corr, avg+max resid'
367 10 : tol_msg(14) = 'max corr, avg+max resid'
368 10 : tol_msg(15) = 'avg+max corr+resid'
369 :
370 10 : ierr = 0
371 10 : iiter = 0
372 :
373 10 : call set_param_defaults
374 10 : dbg_msg = (iwork(i_debug) /= 0)
375 10 : tol_residual_norm = work(r_tol_residual_norm)
376 10 : tol_max_residual = work(r_tol_max_residual)
377 10 : tol_max_correction = work(r_tol_max_correction)
378 10 : tol_abs_slope_min = work(r_tol_abs_slope_min)
379 10 : tol_corr_resid_product = work(r_tol_corr_resid_product)
380 10 : min_corr_coeff = work(r_min_corr_coeff)
381 :
382 10 : max_iter_for_enforce_resid_tol = iwork(i_max_iter_for_enforce_resid_tol)
383 10 : max_iter_for_resid_tol2 = iwork(i_max_iter_for_resid_tol2)
384 10 : max_iter_for_resid_tol3 = iwork(i_max_iter_for_resid_tol3)
385 :
386 10 : caller_id = iwork(i_caller_id)
387 :
388 10 : if (ldy < nz .and. nsec > 0) then
389 0 : ierr = -1
390 0 : return
391 : end if
392 :
393 10 : idiag = 1
394 10 : if (matrix_type == block_tridiag_dble_matrix_type) then
395 0 : ndiag = 3*nvar
396 10 : else if (matrix_type == square_matrix_type) then
397 0 : ndiag = neq
398 : else
399 10 : idiag = mujac + 1
400 10 : ndiag = mljac + mujac + 1
401 : end if
402 :
403 10 : ldA = ndiag
404 10 : call pointers(ierr)
405 10 : if (ierr /= 0) return
406 :
407 10 : if (iwork(i_do_core_dump) /= 0) then
408 0 : call newton_core_dump(x, dx, xold)
409 0 : return
410 : end if
411 :
412 10 : doing_extra = .false.
413 10 : passed_tol_tests = .false. ! goes true when pass the tests
414 10 : convergence_failure = .false. ! goes true when time to give up
415 10 : coeff = 1.
416 30040 : xscale = 1.
417 :
418 10 : residual_norm = 0
419 10 : max_residual = 0
420 10 : corr_norm_min = 1d99
421 10 : max_corr_min = 1d99
422 10 : max_resid_min = 1d99
423 10 : resid_norm_min = 1d99
424 10 : correction_factor = 0
425 :
426 10020 : do k = 1, nz
427 30040 : do i = 1, nvar
428 30030 : dx(i, k) = x(i, k) - xold(i, k)
429 : end do
430 : end do
431 :
432 10 : call xdomain(iiter, nvar, nz, x, dx, xold, lrpar, rpar, lipar, ipar, ierr)
433 10 : if (ierr /= 0) then
434 0 : if (dbg_msg) write (*, *) 'newton failure: xdomain returned ierr', ierr
435 0 : convergence_failure = .true.
436 0 : return
437 : end if
438 10 : call set_xscale(nvar, nz, xold, xscale, lrpar, rpar, lipar, ipar, ierr) ! set xscale
439 10 : if (ierr /= 0) then
440 0 : if (dbg_msg) write (*, *) 'newton failure: set_xscale returned ierr', ierr
441 0 : convergence_failure = .true.
442 0 : return
443 : end if
444 10 : call setequ(nvar, nz, x, equ, lrpar, rpar, lipar, ipar, ierr)
445 10 : if (ierr /= 0) then
446 0 : if (dbg_msg) write (*, *) 'newton failure: setequ returned ierr', ierr
447 0 : convergence_failure = .true.
448 0 : return
449 : end if
450 10 : call size_equ(iiter, nvar, nz, equ, residual_norm, max_residual, lrpar, rpar, lipar, ipar, ierr)
451 10 : if (ierr /= 0) then
452 0 : if (dbg_msg) write (*, *) 'newton failure: size_equ returned ierr', ierr
453 0 : convergence_failure = .true.
454 0 : return
455 : end if
456 :
457 10 : first_try = .true.
458 10 : iiter = 1
459 10 : max_tries = abs(iwork(i_max_tries))
460 10 : last_jac_iter = 0
461 10 : tiny_corr_cnt = 0
462 :
463 10 : if (iwork(i_max_iterations_for_jacobian) == 0) then
464 : max_iterations_for_jacobian = 1000000
465 : else
466 : max_iterations_for_jacobian = iwork(i_max_iterations_for_jacobian)
467 : end if
468 :
469 40 : do while (.not. passed_tol_tests)
470 :
471 20 : if (dbg_msg .and. first_try) write (*, *)
472 :
473 20 : if (iiter >= max_iter_for_enforce_resid_tol) then
474 20 : if (iiter >= max_iter_for_resid_tol2) then
475 20 : if (iiter >= max_iter_for_resid_tol3) then ! shut down
476 20 : tol_residual_norm = 1d200
477 20 : tol_max_residual = 1d200
478 : else ! >= max_iter_for_resid_tol2 and but < max_iter_for_resid_tol3
479 0 : tol_residual_norm = work(r_tol_residual_norm3)
480 0 : tol_max_residual = work(r_tol_max_residual3)
481 : end if
482 : else ! >= max_iter_for_enforce_resid_tol but < max_iter_for_resid_tol2
483 0 : tol_residual_norm = work(r_tol_residual_norm2)
484 0 : tol_max_residual = work(r_tol_max_residual2)
485 : end if
486 : end if
487 :
488 : overlay_AF = (min_corr_coeff == 1) .and. &
489 20 : (matrix_type == banded_matrix_type .or. matrix_type == block_tridiag_dble_matrix_type)
490 :
491 : ! NOTE: for banded matrix, the jacobian A is a part of the array AF
492 : ! AF has extra rows for storing banded LU factored matrix.
493 20 : if (overlay_AF) then
494 0 : A1 => AF1
495 0 : A => AF
496 0 : ldA = ldAF
497 0 : if (matrix_type == banded_matrix_type) then
498 0 : idiag = mljac + mujac + 1
499 0 : else if (matrix_type == block_tridiag_dble_matrix_type) then
500 0 : ublk1 => ublkF1
501 0 : dblk1 => dblkF1
502 0 : lblk1 => lblkF1
503 0 : lblk(1:nvar, 1:nvar, 1:nz) => lblk1(1:nvar*neq)
504 0 : dblk(1:nvar, 1:nvar, 1:nz) => dblk1(1:nvar*neq)
505 0 : ublk(1:nvar, 1:nvar, 1:nz) => ublk1(1:nvar*neq)
506 : else
507 0 : stop 'confusion about matrix_type'
508 : end if
509 : else
510 20 : idiag = mujac + 1
511 : end if
512 :
513 20 : call setmatrix(neq, x, dx, xscale, xsave, dxsave, lrpar, rpar, lipar, ipar, ierr)
514 20 : if (ierr /= 0) then
515 0 : if (any(dx /= 0)) then
516 0 : call write_msg('setmatrix returned ierr /= 0; retry with dx = 0.')
517 0 : do i = 1, neq
518 0 : x1(i) = xold1(i)
519 : end do
520 0 : dx = 0
521 0 : iiter = iiter + 1
522 0 : first_try = .false.
523 0 : cycle
524 : end if
525 0 : call write_msg('setmatrix returned ierr /= 0; dx already = 0, so give up.')
526 0 : convergence_failure = .true.; exit
527 : end if
528 20 : iwork(i_num_jacobians) = iwork(i_num_jacobians) + 1
529 20 : last_jac_iter = iiter
530 :
531 20 : if (.not. solve_equ()) then ! either singular or horribly ill-conditioned
532 0 : write (err_msg, '(a, i5, 3x, a)') 'info', ierr, 'bad_matrix'
533 0 : call oops(err_msg)
534 0 : exit
535 : end if
536 20 : iwork(i_num_solves) = iwork(i_num_solves) + 1
537 :
538 : ! inform caller about the correction
539 20 : call inspectB(iiter, nvar, nz, x, B, xscale, lrpar, rpar, lipar, ipar, ierr)
540 20 : if (ierr /= 0) then
541 0 : call oops('inspectB returned ierr')
542 0 : exit
543 : end if
544 :
545 : ! compute size of scaled correction B
546 20 : call sizeB(iiter, nvar, nz, x, B, xscale, max_correction, correction_norm, lrpar, rpar, lipar, ipar, ierr)
547 20 : if (ierr /= 0) then
548 0 : call oops('correction rejected by sizeB')
549 0 : exit
550 : end if
551 20 : correction_norm = abs(correction_norm)
552 20 : max_correction = abs(max_correction)
553 20 : corr_norm_min = min(correction_norm, corr_norm_min)
554 20 : max_corr_min = min(max_correction, max_corr_min)
555 :
556 20 : if (is_bad(correction_norm) .or. is_bad(max_correction)) then
557 : ! bad news -- bogus correction
558 0 : call oops('bad result from sizeb -- correction info either NaN or Inf')
559 0 : exit
560 : end if
561 :
562 20 : if ((correction_norm > work(r_corr_param_factor)*work(r_scale_correction_norm)) .and. (iwork(i_try_really_hard) == 0)) then
563 0 : call oops('avg corr too large')
564 0 : exit
565 : end if
566 :
567 : ! shrink the correction if it is too large
568 20 : correction_factor = 1
569 :
570 20 : if (correction_norm*correction_factor > work(r_scale_correction_norm)) then
571 0 : correction_factor = work(r_scale_correction_norm)/correction_norm
572 : end if
573 :
574 20 : if (max_correction*correction_factor > work(r_scale_max_correction)) then
575 0 : correction_factor = work(r_scale_max_correction)/max_correction
576 : end if
577 :
578 : ! fix B if out of definition domain
579 20 : call Bdomain(iiter, nvar, nz, B, x, xscale, correction_factor, lrpar, rpar, lipar, ipar, ierr)
580 20 : if (ierr /= 0) then ! correction cannot be fixed
581 0 : call oops('correction rejected by Bdomain')
582 0 : exit
583 : end if
584 :
585 : ! save previous
586 20 : residual_norm_save = residual_norm
587 20 : corr_norm_min_save = corr_norm_min
588 20 : resid_norm_min_save = resid_norm_min
589 20 : correction_factor_save = correction_factor
590 :
591 20 : if (min_corr_coeff < 1) then
592 : ! compute gradient of f = equ<dot>jacobian
593 : ! NOTE: NOT jacobian<dot>equ
594 20 : if (matrix_type == block_tridiag_dble_matrix_type) then
595 0 : call block_multiply_xa(nvar, nz, lblk1, dblk1, ublk1, equ1, grad_f1)
596 20 : else if (matrix_type == square_matrix_type) then
597 0 : call multiply_xa(neq, A1, equ1, grad_f1)
598 : else
599 20 : call band_multiply_xa(neq, mljac, mujac, A1, ldA, equ1, grad_f1)
600 : end if
601 :
602 20 : slope = eval_slope(nvar, nz, grad_f, B)
603 : !write(*,*) 'slope', slope
604 : !if (is_bad(slope)) then
605 : ! call oops('bad slope value')
606 : ! exit
607 : !end if
608 20 : if (is_bad(slope) .or. slope > 0) then ! a bad sign
609 : ! but give it a chance before give up
610 : !write(*,*) 'slope', slope
611 0 : slope = 0
612 0 : min_corr_coeff = 1
613 : end if
614 :
615 : else
616 :
617 0 : slope = 0
618 :
619 : end if
620 :
621 : call adjust_correction(min_corr_coeff, correction_factor, grad_f1, f, slope, coeff, err_msg, &
622 20 : lrpar, rpar, lipar, ipar, ierr)
623 20 : if (ierr /= 0) then
624 0 : call oops(err_msg)
625 0 : exit
626 : end if
627 :
628 : ! coeff is factor by which adjust_correction rescaled the correction vector
629 20 : if (coeff > work(r_tiny_corr_factor)*min_corr_coeff) then
630 : tiny_corr_cnt = 0
631 : else
632 6 : tiny_corr_cnt = tiny_corr_cnt + 1
633 : end if
634 :
635 : ! check the residuals for the equations
636 20 : call size_equ(iiter, nvar, nz, equ, residual_norm, max_residual, lrpar, rpar, lipar, ipar, ierr)
637 20 : if (ierr /= 0) then
638 0 : call oops('size_equ returned ierr')
639 0 : exit
640 : end if
641 20 : if (is_bad(residual_norm)) then
642 0 : call oops('residual_norm is a a bad number (NaN or Infinity)')
643 0 : exit
644 : end if
645 20 : if (is_bad(max_residual)) then
646 0 : call oops('max_residual is a a bad number (NaN or Infinity)')
647 0 : exit
648 : end if
649 20 : residual_norm = abs(residual_norm)
650 20 : max_residual = abs(max_residual)
651 20 : resid_norm_min = min(residual_norm, resid_norm_min)
652 20 : max_resid_min = min(max_residual, max_resid_min)
653 :
654 20 : if (max_correction > tol_max_correction*coeff .or. max_residual > tol_max_residual*coeff) then
655 : passed_tol_tests = .false.
656 : else
657 : passed_tol_tests = (correction_norm <= tol_correction_norm*coeff .and. &
658 : residual_norm <= tol_residual_norm*coeff) .or. &
659 : (abs(slope) <= tol_abs_slope_min .and. &
660 20 : correction_norm*residual_norm <= tol_corr_resid_product*coeff*coeff)
661 : end if
662 :
663 : if (.not. passed_tol_tests) then
664 10 : if (iiter >= max_tries) then
665 0 : if (dbg_msg) then
666 0 : call get_message
667 0 : message = trim(message)//' -- give up'
668 0 : call write_msg(message)
669 : end if
670 0 : convergence_failure = .true.; exit
671 10 : else if (iwork(i_try_really_hard) == 0) then
672 0 : if (coeff < min(min_corr_coeff, correction_factor)) then
673 0 : call oops('coeff too small')
674 0 : exit
675 : else if (correction_norm > tol_correction_norm*coeff .and. &
676 : (correction_norm > work(r_corr_norm_jump_limit)*corr_norm_min) &
677 0 : .and. (.not. first_try)) then
678 0 : call oops('avg correction jumped')
679 0 : exit
680 : else if (residual_norm > tol_residual_norm*coeff .and. &
681 : (residual_norm > work(r_resid_norm_jump_limit)*resid_norm_min) &
682 0 : .and. (.not. first_try)) then
683 0 : call oops('avg residual jumped')
684 0 : exit
685 : else if (max_correction > tol_max_correction*coeff .and. &
686 : (max_correction > work(r_max_corr_jump_limit)*max_corr_min) &
687 0 : .and. (.not. first_try)) then
688 0 : call oops('max correction jumped')
689 0 : exit
690 : else if (residual_norm > tol_residual_norm*coeff .and. &
691 : (max_residual > work(r_max_resid_jump_limit)*max_resid_min) &
692 0 : .and. (.not. first_try)) then
693 0 : call oops('max residual jumped')
694 0 : exit
695 0 : else if (tiny_corr_cnt >= iwork(i_tiny_min_corr_coeff) .and. min_corr_coeff < 1) then
696 0 : call oops('tiny corrections')
697 0 : exit
698 : end if
699 : end if
700 : end if
701 :
702 20 : if (dbg_msg) then
703 0 : if (.not. passed_tol_tests) then
704 0 : call get_message
705 0 : call write_msg(message)
706 0 : else if (iiter < iwork(i_itermin)) then
707 0 : call write_msg('iiter < itermin')
708 : else
709 0 : call write_msg('okay!')
710 : end if
711 : end if
712 :
713 20 : if (passed_tol_tests .and. (iiter + 1 < max_tries)) then
714 : ! about to declare victory... but may want to do another iteration
715 10 : force_iter_value = force_another_iteration(iiter, iwork(i_itermin), lrpar, rpar, lipar, ipar)
716 10 : if (force_iter_value > 0) then
717 : passed_tol_tests = .false. ! force another
718 : tiny_corr_cnt = 0 ! reset the counter
719 : corr_norm_min = 1d99
720 : resid_norm_min = 1d99
721 : max_corr_min = 1d99
722 : max_resid_min = 1d99
723 10 : else if (force_iter_value < 0) then ! failure
724 0 : call oops('force iter')
725 0 : convergence_failure = .true.
726 0 : exit
727 : end if
728 : end if
729 :
730 20 : iiter = iiter + 1
731 20 : first_try = .false.
732 :
733 : end do
734 :
735 : contains
736 :
737 0 : subroutine get_message
738 : include 'formats'
739 0 : i = 0
740 0 : if (correction_norm > tol_correction_norm*coeff) i = i + 1
741 0 : if (max_correction > tol_max_correction*coeff) i = i + 2
742 0 : if (residual_norm > tol_residual_norm*coeff) i = i + 4
743 0 : if (max_residual > tol_max_residual*coeff) i = i + 8
744 0 : if (i == 0) then
745 0 : message = 'out of tries'
746 : else
747 0 : message = tol_msg(i)
748 : end if
749 0 : end subroutine get_message
750 :
751 10 : subroutine set_param_defaults
752 :
753 10 : if (iwork(i_itermin) == 0) iwork(i_itermin) = 2
754 10 : if (iwork(i_max_tries) == 0) iwork(i_max_tries) = 50
755 10 : if (iwork(i_tiny_min_corr_coeff) == 0) iwork(i_tiny_min_corr_coeff) = 25
756 :
757 10 : if (work(r_tol_residual_norm) == 0) work(r_tol_residual_norm) = 1d99
758 10 : if (work(r_tol_max_residual) == 0) work(r_tol_max_residual) = 1d99
759 10 : if (work(r_tol_max_correction) == 0) work(r_tol_max_correction) = 1d99
760 10 : if (work(r_target_corr_factor) == 0) work(r_target_corr_factor) = 0.9d0
761 10 : if (work(r_scale_correction_norm) == 0) work(r_scale_correction_norm) = 2d0
762 10 : if (work(r_corr_param_factor) == 0) work(r_corr_param_factor) = 10d0
763 10 : if (work(r_scale_max_correction) == 0) work(r_scale_max_correction) = 1d99
764 10 : if (work(r_corr_norm_jump_limit) == 0) work(r_corr_norm_jump_limit) = 1d99
765 10 : if (work(r_max_corr_jump_limit) == 0) work(r_max_corr_jump_limit) = 1d99
766 10 : if (work(r_resid_norm_jump_limit) == 0) work(r_resid_norm_jump_limit) = 1d99
767 10 : if (work(r_max_resid_jump_limit) == 0) work(r_max_resid_jump_limit) = 1d99
768 10 : if (work(r_min_corr_coeff) == 0) work(r_min_corr_coeff) = 1d-3
769 10 : if (work(r_slope_alert_level) == 0) work(r_slope_alert_level) = 1d0
770 10 : if (work(r_slope_crisis_level) == 0) work(r_slope_crisis_level) = 1d0
771 10 : if (work(r_tiny_corr_factor) == 0) work(r_tiny_corr_factor) = 2d0
772 :
773 10 : end subroutine set_param_defaults
774 :
775 0 : subroutine oops(msg)
776 : character(len=*), intent(in) :: msg
777 : character(len=256) :: full_msg
778 0 : full_msg = trim(msg)//' -- give up'
779 0 : call write_msg(full_msg)
780 0 : convergence_failure = .true.
781 0 : end subroutine oops
782 :
783 61 : subroutine setequ(nvar, nz, x, equ, lrpar, rpar, lipar, ipar, ierr)
784 : integer, intent(in) :: nvar, nz
785 : real(dp), pointer :: x(:, :) ! (nvar, nz)
786 : real(dp), pointer :: equ(:, :) ! (nvar, nz)
787 : integer, intent(in) :: lrpar, lipar
788 : real(dp), intent(inout) :: rpar(:) ! (lrpar)
789 : integer, intent(inout) :: ipar(:) ! (lipar)
790 : integer, intent(out) :: ierr
791 61 : call set_primaries(nvar, nz, x, lrpar, rpar, lipar, ipar, ierr); if (ierr /= 0) return
792 61 : call set_secondaries(0, lrpar, rpar, lipar, ipar, ierr); if (ierr /= 0) return
793 61 : call eval_equations(iiter, nvar, nz, x, xscale, equ, lrpar, rpar, lipar, ipar, ierr)
794 : if (ierr /= 0) return
795 : end subroutine setequ
796 :
797 20 : subroutine adjust_correction(min_corr_coeff_in, max_corr_coeff, grad_f, f, slope, coeff, err_msg, &
798 20 : lrpar, rpar, lipar, ipar, ierr)
799 : real(dp), intent(in) :: min_corr_coeff_in
800 : real(dp), intent(in) :: max_corr_coeff
801 : real(dp), intent(in) :: grad_f(:) ! (neq) ! gradient df/dx at xold
802 : real(dp), intent(out) :: f ! 1/2 fvec^2. minimize this.
803 : real(dp), intent(in) :: slope
804 : real(dp), intent(out) :: coeff
805 :
806 : ! the new correction is coeff*xscale*B
807 : ! with min_corr_coeff <= coeff <= max_corr_coeff
808 : ! if all goes well, the new x will give an improvement in f
809 :
810 : character(len=*), intent(out) :: err_msg
811 : integer, intent(in) :: lrpar, lipar
812 : real(dp), intent(inout) :: rpar(:) ! (lrpar)
813 : integer, intent(inout) :: ipar(:) ! (lipar)
814 : integer, intent(out) :: ierr
815 :
816 : integer :: i, k, iter
817 : logical :: first_time
818 20 : real(dp) :: a1, alam, alam2, a2, disc, f2, tmp1, rhs1, rhs2, tmplam, fold, min_corr_coeff
819 20 : real(dp) :: f_target
820 : logical :: skip_eval_f
821 :
822 : real(dp), parameter :: alf = 1d-2 ! ensures sufficient decrease in f
823 :
824 : real(dp), parameter :: alam_factor = 0.2d0
825 :
826 : include 'formats'
827 :
828 20 : ierr = 0
829 20 : coeff = 0
830 :
831 20 : skip_eval_f = (min_corr_coeff_in == 1)
832 20 : if (skip_eval_f) then
833 0 : f = 0
834 : else
835 20040 : do k = 1, nz
836 60080 : do i = 1, nvar
837 40040 : xsave(i, k) = x(i, k)
838 60060 : dxsave(i, k) = dx(i, k)
839 : end do
840 : end do
841 20 : f = eval_f(nvar, nz, equ)
842 20 : if (is_bad(f)) then
843 0 : ierr = -1
844 0 : write (err_msg, *) 'adjust_correction failed in eval_f'
845 0 : if (dbg_msg) write (*, *) 'adjust_correction: eval_f(nvar,nz,equ)', eval_f(nvar, nz, equ)
846 20 : return
847 : end if
848 : end if
849 20 : fold = f
850 :
851 20 : min_corr_coeff = min(min_corr_coeff_in, max_corr_coeff) ! make sure min <= max
852 20 : alam = max_corr_coeff
853 20 : first_time = .true.
854 20 : f2 = 0
855 20 : alam2 = 0
856 :
857 51 : search_loop: do iter = 1, 1000
858 :
859 51 : coeff = max(min_corr_coeff, alam)
860 :
861 51 : call apply_coeff(nvar, nz, x, xsave, B, xscale, coeff, skip_eval_f)
862 51102 : do k = 1, nz
863 153204 : do i = 1, nvar
864 153153 : dx(i, k) = x(i, k) - xold(i, k)
865 : end do
866 : end do
867 51 : call xdomain(iiter, nvar, nz, x, dx, xold, lrpar, rpar, lipar, ipar, ierr)
868 51 : if (ierr /= 0) then
869 0 : write (err_msg, *) 'adjust_correction failed in xdomain'
870 0 : if (dbg_msg) write (*, *) 'adjust_correction failed in xdomain: alam', alam
871 0 : if (alam <= min_corr_coeff) return
872 0 : ierr = 0
873 0 : alam = max(alam*alam_factor, min_corr_coeff)
874 0 : cycle search_loop
875 : end if
876 :
877 51 : call setequ(nvar, nz, x, equ, lrpar, rpar, lipar, ipar, ierr)
878 51 : if (ierr /= 0) then
879 0 : if (alam > min_corr_coeff) then
880 0 : alam = max(alam/10, min_corr_coeff)
881 0 : ierr = 0
882 0 : cycle search_loop
883 : end if
884 0 : ierr = -1
885 0 : write (err_msg, *) 'adjust_correction failed in setequ'
886 0 : if (dbg_msg) write (*, *) 'adjust_correction: setequ returned ierr', ierr
887 : exit search_loop
888 : end if
889 :
890 51 : if (min_corr_coeff == 1) return
891 :
892 51 : f = eval_f(nvar, nz, equ)
893 51 : if (is_bad(f)) then
894 0 : if (alam > min_corr_coeff) then
895 0 : alam = max(alam/10, min_corr_coeff)
896 0 : ierr = 0
897 0 : cycle search_loop
898 : end if
899 0 : err_msg = 'equ norm is NaN or other bad num'
900 0 : ierr = -1
901 0 : exit search_loop
902 : end if
903 :
904 51 : f_target = max(fold/2, fold + alf*coeff*slope)
905 51 : if (f <= f_target) then
906 : return ! sufficient decrease in f
907 : end if
908 :
909 37 : if (alam <= min_corr_coeff) then
910 : return ! time to give up
911 : end if
912 :
913 : ! reduce alam and try again
914 31 : if (first_time) then
915 7 : tmplam = -slope/(2*(f - fold - slope))
916 7 : first_time = .false.
917 : else ! have two prior f values to work with
918 24 : rhs1 = f - fold - alam*slope
919 24 : rhs2 = f2 - fold - alam2*slope
920 24 : tmp1 = 1d0/(alam2*alam2*(alam - alam2))
921 24 : a1 = (rhs1 - rhs2)*tmp1
922 24 : a2 = (-alam2*rhs1 + alam*rhs2)*tmp1
923 24 : if (a1 == 0) then
924 0 : tmplam = -slope/(2*a2)
925 : else
926 24 : disc = a2*a2 - 3*a1*slope
927 24 : if (disc < 0) then
928 0 : tmplam = alam*alam_factor
929 24 : else if (a2 <= 0) then
930 18 : tmplam = (-a2 + sqrt(disc))/(3*a1)
931 : else
932 6 : tmplam = -slope/(a2 + sqrt(disc))
933 : end if
934 : end if
935 24 : if (tmplam > alam*alam_factor) tmplam = alam*alam_factor
936 : end if
937 :
938 31 : alam2 = alam
939 31 : f2 = f
940 31 : alam = max(tmplam, alam*alam_factor, min_corr_coeff)
941 :
942 : end do search_loop
943 :
944 0 : do k = 1, nz
945 0 : do i = 1, nvar
946 0 : x(i, k) = xsave(i, k)
947 0 : dx(i, k) = dxsave(i, k)
948 : end do
949 : end do
950 :
951 20 : end subroutine adjust_correction
952 :
953 51 : subroutine apply_coeff(nvar, nz, x, xsave, B, xscale, coeff, just_use_x)
954 : integer, intent(in) :: nvar, nz
955 : real(dp), intent(inout), dimension(:, :) :: x
956 : real(dp), intent(in), dimension(:, :) :: xsave, B, xscale
957 : real(dp), intent(in) :: coeff
958 : logical, intent(in) :: just_use_x
959 : integer :: i, k
960 : include 'formats'
961 51 : if (just_use_x) then
962 : !write(*,1) 'apply_coeff just_use_x', coeff
963 0 : if (coeff == 1d0) then
964 0 : do k = 1, nz
965 0 : do i = 1, nvar
966 : !if (i==1 .and. x(i,k) + xscale(i,k)*B(i,k) < -1d-10) then
967 : ! write(*,3) 'x(i,k)', i, k, x(i,k), xscale(i,k), B(i,k)
968 : ! stop 'apply_coeff'
969 : !end if
970 0 : x(i, k) = x(i, k) + xscale(i, k)*B(i, k)
971 : end do
972 : end do
973 : else
974 0 : do k = 1, nz
975 0 : do i = 1, nvar
976 0 : x(i, k) = x(i, k) + coeff*xscale(i, k)*B(i, k)
977 : end do
978 : end do
979 : end if
980 0 : return
981 : end if
982 : ! else use xsave instead of x
983 51 : if (coeff == 1d0) then
984 20040 : do k = 1, nz
985 60080 : do i = 1, nvar
986 60060 : x(i, k) = xsave(i, k) + xscale(i, k)*B(i, k)
987 : end do
988 : end do
989 : return
990 : end if
991 31062 : do k = 1, nz
992 93124 : do i = 1, nvar
993 93093 : x(i, k) = xsave(i, k) + coeff*xscale(i, k)*B(i, k)
994 : end do
995 : end do
996 : end subroutine apply_coeff
997 :
998 40 : logical function solve_equ()
999 : integer :: nrhs, ldafb, ldb, ldx, lda, i, n, sprs_nz
1000 :
1001 : include 'formats'
1002 :
1003 20 : solve_equ = .true.
1004 20040 : do k = 1, nz
1005 60080 : do i = 1, nvar
1006 60060 : b(i, k) = -equ(i, k)
1007 : end do
1008 : end do
1009 20 : n = nvar*nz
1010 :
1011 20 : nrhs = 1
1012 20 : lda = mljac + 1 + mujac
1013 20 : ldafb = 2*mljac + mujac + 1
1014 20 : ldb = n
1015 20 : ldx = n
1016 :
1017 20 : info = 0
1018 20 : if (do_mtx_timing) call system_clock(time0, clock_rate)
1019 20 : call factor_mtx(n, ldafb, sprs_nz)
1020 20 : if (info == 0) call solve_mtx(n, ldafb, sprs_nz)
1021 20 : if (do_mtx_timing) then
1022 0 : call system_clock(time1, clock_rate)
1023 0 : work(r_mtx_time) = work(r_mtx_time) + dble(time1 - time0)/clock_rate
1024 : end if
1025 :
1026 20 : if (info /= 0) then
1027 0 : solve_equ = .false.
1028 0 : b(1:nvar, 1:nz) = 0
1029 : end if
1030 :
1031 20 : end function solve_equ
1032 :
1033 20 : subroutine factor_mtx(n, ldafb, sprs_nz)
1034 : integer, intent(in) :: n, ldafb
1035 : integer, intent(out) :: sprs_nz
1036 : integer :: i, j, k, info_dealloc
1037 : include 'formats'
1038 20 : sprs_nz = 0
1039 20 : if (matrix_type == block_tridiag_dble_matrix_type) then
1040 0 : if (.not. overlay_AF) then
1041 0 : do k = 1, nvar*neq
1042 0 : lblkF1(k) = lblk1(k)
1043 0 : dblkF1(k) = dblk1(k)
1044 0 : ublkF1(k) = ublk1(k)
1045 : end do
1046 : end if
1047 0 : call decsolblk(0, caller_id, nvar, nz, lblkF1, dblkF1, ublkF1, B1, ipiv_blk1, lrd, rpar_decsol, lid, ipar_decsol, info)
1048 0 : if (info /= 0) then
1049 : call decsolblk(2, caller_id, nvar, nz, lblkF1, dblkF1, ublkF1, &
1050 0 : B1, ipiv_blk1, lrd, rpar_decsol, lid, ipar_decsol, info_dealloc)
1051 : end if
1052 20 : else if (matrix_type == square_matrix_type) then
1053 0 : do k = 1, n*n
1054 0 : AF1(k) = A1(k)
1055 : end do
1056 0 : call decsol(0, n, n, AF1, n, n, B1, ipiv1, lrd, rpar_decsol, lid, ipar_decsol, info)
1057 : else ! banded_matrix_type
1058 20 : if (.not. overlay_AF) then
1059 40060 : do j = 1, neq
1060 320340 : do i = 1, ldA
1061 320320 : AF(mljac + i, j) = A(i, j)
1062 : end do
1063 : end do
1064 : end if
1065 20 : call decsol(0, n, ldafb, AF1, mljac, mujac, B1, ipiv1, lrd, rpar_decsol, lid, ipar_decsol, info)
1066 : end if
1067 20 : end subroutine factor_mtx
1068 :
1069 20 : subroutine solve_mtx(n, ldafb, sprs_nz)
1070 : integer, intent(in) :: n, ldafb, sprs_nz
1071 : character(1) :: trans
1072 : integer :: info_solve, info_dealloc
1073 20 : info = 0; info_solve = 0; info_dealloc = 0
1074 20 : trans = 'N'
1075 20 : if (matrix_type == block_tridiag_dble_matrix_type) then
1076 : call decsolblk(1, caller_id, nvar, nz, lblkF1, dblkF1, ublkF1, &
1077 0 : B1, ipiv_blk1, lrd, rpar_decsol, lid, ipar_decsol, info_solve)
1078 : call decsolblk(2, caller_id, nvar, nz, lblkF1, dblkF1, ublkF1, &
1079 0 : B1, ipiv_blk1, lrd, rpar_decsol, lid, ipar_decsol, info_dealloc)
1080 20 : else if (matrix_type == square_matrix_type) then
1081 0 : call decsol(1, n, n, AF1, n, n, B1, ipiv1, lrd, rpar_decsol, lid, ipar_decsol, info_solve)
1082 0 : call decsol(2, n, n, AF1, n, n, B1, ipiv1, lrd, rpar_decsol, lid, ipar_decsol, info_dealloc)
1083 : else ! banded_matrix_type
1084 20 : call decsol(1, n, ldafb, AF1, mljac, mujac, B1, ipiv1, lrd, rpar_decsol, lid, ipar_decsol, info_solve)
1085 20 : call decsol(2, n, ldafb, AF1, mljac, mujac, B1, ipiv1, lrd, rpar_decsol, lid, ipar_decsol, info_dealloc)
1086 : end if
1087 20 : if (info_solve /= 0 .or. info_dealloc /= 0) info = -1
1088 20 : end subroutine solve_mtx
1089 :
1090 20 : logical function do_enter_setmatrix(neq, x, dx, xscale, lrpar, rpar, lipar, ipar, ierr)
1091 : ! create jacobian by using numerical differences to approximate the partial derivatives
1092 : implicit none
1093 : integer, intent(in) :: neq
1094 : real(dp), pointer, dimension(:, :) :: x, dx, xscale
1095 : integer, intent(in) :: lrpar, lipar
1096 : real(dp), intent(inout) :: rpar(:) ! (lrpar)
1097 : integer, intent(inout) :: ipar(:) ! (lipar)
1098 : integer, intent(out) :: ierr
1099 : logical :: need_solver_to_eval_jacobian
1100 : include 'formats'
1101 : need_solver_to_eval_jacobian = .true.
1102 : call enter_setmatrix(iiter, nvar, nz, neq, x, xold, xscale, xder, need_solver_to_eval_jacobian, &
1103 20 : size(A, dim=1), A1, idiag, lrpar, rpar, lipar, ipar, ierr)
1104 20 : do_enter_setmatrix = need_solver_to_eval_jacobian
1105 20 : end function do_enter_setmatrix
1106 :
1107 20 : subroutine setmatrix(neq, x, dx, xscale, xsave, dxsave, lrpar, rpar, lipar, ipar, ierr)
1108 : ! create jacobian by using numerical differences to approximate the partial derivatives
1109 : implicit none
1110 : integer, intent(in) :: neq
1111 : real(dp), pointer, dimension(:, :) :: x, dx, xscale, xsave, dxsave
1112 : integer, intent(in) :: lrpar, lipar
1113 : real(dp), intent(inout) :: rpar(:) ! (lrpar)
1114 : integer, intent(inout) :: ipar(:) ! (lipar)
1115 : integer, intent(out) :: ierr
1116 :
1117 : integer :: i, j, ii, k, kk, ij, ik, ivar, ideb, ifin
1118 20 : integer, dimension(nvar) :: nskip, gskip, dskip
1119 20 : real(dp) :: partial
1120 : logical :: need_solver_to_eval_jacobian
1121 :
1122 : include 'formats'
1123 :
1124 : ierr = 0
1125 :
1126 20 : need_solver_to_eval_jacobian = do_enter_setmatrix(neq, x, dx, xscale, lrpar, rpar, lipar, ipar, ierr)
1127 20 : if (ierr /= 0) return
1128 20 : if (.not. need_solver_to_eval_jacobian) return
1129 :
1130 0 : if (matrix_type == block_tridiag_dble_matrix_type) then
1131 0 : write (*, '(a)') 'sorry: newton numerical jacobian does '//'not support numerical block triangular jacobians.'
1132 0 : write (*, *) 'requested matrix_type', matrix_type
1133 0 : write (*, *) 'try using a banded matrix instead'
1134 0 : ierr = -1
1135 0 : return
1136 : end if
1137 :
1138 : ! allocate working arrays for numerical jacobian calculation
1139 0 : allocate (xgg(nvar, nz), dxd(nvar, nz), dxdd(nvar, nz), equsave(nvar, nz), stat=ierr)
1140 :
1141 0 : do k = 1, nz
1142 0 : do j = 1, nvar
1143 0 : xsave(j, k) = x(j, k)
1144 0 : dxsave(j, k) = dx(j, k)
1145 0 : equsave(j, k) = equ(j, k)
1146 : end do
1147 : end do
1148 0 : if (nsec > 0) call mesa_error(__FILE__, __LINE__) ! y1=y
1149 :
1150 : ! some info about the stencil
1151 : ! gskip zones on left
1152 : ! dskip zones on right
1153 : ! nskip zones in total
1154 0 : gskip = mljac/nvar
1155 0 : dskip = mujac/nvar
1156 0 : nskip = 1 + dskip + gskip
1157 :
1158 0 : A = 0
1159 : ! loop on variables
1160 0 : do ivar = 1, nvar
1161 0 : do k = 1, nz
1162 0 : do j = 1, nvar
1163 0 : dxd(j, k) = dxsave(j, k)
1164 : end do
1165 : end do
1166 0 : do k = 1, nz
1167 0 : do ii = 1, 20 ! may need to increase xder
1168 0 : dxd(ivar, k) = dxd(ivar, k) + xder(ivar, k)
1169 0 : if (dxd(ivar, k) - dxsave(ivar, k) /= 0) exit
1170 0 : xder(ivar, k) = xder(ivar, k)*2
1171 : end do
1172 : end do
1173 0 : do k = 1, nz
1174 0 : do j = 1, nvar
1175 0 : dx(j, k) = dxd(j, k)
1176 0 : x(j, k) = xold(j, k) + dx(j, k)
1177 0 : dx(j, k) = x(j, k) - xold(j, k)
1178 : end do
1179 : end do
1180 0 : call xdomain(iiter, nvar, nz, x, dx, xold, lrpar, rpar, lipar, ipar, ierr)
1181 0 : if (ierr /= 0) then
1182 0 : if (nsec > 0) call mesa_error(__FILE__, __LINE__) ! y = y1
1183 0 : call cleanup_after_setmatrix
1184 0 : call failed_in_setmatrix(0, lrpar, rpar, lipar, ipar, ierr)
1185 0 : return
1186 : end if
1187 0 : do k = 1, nz
1188 0 : do j = 1, nvar
1189 0 : dxd(j, k) = dx(j, k)
1190 : end do
1191 : end do
1192 0 : call set_primaries(nvar, nz, x, lrpar, rpar, lipar, ipar, ierr)
1193 0 : if (ierr /= 0) then
1194 0 : if (nsec > 0) call mesa_error(__FILE__, __LINE__) ! y = y1
1195 0 : call cleanup_after_setmatrix
1196 0 : call failed_in_setmatrix(0, lrpar, rpar, lipar, ipar, ierr)
1197 0 : return
1198 : end if
1199 : ! compute secondary variables for modified primaries
1200 0 : call set_secondaries(ivar, lrpar, rpar, lipar, ipar, ierr)
1201 0 : if (ierr /= 0) then
1202 0 : if (nsec > 0) call mesa_error(__FILE__, __LINE__) ! y = y1
1203 0 : call cleanup_after_setmatrix
1204 0 : call failed_in_setmatrix(0, lrpar, rpar, lipar, ipar, ierr)
1205 0 : return
1206 : end if
1207 0 : if (nsec > 0) call mesa_error(__FILE__, __LINE__) ! y2 = y
1208 :
1209 : ! now use the modified primaries and secondaries to get modified equations
1210 0 : do kk = 0, nskip(ivar) - 1
1211 :
1212 0 : if (nsec > 0) call mesa_error(__FILE__, __LINE__) ! y = y1
1213 0 : do k = 1, nz
1214 0 : do j = 1, nvar
1215 0 : x(j, k) = dxsave(j, k)
1216 : end do
1217 : end do
1218 : ! primaries are changed only on the zones of the comb
1219 0 : do k = 1 + kk, nz, nskip(ivar)
1220 0 : dx(ivar, k) = dxd(ivar, k)
1221 : end do
1222 0 : do k = 1, nz
1223 0 : do j = 1, nvar
1224 0 : x(j, k) = xold(j, k) + dx(j, k)
1225 0 : dxdd(j, k) = dx(j, k)
1226 : end do
1227 : end do
1228 0 : call set_primaries(nvar, nz, x, lrpar, rpar, lipar, ipar, ierr)
1229 0 : if (ierr /= 0) then
1230 0 : if (nsec > 0) call mesa_error(__FILE__, __LINE__) ! y = y1
1231 0 : call cleanup_after_setmatrix
1232 0 : call failed_in_setmatrix(0, lrpar, rpar, lipar, ipar, ierr)
1233 0 : return
1234 : end if
1235 :
1236 0 : if (nsec > 0) call mesa_error(__FILE__, __LINE__) ! then
1237 : ! note that we can use the previously computed secondaries
1238 : ! since, by definition, they depend only on the primaries of their own zone.
1239 : !do j=1+kk, nz, nskip(ivar)
1240 : ! y(j, 1:nsec)=y2(j, 1:nsec)
1241 : !end do
1242 : !end if
1243 :
1244 : ! compute the equations using these primaries and secondaries
1245 0 : call eval_equations(iiter, nvar, nz, x, xscale, equ, lrpar, rpar, lipar, ipar, ierr)
1246 0 : if (ierr /= 0) then
1247 0 : if (nsec > 0) call mesa_error(__FILE__, __LINE__) ! y = y1
1248 0 : call cleanup_after_setmatrix
1249 0 : call failed_in_setmatrix(0, lrpar, rpar, lipar, ipar, ierr)
1250 0 : return
1251 : end if
1252 :
1253 : ! compute derivatives
1254 0 : do j = ivar + kk*nvar, neq, nvar*nskip(ivar)
1255 0 : zone = (j - 1)/nvar + 1
1256 0 : if (dxdd(ivar, zone) == dxsave(ivar, zone)) then
1257 : ! can happen if the xdomain routine changed dx in a bad way.
1258 0 : ierr = -1
1259 0 : write (*, '(a, i5, 99e20.10)') 'failed trying to create numerical derivative for variable ', &
1260 0 : j, dxsave(ivar, zone), xsave(ivar, zone), xder(ivar, zone)
1261 0 : if (nsec > 0) call mesa_error(__FILE__, __LINE__) ! y = y1
1262 0 : call cleanup_after_setmatrix
1263 0 : call failed_in_setmatrix(j, lrpar, rpar, lipar, ipar, ierr)
1264 0 : return
1265 : end if
1266 0 : ideb = max(1, (zone - gskip(ivar) - 1)*nvar + 1)
1267 0 : ifin = min(neq, (zone + dskip(ivar))*nvar)
1268 0 : ideb = max(ideb, j - mljac)
1269 0 : ifin = min(ifin, j + mujac)
1270 0 : do i = ideb, ifin
1271 0 : ik = (i - 1)/nvar + 1
1272 0 : ij = i - (ik - 1)*nvar
1273 0 : partial = xscale(ivar, zone)*(equ(ij, ik) - equsave(ij, ik))/(dxdd(ivar, zone) - dxsave(ivar, zone))
1274 0 : if (matrix_type == square_matrix_type) then
1275 0 : A(i, j) = partial
1276 : else
1277 0 : A(i - j + idiag, j) = partial
1278 : end if
1279 : end do
1280 : end do
1281 :
1282 0 : if (nsec > 0) then ! restore the secondaries that correspond to the unmodified primaries
1283 : !do j=1+kk, nz, nskip(ivar)
1284 : ! y(j, 1:nsec)=y1(j, 1:nsec)
1285 : !end do
1286 : end if
1287 :
1288 : end do
1289 :
1290 : end do
1291 :
1292 0 : if (nsec > 0) call mesa_error(__FILE__, __LINE__) ! y = y1
1293 0 : call cleanup_after_setmatrix
1294 :
1295 0 : call exit_setmatrix(iiter, nvar, nz, neq, dx, ldA, A1, idiag, xscale, lrpar, rpar, lipar, ipar, ierr)
1296 :
1297 : end subroutine setmatrix
1298 :
1299 0 : subroutine cleanup_after_setmatrix
1300 : integer :: i, k
1301 0 : do k = 1, nz
1302 0 : do i = 1, nvar
1303 0 : x(i, k) = xsave(i, k)
1304 0 : dx(i, k) = dxsave(i, k)
1305 0 : equ(i, k) = equsave(i, k)
1306 : end do
1307 : end do
1308 0 : deallocate (xgg, dxd, dxdd, equsave)
1309 0 : end subroutine cleanup_after_setmatrix
1310 :
1311 0 : subroutine write_msg(msg)
1312 : real(dp), parameter :: secyer = 3.1558149984d7 ! seconds per year
1313 : character(*) :: msg
1314 0 : if (.not. dbg_msg) return
1315 :
1316 : 1 format(i6, 2x, i3, 2x, a, f8.4, 6(2x, a, 1x, e10.3), 2x, a, f6.2, 2x, a)
1317 0 : write (*, 1) iwork(i_model_number), iiter, 'coeff', coeff, 'slope', slope, 'f', f, &
1318 0 : 'avg resid', residual_norm, 'max resid', max_residual, 'avg corr', correction_norm, 'max corr', max_correction, &
1319 0 : 'lg dt/yr', log10(max(1d-99, work(r_dt)/secyer)), trim(msg)
1320 : end subroutine write_msg
1321 :
1322 0 : subroutine newton_core_dump(x, dx, xold)
1323 : real(dp), dimension(:, :) :: x
1324 : ! new vector of primaries, x = xold+dx
1325 : real(dp), dimension(:, :) :: dx
1326 : ! increment vector from previous vector of primaries.
1327 : real(dp), dimension(:, :) :: xold
1328 : ! xold = x-dx. xold is kept constant; x and dx change.
1329 : integer :: j, k
1330 :
1331 : 1 format(a20, i16) ! integers
1332 : 2 format(a20, 1pe26.16) ! reals
1333 : 3 format(a20, i6, 1x, 1pe26.16) ! 1 index reals
1334 : 4 format(a20, 2(i6, 1x), 1pe26.16) ! 2 index reals
1335 : 5 format(a20, i6, 1x, i16) ! 1 index integers
1336 :
1337 : ! only printout args and things that are carried over from one call to next
1338 : ! e.g., skip work arrays that are written on each call before they are read
1339 :
1340 0 : write (*, *) 'newton core dump'
1341 0 : write (*, 1) 'nz', nz
1342 0 : write (*, 1) 'nvar', nvar
1343 0 : write (*, 1) 'mljac', mljac
1344 0 : write (*, 1) 'mujac', mujac
1345 0 : write (*, 1) 'liwork', liwork
1346 0 : write (*, 1) 'lwork', lwork
1347 0 : write (*, 1) 'ldy', ldy
1348 0 : write (*, 1) 'nsec', nsec
1349 0 : write (*, 1) 'ldAF', ldAF
1350 0 : write (*, 1) 'ndiag', ndiag
1351 :
1352 0 : write (*, 2) 'tol_correction_norm', tol_correction_norm
1353 :
1354 0 : do j = 1, ndiag
1355 0 : do k = 1, nz
1356 0 : write (*, 4) 'A', j, k, A(j, k)
1357 : end do
1358 : end do
1359 :
1360 0 : do j = 1, ldAF
1361 0 : do k = 1, nz
1362 0 : write (*, 4) 'AF', j, k, AF(j, k)
1363 : end do
1364 : end do
1365 :
1366 0 : do k = 1, nz
1367 0 : write (*, 5) 'ipiv1', k, ipiv1(k)
1368 : end do
1369 :
1370 0 : do k = 1, nz
1371 0 : do j = 1, nvar
1372 0 : write (*, 4) 'x', j, k, x(j, k)
1373 0 : write (*, 4) 'dx', j, k, x(j, k)
1374 0 : write (*, 4) 'xold', j, k, x(j, k)
1375 : end do
1376 : end do
1377 :
1378 0 : end subroutine newton_core_dump
1379 :
1380 10 : subroutine pointers(ierr)
1381 : integer, intent(out) :: ierr
1382 :
1383 : integer :: i, j
1384 :
1385 10 : ierr = 0
1386 : i = num_work_params + 1
1387 :
1388 10 : A1(1:ndiag*neq) => work(i:i + ndiag*neq - 1); i = i + ndiag*neq
1389 10 : A(1:ndiag, 1:neq) => A1(1:ndiag*neq)
1390 10 : Acopy1 => A1
1391 10 : Acopy => A
1392 :
1393 10 : xsave1(1:neq) => work(i:i + neq - 1); i = i + neq
1394 10 : xsave(1:nvar, 1:nz) => xsave1(1:neq)
1395 :
1396 10 : dxsave1(1:neq) => work(i:i + neq - 1); i = i + neq
1397 10 : dxsave(1:nvar, 1:nz) => dxsave1(1:neq)
1398 :
1399 10 : B1 => work(i:i + neq - 1); i = i + neq
1400 10 : B(1:nvar, 1:nz) => B1(1:neq)
1401 :
1402 10 : B_init1 => work(i:i + neq - 1); i = i + neq
1403 10 : B_init(1:nvar, 1:nz) => B_init1(1:neq)
1404 :
1405 10 : grad_f1(1:neq) => work(i:i + neq - 1); i = i + neq
1406 10 : grad_f(1:nvar, 1:nz) => grad_f1(1:neq)
1407 :
1408 10 : rhs(1:nvar, 1:nz) => work(i:i + neq - 1); i = i + neq
1409 :
1410 10 : xder(1:nvar, 1:nz) => work(i:i + neq - 1); i = i + neq
1411 :
1412 10 : dx(1:nvar, 1:nz) => work(i:i + neq - 1); i = i + neq
1413 :
1414 10 : if (nsec > 0) then
1415 : !y1(1:nvar,1:nz) => work(i:i+nsec*neq-1); i = i+nsec*neq
1416 : !y2(1:nvar,1:nz) => work(i:i+nsec*neq-1); i = i+nsec*neq
1417 : else
1418 10 : nullify (y1)
1419 10 : nullify (y2)
1420 : end if
1421 :
1422 10 : if (i - 1 > lwork) then
1423 0 : ierr = -1
1424 0 : write (*, '(a, i6, a, 99i6)') 'newton: lwork is too small. must be at least', i - 1, &
1425 0 : ' but is only ', lwork, neq, ndiag, ldAF, nsec
1426 0 : return
1427 : end if
1428 :
1429 : i = num_iwork_params + 1
1430 10 : ipiv1(1:neq) => iwork(i:i + neq - 1); i = i + neq
1431 10 : if (i - 1 > liwork) then
1432 0 : ierr = -1
1433 0 : write (*, '(a, i6, a, i6)') 'newton: liwork is too small. must be at least', i, &
1434 0 : ' but is only ', liwork
1435 0 : return
1436 : end if
1437 :
1438 10 : if (matrix_type == block_tridiag_dble_matrix_type) then
1439 :
1440 0 : ipiv_blk1(1:neq) => ipiv1(1:neq)
1441 0 : ublk1(1:nvar*neq) => A1(1:nvar*neq)
1442 0 : dblk1(1:nvar*neq) => A1(1 + nvar*neq:2*nvar*neq)
1443 0 : lblk1(1:nvar*neq) => A1(1 + 2*nvar*neq:3*nvar*neq)
1444 0 : lblk(1:nvar, 1:nvar, 1:nz) => lblk1(1:nvar*neq)
1445 0 : dblk(1:nvar, 1:nvar, 1:nz) => dblk1(1:nvar*neq)
1446 0 : ublk(1:nvar, 1:nvar, 1:nz) => ublk1(1:nvar*neq)
1447 :
1448 : ! testing
1449 0 : k = 2*nvar*neq - nvar*nvar
1450 0 : do i = 1, nvar
1451 0 : do j = 1, nvar
1452 0 : dblk(i, j, nz) = i*100 + j
1453 0 : if (dblk(i, j, nz) /= A1(k + nvar*(j - 1) + i)) then
1454 0 : call mesa_error(__FILE__, __LINE__)
1455 : end if
1456 : !if (dblk1(nvar*nvar*(nz-1)+j) /= dblk(i,j,nz)) then
1457 : ! call mesa_error(__FILE__,__LINE__)
1458 : !end if
1459 : end do
1460 : end do
1461 :
1462 : end if
1463 :
1464 10 : if (matrix_type == block_tridiag_dble_matrix_type) then
1465 :
1466 0 : ublkF1(1:nvar*neq) => AF1(1:nvar*neq)
1467 0 : dblkF1(1:nvar*neq) => AF1(1 + nvar*neq:2*nvar*neq)
1468 0 : lblkF1(1:nvar*neq) => AF1(1 + 2*nvar*neq:3*nvar*neq)
1469 :
1470 0 : lblkF(1:nvar, 1:nvar, 1:nz) => lblkF1(1:nvar*neq)
1471 0 : dblkF(1:nvar, 1:nvar, 1:nz) => dblkF1(1:nvar*neq)
1472 0 : ublkF(1:nvar, 1:nvar, 1:nz) => ublkF1(1:nvar*neq)
1473 :
1474 : end if
1475 :
1476 : end subroutine pointers
1477 :
1478 20 : real(dp) function eval_slope(nvar, nz, grad_f, B)
1479 : integer, intent(in) :: nvar, nz
1480 : real(dp), intent(in), dimension(:, :) :: grad_f, B
1481 : integer :: i
1482 20 : eval_slope = 0
1483 60 : do i = 1, nvar
1484 40100 : eval_slope = eval_slope + dot_product(grad_f(i, 1:nz), B(i, 1:nz))
1485 : end do
1486 20 : end function eval_slope
1487 :
1488 71 : real(dp) function eval_f(nvar, nz, equ)
1489 : integer, intent(in) :: nvar, nz
1490 : real(dp), intent(in), dimension(:, :) :: equ
1491 : integer :: k, i
1492 71 : real(dp) :: q
1493 : include 'formats'
1494 71 : eval_f = 0
1495 71142 : do k = 1, nz
1496 213284 : do i = 1, nvar
1497 142142 : q = equ(i, k)
1498 213213 : eval_f = eval_f + q*q
1499 : end do
1500 : end do
1501 71 : eval_f = eval_f/2
1502 : !write(*,1) 'do_newton: eval_f', eval_f
1503 71 : end function eval_f
1504 :
1505 : end subroutine do_newton
1506 :
1507 10 : subroutine get_newton_work_sizes(mljac, mujac, nvar, nz, nsec, matrix_type, lwork, liwork, ierr)
1508 : integer, intent(in) :: mljac, mujac, nvar, nz, nsec
1509 : integer, intent(in) :: matrix_type
1510 : integer, intent(out) :: lwork, liwork
1511 : integer, intent(out) :: ierr
1512 :
1513 : integer :: ndiag, ldAF, neq
1514 :
1515 : include 'formats'
1516 :
1517 10 : ierr = 0
1518 10 : neq = nvar*nz
1519 :
1520 10 : if (matrix_type == square_matrix_type) then
1521 : ndiag = neq
1522 10 : ldAF = ndiag
1523 10 : else if (matrix_type == block_tridiag_dble_matrix_type) then
1524 0 : ndiag = 3*nvar
1525 0 : ldAF = ndiag
1526 : else
1527 10 : ndiag = mljac + mujac + 1
1528 10 : ldAF = mljac + ndiag
1529 : end if
1530 :
1531 10 : liwork = num_iwork_params + neq
1532 10 : lwork = num_work_params + neq*(ndiag + 9 + 2*nsec)
1533 :
1534 10 : end subroutine get_newton_work_sizes
1535 :
1536 : end module mod_newton
|