"""Direct minimisation of the Hamiltonian
"""
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
import numpy as np
import scipy.linalg as spla
machine_epsilon = np.finfo(float).eps
[docs]class CGMinimizer(object):
"""Performs conjugate gradient minimization
Performs Pulay mixing with Kerker preconditioner,
as described on pp 1071 of [Payne1992]_
"""
[docs] def __init__(self, pm, total_energy=None, nstates=None, cg_restart=3, line_fit='quadratic'):
"""Initializes variables
parameters
----------
pm: object
input parameters
total_energy: callable
function f(pm, waves) that returns total energy
nstates: int
how many states to retain.
currently, this must equal the number of occupied states
(for unoccupied states need to re-diagonalize)
cg_restart: int
cg history is restarted every cg_restart steps
line_fit: str
select method for line-search
'quadratic' (default), 'quadratic-der' or 'trigonometric'
"""
self.pm = pm
self.dx = pm.sys.deltax
self.sqdx = np.sqrt(self.dx)
self.dx2 = (self.dx)**2
self.NE = pm.sys.NE
if nstates is None:
## this is CASTEP's rule based on free-electron arguments
## + some heuristics
#n_add = int(2 * np.sqrt(pm.sys.NE))
#n_add = np.minimum(4, n_add)
#self.nstates = pm.sys.NE + n_add
self.nstates = pm.sys.NE
else:
self.nstates = nstates
self.cg_dirs = np.zeros((pm.sys.grid, self.nstates))
self.steepest_prods = np.ones((self.nstates))
if total_energy is None:
raise ValueError("Need to provide total_energy function that computes total energy from given set of single-particle wave functions.")
else:
self._total_energy = total_energy
self.cg_restart = cg_restart
self.cg_counter = 1
self.line_fit = line_fit
[docs] def exact_dirs(self, wfs, H):
r"""Search direction from exact diagonalization
Just for testing purposes (you can easily end up with
multiple minima along the exact search directions)
"""
eigv, eigf = spla.eigh(H)
dirs = eigf[:,:self.nstates] - wfs
overlaps = np.dot(wfs.conj().T, dirs)
dirs = dirs - np.dot(wfs, overlaps)
return dirs
[docs] def step(self, wfs, H):
r"""Performs one cg step
After each step, the Hamiltonian should be recomputed using the updated
wave functions.
Note that we currently don't enforce the wave functions to remain
eigenfunctions of the Hamiltonian. This should not matter for the total
energy but means we need to performa a diagonalisation at the very end.
parameters
----------
wfs: array_like
(grid, nwf) input wave functions
H: array_like
input Hamiltonian
returns
-------
wfs: array_like
(grid, nwf) updated wave functions
"""
# internally work with dx=1
wfs *= self.sqdx
wfs = wfs[:, :self.nstates]
#wfs = orthonormalize(wfs)
# subspace diagonalisation also makes sure that wfs are orthonormal
wfs = self.subspace_diagonalization(wfs,H)
exact = False
if exact:
conjugate_dirs = self.exact_dirs(wfs, H)
else:
steepest_dirs = self.steepest_dirs(wfs, H)
conjugate_dirs = self.conjugate_directions(steepest_dirs, wfs)
wfs_new = self.line_search(wfs, conjugate_dirs, H, mode=self.line_fit)
return wfs_new / self.sqdx
[docs] def line_search(self, wfs, dirs, H, mode):
r"""Performs a line search along cg direction
Trying to minimize total energy
.. math ::
E(s) = \langle \psi(s) | H[\psi(s)] | \psi(s) \rangle
"""
E_0 = self.total_energy(wfs)
dE_ds_0 = 2 * np.sum(self.braket(dirs[:,:self.NE], H, wfs[:,:self.NE]).real)
if dE_ds_0 > 0:
s = "CG: Warning: Energy derivative along conjugate gradient direction"
s += "\nis positive: dE/dlambda = {:.3e}".format(dE_ds_0)
self.pm.sprint(s)
#raise ValueError(s)
# Use only E_0 and dE_ds_0, assume constant H
# This is useful, if the energy landscape is flat up to machine precision
if mode == 'quadratic-der':
H_dirs = np.dot(H, dirs)
a = (wfs.conj() * H_dirs).sum()
b = (dirs.conj() * H_dirs).sum()
s_opt = -a/b
wfs_new = wfs + s_opt * dirs
# Use E_0, dE_ds_0 and E_1 + perform parabolic fit
elif mode == 'quadratic':
s_1 = 0.7
new_wfs = lambda s: wfs + dirs * s
b = lambda s: dE_ds_0 * s
a = lambda E_1, s: (E_1 - E_0 - b(s))/ s**2
s_min = lambda E_1, s: -0.5 * b(s) / (a(E_1,s) * s)
wfs_1 = new_wfs(s_1)
wfs_1 = orthonormalize(wfs_1)
E_1 = self.total_energy(wfs_1)
eval_max = 10
epsilon = 10 * machine_epsilon * np.abs(E_0)
for i in range(eval_max):
if E_1 < E_0 + epsilon:
break
else:
s_1 /= 2.0
#print(s_1)
wfs_1 = new_wfs(s_1)
wfs_1 = orthonormalize(wfs_1)
E_1 = self.total_energy(wfs_1)
i += 1
# if energy landscape is just noise,
# rely on gradient information only
if np.abs(E_0 - E_1) < epsilon: # and np.abs(dE_ds_0) < epsilon:
# this is non-selfconsistent for the moment
# (would need to recompute H from wfs_1 otherwise)
dE_ds_1 = 2 * np.sum(self.braket(dirs[:,:self.NE], H, wfs_1[:,:self.NE]).real)
if dE_ds_1 < 0:
s = "CG: Warning: Energy derivative along conjugate gradient direction"
s += "\n at E1 is negative: dE/dlambda = {:.3e}".format(dE_ds_1)
self.pm.sprint(s)
s_opt = - dE_ds_0/(dE_ds_1 - dE_ds_0) * s_1
#dE_ds_1 =
#H_dirs = np.dot(H, dirs)
#a = (wfs.conj() * H_dirs).sum()
#b = (dirs.conj() * H_dirs).sum()
#s_opt = -a/b
#dE_ds_1 = 2 * np.sum(self.braket(dirs[:,:self.NE], H, wfs_1[:,:self.NE]).real)
print(s_1)
print(s_opt)
# else use energy E_1
else:
# eqn (5.30)
s_opt = s_min(E_1, s_1)
# we don't want the step to get too large
s_opt = np.minimum(1.5,s_opt)
#s_opt = np.maximum(0,s_opt)
if s_opt < 0:
self.pm.sprint("CG: Warning: s_opt = {:.3e} < 0".format(s_opt))
wfs_new = new_wfs(s_opt)
# [Payne1992]_ method for (single-band) cg
# Not sure this is really appropriate here
# note: Payne et al. actually normalize the cg direction (?)
elif mode == 'trigonometric':
s_1 = np.pi/30
wfs_1 = wfs * np.cos(s_1) + dirs * np.sin(s_1)
wfs_1 = orthonormalize(wfs_1)
E_1 = self.total_energy(wfs_1)
# eqns (5.28-5.29)
B_1 = 0.5 * dE_ds_0
A_1 = (E_0 - E_1 + B_1)/ (1 - np.cos(2*s_1))
s_opt = 0.5 * np.arctan(B_1/ A_1)
# we don't want the step to get too large
s_opt = np.minimum(1.5,s_opt)
wfs_new = wfs * np.cos(s_opt) + dirs * np.sin(s_opt)
else:
raise ValueError("CG: Unrecognised line-search mode '{}'".format(mode))
wfs_new = orthonormalize(wfs_new)
return wfs_new
[docs] def braket(self, bra=None, O=None, ket=None):
r"""Compute braket with operator O
bra and ket may hold multiple vectors or may be empty.
Variants:
.. math:
\lambda_i = \langle \psi_i | O | \psi_i \rangle
\varphi_i = O | \psi_i \rangle
\varphi_i = <psi_i | O
parameters
----------
bra: array_like
(grid, nwf) lhs of braket
O: array_like
(grid, grid) operator. defaults to identity matrix
ket: array_like
(grid, nwf) rhs of braket
"""
if O is None:
if bra is None:
return ket
elif ket is None:
return bra.conj().T
else:
return np.dot(bra.conj().T, ket)
else:
if bra is None:
return np.dot(O, ket)
elif ket is None:
return np.dot(bra.conj().T, O)
else:
O_ket = np.dot(O, ket)
return (bra.conj() * O_ket).sum(0)
[docs] def steepest_dirs(self, wfs, H):
r"""Compute steepest descent directions
Compute steepest descent directions and project out components pointing
along other orbitals
(equivalent to steepest descent with the proper Lagrange multipliers).
.. math:
\zeta^{'m}_i = -(H \psi_i^m + \sum_j \langle \psi_j^m | H | \psi_i^m\rangle \psi_j^m
See eqns (5.10), (5.12) in [Payne1992]_
parameters
----------
H: array_like
Hamiltonian matrix (grid,grid)
wavefunctions: array_like
wave function array (grid, nwf)
returns
-------
steepest_orth: array_like
steepest descent directions (grid, nwf)
"""
nwf = wfs.shape[-1]
# \zeta_i = - H v_i
steepest = - self.braket(None, H, wfs)
# overlaps = (iwf, idir)
overlaps = np.dot(wfs.conj().T, steepest)
# \zeta_i += \sum_j <v_j|H|v_i> v_j
steepest_orth = steepest - np.dot(wfs,overlaps)
return steepest_orth
[docs] def conjugate_directions(self, steepest_dirs, wfs):
r"""Compute conjugate gradient descent for one state
Updates internal arrays accordingly
.. math:
d^m = g^m + \gamma^m d^{m-1}
\gamma^m = \frac{g^m\cdot g^m}{g^{m-1}\cdot g^{m-1}}
See eqns (5.8-9) in [Payne1992]_
parameters
----------
steepest_dirs: array_like
steepest-descent directions (grid, nwf)
wfs: array_like
wave functions (grid, nwf)
returns
-------
cg_dirs: array_like
conjugate directions (grid, nwf)
"""
steepest_prods = np.linalg.norm(steepest_dirs)**2
gamma = np.sum(steepest_prods)/ np.sum(self.steepest_prods)
#gamma = steepest_prods / self.steepest_prods
self.steepest_prods = steepest_prods
if self.cg_counter == self.cg_restart:
self.cg_dirs[:] = 0
self.cg_counter = 0
# cg_dirs = (grid, nwf)
#cg_dirs = steepest_dirs + np.dot(gamma, self.cg_dirs)
cg_dirs = steepest_dirs + gamma * self.cg_dirs
#print(gamma)
# orthogonalize to wfs vector
# note that wfs vector is normalised to #electrons!
#cg_dirs = cg_dirs - np.sum(np.dot(cg_dirs.conj(), wfs.T))/self.nstates * wfs
# overlaps: 1st index wf, 2nd index cg dir
overlaps = np.dot(wfs.conj().T, cg_dirs)
cg_dirs = cg_dirs - np.dot(wfs, overlaps)
self.cg_dirs = cg_dirs
self.cg_counter += 1
return cg_dirs
[docs] def total_energy(self, wfs):
r"""Compute total energy for given wave function
This method must be provided by the calling module
and is initialized in the constructor.
"""
return self._total_energy(self.pm, wfs/self.sqdx)
[docs] def subspace_diagonalization(self, v, H):
"""Diagonalise suspace of wfs
parameters
----------
v: array_like
(grid, nwf) array of orthonormal vectors
H: array_like
(grid,grid) Hamiltonian matrix
returns
-------
v_rot: array_like
(grid, nwf) array of orthonormal eigenvectors of H
(or at least close to eigenvectors)
"""
# overlap matrix
S = np.dot(v.conj().T, np.dot(H, v))
# eigf = (nwf_old, nwf_new)
eigv, eigf = np.linalg.eigh(S)
v_rot = np.dot(v, eigf)
# need to rotate cg_dirs as well!
self.cg_dirs = np.dot(self.cg_dirs, eigf)
return v_rot
[docs]def orthonormalize(v):
r"""Return orthonormalized set of vectors
Return orthonormal set of vectors that spans the same space
as the input vectors.
parameters
----------
v: array_like
(n, m) array of m vectors in n-dimensional space
"""
#orth = spla.orth(vecs.T)
#orth /= np.linalg.norm(orth, axis=0)
#return orth.T
# vectors need to be columns
Q, R = spla.qr(v, pivoting=False, mode='economic')
# required to enforce positive signs of R's diagonal
# without this, the signs of the orthonormalised vectors in Q are random
# See https://mail.python.org/pipermail/scipy-user/2014-September/035990.html
Q = Q * np.sign(np.diag(R))
# Q contains orthonormalised vectors as columns
return Q
[docs]class DiagMinimizer(object):
"""Performs minimization using exact diagonalisation
This would be too slow for ab initio codes but is something we can afford
in the iDEA world.
Not yet fully implemented though (still missing analytic derivatives and a
proper line search algorithm).
"""
[docs] def __init__(self, pm, total_energy=None):
"""Initializes variables
parameters
----------
pm: object
input parameters
total_energy: callable
function f(pm, waves) that returns total energy
nstates: int
how many states to retain.
currently, this must equal the number of occupied states
(for unoccupied states need to re-diagonalize)
"""
self.pm = pm
self.dx = pm.sys.deltax
self.sqdx = np.sqrt(self.dx)
self.dx2 = (self.dx)**2
self.nstates = pm.sys.NE
if total_energy is None:
raise ValueError("Need to provide total_energy function that computes total energy from given set of single-particle wave functions.")
else:
self._total_energy = total_energy
[docs] def total_energy(self, wfs):
r"""Compute total energy for given wave function
This method must be provided by the calling module
and is initialized in the constructor.
"""
return self._total_energy(self.pm, wfs/self.sqdx)
[docs] def h_step(self, H0, H1):
r"""Performs one minimisation step
parameters
----------
H0: array_like
input Hamiltonian to be mixed (banded form)
H1: array_like
output Hamiltonian to be mixed (banded form)
returns
-------
H: array_like
mixed hamiltonian (banded form)
"""
## TODO: implement analytic derivative
#dE_dtheta_0 = 2 * np.sum(self.braket(dirs, H, wfs).real)
# For the moment, we simply fit the parabola through 3 points
npt = 3
lambdas = np.linspace(0, 1, npt)
energies = np.zeros(npt)
# self-consistent variant
for i in range(npt):
l = lambdas[i]
Hl = (1-l) * H0 + l * H1
enl, wfsl = spla.eig_banded(Hl, lower=True)
energies[i] = self.total_energy(wfsl)
# improve numerical stability
energies -= energies[0]
p = np.polyfit(lambdas, energies,2)
a,b,c = p
l = -b/ (2*a)
# don't want step to get too large
l = np.minimum(1,l)
#import matplotlib.pyplot as plt
#fig, axs = plt.subplots(1,1)
#plt.plot(lambdas, energies)
##axs.set_ylim(np.min(total_energies), np.max(total_energies))
#plt.show()
Hl = (1-l) * H0 + l * H1
return Hl