Source code for iDEA.minimize

"""Direct minimisation of the Hamiltonian
"""
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import

import numpy as np
import scipy.linalg as spla

machine_epsilon = np.finfo(float).eps


[docs]class CGMinimizer(object): """Performs conjugate gradient minimization Performs Pulay mixing with Kerker preconditioner, as described on pp 1071 of [Payne1992]_ """
[docs] def __init__(self, pm, total_energy=None, nstates=None, cg_restart=3, line_fit='quadratic'): """Initializes variables parameters ---------- pm: object input parameters total_energy: callable function f(pm, waves) that returns total energy nstates: int how many states to retain. currently, this must equal the number of occupied states (for unoccupied states need to re-diagonalize) cg_restart: int cg history is restarted every cg_restart steps line_fit: str select method for line-search 'quadratic' (default), 'quadratic-der' or 'trigonometric' """ self.pm = pm self.dx = pm.sys.deltax self.sqdx = np.sqrt(self.dx) self.dx2 = (self.dx)**2 self.NE = pm.sys.NE if nstates is None: ## this is CASTEP's rule based on free-electron arguments ## + some heuristics #n_add = int(2 * np.sqrt(pm.sys.NE)) #n_add = np.minimum(4, n_add) #self.nstates = pm.sys.NE + n_add self.nstates = pm.sys.NE else: self.nstates = nstates self.cg_dirs = np.zeros((pm.sys.grid, self.nstates)) self.steepest_prods = np.ones((self.nstates)) if total_energy is None: raise ValueError("Need to provide total_energy function that computes total energy from given set of single-particle wave functions.") else: self._total_energy = total_energy self.cg_restart = cg_restart self.cg_counter = 1 self.line_fit = line_fit
[docs] def exact_dirs(self, wfs, H): r"""Search direction from exact diagonalization Just for testing purposes (you can easily end up with multiple minima along the exact search directions) """ eigv, eigf = spla.eigh(H) dirs = eigf[:,:self.nstates] - wfs overlaps = np.dot(wfs.conj().T, dirs) dirs = dirs - np.dot(wfs, overlaps) return dirs
[docs] def step(self, wfs, H): r"""Performs one cg step After each step, the Hamiltonian should be recomputed using the updated wave functions. Note that we currently don't enforce the wave functions to remain eigenfunctions of the Hamiltonian. This should not matter for the total energy but means we need to performa a diagonalisation at the very end. parameters ---------- wfs: array_like (grid, nwf) input wave functions H: array_like input Hamiltonian returns ------- wfs: array_like (grid, nwf) updated wave functions """ # internally work with dx=1 wfs *= self.sqdx wfs = wfs[:, :self.nstates] #wfs = orthonormalize(wfs) # subspace diagonalisation also makes sure that wfs are orthonormal wfs = self.subspace_diagonalization(wfs,H) exact = False if exact: conjugate_dirs = self.exact_dirs(wfs, H) else: steepest_dirs = self.steepest_dirs(wfs, H) conjugate_dirs = self.conjugate_directions(steepest_dirs, wfs) wfs_new = self.line_search(wfs, conjugate_dirs, H, mode=self.line_fit) return wfs_new / self.sqdx
[docs] def braket(self, bra=None, O=None, ket=None): r"""Compute braket with operator O bra and ket may hold multiple vectors or may be empty. Variants: .. math: \lambda_i = \langle \psi_i | O | \psi_i \rangle \varphi_i = O | \psi_i \rangle \varphi_i = <psi_i | O parameters ---------- bra: array_like (grid, nwf) lhs of braket O: array_like (grid, grid) operator. defaults to identity matrix ket: array_like (grid, nwf) rhs of braket """ if O is None: if bra is None: return ket elif ket is None: return bra.conj().T else: return np.dot(bra.conj().T, ket) else: if bra is None: return np.dot(O, ket) elif ket is None: return np.dot(bra.conj().T, O) else: O_ket = np.dot(O, ket) return (bra.conj() * O_ket).sum(0)
[docs] def steepest_dirs(self, wfs, H): r"""Compute steepest descent directions Compute steepest descent directions and project out components pointing along other orbitals (equivalent to steepest descent with the proper Lagrange multipliers). .. math: \zeta^{'m}_i = -(H \psi_i^m + \sum_j \langle \psi_j^m | H | \psi_i^m\rangle \psi_j^m See eqns (5.10), (5.12) in [Payne1992]_ parameters ---------- H: array_like Hamiltonian matrix (grid,grid) wavefunctions: array_like wave function array (grid, nwf) returns ------- steepest_orth: array_like steepest descent directions (grid, nwf) """ nwf = wfs.shape[-1] # \zeta_i = - H v_i steepest = - self.braket(None, H, wfs) # overlaps = (iwf, idir) overlaps = np.dot(wfs.conj().T, steepest) # \zeta_i += \sum_j <v_j|H|v_i> v_j steepest_orth = steepest - np.dot(wfs,overlaps) return steepest_orth
[docs] def conjugate_directions(self, steepest_dirs, wfs): r"""Compute conjugate gradient descent for one state Updates internal arrays accordingly .. math: d^m = g^m + \gamma^m d^{m-1} \gamma^m = \frac{g^m\cdot g^m}{g^{m-1}\cdot g^{m-1}} See eqns (5.8-9) in [Payne1992]_ parameters ---------- steepest_dirs: array_like steepest-descent directions (grid, nwf) wfs: array_like wave functions (grid, nwf) returns ------- cg_dirs: array_like conjugate directions (grid, nwf) """ steepest_prods = np.linalg.norm(steepest_dirs)**2 gamma = np.sum(steepest_prods)/ np.sum(self.steepest_prods) #gamma = steepest_prods / self.steepest_prods self.steepest_prods = steepest_prods if self.cg_counter == self.cg_restart: self.cg_dirs[:] = 0 self.cg_counter = 0 # cg_dirs = (grid, nwf) #cg_dirs = steepest_dirs + np.dot(gamma, self.cg_dirs) cg_dirs = steepest_dirs + gamma * self.cg_dirs #print(gamma) # orthogonalize to wfs vector # note that wfs vector is normalised to #electrons! #cg_dirs = cg_dirs - np.sum(np.dot(cg_dirs.conj(), wfs.T))/self.nstates * wfs # overlaps: 1st index wf, 2nd index cg dir overlaps = np.dot(wfs.conj().T, cg_dirs) cg_dirs = cg_dirs - np.dot(wfs, overlaps) self.cg_dirs = cg_dirs self.cg_counter += 1 return cg_dirs
[docs] def total_energy(self, wfs): r"""Compute total energy for given wave function This method must be provided by the calling module and is initialized in the constructor. """ return self._total_energy(self.pm, wfs/self.sqdx)
[docs] def subspace_diagonalization(self, v, H): """Diagonalise suspace of wfs parameters ---------- v: array_like (grid, nwf) array of orthonormal vectors H: array_like (grid,grid) Hamiltonian matrix returns ------- v_rot: array_like (grid, nwf) array of orthonormal eigenvectors of H (or at least close to eigenvectors) """ # overlap matrix S = np.dot(v.conj().T, np.dot(H, v)) # eigf = (nwf_old, nwf_new) eigv, eigf = np.linalg.eigh(S) v_rot = np.dot(v, eigf) # need to rotate cg_dirs as well! self.cg_dirs = np.dot(self.cg_dirs, eigf) return v_rot
[docs]def orthonormalize(v): r"""Return orthonormalized set of vectors Return orthonormal set of vectors that spans the same space as the input vectors. parameters ---------- v: array_like (n, m) array of m vectors in n-dimensional space """ #orth = spla.orth(vecs.T) #orth /= np.linalg.norm(orth, axis=0) #return orth.T # vectors need to be columns Q, R = spla.qr(v, pivoting=False, mode='economic') # required to enforce positive signs of R's diagonal # without this, the signs of the orthonormalised vectors in Q are random # See https://mail.python.org/pipermail/scipy-user/2014-September/035990.html Q = Q * np.sign(np.diag(R)) # Q contains orthonormalised vectors as columns return Q
[docs]class DiagMinimizer(object): """Performs minimization using exact diagonalisation This would be too slow for ab initio codes but is something we can afford in the iDEA world. Not yet fully implemented though (still missing analytic derivatives and a proper line search algorithm). """
[docs] def __init__(self, pm, total_energy=None): """Initializes variables parameters ---------- pm: object input parameters total_energy: callable function f(pm, waves) that returns total energy nstates: int how many states to retain. currently, this must equal the number of occupied states (for unoccupied states need to re-diagonalize) """ self.pm = pm self.dx = pm.sys.deltax self.sqdx = np.sqrt(self.dx) self.dx2 = (self.dx)**2 self.nstates = pm.sys.NE if total_energy is None: raise ValueError("Need to provide total_energy function that computes total energy from given set of single-particle wave functions.") else: self._total_energy = total_energy
[docs] def total_energy(self, wfs): r"""Compute total energy for given wave function This method must be provided by the calling module and is initialized in the constructor. """ return self._total_energy(self.pm, wfs/self.sqdx)
[docs] def h_step(self, H0, H1): r"""Performs one minimisation step parameters ---------- H0: array_like input Hamiltonian to be mixed (banded form) H1: array_like output Hamiltonian to be mixed (banded form) returns ------- H: array_like mixed hamiltonian (banded form) """ ## TODO: implement analytic derivative #dE_dtheta_0 = 2 * np.sum(self.braket(dirs, H, wfs).real) # For the moment, we simply fit the parabola through 3 points npt = 3 lambdas = np.linspace(0, 1, npt) energies = np.zeros(npt) # self-consistent variant for i in range(npt): l = lambdas[i] Hl = (1-l) * H0 + l * H1 enl, wfsl = spla.eig_banded(Hl, lower=True) energies[i] = self.total_energy(wfsl) # improve numerical stability energies -= energies[0] p = np.polyfit(lambdas, energies,2) a,b,c = p l = -b/ (2*a) # don't want step to get too large l = np.minimum(1,l) #import matplotlib.pyplot as plt #fig, axs = plt.subplots(1,1) #plt.plot(lambdas, energies) ##axs.set_ylim(np.min(total_energies), np.max(total_energies)) #plt.show() Hl = (1-l) * H0 + l * H1 return Hl