Kernel Quantum Probability Library
The KQP library aims at providing tools for working with quantums probabilities
qp_approach.hpp
1 /*
2  This file is part of the Kernel Quantum Probability library (KQP).
3 
4  KQP is free software: you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation, either version 3 of the License, or
7  (at your option) any later version.
8 
9  KQP is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU General Public License for more details.
13 
14  You should have received a copy of the GNU General Public License
15  along with KQP. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifndef __KQP_REDUCED_QP_APPROACH_H__
19 #define __KQP_REDUCED_QP_APPROACH_H__
20 
21 #include <iostream>
22 #include <functional>
23 #include <list>
24 #include <boost/type_traits/is_complex.hpp>
25 #include <numeric>
26 
27 #include <kqp/kqp.hpp>
28 #include <Eigen/Eigenvalues>
29 
30 #include <kqp/evd_utils.hpp>
31 #include <kqp/cleanup.hpp>
32 #include <kqp/cleaning/null_space.hpp>
33 #include <kqp/feature_matrix.hpp>
34 #include <kqp/coneprog.hpp>
35 
36 
37 
38 namespace kqp {
39 
40 #ifndef SWIG
41 # include <kqp/define_header_logger.hpp>
42  DEFINE_KQP_HLOGGER("kqp.qp-approach");
43 
54  template<typename Scalar>
55  void solve_qp(int r, KQP_REAL_OF(Scalar) lambda, const KQP_MATRIX(Scalar) &gramMatrix, const KQP_MATRIX(Scalar) &alpha,
56  const KQP_VECTOR(KQP_REAL_OF(Scalar)) &nu, kqp::cvxopt::ConeQPReturn<KQP_REAL_OF(Scalar)> &result,
57  const cvxopt::ConeQPOptions< KQP_REAL_OF(Scalar) >& options = cvxopt::ConeQPOptions< KQP_REAL_OF(Scalar) >());
58 
60  template<typename Scalar>
61  struct LambdaError {
62  Scalar deltaMin, deltaMax;
63  Scalar maxa;
64  Index j;
65 
66  LambdaError() : deltaMin(0), deltaMax(0), maxa(0) {}
67  LambdaError(Scalar deltaMin, Scalar deltaMax, Scalar maxa, Index j) : deltaMin(deltaMin), deltaMax(deltaMax), maxa(maxa), j(j) {}
68 
69  LambdaError operator+(const LambdaError &other) {
70  LambdaError<Scalar> r;
71  r.deltaMin = this->deltaMin + other.deltaMin;
72  r.deltaMax = this->deltaMax + other.deltaMax;
73  r.maxa = this->maxa + other.maxa;
74  // std::cerr << "[[" << *this << " + " << other << "]] = " << r << std::endl;
75  return r;
76  }
77 
78  inline Scalar delta() const {
79  return (deltaMax + deltaMin) / 2.;
80  }
81 
82  struct Comparator {
83  bool operator() (const LambdaError &e1, const LambdaError &e2) {
84  if (e1.maxa < epsilon())
85  return e1.maxa < e2.maxa;
86  return e1.delta() * e2.maxa < e2.delta() * e1.maxa;
87  }
88  };
89 
90  };
91 
92  template<typename Scalar>
93  std::ostream &operator<<(std::ostream &out, const LambdaError<Scalar> &l) {
94  return out << boost::format("<delta=%g, maxa=%g, index=%d>") % l.delta %l.maxa %l.j;
95  }
96 
97  template<class IndexedComparable>
98  struct QPResultComparator {
99  const IndexedComparable &x;
100  Index r,n;
101 
102  bool isComplex;
103  Index size;
104 
105  typedef typename Eigen::internal::remove_all<decltype(x[0])>::type Real;
106 
107  QPResultComparator(bool isComplex, const IndexedComparable &x, Index r, Index n) : x(x), r(r), n(n), isComplex(isComplex),
108  size(isComplex ? 2*n*r : n*r) {}
109 
110  inline Real getXi(int i) {
111  return std::abs(x[size+i]);
112  }
113 
114  inline Real getAbsSum(int i) {
115  Real sum = 0;
116  for(Index j = 0; j < r; j++) {
117  if (isComplex) sum += Eigen::internal::abs2(x[2*i*n + j]) + Eigen::internal::abs2(x[2*i*n + n + j]);
118  else sum += Eigen::internal::abs2(x[i*n + j]);
119 
120  }
121  return std::sqrt(sum);
122  }
123 
124  inline Real getAbsMax(int i) {
125  Real max = 0;
126  for(Index j = 0; j < r; j++) {
127  if (isComplex) max = std::max(max, std::max(std::abs(x[2*i*n + j]),std::abs(x[2*i*n + n + j])));
128  else max = std::max(max, std::abs(x[i*n + j]));
129 
130  }
131  return max;
132  }
133 
134 
135  bool operator() (int i1, int i2) {
136  return getAbsSum(i1) < getAbsSum(i2);
137  }
138  };
139 
140  template<typename Derived> QPResultComparator<Derived> getQPResultComparator(bool isComplex, const Derived &x, Index r, Index n) {
141  return QPResultComparator<Derived>(isComplex, x, r, n);
142  }
143 
144 
145 
146 
147  template <typename Scalar>
148  struct ReducedSetWithQP {
149  KQP_SCALAR_TYPEDEFS(Scalar);
150 
151 
153  FMatrixPtr new_mF;
154  ScalarMatrix new_mY;
155  RealVector new_mD;
156 
157 
158  //
159  const FMatrixPtr &getFeatureMatrix() { return new_mF; }
160  ScalarMatrix &getMixtureMatrix() { return new_mY; }
161  RealVector &getEigenValues() { return new_mD; }
162 
174  void run(Index target, const FSpaceCPtr &fs, const FMatrixCPtr &_mF, const ScalarAltMatrix &_mY, const RealVector &mD) {
175  KQP_LOG_ASSERT_F(main_logger(), _mY.rows() == _mF->size(), "Incompatible dimensions (%d vs %d)", %_mY.rows() %_mF->size());
176 
177  // Diagonal won't change
178  new_mD = mD;
179 
180  // Early stop
181  if (target >= _mF->size()) {
182  new_mY = _mY;
183  new_mF = _mF->copy();
184  return;
185  }
186 
187  KQP_HLOG_INFO_F("QP approach : reduce rank from %d to %d", %_mF->size() %target);
188 
189  //
190  // (0) Compute the EVD (used by lambda computation), and use it to remove the null space
191  // so that we are sure that we won't have a singular KKT
192  //
193 
194  // Compute the eigenvalue decomposition of the Gram matrix
195  // (assumes the eigenvalues are sorted by increasing order)
196 
197  // Remove using the null space approach if we can
198  bool useNew = false;
199  ScalarAltMatrix new_alt_mY;
200  {
201 
202  ScalarMatrix eigenvectors;
203  RealVector eigenvalues;
204  boost::shared_ptr<ScalarMatrix> kernel(new ScalarMatrix());
205  Real threshold = fs->k(_mF).squaredNorm() * std::sqrt(Eigen::NumTraits<Real>::epsilon());
206  kqp::ThinEVD<ScalarMatrix>::run(Eigen::SelfAdjointEigenSolver<ScalarMatrix>(fs->k(_mF).template selfadjointView<Eigen::Lower>()),
207  eigenvectors, eigenvalues, kernel.get(), threshold);
208 
209 
210  if (kernel->cols() > 0) {
211  Eigen::PermutationMatrix<Dynamic, Dynamic, Index> mP;
212  RealVector weights = _mY.rowwise().squaredNorm().array() * fs->k(_mF).diagonal().array().abs();
213 
214  // kernel will contain a matrix such that *kernel * mP * mY
215  ReducedSetNullSpace<Scalar> nullSpaceCleaner;
216  new_mF = nullSpaceCleaner.remove(_mF, *kernel, mP, weights);
217 
218  // Y <- (Id A) P Y
219  ScalarMatrix mY2(_mY); // FIXME: .topRows() should be defined in AltMatrix expressions
220  ScalarMatrix mY3 = (mP * mY2).topRows(new_mF->size()) + *kernel * (mP * mY2).bottomRows(_mY.rows() - new_mF->size());
221 
222  KQP_HLOG_INFO_F("Reduced rank to %d [target %d] with null space (kernel size: %d)", %new_mF->size() %target %kernel->cols())
223  if (target >= new_mF->size()) {
224  new_mY = std::move(mY3);
225  return;
226  }
227 
228  useNew = true;
229  new_alt_mY = std::move(mY3);
230  }
231 
232  // --- Set some variables before next operations
233 
234  }
235  const FMatrixCPtr &mF = useNew ? new_mF : _mF;
236  const ScalarAltMatrix &mY = useNew ? new_alt_mY : _mY;
237  const ScalarMatrix &gram = fs->k(mF);
238 
239  // Dimension of the basis
240  Index r = mY.cols();
241 
242  // Number of pre-images
243  Index n = gram.rows();
244 
245  //
246  // (1) Estimate the regularization coefficient \f$lambda\f$
247  //
248 
249  // mAS_.j = sigma_j^1/2 A_.j
250  ScalarMatrix mAS = mY * mD.cwiseAbs().cwiseSqrt().asDiagonal();
251 
252  // We first order the pre-images
253  std::vector< LambdaError<Real> > errors;
254  for(Index i = 0; i < n; i++) {
255  Real maxa = mAS.cwiseAbs().row(i).maxCoeff();
256  Real deltaMax = gram(i,i) * mAS.cwiseAbs2().row(i).dot(mD.cwiseAbs().cwiseSqrt());
257  Real deltaMin = deltaMax; //std::min(sum_minDelta[i], deltaMax);
258  KQP_HLOG_DEBUG_F("[%d] We have max = %.3g/ delta = (%.3g,%.3g)", %i %maxa %deltaMin %deltaMax);
259  errors.push_back(LambdaError<Real>(deltaMin, deltaMax, maxa, i));
260  }
261 
262 
263  typedef typename LambdaError<Real>::Comparator LambdaComparator;
264  std::sort(errors.begin(), errors.end(), LambdaComparator());
265  LambdaError<Real> acc_lambda = std::accumulate(errors.begin(), errors.begin() + n - target, LambdaError<Real>());
266 
267  // for(Index j = 0; j < n; j++)
268  // std::cerr << boost::format("[%d] delta=(%g,%g) and maxa=%g\n") %j %errors[j].deltaMin %errors[j].deltaMax %errors[j].maxa;
269  // std::cerr << boost::format("delta=(%g,%g) and maxa=%g\n") %acc_lambda.deltaMin %acc_lambda.deltaMax % acc_lambda.maxa;
270 
271  // Check for a trivial solution
272  if (acc_lambda.maxa <= epsilon() * acc_lambda.delta()) {
273  KQP_HLOG_WARN("There are only trivial solutions, calling cleanUnused");
274  if (!useNew) {
275  new_mY = mY;
276  new_mF = mF->copy();
277  }
278  new_mD = mD;
279  CleanerUnused<Scalar>::run(new_mF, new_mY);
280  if (target >= new_mF->size())
281  return;
282  KQP_THROW_EXCEPTION_F(arithmetic_exception, "Trivial solutions were not found (target %d, size %d)", %target %new_mF->size());
283  }
284 
285  // Now, we compute a more accurate lambda
286  Real lambda = acc_lambda.delta() / acc_lambda.maxa;
287 
288 
289  KQP_HLOG_INFO_F("Lambda = %g", %lambda);
290 
291  //
292  // (2) Solve the cone quadratic problem
293  //
295 
296  RealVector nu = mD.cwiseAbs().cwiseInverse().cwiseSqrt();
297 
298  do {
299  solve_qp<Scalar>(r, lambda, gram, mAS, nu, result);
300 
301  if (result.status == cvxopt::OPTIMAL) break;
302 
303  if (result.status == cvxopt::SINGULAR_KKT_MATRIX)
304  KQP_HLOG_INFO("QP approach did not converge (singular KKT matrix)");
305 
306  Real oldLambda = lambda;
307  lambda /= 2.;
308  KQP_HLOG_INFO_F("Halving lambda to %g", %lambda);
309  if (std::abs(lambda - oldLambda) < Eigen::NumTraits<Real>::epsilon())
310  KQP_THROW_EXCEPTION(arithmetic_exception, "Lambda is too small");
311  } while (true);
312 
313 
314  //
315  // (3) Get the subset
316  //
317 
318  // In result.x,
319  // - the r mixture vectors (of size n or 2n in the complex case) are stored one after each other
320  // - the last n components are the maximum of the norm of the coefficients for each pre-image
321  // we use the <target> first
322  std::vector<Index> indices(n);
323  for(Index i = 0; i < n; i++)
324  indices[i] = i;
325 
326  // Sort by increasing order: we will keep only the target last vectors
327  auto comparator = getQPResultComparator(boost::is_complex<Scalar>::value, result.x, n, r);
328  std::sort(indices.begin(), indices.end(), comparator);
329 
330  Real lowest = result.x.tail(n)[indices[0]];
331  Real last_nsel = n-target-1 >= 0 ? result.x.tail(n)[indices[n-target-1]] : -1;
332  Real first_sel = result.x.tail(n)[indices[n-target]];
333  Real highest = result.x.tail(n)[indices[n-1]];
334  KQP_HLOG_INFO_F("Lambda values [%d/%d]: lowest=%g, last non selected=%g, first selected=%g, highest=%g [ratios %g and %g]",
335  %target %n %lowest %last_nsel %first_sel %highest %(last_nsel/first_sel) %(last_nsel/highest));
336 
337  if (KQP_IS_DEBUG_ENABLED(KQP_HLOGGER)) {
338  for(Index i = 0; i < n; i++) {
339  Index j = indices[i];
340  KQP_HLOG_DEBUG_F(boost::format("[%d] absXi=%g absSum=%g absMax=%g"), % indices[i]
341  % comparator.getXi(j) %comparator.getAbsSum(j) %comparator.getAbsMax(j));
342  }
343  }
344 
345  // Construct a sub-view of the initial set of indices
346  std::vector<bool> to_keep(n, false);
347  for(Index i = n-target; i < n; i++) {
348  to_keep[indices[i]] = true;
349  }
350  FMatrixPtr _new_mF = mF->subset(to_keep.begin(), to_keep.end());
351 
352 
353  //
354  // (4) Project onto the new subspace
355  //
356 
357  // Compute new_mY so that new_mF Y is orthonormal, ie new_mY' new_mF' new_mF new_mY is the identity
358 
359  Eigen::SelfAdjointEigenSolver<ScalarMatrix> evd(fs->k(_new_mF).template selfadjointView<Eigen::Lower>());
360 
361  new_mY.swap(evd.eigenvectors());
362 
363  new_mY *= evd.eigenvalues().cwiseAbs().cwiseSqrt().cwiseInverse().asDiagonal();
364 
365  // Project onto new_mF new_mY
366  KQP_HLOG_DEBUG_F("Update new_mY <- new_Y [%dx%d] * new_Y'[%dx%d] x new_F'[%dxF] x mF[Fx%d] x mY[%dx%d]",
367  %new_mY.rows() %new_mY.cols()
368  %new_mY.cols() %new_mY.rows()
369  %_new_mF->size()
370  %mF->size()
371  %mY.rows() %mY.cols()
372  );
373  new_mY *= fs->k(_new_mF, new_mY, mF, mY);
374  new_mF = std::move(_new_mF);
375  }
376  };
377 #endif
378 
379  template <typename Scalar>
380  class CleanerQP : public Cleaner<Scalar> {
381  public:
382  KQP_SCALAR_TYPEDEFS(Scalar);
383 
384  CleanerQP() :
385  m_preImageRatios(std::make_pair(0, std::numeric_limits<Real>::infinity())),
386  m_preImagesRange(std::make_pair(0, std::numeric_limits<Index>::max()))
387  {}
388 
390  void setPreImagesPerRank(float reset, float maximum) {
391  this->m_preImageRatios = std::make_pair(reset, maximum);
392  }
393 
394  void setRankRange(Index reset, Index maximum) {
395  this->m_preImagesRange = std::make_pair(reset, maximum);
396  }
397 
398  virtual void cleanup(Decomposition<Scalar> &d) const override {
399  // Sets the target value
400  Index target = d.mX->size();
401 
402  if (d.mX->size() > this->m_preImageRatios.second * d.mD.rows())
403  target = std::min(target, (Index)(this->m_preImageRatios.first * d.mD.rows()));
404 
405  if (d.mX->size() > this->m_preImagesRange.second)
406  target = std::min(target, this->m_preImagesRange.first);
407 
408  // Ensure there is one pre-image per rank at least
409  target = std::max(target, d.mD.rows());
410 
411  // --- Ensure we have a small enough number of pre-images
412  if (d.mX->size() > target) {
413  if (d.fs->canLinearlyCombine()) {
414  // Easy case: we can linearly combine pre-images
415  d.mX = d.fs->linearCombination(d.mX, d.mY);
416  d.mY = Eigen::Identity<Scalar>(d.mX->size(), d.mX->size());
417  } else {
418  // Use QP approach
419  ReducedSetWithQP<Scalar> qp_rs;
420  qp_rs.run(target, d.fs, d.mX, d.mY, d.mD);
421 
422  // Get the decomposition
423  d.mX = std::move(qp_rs.getFeatureMatrix());
424  d.mY = std::move(qp_rs.getMixtureMatrix());
425  d.mD = std::move(qp_rs.getEigenValues());
426 
427  // The decomposition is not orthonormal anymore
428  d.orthonormal = false;
429  }
430 
431  }
432  }
433 
434  virtual picojson::value save() const override {
435  picojson::object json;
436  json["name"] = picojson::value("selector");
437  json["scalar"] = picojson::value(ScalarInfo<Scalar>::name());
438 
439  picojson::object ratio;
440  ratio["reset"] = picojson::value(m_preImageRatios.first);
441  ratio["max"] = picojson::value(m_preImageRatios.second);
442  json["ratio"] = picojson::value(ratio);
443 
444  picojson::object range;
445  range["reset"] = picojson::value(m_preImageRatios.first);
446  range["max"] = picojson::value(m_preImageRatios.second);
447  json["range"] = picojson::value(range);
448 
449  return picojson::value(json);
450  }
451 
452 
453  private:
458  std::pair<float,float> m_preImageRatios;
459  std::pair<Index,Index> m_preImagesRange;
460  };
461 
462 
463 #ifndef SWIG
464 
468  template<typename Scalar>
469  class KQP_KKTPreSolver : public cvxopt::KKTPreSolver<typename Eigen::NumTraits<Scalar>::Real> {
470  public:
471  typedef typename Eigen::NumTraits<Scalar>::Real Real;
472 
473  KQP_KKTPreSolver(const KQP_MATRIX(Scalar)& gramMatrix, const KQP_VECTOR(Real) &nu);
474 
475  cvxopt::KKTSolver<Real> *get(const cvxopt::ScalingMatrix<Real> &w);
476  private:
477  Eigen::LLT<KQP_MATRIX(Real)> lltOfK;
478  KQP_MATRIX(Real) B, BBT;
479  KQP_VECTOR(Real) nu;
480 
481  };
482 
483 
484 #endif
485 }
486 
487 #include <kqp/cleaning/qp_approach.inc.hpp>
488 
489 #endif