/*
 * omp_SOR_itrOddPoints.cpp
 *
 *  Created on: Aug 26, 2019
 *      Author: chuanli
 */

#include "solver.h"

/*
 * -------------------- iterate over odd points (Sequential/OMP version) --------------------
 */
void CDelphiSolver::omp_SOR_itrOddPoints(const int& forWhom, const int& flag)
{
	delphi_integer n, ix, iy, iz;
	delphi_integer star, fin;
	delphi_real temp1, temp2, temp3, temp4;
	delphi_integer itemp1, itemp2, itemp3, itemp4;

	int omp_num_threads,omp_thread_id;

	/*
	 * set number of threads = number of processors
	 */
	//omp_set_num_threads(2);
	omp_set_num_threads(omp_get_max_threads());

	#pragma omp parallel default(shared) private(omp_thread_id,n,ix,iy,star,fin,temp1,temp2,temp3)

	{
		delphi_integer omp_index;

		omp_thread_id = omp_get_thread_num();

		if (0 == omp_thread_id) omp_num_threads = omp_get_num_threads();

		//cout << "thread " << omp_thread_id << " of " << omp_num_threads << " is alive\n";

		/* the following loops are about four times faster than the original loop over all grid points for
		 * several reasons, the biggest being that we are only solving laplace's equation (unless salt is present),
		 * which numerically much simpler, hence faster. we put all we leave out, back in below, ending up with
		 * an equivalent calculation, but much faster.
		 */
		if (fZero < abs(fIonStrength))  //----- the main loop is as below:
		{
			#pragma omp for schedule(auto)

			for (n = 1; n < iGrid - 1; n++)
			{
				star = sta1[n];
				fin = fi1[n];
				for (ix = star; ix <= fin; ix++)
				{
					temp1 = phimap2[ix - 1] + phimap2[(ix - 1) - 1];
					temp2 = phimap2[(ix - 1) + lat1] + phimap2[(ix - 1) - lat2];
					temp3 = phimap2[(ix - 1) + long1] + phimap2[(ix - 1) - long2];
					//phimap1[ix-1] = phimap1[ix-1]*om1 + (qmap1[ix-1]+temp1+temp2+temp3)*prgfSaltMap1[ix-1];
					phimap1[ix - 1] = phimap1[ix - 1] * om1 + (qmap1[ix - 1] + temp1 + temp2 + temp3) * prgfSaltMap1[ix - 1];
				}
			}
		}
		else //----- if there is no salt then the main loop is executed without sf saving about 15% in execution time
		{
			#pragma omp for schedule(auto)

			for (n = 1; n < iGrid - 1; n++)
			{
				star = sta1[n];
				fin = fi1[n];
				for (ix = star; ix <= fin; ix++)
				{
					temp1 = phimap2[ix - 1] + phimap2[(ix - 1) - 1];
					temp2 = phimap2[(ix - 1) + lat1] + phimap2[(ix - 1) - lat2];
					temp3 = phimap2[(ix - 1) + long1] + phimap2[(ix - 1) - long2];
					phimap1[ix - 1] = phimap1[ix - 1] * om1 + (temp1 + temp2 + temp3) * sixth;
				}
			}
		}


		//#pragma omp barrier

		/*
		 * first we add back the dielectric boundary points, by recalculating them individually. note this is still
		 * vectorised by means of a gathering load by the compiler.
		 */
		if (iGaussian != 0)
		{
			if (fZero < abs(fIonStrength)) // If there is ion, Gaussian
			{
				#pragma omp for schedule(auto)

				for (n = 0; n < iDielecBndyEven; n++)
				{
					ix = prgiBndyDielecIndex[n];

					//We need to recalculate the boudary points
					//Here we only calculate the pure linear part, and then add back the nonliear part

					delphi_real eps1 = gaussianBoundaryDielec[n][0];
					delphi_real eps2 = gaussianBoundaryDielec[n][1];
					delphi_real eps3 = gaussianBoundaryDielec[n][2];
					delphi_real eps4 = gaussianBoundaryDielec[n][3];
					delphi_real eps5 = gaussianBoundaryDielec[n][4];
					delphi_real eps6 = gaussianBoundaryDielec[n][5];

					delphi_real phi1 = phimap2[(ix - 1) - 1];
					delphi_real phi2 = phimap2[ix - 1];
					delphi_real phi3 = phimap2[(ix - 1) - lat2];
					delphi_real phi4 = phimap2[(ix - 1) + lat1];
					delphi_real phi5 = phimap2[(ix - 1) - long2];
					delphi_real phi6 = phimap2[(ix - 1) + long1];

	                temp1 = phi1 + phi2;
	                temp2 = phi3 + phi4;
	                temp3 = phi5 + phi6;

					delphi_real myLastPhi = phimap1[ix - 1] - (qmap1[ix - 1] + temp1 + temp2 + temp3) * prgfSaltMap1[ix - 1];

					delphi_real myDensity = gaussianBoundaryDensity[n];

					delphi_real myExpSolvE = SOR_calcExpSolvE(myDensity);

					delphi_real myNonlinearCorrection = gaussianBoundaryNonlinear[n];

					delphi_real numerator = (eps1 * phi1 + eps2 * phi2 + eps3 * phi3 + eps4 * phi4 + eps5 * phi5 + eps6 * phi6) / fEPKT;
					delphi_real demonimator = (eps1 + eps2 + eps3 + eps4 + eps5 + eps6) / fEPKT + fDebFct * myExpSolvE;

					phimap1[ix - 1] = myLastPhi + (numerator / demonimator + myNonlinearCorrection) * (1 - (om1));
				}
			}
			else  //if there is no ion, Gaussian
			{
				#pragma omp for schedule(auto)

				for (n = 0; n < iDielecBndyEven; n++)
				{
					ix = prgiBndyDielecIndex[n];

					//We need to recalculate the boudary points
					//Here we only calculate the pure linear part, and then add back the nonliear part

					delphi_real eps1 = gaussianBoundaryDielec[n][0];
					delphi_real eps2 = gaussianBoundaryDielec[n][1];
					delphi_real eps3 = gaussianBoundaryDielec[n][2];
					delphi_real eps4 = gaussianBoundaryDielec[n][3];
					delphi_real eps5 = gaussianBoundaryDielec[n][4];
					delphi_real eps6 = gaussianBoundaryDielec[n][5];

					delphi_real phi1 = phimap2[(ix - 1) - 1];
					delphi_real phi2 = phimap2[ix - 1];
					delphi_real phi3 = phimap2[(ix - 1) - lat2];
					delphi_real phi4 = phimap2[(ix - 1) + lat1];
					delphi_real phi5 = phimap2[(ix - 1) - long2];
					delphi_real phi6 = phimap2[(ix - 1) + long1];

					delphi_real myLastPhi = phimap1[ix - 1] - (phi1 + phi2 + phi3 + phi4 + phi5 + phi6) * sixth;

					delphi_real numerator   = eps1 * phi1 + eps2 * phi2 + eps3 * phi3 + eps4 * phi4 + eps5 * phi5 + eps6 * phi6;
					delphi_real demonimator = eps1 + eps2 + eps3 + eps4 + eps5 + eps6;

					phimap1[ix - 1] = myLastPhi + (numerator / demonimator) * (1 - (om1));
				}
			}
		}
		else // if not Gaussian
		{
			#pragma omp for schedule(auto)

			for (n = 0; n < iDielecBndyEven; n++)
			{
				ix = prgiBndyDielecIndex[n];
				temp1 = phimap2[(ix - 1) - 1] * prgfBndyDielec[n][0] + phimap2[ix - 1] * prgfBndyDielec[n][1];
				temp2 = phimap2[(ix - 1) - lat2] * prgfBndyDielec[n][2] + phimap2[(ix - 1) + lat1] * prgfBndyDielec[n][3];
				temp3 = phimap2[(ix - 1) - long2] * prgfBndyDielec[n][4] + phimap2[(ix - 1) + long1] * prgfBndyDielec[n][5];
				phimap1[ix - 1] += temp1 + temp2 + temp3;
			}
		}

		/*
		 * Now reset boundary values altered in above loops.
		 */
		star = (iGrid+1)/2; fin = (iGrid*(iGrid-1)-2)/2; omp_index = iGrid*(iGrid+1)/2-iGrid+1; //iy = iGrid*(iGrid+1)/2-iGrid+1;

		#pragma omp for schedule(auto)

		for (n = 0; n < fin-star+1; n++)
		{
			iy = omp_index+(n+1)*iGrid;
			phimap1[iy-1] = bndx1[n];
			phimap1[iy+((iGrid+1)/2-1)-1] = bndx2[n];
		}

		/*
		 * next we add back an adjustment to all the charged grid points due to the charge assigned. the compiler
		 * directive just reassures the vector compiler that all is well as far as recurrence is concerned, i.e. it
		 * would think there is a recurrence below, where as in fact there is none.
		 */
		if (0 != forWhom)
		{
			if (iGaussian != 0)
			{
				if (fZero < abs(fIonStrength)) // If there is ion, Gaussian
				{
					#pragma omp for schedule(auto)

					for (n = 0; n < iCrgedGridEven; n++)
					{
						ix = prgiCrgPose[n];

						delphi_real eps1 = gaussianChargeDielec[n][0];
						delphi_real eps2 = gaussianChargeDielec[n][1];
						delphi_real eps3 = gaussianChargeDielec[n][2];
						delphi_real eps4 = gaussianChargeDielec[n][3];
						delphi_real eps5 = gaussianChargeDielec[n][4];
						delphi_real eps6 = gaussianChargeDielec[n][5];

						delphi_real myDensity = gaussianChargeDensity[n];
						delphi_real myCharge  = prgfCrgValG[n];

						delphi_real myExpSolvE = SOR_calcExpSolvE(myDensity);

						delphi_real myNonlinearCorrection = gaussianChargeNonlinear[n];

						delphi_real numerator   = myCharge * f4Pi * fScale;
						delphi_real demonimator = (eps1 + eps2 + eps3 + eps4 + eps5 + eps6) / fEPKT + fDebFct * myExpSolvE;

						phimap1[ix - 1] = phimap1[ix - 1] + (numerator / demonimator + myNonlinearCorrection) * (1 - (om1));
					}
				}
				else // If there is no ion, Gaussian
				{
					#pragma omp for schedule(auto)

					for (n = 0; n < iCrgedGridEven; n++)
					{
						ix = prgiCrgPose[n];

						delphi_real eps1 = gaussianChargeDielec[n][0];
						delphi_real eps2 = gaussianChargeDielec[n][1];
						delphi_real eps3 = gaussianChargeDielec[n][2];
						delphi_real eps4 = gaussianChargeDielec[n][3];
						delphi_real eps5 = gaussianChargeDielec[n][4];
						delphi_real eps6 = gaussianChargeDielec[n][5];

						delphi_real myCharge = prgfCrgValG[n];

						delphi_real numerator = myCharge * f4Pi * fScale;
						delphi_real demonimator = (eps1 + eps2 + eps3 + eps4 + eps5 + eps6) / fEPKT;

						phimap1[ix - 1] = phimap1[ix - 1] + (numerator / demonimator) * (1 - (om1));
					}
				}
			}
			else
			{
				#pragma omp for schedule(auto)

				for (n = 0; n < iCrgedGridEven; n++)
				{
					ix = prgiCrgPose[n];
					phimap1[ix - 1] += prgfCrgValA[n];
				}
			}
		}
	} // end of #pragma omp parallel

	/*
	 * if periodic boundary condition option, force periodicity using wrap around update of boundary values:
	 *    2nd slice-->last
	 *    last-1 slice-->first
	 */
	if (rgbPeriodicBndy[2]) //----- z periodicity
	{
		for (iz = 0; iz < (iGrid - 2) * (iGrid - 2); iz += 2)
		{
			temp1  = ibndz[iz];
			itemp1 = (delphi_integer) temp1;
			temp2  = temp1 + idif1z;
			itemp2 = (delphi_integer) temp2;
			temp3  = temp2 + inc1za;
			itemp3 = (delphi_integer) temp3;
			temp4  = temp1 + inc1zb;
			itemp4 = (delphi_integer) temp4;
			phimap1[itemp1 - 1] = phimap2[itemp2 - 1];
			phimap1[itemp3 - 1] = phimap2[itemp4 - 1];
		}
	}

	if (rgbPeriodicBndy[1]) //----- y periodicity
	{
		for (iy = 0; iy < (iGrid - 2) * (iGrid - 2); iy += 2)
		{
			temp1  = ibndy[iy];
			itemp1 = (delphi_integer) temp1;
			temp2  = temp1 + idif1y;
			itemp2 = (delphi_integer) temp2;
			temp3  = temp2 + inc1ya;
			itemp3 = (delphi_integer) temp3;
			temp4  = temp1 + inc1yb;
			itemp4 = (delphi_integer) temp4;
			phimap1[itemp1 - 1] = phimap2[itemp2 - 1];
			phimap1[itemp3 - 1] = phimap2[itemp4 - 1];
		}
	}

	if (rgbPeriodicBndy[0]) //----- x periodicity
	{
		for (ix = 0; ix < (iGrid - 2) * (iGrid - 2); ix += 2)
		{
			temp1  = ibndx[ix];
			itemp1 = (delphi_integer) temp1;
			temp2  = temp1 + idif1x;
			itemp2 = (delphi_integer) temp2;
			temp3  = temp2 + inc1xa;
			itemp3 = (delphi_integer) temp3;
			temp4  = temp1 + inc1xb;
			itemp4 = (delphi_integer) temp4;
			phimap1[itemp1 - 1] = phimap2[itemp2 - 1];
			phimap1[itemp3 - 1] = phimap2[itemp4 - 1];
		}
	}

	//    if (1 == flag)
	//    {
	//        string strTestFile = "rank1_solver_itrOddPoints.dat";
	//        ofstream ofTestStream(strTestFile.c_str());
	//        ofTestStream << boolalpha;
	//        ofTestStream << fixed << setprecision(7);
	//
	//        ofTestStream << "flag = " << flag << endl;
	//
	//        ix = 0;
	//        for (vector<delphi_real>::iterator it = phimap1.begin(); it != phimap1.end(); ++it)
	//        {
	//            ofTestStream << "mpi_phimap1[" << setw(6) << right << ix << "] = " << setw(11) << right << *it << endl;
	//            ix++;
	//        }
	//
	//        ix = 0;
	//        for (vector<delphi_real>::iterator it = phimap2.begin(); it != phimap2.end(); ++it)
	//        {
	//            ofTestStream << "mpi_phimap2[" << setw(6) << right << ix << "] = " << setw(11) << right << *it << endl;
	//            ix++;
	//        }
	//
	//        ofTestStream.close();
	//    }
}



