// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// This file provides Go implementations of elementary multi-precision
// arithmetic operations on word vectors. Needed for platforms without
// assembly implementations of these routines.

package big

import "unsafe"

type Word uintptr

const (
	_S	= uintptr(unsafe.Sizeof(Word(0)));	// TODO(gri) should Sizeof return a uintptr?
	_logW	= (0x650 >> _S) & 7;
	_W	= 1 << _logW;
	_B	= 1 << _W;
	_M	= _B - 1;
	_W2	= _W / 2;
	_B2	= 1 << _W2;
	_M2	= _B2 - 1;
)


// ----------------------------------------------------------------------------
// Elementary operations on words
//
// These operations are used by the vector operations below.

// z1<<_W + z0 = x+y+c, with c == 0 or 1
func addWW_g(x, y, c Word) (z1, z0 Word) {
	yc := y + c;
	z0 = x + yc;
	if z0 < x || yc < y {
		z1 = 1
	}
	return;
}


// z1<<_W + z0 = x-y-c, with c == 0 or 1
func subWW_g(x, y, c Word) (z1, z0 Word) {
	yc := y + c;
	z0 = x - yc;
	if z0 > x || yc < y {
		z1 = 1
	}
	return;
}


// z1<<_W + z0 = x*y
func mulWW_g(x, y Word) (z1, z0 Word) {
	// Split x and y into 2 halfWords each, multiply
	// the halfWords separately while avoiding overflow,
	// and return the product as 2 Words.

	if x < y {
		x, y = y, x
	}

	if x < _B2 {
		// y < _B2 because y <= x
		// sub-digits of x and y are (0, x) and (0, y)
		// z = z[0] = x*y
		z0 = x * y;
		return;
	}

	if y < _B2 {
		// sub-digits of x and y are (x1, x0) and (0, y)
		// x = (x1*_B2 + x0)
		// y = (y1*_B2 + y0)
		x1, x0 := x>>_W2, x&_M2;

		// x*y = t2*_B2*_B2 + t1*_B2 + t0
		t0 := x0 * y;
		t1 := x1 * y;

		// compute result digits but avoid overflow
		// z = z[1]*_B + z[0] = x*y
		z0 = t1<<_W2 + t0;
		z1 = (t1 + t0>>_W2) >> _W2;
		return;
	}

	// general case
	// sub-digits of x and y are (x1, x0) and (y1, y0)
	// x = (x1*_B2 + x0)
	// y = (y1*_B2 + y0)
	x1, x0 := x>>_W2, x&_M2;
	y1, y0 := y>>_W2, y&_M2;

	// x*y = t2*_B2*_B2 + t1*_B2 + t0
	t0 := x0 * y0;
	// t1 := x1*y0 + x0*y1;
	var c Word;
	t1 := x1 * y0;
	t1a := t1;
	t1 += x0 * y1;
	if t1 < t1a {
		c++
	}
	t2 := x1*y1 + c*_B2;

	// compute result digits but avoid overflow
	// z = z[1]*_B + z[0] = x*y
	// This may overflow, but that's ok because we also sum t1 and t0 above
	// and we take care of the overflow there.
	z0 = t1<<_W2 + t0;

	// z1 = t2 + (t1 + t0>>_W2)>>_W2;
	var c3 Word;
	z1 = t1 + t0>>_W2;
	if z1 < t1 {
		c3++
	}
	z1 >>= _W2;
	z1 += c3 * _B2;
	z1 += t2;
	return;
}


// z1<<_W + z0 = x*y + c
func mulAddWWW_g(x, y, c Word) (z1, z0 Word) {
	// Split x and y into 2 halfWords each, multiply
	// the halfWords separately while avoiding overflow,
	// and return the product as 2 Words.

	// TODO(gri) Should implement special cases for faster execution.

	// general case
	// sub-digits of x, y, and c are (x1, x0), (y1, y0), (c1, c0)
	// x = (x1*_B2 + x0)
	// y = (y1*_B2 + y0)
	x1, x0 := x>>_W2, x&_M2;
	y1, y0 := y>>_W2, y&_M2;
	c1, c0 := c>>_W2, c&_M2;

	// x*y + c = t2*_B2*_B2 + t1*_B2 + t0
	// (1<<32-1)^2 == 1<<64 - 1<<33 + 1, so there's space to add c0 in here.
	t0 := x0*y0 + c0;

	// t1 := x1*y0 + x0*y1 + c1;
	var c2 Word;	// extra carry
	t1 := x1*y0 + c1;
	t1a := t1;
	t1 += x0 * y1;
	if t1 < t1a {	// If the number got smaller then we overflowed.
		c2++
	}

	t2 := x1*y1 + c2*_B2;

	// compute result digits but avoid overflow
	// z = z[1]*_B + z[0] = x*y
	// z0 = t1<<_W2 + t0;
	// This may overflow, but that's ok because we also sum t1 and t0 below
	// and we take care of the overflow there.
	z0 = t1<<_W2 + t0;

	var c3 Word;
	z1 = t1 + t0>>_W2;
	if z1 < t1 {
		c3++
	}
	z1 >>= _W2;
	z1 += t2 + c3*_B2;

	return;
}


// q = (x1<<_W + x0 - r)/y
// The most significant bit of y must be 1.
func divStep(x1, x0, y Word) (q, r Word) {
	d1, d0 := y>>_W2, y&_M2;
	q1, r1 := x1/d1, x1%d1;
	m := q1 * d0;
	r1 = r1*_B2 | x0>>_W2;
	if r1 < m {
		q1--;
		r1 += y;
		if r1 >= y && r1 < m {
			q1--;
			r1 += y;
		}
	}
	r1 -= m;

	r0 := r1 % d1;
	q0 := r1 / d1;
	m = q0 * d0;
	r0 = r0*_B2 | x0&_M2;
	if r0 < m {
		q0--;
		r0 += y;
		if r0 >= y && r0 < m {
			q0--;
			r0 += y;
		}
	}
	r0 -= m;

	q = q1*_B2 | q0;
	r = r0;
	return;
}


// Number of leading zeros in x.
func leadingZeros(x Word) (n uint) {
	if x == 0 {
		return _W
	}
	for x&(1<<(_W-1)) == 0 {
		n++;
		x <<= 1;
	}
	return;
}


// q = (x1<<_W + x0 - r)/y
func divWW_g(x1, x0, y Word) (q, r Word) {
	if x1 == 0 {
		q, r = x0/y, x0%y;
		return;
	}

	var q0, q1 Word;
	z := leadingZeros(y);
	if y > x1 {
		if z != 0 {
			y <<= z;
			x1 = (x1 << z) | (x0 >> (_W - z));
			x0 <<= z;
		}
		q0, x0 = divStep(x1, x0, y);
		q1 = 0;
	} else {
		if z == 0 {
			x1 -= y;
			q1 = 1;
		} else {
			z1 := _W - z;
			y <<= z;
			x2 := x1 >> z1;
			x1 = (x1 << z) | (x0 >> z1);
			x0 <<= z;
			q1, x1 = divStep(x2, x1, y);
		}

		q0, x0 = divStep(x1, x0, y);
	}

	r = x0 >> z;

	if q1 != 0 {
		panic("div out of range")
	}

	return q0, r;
}


// ----------------------------------------------------------------------------
// Elementary operations on vectors

// All higher-level functions use these elementary vector operations.
// The function pointers f are initialized with default implementations
// f_g, written in Go for portability. The corresponding assembly routines
// f_s should be installed if they exist.
var (
	// addVV sets z and returns c such that z+c = x+y.
	addVV	func(z, x, y *Word, n int) (c Word)	= addVV_g;

	// subVV sets z and returns c such that z-c = x-y.
	subVV	func(z, x, y *Word, n int) (c Word)	= subVV_g;

	// addVW sets z and returns c such that z+c = x-y.
	addVW	func(z, x *Word, y Word, n int) (c Word)	= addVW_g;

	// subVW sets z and returns c such that z-c = x-y.
	subVW	func(z, x *Word, y Word, n int) (c Word)	= subVW_g;

	// mulAddVWW sets z and returns c such that z+c = x*y + r.
	mulAddVWW	func(z, x *Word, y, r Word, n int) (c Word)	= mulAddVWW_g;

	// addMulVVW sets z and returns c such that z+c = z + x*y.
	addMulVVW	func(z, x *Word, y Word, n int) (c Word)	= addMulVVW_g;

	// divWVW sets z and returns r such that z-r = (xn<<(n*_W) + x) / y.
	divWVW	func(z *Word, xn Word, x *Word, y Word, n int) (r Word)	= divWVW_g;
)


func init() {
	// Uncomment to use generic routines.
	//return;

	// Install assembly routines.
	addVV = addVV_s;
	subVV = subVV_s;
	addVW = addVW_s;
	subVW = subVW_s;
	mulAddVWW = mulAddVWW_s;
	addMulVVW = addMulVVW_s;
	divWVW = divWVW_s;
}


func (p *Word) at(i int) *Word {
	return (*Word)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + uintptr(i)*_S))
}


func addVV_s(z, x, y *Word, n int) (c Word)
func addVV_g(z, x, y *Word, n int) (c Word) {
	for i := 0; i < n; i++ {
		c, *z.at(i) = addWW_g(*x.at(i), *y.at(i), c)
	}
	return;
}


func subVV_s(z, x, y *Word, n int) (c Word)
func subVV_g(z, x, y *Word, n int) (c Word) {
	for i := 0; i < n; i++ {
		c, *z.at(i) = subWW_g(*x.at(i), *y.at(i), c)
	}
	return;
}


func addVW_s(z, x *Word, y Word, n int) (c Word)
func addVW_g(z, x *Word, y Word, n int) (c Word) {
	c = y;
	for i := 0; i < n; i++ {
		c, *z.at(i) = addWW_g(*x.at(i), c, 0)
	}
	return;
}


func subVW_s(z, x *Word, y Word, n int) (c Word)
func subVW_g(z, x *Word, y Word, n int) (c Word) {
	c = y;
	for i := 0; i < n; i++ {
		c, *z.at(i) = subWW_g(*x.at(i), c, 0)
	}
	return;
}


func mulAddVWW_s(z, x *Word, y, r Word, n int) (c Word)
func mulAddVWW_g(z, x *Word, y, r Word, n int) (c Word) {
	c = r;
	for i := 0; i < n; i++ {
		c, *z.at(i) = mulAddWWW_g(*x.at(i), y, c)
	}
	return;
}


func addMulVVW_s(z, x *Word, y Word, n int) (c Word)
func addMulVVW_g(z, x *Word, y Word, n int) (c Word) {
	for i := 0; i < n; i++ {
		z1, z0 := mulAddWWW_g(*x.at(i), y, *z.at(i));
		c, *z.at(i) = addWW_g(z0, c, 0);
		c += z1;
	}
	return;
}


func divWVW_s(z *Word, xn Word, x *Word, y Word, n int) (r Word)
func divWVW_g(z *Word, xn Word, x *Word, y Word, n int) (r Word) {
	r = xn;
	for i := n - 1; i >= 0; i-- {
		*z.at(i), r = divWW_g(r, *x.at(i), y)
	}
	return;
}