不能让价值通过carry传播
制作一个小的 C++ 大精度类,并且一切似乎都运行良好,但是添加,如果我将 0xffffffff 和 0x04 加在一起,当我应该得到 0x0100000003 时,我得到 0xffff0003.这是有问题的函数:
Making a little C++ large precision class, and everything seems to work decent, but the add, if I add 0xffffffff and 0x04 together I get 0xffff0003 when I should get 0x0100000003. Here is the function with the issue:
mpfl operator+(const mpfl &lhs, const mpfl &rhs)
{
unsigned long i;
mpfl ret(0);
mpfl trhs(rhs);
for (i = lhs.nbytes; i >= 0; i--)
{
if (
(unsigned short)lhs.data[i].data + (unsigned short)trhs.data[i].data
> (unsigned short)255
) {
if (i > 0)
{
ret.data[i].carry = 1;
ret.data[0].carry = 0;
}
else
{
ret.data[0].carry = 1;
}
}
else
ret.data[i].carry = 0;
ret.data[i].data = lhs.data[i].data + trhs.data[i].data;
if (i < lhs.nbytes)
{
if (ret.data[i].data == 255 && ret.data[i + 1].carry == 1)
increment(&trhs, i + 1);
ret.data[i].data += ret.data[i + 1].carry;
}
if (i == 0) break;
}
return ret;
}
这里是完整源代码的链接(github 使这更容易,因为有很多)
Here are links to the full source (github made this easier since there is a lot of it)
- 标题:https://github.com/phyrrus9/mpfl/blob/主/mpfl.h
- 类:https://github.com/phyrrus9/mpfl/blob/master/mpfl.cpp
- 驱动程序:https://github.com/phyrrus9/mpfl/blob/master/main.cpp
推荐答案
你的代码对我来说很乱.我之前做过很多次(长)num 类(浮动,固定,uint,模板化,...)所以这里有一些提示:
Your code is very messy to me. I did (long)num classes many times before (floating,fixed,uint,templated,...) so here are some hints:
尝试设置类似于实际硬件实现的 ALU 架构.
大多数算法都是为这种环境编写的.它将清理并加速您的代码.在某些情况下,我为此使用 asm,但如果您不想 CPU 依赖,您可以使用我的此类
Most algorithms are written for such environment. It will clean and speed up your code. In some cases I use asm for this but if you want to be not CPU dependent you can use this class of mine
C++ 中的 ALU 源代码:
//---------------------------------------------------------------------------
//--- ALU32 class 2.01 ------------------------------------------------------
//---------------------------------------------------------------------------
#ifndef _ALU32_h
#define _ALU32_h
//---------------------------------------------------------------------------
//#define _ALU32_no_asm
//---------------------------------------------------------------------------
class ALU32
{
public:
BYTE cy;
ALU32() { cy=0; }
void sar(DWORD &c); // msb -> [msb...lsb] -> cy shift arithmetic right
void shl(DWORD &c); // cy <- [msb...lsb] <- 0 shift left
void shr(DWORD &c); // 0 -> [msb...lsb] -> cy shift right
void rcl(DWORD &c); // cy <- [msb...lsb] <- cy shift through carry left
void rcr(DWORD &c); // cy -> [msb...lsb] -> cy shift through carry lright
void inc(DWORD &c);
void dec(DWORD &c);
void add(DWORD &c,DWORD a,DWORD b);
void sub(DWORD &c,DWORD a,DWORD b);
void adc(DWORD &c,DWORD a,DWORD b);
void sbc(DWORD &c,DWORD a,DWORD b);
void mul(DWORD &ch,DWORD &cl,DWORD a,DWORD b); // (ch,cl) = a*b
void div(DWORD &c,DWORD &d,DWORD ah,DWORD al,DWORD b); // c = a/b d =a%b
};
//---------------------------------------------------------------------------
void ALU32::inc(DWORD &c) { if (c==0xFFFFFFFF) cy=1; else cy=0; c++; }
void ALU32::dec(DWORD &c) { if (c==0x00000000) cy=1; else cy=0; c--; }
//---------------------------------------------------------------------------
void ALU32::sar(DWORD &c)
{
cy=c&1;
c=((c>>1)&0x7FFFFFFF)|(c&0x80000000);
}
//---------------------------------------------------------------------------
void ALU32::shl(DWORD &c)
{
cy=c>>31;
c=(c<<1)&0xFFFFFFFE;
}
//---------------------------------------------------------------------------
void ALU32::shr(DWORD &c)
{
cy=c&1;
c=(c>>1)&0x7FFFFFFF;
}
//---------------------------------------------------------------------------
void ALU32::rcl(DWORD &c)
{
DWORD cy0=cy;
cy=c>>31;
c=((c<<1)&0xFFFFFFFE)|cy0;
}
//---------------------------------------------------------------------------
void ALU32::rcr(DWORD &c)
{
DWORD cy0=cy;
cy=c&1;
c=((c>>1)&0x7FFFFFFF)|(cy0<<31);
}
//---------------------------------------------------------------------------
void ALU32::add(DWORD &c,DWORD a,DWORD b)
{
c=a+b;
cy=DWORD(((a &1)+(b &1) )>> 1);
cy=DWORD(((a>>1)+(b>>1)+cy)>>31);
}
//---------------------------------------------------------------------------
void ALU32::sub(DWORD &c,DWORD a,DWORD b)
{
c=a-b;
if (a<b) cy=1; else cy=0;
}
//---------------------------------------------------------------------------
void ALU32::adc(DWORD &c,DWORD a,DWORD b)
{
c=a+b+cy;
cy=DWORD(((a &1)+(b &1)+cy)>> 1);
cy=DWORD(((a>>1)+(b>>1)+cy)>>31);
}
//---------------------------------------------------------------------------
void ALU32::sbc(DWORD &c,DWORD a,DWORD b)
{
c=a-b-cy;
if (cy) { if (a<=b) cy=1; else cy=0; }
else { if (a< b) cy=1; else cy=0; }
}
//---------------------------------------------------------------------------
void ALU32::mul(DWORD &ch,DWORD &cl,DWORD a,DWORD b)
{
#ifdef _ALU32_no_asm
const int _h=1; // this is MSW,LSW order platform dependent So swap 0,1 if your platform is different
const int _l=0;
union _u
{
DWORD u32;
WORD u16[2];
} u;
DWORD al,ah,bl,bh;
DWORD c0,c1,c2;
// separate 2^16 base digits
u.u32=a; al=u.u16[_l]; ah=u.u16[_h];
u.u32=b; bl=u.u16[_l]; bh=u.u16[_h];
// multiplication (al+ah<<16)*(bl+bh<<16) = al*bl + al*bh<<16 + ah*bl<<16 + ah*bh<<32
c0=(al*bl);
add(c1,al*bh,ah*bl);
c2=(ah*bh)+(cy<<16);
// add subresults
add(c0,c0,(c1<<16)&0xFFFF0000); c1=((c1>>16)&0x0000FFFF)+cy;
add(c1,c1,c2);
// construct result from (c3,c2,c1,c0)
ch=c1;
cl=c0;
#else
DWORD _a,_b,_cl,_ch;
_a=a;
_b=b;
asm {
mov eax,_a
mov ebx,_b
mul ebx // H(edx),L(eax) = eax * ebx
mov _cl,eax
mov _ch,edx
}
cl=_cl;
ch=_ch;
#endif
}
//---------------------------------------------------------------------------
void ALU32::div(DWORD &c,DWORD &d,DWORD ah,DWORD al,DWORD b)
{
#ifdef _ALU32_no_asm
DWORD ch,cl,bh,bl,h,l,mh,ml;
int e;
// edge cases
if (!b ){ c=0xFFFFFFFF; d=0xFFFFFFFF; cy=1; return; }
if (!ah){ c=al/b; d=al%b; cy=0; return; }
// align a,b for binary long division m is the shifted mask of b lsb
for (bl=b,bh=0,mh=0,ml=1;bh<0x80000000;)
{
e=0; if (ah>bh) e=+1; // e = cmp a,b {-1,0,+1}
else if (ah<bh) e=-1;
else if (al>bl) e=+1;
else if (al<bl) e=-1;
if (e<=0) break; // a<=b ?
shl(bl); rcl(bh); // b<<=1
shl(ml); rcl(mh); // m<<=1
}
// binary long division
for (ch=0,cl=0;;)
{
sub(l,al,bl); // a-b
sbc(h,ah,bh);
if (cy) // a<b ?
{
if (ml==1) break;
shr(mh); rcr(ml); // m>>=1
shr(bh); rcr(bl); // b>>=1
continue;
}
al=l; ah=h; // a>=b ?
add(cl,cl,ml); // c+=m
adc(ch,ch,mh);
}
cy=0; c=cl; d=al;
if ((ch)||(ah)) cy=1; // overflow
#else
DWORD _al,_ah,_b,_c,_d;
_al=al;
_ah=ah;
_b=b;
asm {
mov eax,_al
mov edx,_ah
mov ebx,_b
div ebx
mov _c,eax // eax = H(edx),L(eax) / ebx
mov _d,edx // edx = H(edx),L(eax) % ebx
}
c=_c;
d=_d;
#endif
}
//---------------------------------------------------------------------------
#endif
//---------------------------------------------------------------------------
mul
和div
可通过# 在快速 CPU 汇编和较慢的 C++ 实现之间切换定义_ALU32_no_asm
DWORD
是 32 位unsigned int
并且可以像typedef unsigned __int32 DWORD;
一样定义
mul
anddiv
are switchable between fast CPU assembly and slower C++ implementation with the#define _ALU32_no_asm
DWORD
is 32 bitunsigned int
and can be defined liketypedef unsigned __int32 DWORD;
data[0]
是 LSB 还是 MSB(低/最高位/字节...)?data[0]
is the LSB or MSB (low/most significant bit/byte...)?- 所以要么你只是申请随身携带
- 或者您正在从高到低添加
- 在不使用浮点类型的情况下用 C 构建对数函数
那么现在如果你想添加两个数组(固定大小 N)
可以这样做:
ALU32 alu;
DWORD a[N],b[N],c[N]; // a[0] is LSB and a[N-1] is MSB
alu.add(c[0],a[0],b[0]);
for (int i=1;i<N;i++) alu.adc(c[i],a[i],b[i]);
// here c[] = a[] + b[]
最好使用最大的基数来提高速度.如果您仍然需要 8 位 ALU,由于直接访问进位,这也可以很容易地重写甚至简化.您可以使用 16 位或 32 位变量并直接从子结果中提取 9th
位作为进位(看起来您正在这样做).
it is a good idea to use the biggest base you can to improve speed. If you still need 8 bit ALU this can be also easily rewritten and even simplified due to direct access to carry. You can use 16 or 32 bit variables and extract 9th
bit as carry directly from sub-results (looks like you are doing it).
您的问题(从评论中复制)
我敢打赌,您的问题就在这里:
My bet is that your problem is here:
if (i<lhs.nbytes)
{
if (ret.data[i].data == 255 && ret.data[i + 1].carry == 1) increment(&trhs, i + 1);
ret.data[i].data += ret.data[i + 1].carry;
}
carry 应该总是在第一次使用(你总是在最后一次使用).这也揭示了另一种可能性,您的号码是如何存储的?
carry should be applied always but the first time (you do it always but the last time). This also reveals other possibility how is your number stored?
您必须从最低位开始添加
You have to start adding from lowest digits
但展位不正确.
PS. 以防您在纯 C 中需要 32
位 ALU 样式的乘法而无需 asm/C++ 看到这个链接(但上次更新后这里的代码已经包含这样的mul,div
):
PS. in case you need 32
bit ALU style multiplication without asm in pure C/C++ see this link (but after last update the code here already contains such mul,div
):
相关文章