=================================================================== RCS file: /home/cvs/OpenXM_contrib2/asir2018/engine/nd.c,v retrieving revision 1.6 retrieving revision 1.18 diff -u -p -r1.6 -r1.18 --- OpenXM_contrib2/asir2018/engine/nd.c 2018/09/28 08:20:28 1.6 +++ OpenXM_contrib2/asir2018/engine/nd.c 2019/09/04 01:12:02 1.18 @@ -1,8 +1,9 @@ -/* $OpenXM: OpenXM_contrib2/asir2018/engine/nd.c,v 1.5 2018/09/27 02:39:37 noro Exp $ */ +/* $OpenXM: OpenXM_contrib2/asir2018/engine/nd.c,v 1.17 2019/08/28 23:27:34 noro Exp $ */ #include "nd.h" -struct oEGT eg_search; +int Nnd_add,Nf4_red; +struct oEGT eg_search,f4_symb,f4_conv,f4_elim1,f4_elim2; int diag_period = 6; int weight_check = 1; @@ -77,6 +78,7 @@ NDV pltondv(VL vl,VL dvl,LIST p); void pltozpl(LIST l,Q *cont,LIST *pp); void ndl_max(UINT *d1,unsigned *d2,UINT *d); void nmtodp(int mod,NM m,DP *r); +void ndltodp(UINT *d,DP *r); NODE reverse_node(NODE n); P ndc_div(int mod,union oNDC a,union oNDC b); P ndctop(int mod,union oNDC c); @@ -86,6 +88,11 @@ void parse_nd_option(NODE opt); void dltondl(int n,DL dl,UINT *r); DP ndvtodp(int mod,NDV p); DP ndtodp(int mod,ND p); +DPM ndvtodpm(int mod,NDV p); +NDV dpmtondv(int mod,DPM p); +int dpm_getdeg(DPM p,int *rank); +void dpm_ptozp(DPM p,Z *cont,DPM *r); +int compdmm(int nv,DMM a,DMM b); void Pdp_set_weight(NODE,VECT *); void Pox_cmo_rpc(NODE,Obj *); @@ -1125,11 +1132,12 @@ int ndl_check_bound2(int index,UINT *d2) INLINE int ndl_hash_value(UINT *d) { int i; - int r; + UINT r; r = 0; for ( i = 0; i < nd_wpd; i++ ) - r = ((r<<16)+d[i])%REDTAB_LEN; + r = (r*1511+d[i]); + r %= REDTAB_LEN; return r; } @@ -1216,6 +1224,7 @@ ND nd_add(int mod,ND p1,ND p2) ND r; NM m1,m2,mr0,mr,s; + Nnd_add++; if ( !p1 ) return p2; else if ( !p2 ) return p1; else if ( mod == -1 ) return nd_add_sf(p1,p2); @@ -2081,6 +2090,7 @@ NODE nd_gb(int m,int ishomo,int checkonly,int gensyz,i P cont; LIST list; + Nnd_add = 0; g = 0; d = 0; for ( i = 0; i < nd_psn; i++ ) { d = update_pairs(d,g,i,gensyz); @@ -2170,7 +2180,7 @@ again: } } conv_ilist(nd_demand,0,g,indp); - if ( !checkonly && DP_Print ) { printf("nd_gb done.\n"); fflush(stdout); } + if ( !checkonly && DP_Print ) { printf("nd_gb done. Number of nd_add=%d\n",Nnd_add); fflush(stdout); } return g; } @@ -2601,7 +2611,7 @@ ND_pairs nd_newpairs( NODE g, int t ) dl = DL(nd_psh[t]); ts = SG(nd_psh[t]) - TD(dl); - if ( nd_module && nd_intersect && (MPOS(dl) > 1) ) return 0; + if ( nd_module && nd_intersect && (MPOS(dl) > nd_intersect) ) return 0; for ( r0 = 0, h = g; h; h = NEXT(h) ) { if ( nd_module && (MPOS(DL(nd_psh[(long)BDY(h)])) != MPOS(dl)) ) continue; @@ -3220,6 +3230,7 @@ void nd_gr(LIST f,LIST v,int m,int homo,int retdp,int int *perm; EPOS oepos; int obpe,oadv,ompos,cbpe; + VECT hvect; nd_module = 0; if ( !m && Demand ) nd_demand = 1; @@ -3263,12 +3274,18 @@ void nd_gr(LIST f,LIST v,int m,int homo,int retdp,int for ( t = BDY(f), max = 1; t; t = NEXT(t) ) for ( tv = vv; tv; tv = NEXT(tv) ) { if ( nd_module ) { - s = BDY((LIST)BDY(t)); - trank = length(s); - mrank = MAX(mrank,trank); - for ( ; s; s = NEXT(s) ) { - e = getdeg(tv->v,(P)BDY(s)); - max = MAX(e,max); + if ( OID(BDY(t)) == O_DPM ) { + e = dpm_getdeg((DPM)BDY(t),&trank); + max = MAX(e,max); + mrank = MAX(mrank,trank); + } else { + s = BDY((LIST)BDY(t)); + trank = length(s); + mrank = MAX(mrank,trank); + for ( ; s; s = NEXT(s) ) { + e = getdeg(tv->v,(P)BDY(s)); + max = MAX(e,max); + } } } else { e = getdeg(tv->v,(P)BDY(t)); @@ -3280,9 +3297,18 @@ void nd_gr(LIST f,LIST v,int m,int homo,int retdp,int ishomo = 1; for ( fd0 = 0, t = BDY(f); t; t = NEXT(t) ) { if ( nd_module ) { - if ( !m && !nd_gentrace ) pltozpl((LIST)BDY(t),&dmy,&zpl); - else zpl = (LIST)BDY(t); + if ( OID(BDY(t)) == O_DPM ) { + Z cont; + DPM zdpm; + + if ( !m && !nd_gentrace ) dpm_ptozp((DPM)BDY(t),&cont,&zdpm); + else zdpm = (DPM)BDY(t); + b = (pointer)dpmtondv(m,zdpm); + } else { + if ( !m && !nd_gentrace ) pltozpl((LIST)BDY(t),&dmy,&zpl); + else zpl = (LIST)BDY(t); b = (pointer)pltondv(CO,vv,zpl); + } } else { if ( !m && !nd_gentrace ) ptozp((P)BDY(t),1,&dmy,&zp); else zp = (P)BDY(t); @@ -3330,6 +3356,11 @@ void nd_gr(LIST f,LIST v,int m,int homo,int retdp,int if ( !x ) { *rp = 0; return; } + if ( nd_gentrace ) { + MKVECT(hvect,nd_psn); + for ( i = 0; i < nd_psn; i++ ) + ndltodp(nd_psh[i]->dl,(DP *)&BDY(hvect)[i]); + } if ( !ishomo && homo ) { /* dehomogenization */ for ( t = x; t; t = NEXT(t) ) ndv_dehomogenize((NDV)BDY(t),ord); @@ -3339,7 +3370,7 @@ void nd_gr(LIST f,LIST v,int m,int homo,int retdp,int nd_demand = 0; if ( nd_module && nd_intersect ) { for ( j = nd_psn-1, x = 0; j >= 0; j-- ) - if ( MPOS(DL(nd_psh[j])) > 1 ) { + if ( MPOS(DL(nd_psh[j])) > nd_intersect ) { MKNODE(xx,(pointer)((unsigned long)j),x); x = xx; } conv_ilist(nd_demand,0,x,0); @@ -3363,10 +3394,12 @@ void nd_gr(LIST f,LIST v,int m,int homo,int retdp,int nd_setup_parameters(nd_nvar,0); FINAL: for ( r0 = 0, t = x; t; t = NEXT(t) ) { - NEXTNODE(r0,r); - if ( nd_module ) BDY(r) = ndvtopl(m,CO,vv,BDY(t),mrank); - else if ( retdp ) BDY(r) = ndvtodp(m,BDY(t)); - else BDY(r) = ndvtop(m,CO,vv,BDY(t)); + NEXTNODE(r0,r); + if ( nd_module ) { + if ( retdp ) BDY(r) = ndvtodpm(m,BDY(t)); + else BDY(r) = ndvtopl(m,CO,vv,BDY(t),mrank); + } else if ( retdp ) BDY(r) = ndvtodp(m,BDY(t)); + else BDY(r) = ndvtop(m,CO,vv,BDY(t)); } if ( r0 ) NEXT(r) = 0; if ( !m && nd_nalg ) @@ -3376,8 +3409,7 @@ FINAL: if ( f4 ) { STOZ(16,bpe); STOZ(nd_last_nonzero,last_nonzero); - tr = mknode(5,*rp,(!ishomo&&homo)?ONE:0,BDY(nzlist),bpe,last_nonzero); MKLIST(*rp,tr); - + tr = mknode(6,*rp,(!ishomo&&homo)?ONE:0,BDY(nzlist),bpe,last_nonzero,hvect); MKLIST(*rp,tr); } else { tl1 = reverse_node(tl1); tl2 = reverse_node(tl2); tl3 = reverse_node(tl3); @@ -3397,7 +3429,7 @@ FINAL: MKLIST(l1,tl1); MKLIST(l2,tl2); MKLIST(l3,t); MKLIST(l4,tl3); MKLIST(l5,tl4); STOZ(nd_bpe,bpe); - tr = mknode(8,*rp,(!ishomo&&homo)?ONE:0,l1,l2,l3,l4,l5,bpe); MKLIST(*rp,tr); + tr = mknode(9,*rp,(!ishomo&&homo)?ONE:0,l1,l2,l3,l4,l5,bpe,hvect); MKLIST(*rp,tr); } } #if 0 @@ -3641,7 +3673,7 @@ void nd_gr_recompute_trace(LIST f,LIST v,int m,struct if ( DP_Print ) fprintf(asir_out,"\n"); } -void nd_gr_trace(LIST f,LIST v,int trace,int homo,int f4,struct order_spec *ord,LIST *rp) +void nd_gr_trace(LIST f,LIST v,int trace,int homo,int retdp,int f4,struct order_spec *ord,LIST *rp) { VL tv,fv,vv,vc,av; NODE fd,fd0,in0,in,r,r0,t,s,cand,alist; @@ -3664,6 +3696,7 @@ void nd_gr_trace(LIST f,LIST v,int trace,int homo,int int *perm; int j,ret; Z jq,bpe; + VECT hvect; nd_module = 0; nd_lf = 0; @@ -3718,6 +3751,11 @@ void nd_gr_trace(LIST f,LIST v,int trace,int homo,int for ( t = BDY(f), max = 1; t; t = NEXT(t) ) for ( tv = vv; tv; tv = NEXT(tv) ) { if ( nd_module ) { + if ( OID(BDY(t)) == O_DPM ) { + e = dpm_getdeg((DPM)BDY(t),&trank); + max = MAX(e,max); + mrank = MAX(mrank,trank); + } else { s = BDY((LIST)BDY(t)); trank = length(s); mrank = MAX(mrank,trank); @@ -3725,6 +3763,7 @@ void nd_gr_trace(LIST f,LIST v,int trace,int homo,int e = getdeg(tv->v,(P)BDY(s)); max = MAX(e,max); } + } } else { e = getdeg(tv->v,(P)BDY(t)); max = MAX(e,max); @@ -3735,13 +3774,22 @@ void nd_gr_trace(LIST f,LIST v,int trace,int homo,int ishomo = 1; for ( in0 = 0, fd0 = 0, t = BDY(f); t; t = NEXT(t) ) { if ( nd_module ) { - if ( !nd_gentrace ) pltozpl((LIST)BDY(t),&dmy,&zpl); - else zpl = (LIST)BDY(t); + if ( OID(BDY(t)) == O_DPM ) { + Z cont; + DPM zdpm; + + if ( !nd_gentrace ) dpm_ptozp((DPM)BDY(t),&cont,&zdpm); + else zdpm = (DPM)BDY(t); + c = (pointer)dpmtondv(m,zdpm); + } else { + if ( !nd_gentrace ) pltozpl((LIST)BDY(t),&dmy,&zpl); + else zpl = (LIST)BDY(t); c = (pointer)pltondv(CO,vv,zpl); + } } else { - if ( !nd_gentrace ) ptozp((P)BDY(t),1,&dmy,&zp); - else zp = (P)BDY(t); - c = (pointer)ptondv(CO,vv,zp); + if ( !nd_gentrace ) ptozp((P)BDY(t),1,&dmy,&zp); + else zp = (P)BDY(t); + c = (pointer)ptondv(CO,vv,zp); } if ( ishomo ) ishomo = ishomo && ndv_ishomo(c); @@ -3781,6 +3829,11 @@ void nd_gr_trace(LIST f,LIST v,int trace,int homo,int else m = get_lprime(++mindex); continue; } + if ( nd_gentrace ) { + MKVECT(hvect,nd_psn); + for ( i = 0; i < nd_psn; i++ ) + ndltodp(nd_psh[i]->dl,(DP *)&BDY(hvect)[i]); + } if ( !ishomo && homo ) { /* dehomogenization */ for ( t = cand; t; t = NEXT(t) ) ndv_dehomogenize((NDV)BDY(t),ord); @@ -3833,8 +3886,11 @@ void nd_gr_trace(LIST f,LIST v,int trace,int homo,int nd_bpe = cbpe; nd_setup_parameters(nd_nvar,0); for ( r = cand; r; r = NEXT(r) ) { - if ( nd_module ) BDY(r) = ndvtopl(0,CO,vv,BDY(r),mrank); - else BDY(r) = (pointer)ndvtop(0,CO,vv,BDY(r)); + if ( nd_module ) { + if ( retdp ) BDY(r) = ndvtodpm(0,BDY(r)); + else BDY(r) = ndvtopl(0,CO,vv,BDY(r),mrank); + } else if ( retdp ) BDY(r) = ndvtodp(0,BDY(r)); + else BDY(r) = (pointer)ndvtop(0,CO,vv,BDY(r)); } if ( nd_nalg ) cand = postprocess_algcoef(av,alist,cand); @@ -3858,7 +3914,7 @@ void nd_gr_trace(LIST f,LIST v,int trace,int homo,int MKLIST(l1,tl1); MKLIST(l2,tl2); MKLIST(l3,t); MKLIST(l4,tl3); MKLIST(l5,tl4); STOZ(nd_bpe,bpe); - tr = mknode(8,*rp,(!ishomo&&homo)?ONE:0,l1,l2,l3,l4,l5,bpe); MKLIST(*rp,tr); + tr = mknode(9,*rp,(!ishomo&&homo)?ONE:0,l1,l2,l3,l4,l5,bpe,hvect); MKLIST(*rp,tr); } } @@ -3922,6 +3978,18 @@ void nmtodp(int mod,NM m,DP *r) *r = dp; } +void ndltodp(UINT *d,DP *r) +{ + DP dp; + MP mr; + + NEWMP(mr); + mr->dl = ndltodl(nd_nvar,d); + mr->c = (Obj)ONE; + NEXT(mr) = 0; MKDP(nd_nvar,mr,dp); dp->sugar = mr->dl->td; + *r = dp; +} + void ndl_print(UINT *dl) { int n; @@ -4086,7 +4154,7 @@ void ndv_homogenize(NDV p,int obpe,int oadv,EPOS oepos NMV m,mr0,mr,t; len = p->len; - for ( m = BDY(p), i = 0, max = 1; i < len; NMV_OADV(m), i++ ) + for ( m = BDY(p), i = 0, max = 0; i < len; NMV_OADV(m), i++ ) max = MAX(max,TD(DL(m))); mr0 = nmv_adv>oadv?(NMV)REALLOC(BDY(p),len*nmv_adv):BDY(p); m = (NMV)((char *)mr0+(len-1)*oadv); @@ -4231,14 +4299,18 @@ void mpz_removecont_array(mpz_t *c,int n) { mpz_t d0,a,u,u1,gcd; int i,j; - mpz_t *q,*r; + static mpz_t *q,*r; + static int c_len = 0; for ( i = 0; i < n; i++ ) if ( mpz_sgn(c[i]) ) break; if ( i == n ) return; gcdv_mpz_estimate(d0,c,n); - q = (mpz_t *)MALLOC(n*sizeof(mpz_t)); - r = (mpz_t *)MALLOC(n*sizeof(mpz_t)); + if ( n > c_len ) { + q = (mpz_t *)MALLOC(n*sizeof(mpz_t)); + r = (mpz_t *)MALLOC(n*sizeof(mpz_t)); + c_len = n; + } for ( i = 0; i < n; i++ ) { mpz_init(q[i]); mpz_init(r[i]); mpz_fdiv_qr(q[i],r[i],c[i],d0); @@ -5188,31 +5260,32 @@ NDV ptondv(VL vl,VL dvl,P p) void pltozpl(LIST l,Q *cont,LIST *pp) { - NODE nd,nd1; - int n; - P *pl; - Q *cl; - int i; - P dmy; - Z dvr; - LIST r; + NODE nd,nd1; + int n; + P *pl; + Q *cl; + int i; + P dmy; + Z dvr,inv; + LIST r; - nd = BDY(l); n = length(nd); - pl = (P *)MALLOC(n*sizeof(P)); - cl = (Q *)MALLOC(n*sizeof(P)); - for ( i = 0; i < n; i++, nd = NEXT(nd) ) - ptozp((P)BDY(nd),1,&cl[i],&dmy); - qltozl(cl,n,&dvr); - nd = BDY(l); - for ( i = 0; i < n; i++, nd = NEXT(nd) ) { - divsp(CO,(P)BDY(nd),(P)dvr,&pl[i]); - } - nd = 0; - for ( i = n-1; i >= 0; i-- ) { - MKNODE(nd1,pl[i],nd); nd = nd1; - } - MKLIST(r,nd); - *pp = r; + nd = BDY(l); n = length(nd); + pl = (P *)MALLOC(n*sizeof(P)); + cl = (Q *)MALLOC(n*sizeof(Q)); + for ( i = 0; i < n; i++, nd = NEXT(nd) ) { + ptozp((P)BDY(nd),1,&cl[i],&dmy); + } + qltozl(cl,n,&dvr); + divz(ONE,dvr,&inv); + nd = BDY(l); + for ( i = 0; i < n; i++, nd = NEXT(nd) ) + divsp(CO,(P)BDY(nd),(P)dvr,&pl[i]); + nd = 0; + for ( i = n-1; i >= 0; i-- ) { + MKNODE(nd1,pl[i],nd); nd = nd1; + } + MKLIST(r,nd); + *pp = r; } /* (a1,a2,...,an) -> a1*e(1)+...+an*e(n) */ @@ -5401,6 +5474,99 @@ NDV ndtondv(int mod,ND p) return d; } +static int dmm_comp_nv; + +int dmm_comp(DMM *a,DMM *b) +{ + return -compdmm(dmm_comp_nv,*a,*b); +} + +void dmm_sort_by_ord(DMM *a,int len,int nv) +{ + dmm_comp_nv = nv; + qsort(a,len,sizeof(DMM),(int (*)(const void *,const void *))dmm_comp); +} + +void dpm_sort(DPM p,DPM *rp) +{ + DMM t,t1; + int len,i,n; + DMM *a; + DPM d; + + if ( !p ) *rp = 0; + for ( t = BDY(p), len = 0; t; t = NEXT(t), len++ ); + a = (DMM *)MALLOC(len*sizeof(DMM)); + for ( i = 0, t = BDY(p); i < len; i++, t = NEXT(t) ) a[i] = t; + n = p->nv; + dmm_sort_by_ord(a,len,n); + t = 0; + for ( i = len-1; i >= 0; i-- ) { + NEWDMM(t1); + t1->c = a[i]->c; + t1->dl = a[i]->dl; + t1->pos = a[i]->pos; + t1->next = t; + t = t1; + } + MKDPM(n,t,d); + SG(d) = SG(p); + *rp = d; +} + +int dpm_comp(DPM *a,DPM *b) +{ + return compdpm(CO,*a,*b); +} + +NODE dpm_sort_list(NODE l) +{ + int i,len; + NODE t,t1; + DPM *a; + + len = length(l); + a = (DPM *)MALLOC(len*sizeof(DPM)); + for ( t = l, i = 0; i < len; i++, t = NEXT(t) ) a[i] = (DPM)BDY(t); + qsort(a,len,sizeof(DPM),(int (*)(const void *,const void *))dpm_comp); + t = 0; + for ( i = len-1; i >= 0; i-- ) { + MKNODE(t1,(pointer)a[i],t); t = t1; + } + return t; +} + +NDV dpmtondv(int mod,DPM p) +{ + NDV d; + NMV m,m0; + DMM t; + DMM *a; + int i,len,n; + + if ( !p ) return 0; + for ( t = BDY(p), len = 0; t; t = NEXT(t), len++ ); + a = (DMM *)MALLOC(len*sizeof(DMM)); + for ( i = 0, t = BDY(p); i < len; i++, t = NEXT(t) ) a[i] = t; + n = p->nv; + dmm_sort_by_ord(a,len,n); + if ( mod > 0 || mod == -1 ) + m0 = m = (NMV)MALLOC_ATOMIC_IGNORE_OFF_PAGE(len*nmv_adv); + else + m0 = m = MALLOC(len*nmv_adv); +#if 0 + ndv_alloc += nmv_adv*len; +#endif + for ( i = 0; i < len; i++, NMV_ADV(m) ) { + dltondl(n,a[i]->dl,DL(m)); + MPOS(DL(m)) = a[i]->pos; + CZ(m) = (Z)a[i]->c; + } + MKNDV(NV(p),m0,len,d); + SG(d) = SG(p); + return d; +} + ND ndvtond(int mod,NDV p) { ND d; @@ -5443,6 +5609,29 @@ DP ndvtodp(int mod,NDV p) return d; } +DPM ndvtodpm(int mod,NDV p) +{ + DMM m,m0; + DPM d; + NMV t; + int i,len; + + if ( !p ) return 0; + m0 = 0; + len = p->len; + for ( t = BDY(p), i = 0; i < len; NMV_ADV(t), i++ ) { + NEXTDMM(m0,m); + m->dl = ndltodl(nd_nvar,DL(t)); + m->c = (Obj)ndctop(mod,t->c); + m->pos = MPOS(DL(t)); + } + NEXT(m) = 0; + MKDPM(nd_nvar,m0,d); + SG(d) = SG(p); + return d; +} + + DP ndtodp(int mod,ND p) { MP m,m0; @@ -5530,6 +5719,8 @@ NODE ndv_reducebase(NODE x,int *perm) /* XXX incomplete */ +extern int dpm_ordtype; + void nd_init_ord(struct order_spec *ord) { nd_module = (ord->id >= 256); @@ -5541,6 +5732,7 @@ void nd_init_ord(struct order_spec *ord) nd_poly_weight = ord->top_weight; nd_module_rank = ord->module_rank; nd_module_weight = ord->module_top_weight; + dpm_ordtype = ord->ispot; } nd_matrix = 0; nd_matrix_len = 0; @@ -5796,27 +5988,6 @@ int nd_to_vect(int mod,UINT *s0,int n,ND d,UINT *r) return i; } -#if defined(__GNUC__) && SIZEOF_LONG==8 - -#define MOD128(a,c,m) ((a)=(((c)!=0||((a)>=(m)))?(((((U128)(c))<<64)+(a))%(m)):(a))) - -int nd_to_vect64(int mod,UINT *s0,int n,ND d,U64 *r) -{ - NM m; - UINT *t,*s; - int i; - - for ( i = 0; i < n; i++ ) r[i] = 0; - for ( i = 0, s = s0, m = BDY(d); m; m = NEXT(m) ) { - t = DL(m); - for ( ; !ndl_equal(t,s); s += nd_wpd, i++ ); - r[i] = (U64)CM(m); - } - for ( i = 0; !r[i]; i++ ); - return i; -} -#endif - int nd_to_vect_q(UINT *s0,int n,ND d,Z *r) { NM m; @@ -5910,18 +6081,17 @@ Z *nm_ind_pair_to_vect(int mod,UINT *s0,int n,NM_ind_p return r; } -IndArray nm_ind_pair_to_vect_compress(int trace,UINT *s0,int n,int *s0hash,NM_ind_pair pair) +IndArray nm_ind_pair_to_vect_compress(int trace,UINT *s0,int n,NM_ind_pair pair,int start) { NM m; NMV mr; - UINT *d,*t,*s; + UINT *d,*t,*s,*u; NDV p; unsigned char *ivc; unsigned short *ivs; UINT *v,*ivi,*s0v; - int i,j,len,prev,diff,cdiff,h; + int i,j,len,prev,diff,cdiff,h,st,ed,md,c; IndArray r; -struct oEGT eg0,eg1; m = pair->mul; d = DL(m); @@ -5933,14 +6103,20 @@ struct oEGT eg0,eg1; len = LEN(p); t = (UINT *)MALLOC(nd_wpd*sizeof(UINT)); v = (unsigned int *)MALLOC(len*sizeof(unsigned int)); -get_eg(&eg0); - for ( i = j = 0, s = s0, mr = BDY(p); j < len; j++, NMV_ADV(mr) ) { - ndl_add(d,DL(mr),t); - h = ndl_hash_value(t); - for ( ; h != s0hash[i] || !ndl_equal(t,s); s += nd_wpd, i++ ); - v[j] = i; + for ( prev = start, mr = BDY(p), j = 0; j < len; j++, NMV_ADV(mr) ) { + ndl_add(d,DL(mr),t); + st = prev; + ed = n; + while ( ed > st ) { + md = (st+ed)/2; + u = s0+md*nd_wpd; + c = DL_COMPARE(u,t); + if ( c == 0 ) break; + else if ( c > 0 ) st = md; + else ed = md; + } + prev = v[j] = md; } -get_eg(&eg1); add_eg(&eg_search,&eg0,&eg1); r = (IndArray)MALLOC(sizeof(struct oIndArray)); r->head = v[0]; diff = 0; @@ -5983,7 +6159,7 @@ void expand_array(Z *svect,Z *cvect,int n) if ( svect[i] ) svect[i] = cvect[j++]; } -#if 1 +#if 0 int ndv_reduce_vect_q(Z *svect,int trace,int col,IndArray *imat,NM_ind_pair *rp0,int nred) { int i,j,k,len,pos,prev,nz; @@ -6063,11 +6239,11 @@ int ndv_reduce_vect_q(Z *svect,int trace,int col,IndAr return maxrs; } #else + /* direct mpz version */ int ndv_reduce_vect_q(Z *svect0,int trace,int col,IndArray *imat,NM_ind_pair *rp0,int nred) { int i,j,k,len,pos,prev; - mpz_t *svect; mpz_t cs,cr,gcd; IndArray ivect; unsigned char *ivc; @@ -6079,12 +6255,17 @@ int ndv_reduce_vect_q(Z *svect0,int trace,int col,IndA int maxrs; double hmag; int l; + static mpz_t *svect; + static int svect_len=0; maxrs = 0; for ( i = 0; i < col && !svect0[i]; i++ ); if ( i == col ) return maxrs; hmag = p_mag((P)svect0[i])*nd_scale; - svect = (mpz_t *)MALLOC(col*sizeof(mpz_t)); + if ( col > svect_len ) { + svect = (mpz_t *)MALLOC(col*sizeof(mpz_t)); + svect_len = col; + } for ( i = 0; i < col; i++ ) { mpz_init(svect[i]); if ( svect0[i] ) @@ -6105,8 +6286,12 @@ int ndv_reduce_vect_q(Z *svect0,int trace,int col,IndA mpz_div(cs,svect[k],gcd); mpz_div(cr,BDY(CZ(mr)),gcd); mpz_neg(cs,cs); - for ( j = 0; j < col; j++ ) - mpz_mul(svect[j],svect[j],cr); + if ( MUNIMPZ(cr) ) + for ( j = 0; j < col; j++ ) mpz_neg(svect[j],svect[j]); + else if ( !UNIMPZ(cr) ) + for ( j = 0; j < col; j++ ) { + if ( mpz_sgn(svect[j]) ) mpz_mul(svect[j],svect[j],cr); + } mpz_set_ui(svect[k],0); prev = k; switch ( ivect->width ) { @@ -6220,78 +6405,6 @@ int ndv_reduce_vect(int m,UINT *svect,int col,IndArray return maxrs; } -#if defined(__GNUC__) && SIZEOF_LONG==8 - -int ndv_reduce_vect64(int m,U64 *svect,U64 *cvect,int col,IndArray *imat,NM_ind_pair *rp0,int nred) -{ - int i,j,k,len,pos,prev; - U64 a,c,c1,c2; - IndArray ivect; - unsigned char *ivc; - unsigned short *ivs; - unsigned int *ivi; - NDV redv; - NMV mr; - NODE rp; - int maxrs; - - for ( i = 0; i < col; i++ ) cvect[i] = 0; - maxrs = 0; - for ( i = 0; i < nred; i++ ) { - ivect = imat[i]; - k = ivect->head; - a = svect[k]; c = cvect[k]; - MOD128(a,c,m); - svect[k] = a; cvect[k] = 0; - if ( (c = svect[k]) != 0 ) { - maxrs = MAX(maxrs,rp0[i]->sugar); - c = m-c; redv = nd_ps[rp0[i]->index]; - len = LEN(redv); mr = BDY(redv); - svect[k] = 0; prev = k; - switch ( ivect->width ) { - case 1: - ivc = ivect->index.c; - for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { - pos = prev+ivc[j]; c1 = CM(mr); prev = pos; - if ( c1 ) { - c2 = svect[pos]+c1*c; - if ( c2 < svect[pos] ) cvect[pos]++; - svect[pos] = c2; - } - } - break; - case 2: - ivs = ivect->index.s; - for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { - pos = prev+ivs[j]; c1 = CM(mr); prev = pos; - if ( c1 ) { - c2 = svect[pos]+c1*c; - if ( c2 < svect[pos] ) cvect[pos]++; - svect[pos] = c2; - } - } - break; - case 4: - ivi = ivect->index.i; - for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { - pos = prev+ivi[j]; c1 = CM(mr); prev = pos; - if ( c1 ) { - c2 = svect[pos]+c1*c; - if ( c2 < svect[pos] ) cvect[pos]++; - svect[pos] = c2; - } - } - break; - } - } - } - for ( i = 0; i < col; i++ ) { - a = svect[i]; c = cvect[i]; MOD128(a,c,m); svect[i] = a; - } - return maxrs; -} -#endif - int ndv_reduce_vect_sf(int m,UINT *svect,int col,IndArray *imat,NM_ind_pair *rp0,int nred) { int i,j,k,len,pos,prev; @@ -6549,36 +6662,6 @@ NDV vect_to_ndv(UINT *vect,int spcol,int col,int *rhea } } -#if defined(__GNUC__) && SIZEOF_LONG==8 -NDV vect64_to_ndv(U64 *vect,int spcol,int col,int *rhead,UINT *s0vect) -{ - int j,k,len; - UINT *p; - UINT c; - NDV r; - NMV mr0,mr; - - for ( j = 0, len = 0; j < spcol; j++ ) if ( vect[j] ) len++; - if ( !len ) return 0; - else { - mr0 = (NMV)MALLOC_ATOMIC_IGNORE_OFF_PAGE(nmv_adv*len); -#if 0 - ndv_alloc += nmv_adv*len; -#endif - mr = mr0; - p = s0vect; - for ( j = k = 0; j < col; j++, p += nd_wpd ) - if ( !rhead[j] ) { - if ( (c = (UINT)vect[k++]) != 0 ) { - ndl_copy(p,DL(mr)); CM(mr) = c; NMV_ADV(mr); - } - } - MKNDV(nd_nvar,mr0,len,r); - return r; - } -} -#endif - NDV vect_to_ndv_2(unsigned long *vect,int col,UINT *s0vect) { int j,k,len; @@ -6788,9 +6871,12 @@ NODE nd_f4(int m,int checkonly,int **indp) PGeoBucket bucket; struct oEGT eg0,eg1,eg_f4; Z i1,i2,sugarq; + + init_eg(&f4_symb); init_eg(&f4_conv); init_eg(&f4_conv); init_eg(&f4_elim1); init_eg(&f4_elim2); #if 0 ndv_alloc = 0; #endif + Nf4_red=0; g = 0; d = 0; for ( i = 0; i < nd_psn; i++ ) { d = update_pairs(d,g,i,0); @@ -6832,7 +6918,7 @@ NODE nd_f4(int m,int checkonly,int **indp) d = nd_reconstruct(0,d); continue; } - get_eg(&eg1); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg1); + get_eg(&eg1); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg1); add_eg(&f4_symb,&eg0,&eg1); if ( DP_Print ) fprintf(asir_out,"sugar=%d,symb=%.3fsec,", sugar,eg_f4.exectime); @@ -6881,6 +6967,11 @@ NODE nd_f4(int m,int checkonly,int **indp) #if 0 fprintf(asir_out,"ndv_alloc=%d\n",ndv_alloc); #endif + if ( DP_Print ) { + fprintf(asir_out,"number of red=%d,",Nf4_red); + fprintf(asir_out,"symb=%.3fsec,conv=%.3fsec,elim1=%.3fsec,elim2=%.3fsec\n", + f4_symb.exectime,f4_conv.exectime,f4_elim1.exectime,f4_elim2.exectime); + } conv_ilist(nd_demand,0,g,indp); return g; } @@ -7104,7 +7195,6 @@ NODE nd_f4_red_2(ND_pairs sp0,UINT *s0vect,int col,NOD unsigned long *v; get_eg(&eg0); -init_eg(&eg_search); for ( sp = sp0, nsp = 0; sp; sp = NEXT(sp), nsp++ ); nred = length(rp0); mat = alloc_matrix(nsp,col); @@ -7159,18 +7249,18 @@ init_eg(&eg_search); NODE nd_f4_red(int m,ND_pairs sp0,int trace,UINT *s0vect,int col,NODE rp0,ND_pairs *nz) { IndArray *imat; - int nsp,nred,i; + int nsp,nred,i,start; int *rhead; NODE r0,rp; ND_pairs sp; NM_ind_pair *rvect; UINT *s; int *s0hash; + struct oEGT eg0,eg1,eg_conv; if ( m == 2 && nd_rref2 ) return nd_f4_red_2(sp0,s0vect,col,rp0,nz); -init_eg(&eg_search); for ( sp = sp0, nsp = 0; sp; sp = NEXT(sp), nsp++ ); nred = length(rp0); imat = (IndArray *)MALLOC(nred*sizeof(IndArray)); @@ -7178,20 +7268,25 @@ init_eg(&eg_search); for ( i = 0; i < col; i++ ) rhead[i] = 0; /* construction of index arrays */ + get_eg(&eg0); if ( DP_Print ) { - fprintf(asir_out,"%dx%d,",nsp+nred,col); + fprintf(asir_out,"%dx%d,",nsp+nred,col); + fflush(asir_out); } rvect = (NM_ind_pair *)MALLOC(nred*sizeof(NM_ind_pair)); - s0hash = (int *)MALLOC(col*sizeof(int)); - for ( i = 0, s = s0vect; i < col; i++, s += nd_wpd ) - s0hash[i] = ndl_hash_value(s); - for ( rp = rp0, i = 0; rp; i++, rp = NEXT(rp) ) { + for ( start = 0, rp = rp0, i = 0; rp; i++, rp = NEXT(rp) ) { rvect[i] = (NM_ind_pair)BDY(rp); - imat[i] = nm_ind_pair_to_vect_compress(trace,s0vect,col,s0hash,rvect[i]); + imat[i] = nm_ind_pair_to_vect_compress(trace,s0vect,col,rvect[i],start); rhead[imat[i]->head] = 1; + start = imat[i]->head; } + get_eg(&eg1); init_eg(&eg_conv); add_eg(&eg_conv,&eg0,&eg1); add_eg(&f4_conv,&eg0,&eg1); + if ( DP_Print ) { + fprintf(asir_out,"conv=%.3fsec,",eg_conv.exectime); + fflush(asir_out); + } if ( m > 0 ) -#if defined(__GNUC__) && SIZEOF_LONG==8 +#if SIZEOF_LONG==8 r0 = nd_f4_red_mod64_main(m,sp0,nsp,s0vect,col,rvect,rhead,imat,nred,nz); #else r0 = nd_f4_red_main(m,sp0,nsp,s0vect,col,rvect,rhead,imat,nred,nz); @@ -7202,9 +7297,6 @@ init_eg(&eg_search); r0 = nd_f4_red_lf_main(m,sp0,nsp,trace,s0vect,col,rvect,rhead,imat,nred); else r0 = nd_f4_red_q_main(sp0,nsp,trace,s0vect,col,rvect,rhead,imat,nred); -#if 0 - if ( DP_Print ) print_eg("search",&eg_search); -#endif return r0; } @@ -7296,92 +7388,7 @@ NODE nd_f4_red_main(int m,ND_pairs sp0,int nsp,UINT *s return r0; } -#if defined(__GNUC__) && SIZEOF_LONG==8 -/* for Fp, 2^15=<p<2^29 */ -NODE nd_f4_red_mod64_main(int m,ND_pairs sp0,int nsp,UINT *s0vect,int col, - NM_ind_pair *rvect,int *rhead,IndArray *imat,int nred,ND_pairs *nz) -{ - int spcol,sprow,a; - int i,j,k,l,rank; - NODE r0,r; - ND_pairs sp; - ND spol; - U64 **spmat; - U64 *svect,*cvect; - U64 *v; - int *colstat; - struct oEGT eg0,eg1,eg2,eg_f4,eg_f4_1,eg_f4_2; - int maxrs; - int *spsugar; - ND_pairs *spactive; - - spcol = col-nred; - get_eg(&eg0); - /* elimination (1st step) */ - spmat = (U64 **)MALLOC(nsp*sizeof(U64 *)); - svect = (U64 *)MALLOC(col*sizeof(U64)); - cvect = (U64 *)MALLOC(col*sizeof(U64)); - spsugar = (int *)MALLOC(nsp*sizeof(int)); - spactive = !nz?0:(ND_pairs *)MALLOC(nsp*sizeof(ND_pairs)); - for ( a = sprow = 0, sp = sp0; a < nsp; a++, sp = NEXT(sp) ) { - nd_sp(m,0,sp,&spol); - if ( !spol ) continue; - nd_to_vect64(m,s0vect,col,spol,svect); - maxrs = ndv_reduce_vect64(m,svect,cvect,col,imat,rvect,nred); - for ( i = 0; i < col; i++ ) if ( svect[i] ) break; - if ( i < col ) { - spmat[sprow] = v = (U64 *)MALLOC_ATOMIC(spcol*sizeof(U64)); - for ( j = k = 0; j < col; j++ ) - if ( !rhead[j] ) v[k++] = (UINT)svect[j]; - spsugar[sprow] = MAX(maxrs,SG(spol)); - if ( nz ) - spactive[sprow] = sp; - sprow++; - } - nd_free(spol); - } - get_eg(&eg1); init_eg(&eg_f4_1); add_eg(&eg_f4_1,&eg0,&eg1); - if ( DP_Print ) { - fprintf(asir_out,"elim1=%.3fsec,",eg_f4_1.exectime); - fflush(asir_out); - } - /* free index arrays */ - for ( i = 0; i < nred; i++ ) GCFREE(imat[i]->index.c); - - /* elimination (2nd step) */ - colstat = (int *)MALLOC(spcol*sizeof(int)); - rank = nd_gauss_elim_mod64(spmat,spsugar,spactive,sprow,spcol,m,colstat); - r0 = 0; - for ( i = 0; i < rank; i++ ) { - NEXTNODE(r0,r); BDY(r) = - (pointer)vect64_to_ndv(spmat[i],spcol,col,rhead,s0vect); - SG((NDV)BDY(r)) = spsugar[i]; - GCFREE(spmat[i]); - } - if ( r0 ) NEXT(r) = 0; - - for ( ; i < sprow; i++ ) GCFREE(spmat[i]); - get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); - init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg2); - if ( DP_Print ) { - fprintf(asir_out,"elim2=%.3fsec,",eg_f4_2.exectime); - fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", - nsp,nred,sprow,spcol,rank); - fprintf(asir_out,"%.3fsec,",eg_f4.exectime); - } - if ( nz ) { - for ( i = 0; i < rank-1; i++ ) NEXT(spactive[i]) = spactive[i+1]; - if ( rank > 0 ) { - NEXT(spactive[rank-1]) = 0; - *nz = spactive[0]; - } else - *nz = 0; - } - return r0; -} -#endif - /* for small finite fields */ NODE nd_f4_red_sf_main(int m,ND_pairs sp0,int nsp,UINT *s0vect,int col, @@ -7793,85 +7800,7 @@ int nd_gauss_elim_mod(UINT **mat0,int *sugar,ND_pairs return rank; } -#if defined(__GNUC__) && SIZEOF_LONG==8 -int nd_gauss_elim_mod64(U64 **mat,int *sugar,ND_pairs *spactive,int row,int col,int md,int *colstat) -{ - int i,j,k,l,rank,s; - U64 inv; - U64 a; - UINT c; - U64 *t,*pivot,*pk; - UINT *ck; - UINT **cmat; - UINT *ct; - ND_pairs pair; - - cmat = (UINT **)MALLOC(row*sizeof(UINT *)); - for ( i = 0; i < row; i++ ) { - cmat[i] = MALLOC_ATOMIC(col*sizeof(UINT)); - bzero(cmat[i],col*sizeof(UINT)); - } - - for ( rank = 0, j = 0; j < col; j++ ) { - for ( i = rank; i < row; i++ ) { - a = mat[i][j]; c = cmat[i][j]; - MOD128(a,c,md); - mat[i][j] = a; cmat[i][j] = 0; - } - for ( i = rank; i < row; i++ ) - if ( mat[i][j] ) - break; - if ( i == row ) { - colstat[j] = 0; - continue; - } else - colstat[j] = 1; - if ( i != rank ) { - t = mat[i]; mat[i] = mat[rank]; mat[rank] = t; - ct = cmat[i]; cmat[i] = cmat[rank]; cmat[rank] = ct; - s = sugar[i]; sugar[i] = sugar[rank]; sugar[rank] = s; - if ( spactive ) { - pair = spactive[i]; spactive[i] = spactive[rank]; - spactive[rank] = pair; - } - } - /* column j is normalized */ - s = sugar[rank]; - inv = invm((UINT)mat[rank][j],md); - /* normalize pivot row */ - for ( k = j, pk = mat[rank]+j, ck = cmat[rank]+j; k < col; k++, pk++, ck++ ) { - a = *pk; c = *ck; MOD128(a,c,md); *pk = (a*inv)%md; *ck = 0; - } - for ( i = rank+1; i < row; i++ ) { - if ( (a = mat[i][j]) != 0 ) { - sugar[i] = MAX(sugar[i],s); - red_by_vect64(md,mat[i]+j,cmat[i]+j,mat[rank]+j,(int)(md-a),col-j); - } - } - rank++; - } - for ( j = col-1, l = rank-1; j >= 0; j-- ) - if ( colstat[j] ) { - for ( k = j, pk = mat[l]+j, ck = cmat[l]+j; k < col; k++, pk++, ck++ ) { - a = *pk; c = *ck; MOD128(a,c,md); *pk = a; *ck = 0; - } - s = sugar[l]; - for ( i = 0; i < l; i++ ) { - a = mat[i][j]; c = cmat[i][j]; MOD128(a,c,md); mat[i][j] = a; cmat[i][j] = 0; - if ( a ) { - sugar[i] = MAX(sugar[i],s); - red_by_vect64(md,mat[i]+j,cmat[i]+j,mat[l]+j,(int)(md-a),col-j); - } - } - l--; - } - for ( i = 0; i < row; i++ ) GCFREE(cmat[i]); - GCFREE(cmat); - return rank; -} -#endif - int nd_gauss_elim_sf(UINT **mat0,int *sugar,int row,int col,int md,int *colstat) { int i,j,k,l,inv,a,rank,s; @@ -8482,7 +8411,7 @@ P ndc_div(int mod,union oNDC a,union oNDC b) int inv,t; if ( mod == -1 ) c.m = _mulsf(a.m,_invsf(b.m)); - else if ( mod == -2 ) divlf(a.gz,b.gz,&c.gz); + else if ( mod == -2 ) divlf(a.z,b.z,&c.z); else if ( mod ) { inv = invm(b.m,mod); DMAR(a.m,inv,0,mod,t); c.m = t; @@ -8502,7 +8431,7 @@ P ndctop(int mod,union oNDC c) if ( mod == -1 ) { e = IFTOF(c.m); MKGFS(e,gfs); return (P)gfs; } else if ( mod == -2 ) { - q = c.gz; return (P)q; + q = c.z; return (P)q; } else if ( mod > 0 ) { STOZ(c.m,q); return (P)q; } else @@ -8584,6 +8513,8 @@ void parse_nd_option(NODE opt) nd_newelim = value?1:0; else if ( !strcmp(key,"intersect") ) nd_intersect = value?1:0; + else if ( !strcmp(key,"syzgen") ) + nd_intersect = ZTOS((Q)value); else if ( !strcmp(key,"lf") ) nd_lf = value?1:0; else if ( !strcmp(key,"trace") ) { @@ -9264,4 +9195,290 @@ NODE nd_f4_lf_trace_main(int m,int **indp) conv_ilist(nd_demand,1,g,indp); return g; } + +#if SIZEOF_LONG==8 + +NDV vect64_to_ndv(mp_limb_t *vect,int spcol,int col,int *rhead,UINT *s0vect) +{ + int j,k,len; + UINT *p; + UINT c; + NDV r; + NMV mr0,mr; + + for ( j = 0, len = 0; j < spcol; j++ ) if ( vect[j] ) len++; + if ( !len ) return 0; + else { + mr0 = (NMV)MALLOC_ATOMIC_IGNORE_OFF_PAGE(nmv_adv*len); +#if 0 + ndv_alloc += nmv_adv*len; +#endif + mr = mr0; + p = s0vect; + for ( j = k = 0; j < col; j++, p += nd_wpd ) + if ( !rhead[j] ) { + if ( (c = (UINT)vect[k++]) != 0 ) { + ndl_copy(p,DL(mr)); CM(mr) = c; NMV_ADV(mr); + } + } + MKNDV(nd_nvar,mr0,len,r); + return r; + } +} + +int nd_to_vect64(int mod,UINT *s0,int n,ND d,mp_limb_t *r) +{ + NM m; + UINT *t,*s,*u; + int i,st,ed,md,prev,c; + + for ( i = 0; i < n; i++ ) r[i] = 0; + prev = 0; + for ( i = 0, m = BDY(d); m; m = NEXT(m) ) { + t = DL(m); + st = prev; + ed = n; + while ( ed > st ) { + md = (st+ed)/2; + u = s0+md*nd_wpd; + c = DL_COMPARE(u,t); + if ( c == 0 ) break; + else if ( c > 0 ) st = md; + else ed = md; + } + r[md] = (mp_limb_t)CM(m); + prev = md; + } + for ( i = 0; !r[i]; i++ ); + return i; +} + +#define MOD128(a,c,m) ((a)=(((c)!=0||((a)>=(m)))?(((((U128)(c))<<64)+(a))%(m)):(a))) + +int ndv_reduce_vect64(int m,mp_limb_t *svect,mp_limb_t *cvect,int col,IndArray *imat,NM_ind_pair *rp0,int nred) +{ + int i,j,k,len,pos,prev; + mp_limb_t a,c,c1,c2; + IndArray ivect; + unsigned char *ivc; + unsigned short *ivs; + unsigned int *ivi; + NDV redv; + NMV mr; + NODE rp; + int maxrs; + + for ( i = 0; i < col; i++ ) cvect[i] = 0; + maxrs = 0; + for ( i = 0; i < nred; i++ ) { + ivect = imat[i]; + k = ivect->head; + a = svect[k]; c = cvect[k]; + MOD128(a,c,m); + svect[k] = a; cvect[k] = 0; + if ( (c = svect[k]) != 0 ) { + Nf4_red++; + maxrs = MAX(maxrs,rp0[i]->sugar); + c = m-c; redv = nd_ps[rp0[i]->index]; + len = LEN(redv); mr = BDY(redv); + svect[k] = 0; prev = k; + switch ( ivect->width ) { + case 1: + ivc = ivect->index.c; + for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { + pos = prev+ivc[j]; c1 = CM(mr); prev = pos; + c2 = svect[pos]+c1*c; + if ( c2 < svect[pos] ) cvect[pos]++; + svect[pos] = c2; + } + break; + case 2: + ivs = ivect->index.s; + for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { + pos = prev+ivs[j]; c1 = CM(mr); prev = pos; + c2 = svect[pos]+c1*c; + if ( c2 < svect[pos] ) cvect[pos]++; + svect[pos] = c2; + } + break; + case 4: + ivi = ivect->index.i; + for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { + pos = prev+ivi[j]; c1 = CM(mr); prev = pos; + c2 = svect[pos]+c1*c; + if ( c2 < svect[pos] ) cvect[pos]++; + svect[pos] = c2; + } + break; + } + } + } + for ( i = 0; i < col; i++ ) { + a = svect[i]; c = cvect[i]; MOD128(a,c,m); svect[i] = a; + } + return maxrs; +} + +/* for Fp, 2^15=<p<2^29 */ + +NODE nd_f4_red_mod64_main(int m,ND_pairs sp0,int nsp,UINT *s0vect,int col, + NM_ind_pair *rvect,int *rhead,IndArray *imat,int nred,ND_pairs *nz) +{ + int spcol,sprow,a; + int i,j,k,l,rank; + NODE r0,r; + ND_pairs sp; + ND spol; + mp_limb_t **spmat; + mp_limb_t *svect,*cvect; + mp_limb_t *v; + int *colstat; + struct oEGT eg0,eg1,eg2,eg_f4,eg_f4_1,eg_f4_2; + int maxrs; + int *spsugar; + ND_pairs *spactive; + + spcol = col-nred; + get_eg(&eg0); + /* elimination (1st step) */ + spmat = (mp_limb_t **)MALLOC(nsp*sizeof(mp_limb_t *)); + svect = (mp_limb_t *)MALLOC(col*sizeof(mp_limb_t)); + cvect = (mp_limb_t *)MALLOC(col*sizeof(mp_limb_t)); + spsugar = (int *)MALLOC(nsp*sizeof(int)); + spactive = !nz?0:(ND_pairs *)MALLOC(nsp*sizeof(ND_pairs)); + for ( a = sprow = 0, sp = sp0; a < nsp; a++, sp = NEXT(sp) ) { + nd_sp(m,0,sp,&spol); + if ( !spol ) continue; + nd_to_vect64(m,s0vect,col,spol,svect); + maxrs = ndv_reduce_vect64(m,svect,cvect,col,imat,rvect,nred); + for ( i = 0; i < col; i++ ) if ( svect[i] ) break; + if ( i < col ) { + spmat[sprow] = v = (mp_limb_t *)MALLOC_ATOMIC(spcol*sizeof(mp_limb_t)); + for ( j = k = 0; j < col; j++ ) + if ( !rhead[j] ) v[k++] = (UINT)svect[j]; + spsugar[sprow] = MAX(maxrs,SG(spol)); + if ( nz ) + spactive[sprow] = sp; + sprow++; + } + nd_free(spol); + } + get_eg(&eg1); init_eg(&eg_f4_1); add_eg(&eg_f4_1,&eg0,&eg1); add_eg(&f4_elim1,&eg0,&eg1); + if ( DP_Print ) { + fprintf(asir_out,"elim1=%.3fsec,",eg_f4_1.exectime); + fflush(asir_out); + } + /* free index arrays */ + for ( i = 0; i < nred; i++ ) GCFREE(imat[i]->index.c); + + /* elimination (2nd step) */ + colstat = (int *)MALLOC(spcol*sizeof(int)); + rank = nd_gauss_elim_mod64(spmat,spsugar,spactive,sprow,spcol,m,colstat); + r0 = 0; + for ( i = 0; i < rank; i++ ) { + NEXTNODE(r0,r); BDY(r) = + (pointer)vect64_to_ndv(spmat[i],spcol,col,rhead,s0vect); + SG((NDV)BDY(r)) = spsugar[i]; + GCFREE(spmat[i]); + } + if ( r0 ) NEXT(r) = 0; + + for ( ; i < sprow; i++ ) GCFREE(spmat[i]); + get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); add_eg(&f4_elim2,&eg1,&eg2); + init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg2); + if ( DP_Print ) { + fprintf(asir_out,"elim2=%.3fsec,",eg_f4_2.exectime); + fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", + nsp,nred,sprow,spcol,rank); + fprintf(asir_out,"%.3fsec,",eg_f4.exectime); + } + if ( nz ) { + for ( i = 0; i < rank-1; i++ ) NEXT(spactive[i]) = spactive[i+1]; + if ( rank > 0 ) { + NEXT(spactive[rank-1]) = 0; + *nz = spactive[0]; + } else + *nz = 0; + } + return r0; +} + +int nd_gauss_elim_mod64(mp_limb_t **mat,int *sugar,ND_pairs *spactive,int row,int col,int md,int *colstat) +{ + int i,j,k,l,rank,s; + mp_limb_t inv; + mp_limb_t a; + UINT c; + mp_limb_t *t,*pivot,*pk; + UINT *ck; + UINT **cmat; + UINT *ct; + ND_pairs pair; + + cmat = (UINT **)MALLOC(row*sizeof(UINT *)); + for ( i = 0; i < row; i++ ) { + cmat[i] = MALLOC_ATOMIC(col*sizeof(UINT)); + bzero(cmat[i],col*sizeof(UINT)); + } + + for ( rank = 0, j = 0; j < col; j++ ) { + for ( i = rank; i < row; i++ ) { + a = mat[i][j]; c = cmat[i][j]; + MOD128(a,c,md); + mat[i][j] = a; cmat[i][j] = 0; + } + for ( i = rank; i < row; i++ ) + if ( mat[i][j] ) + break; + if ( i == row ) { + colstat[j] = 0; + continue; + } else + colstat[j] = 1; + if ( i != rank ) { + t = mat[i]; mat[i] = mat[rank]; mat[rank] = t; + ct = cmat[i]; cmat[i] = cmat[rank]; cmat[rank] = ct; + s = sugar[i]; sugar[i] = sugar[rank]; sugar[rank] = s; + if ( spactive ) { + pair = spactive[i]; spactive[i] = spactive[rank]; + spactive[rank] = pair; + } + } + /* column j is normalized */ + s = sugar[rank]; + inv = invm((UINT)mat[rank][j],md); + /* normalize pivot row */ + for ( k = j, pk = mat[rank]+j, ck = cmat[rank]+j; k < col; k++, pk++, ck++ ) { + a = *pk; c = *ck; MOD128(a,c,md); *pk = (a*inv)%md; *ck = 0; + } + for ( i = rank+1; i < row; i++ ) { + if ( (a = mat[i][j]) != 0 ) { + sugar[i] = MAX(sugar[i],s); + red_by_vect64(md,mat[i]+j,cmat[i]+j,mat[rank]+j,(int)(md-a),col-j); + Nf4_red++; + } + } + rank++; + } + for ( j = col-1, l = rank-1; j >= 0; j-- ) + if ( colstat[j] ) { + for ( k = j, pk = mat[l]+j, ck = cmat[l]+j; k < col; k++, pk++, ck++ ) { + a = *pk; c = *ck; MOD128(a,c,md); *pk = a; *ck = 0; + } + s = sugar[l]; + for ( i = 0; i < l; i++ ) { + a = mat[i][j]; c = cmat[i][j]; MOD128(a,c,md); mat[i][j] = a; cmat[i][j] = 0; + if ( a ) { + sugar[i] = MAX(sugar[i],s); + red_by_vect64(md,mat[i]+j,cmat[i]+j,mat[l]+j,(int)(md-a),col-j); + Nf4_red++; + } + } + l--; + } + for ( i = 0; i < row; i++ ) GCFREE(cmat[i]); + GCFREE(cmat); + return rank; +} +#endif