Description: filter SSE3 intrinsics
 src/sse.c is compiled when SSE is active at least.  However this file includes
 some intrinsics which are specific to SSE3, so adjust the source code to make
 sure the -msse3 option has been specified indeed.  Unfortunately, this also
 means that the code is unusable as is on x86 without proper sse3-support.
Author: Étienne Mollier <emollier@debian.org>
Forwarded: no
Last-Update: 2021-11-17
---
This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
--- phyml.orig/src/sse.h
+++ phyml/src/sse.h
@@ -22,7 +22,7 @@
 #include "times.h"
 #include "mixt.h"
 
-#if (defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__))
+#if defined(__SSE3__)
 
 void SSE_Update_Partial_Lk(t_tree *tree,t_edge *b_fcus,t_node *n);
 void SSE_Update_Eigen_Lr(t_edge *b, t_tree *tree);
--- phyml.orig/src/lk.c
+++ phyml/src/lk.c
@@ -779,7 +779,7 @@
             {
 #if (defined(__AVX__) || defined(__AVX2__))
               tree->site_lk_cat[catg] = AVX_Lk_Core_One_Class_No_Eigen_Lr(p_lk_left,p_lk_rght,Pij_rr,tPij_rr,pi,ns,ambiguity_check,state);
-#elif (defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__))
+#elif defined(__SSE3__)
               tree->site_lk_cat[catg] = SSE_Lk_Core_One_Class_No_Eigen_Lr(p_lk_left,p_lk_rght,Pij_rr,tPij_rr,pi,ns,ambiguity_check,state);
 #else
               tree->site_lk_cat[catg] = Lk_Core_One_Class_No_Eigen_Lr(p_lk_left,p_lk_rght,Pij_rr,pi,ns,ambiguity_check,state);
@@ -870,7 +870,7 @@
             {
 #if (defined(__AVX__) || defined(__AVX2__))
               tree->site_lk_cat[catg] = AVX_Lk_Core_One_Class_Eigen_Lr(dot_prod,expl ? expl : NULL,ns);
-#elif (defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__))
+#elif defined(__SSE3__)
               tree->site_lk_cat[catg] = SSE_Lk_Core_One_Class_Eigen_Lr(dot_prod,expl ? expl : NULL,ns);
 #else
               tree->site_lk_cat[catg] = Lk_Core_One_Class_Eigen_Lr(dot_prod,expl ? expl : NULL,ns);
@@ -963,7 +963,7 @@
               AVX_Lk_dLk_Core_One_Class_Eigen_Lr(dot_prod,
                                                  expl ? expl : NULL,
                                                  ns,&core_lk,&core_dlk);
-#elif (defined(__SSE__) || defined(__SSE2__) ||  defined(__SSE3__))
+#elif defined(__SSE3__)
               SSE_Lk_dLk_Core_One_Class_Eigen_Lr(dot_prod,
                                                  expl ? expl : NULL,
                                                  ns,&core_lk,&core_dlk);
@@ -1045,7 +1045,7 @@
 #if (defined(__AVX__) || defined(__AVX2__))
       AVX_Update_Eigen_Lr(b,tree);
       return;
-#elif (defined(__SSE__) || defined(__SSE2__) ||  defined(__SSE3__))
+#elif defined(__SSE3__)
       SSE_Update_Eigen_Lr(b,tree);
       return;
 #endif
@@ -1292,7 +1292,7 @@
         {
 #if (defined(__AVX__) || defined(__AVX2__))
           AVX_Update_Partial_Lk(tree,b,d);
-#elif (defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__))
+#elif defined(__SSE3__)
           SSE_Update_Partial_Lk(tree,b,d);
 #else
           Default_Update_Partial_Lk(tree,b,d);
--- phyml.orig/src/sse.c
+++ phyml/src/sse.c
@@ -16,7 +16,7 @@
 //////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////
 
-#if ((defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__)) && !((defined __AVX__ || defined __AVX2__)))
+#if (defined(__SSE3__) && !((defined __AVX__ || defined __AVX2__)))
 
 void SSE_Update_Eigen_Lr(t_edge *b, t_tree *tree)
 {
--- phyml.orig/src/utilities.h
+++ phyml/src/utilities.h
@@ -38,7 +38,7 @@
 #include <xmmintrin.h>
 #include <pmmintrin.h>
 #include <immintrin.h>
-#elif (defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__))
+#elif defined(__SSE3__)
 #include <emmintrin.h>
 #include <pmmintrin.h>
 #endif
@@ -122,7 +122,7 @@
 
 #if (defined __AVX__ || defined __AVX2__)
 #define BYTE_ALIGN 32
-#elif (defined(__SSE__) || defined(__SSE2__) ||  defined __SSE3__)
+#elif defined __SSE3__
 #define BYTE_ALIGN 16
 #else
 #define BYTE_ALIGN 1
@@ -772,8 +772,10 @@
 
 #if (defined(__AVX__) || defined(__AVX2__))
   __m256d *_tPij1,*_tPij2,*_pmat1plk1,*_pmat2plk2,*_plk0,*_l_ev,*_r_ev,*_prod_left,*_prod_rght;
-#elif (defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__))
+#elif defined(__SSE3__)
   __m128d *_tPij1,*_tPij2,*_pmat1plk1,*_pmat2plk2,*_plk0,*_l_ev,*_r_ev,*_prod_left,*_prod_rght;
+#else
+  long *_tPij1,*_tPij2,*_pmat1plk1,*_pmat2plk2,*_plk0,*_l_ev,*_r_ev,*_prod_left,*_prod_rght;
 #endif
 
   phydbl                  *p_lk_left_pi,*l_ev;
@@ -2559,7 +2561,7 @@
 
 #if (defined(__AVX__) || defined(__AVX2__))
 #include "avx.h"
-#elif (defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__))
+#elif defined(__SSE3__)
 #include "sse.h"
 #endif
 
--- phyml.orig/src/free.c
+++ phyml/src/free.c
@@ -378,7 +378,7 @@
       Free(tree->p_lk_left_pi);
       Free(tree->l_ev);
       
-#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__))
+#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE3__))
       Free(tree->_tPij1);
       Free(tree->_tPij2);
       Free(tree->_pmat1plk1);
--- phyml.orig/src/make.c
+++ phyml/src/make.c
@@ -29,7 +29,7 @@
   const unsigned int ns = tree->mod->ns;
   const unsigned int nsns =  ns * ns;
 
-#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__))
+#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE3__))
   const unsigned int sz = (int)BYTE_ALIGN / 8;
   const unsigned int ncatg = tree->mod->ras->n_catg;
 #endif
@@ -76,7 +76,7 @@
   tree->_prod_rght   = _aligned_malloc(ns / sz * sizeof(__m256d),BYTE_ALIGN);
   tree->big_lk_array     = _aligned_malloc(((3*tree->n_otu-2)*tree->data->crunch_len*MAX(tree->mod->ras->n_catg,tree->mod->n_mixt_classes)*tree->mod->ns + 2*(2*tree->n_otu-1)*tree->mod->ras->n_catg*tree->mod->ns*tree->mod->ns) * sizeof(phydbl),BYTE_ALIGN);
 #endif
-#elif (defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__))
+#elif (defined(__SSE3__))
 #ifndef WIN32
   if(posix_memalign((void **)&tree->dot_prod,BYTE_ALIGN,(size_t)tree->n_pattern*tree->mod->ns*MAX(tree->mod->ras->n_catg,tree->mod->n_mixt_classes)*sizeof(phydbl))) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
   if(posix_memalign((void **)&tree->expl,BYTE_ALIGN,(size_t)3*MAX(tree->mod->ras->n_catg,tree->mod->n_mixt_classes)*tree->mod->ns*sizeof(phydbl))) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
@@ -108,7 +108,7 @@
   tree->_prod_rght   = _aligned_malloc(ns / sz * sizeof(__m128d),BYTE_ALIGN);
   tree->big_lk_array     = _aligned_malloc(((3*tree->n_otu-2)*tree->data->crunch_len*MAX(tree->mod->ras->n_catg,tree->mod->n_mixt_classes)*tree->mod->ns + 2*(2*tree->n_otu-1)*tree->mod->ras->n_catg*tree->mod->ns*tree->mod->ns) * sizeof(phydbl),BYTE_ALIGN);
 #endif
-#elif (!(defined(__AVX__) || defined(__AVX2__) || defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__)))
+#elif (!(defined(__AVX__) || defined(__AVX2__) || defined(__SSE3__)))
   tree->dot_prod = (phydbl *)mCalloc(tree->n_pattern*tree->mod->ns*MAX(tree->mod->ras->n_catg,tree->mod->n_mixt_classes),sizeof(phydbl));
   tree->expl = (phydbl *)mCalloc(3*MAX(tree->mod->ras->n_catg,tree->mod->n_mixt_classes)*tree->mod->ns,sizeof(phydbl));
   tree->p_lk_left_pi = (phydbl *)mCalloc(ns,sizeof(phydbl));
@@ -381,7 +381,7 @@
         {
           b->p_lk_left   = NULL;
           
-#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__))
+#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE3__))
 #ifndef WIN32
           if(posix_memalign((void **)&b->p_lk_tip_l,BYTE_ALIGN,(size_t)tree->data->crunch_len*tree->mod->ns*sizeof(phydbl))) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
 #else
@@ -458,7 +458,7 @@
         }
       else if(b->rght->tax)
         {
-#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__))
+#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE3__))
 #ifndef WIN32
           if(posix_memalign((void **)&b->p_lk_tip_r,BYTE_ALIGN,(size_t)tree->data->crunch_len*tree->mod->ns*sizeof(phydbl))) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
 #else
@@ -511,7 +511,7 @@
 
   tree->sum_scale_extra_0 = (int *)mCalloc(tree->data->crunch_len*MAX(tree->mod->ras->n_catg,tree->mod->n_mixt_classes),sizeof(int));
 
-#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__))
+#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE3__))
 #ifndef WIN32
   if(posix_memalign((void **)&tree->p_lk_extra_0,BYTE_ALIGN,(size_t)tree->data->crunch_len*MAX(tree->mod->ras->n_catg,tree->mod->n_mixt_classes)*tree->mod->ns*sizeof(phydbl))) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
 #else
@@ -522,7 +522,7 @@
 #endif
 
   
-#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__))
+#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE3__))
 #ifndef WIN32
   if(posix_memalign((void **)&tree->p_lk_tip_extra_0,BYTE_ALIGN,(size_t)tree->data->crunch_len*tree->mod->ns*sizeof(phydbl))) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
 #else
@@ -540,7 +540,7 @@
 
   tree->sum_scale_extra_1 = (int *)mCalloc(tree->data->crunch_len*MAX(tree->mod->ras->n_catg,tree->mod->n_mixt_classes),sizeof(int));
 
-#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__))
+#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE3__))
 #ifndef WIN32
   if(posix_memalign((void **)&tree->p_lk_extra_1,BYTE_ALIGN,(size_t)tree->data->crunch_len*MAX(tree->mod->ras->n_catg,tree->mod->n_mixt_classes)*tree->mod->ns*sizeof(phydbl))) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
 #else
@@ -551,7 +551,7 @@
 #endif
 
   
-#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__))
+#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE3__))
 #ifndef WIN32
   if(posix_memalign((void **)&tree->p_lk_tip_extra_1,BYTE_ALIGN,(size_t)tree->data->crunch_len*tree->mod->ns*sizeof(phydbl))) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
 #else
@@ -947,7 +947,7 @@
   mod->Pij_rr = (vect_dbl *)mCalloc(1,sizeof(vect_dbl));
   Init_Vect_Dbl(0,mod->Pij_rr);
 
-#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__))
+#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE3__))
 #ifndef WIN32
   if(posix_memalign((void **)&mod->Pij_rr->v,BYTE_ALIGN,(size_t)mod->ras->n_catg*mod->ns*mod->ns*sizeof(phydbl))) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
 #else
@@ -1061,7 +1061,7 @@
   e_frq->pi               = (vect_dbl *)mCalloc(1,sizeof(vect_dbl));
   e_frq->pi->len          = ns;
 
-#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__))
+#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE3__))
 #ifndef WIN32
   if(posix_memalign((void **)&e_frq->pi->v,BYTE_ALIGN,(size_t)ns*sizeof(phydbl))) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
 #else
@@ -1184,7 +1184,7 @@
   eig->r_e_vect_im = (phydbl *)mCalloc(ns*ns,sizeof(phydbl));
   eig->q           = (phydbl *)mCalloc(ns*ns,sizeof(phydbl));
 
-#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE__) || defined(__SSE2__) || defined(__SSE3__))
+#if (defined(__AVX__) || defined(__AVX2__) || defined(__SSE3__))
 #ifndef WIN32
   if(posix_memalign((void **)&eig->r_e_vect,BYTE_ALIGN,ns*ns*sizeof(phydbl))) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
   if(posix_memalign((void **)&eig->l_e_vect,BYTE_ALIGN,ns*ns*sizeof(phydbl))) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
