|
|
Patch Set 1 #
MessagesTotal messages: 3
Sri, probably need to remove the [google] prefix in the subject line to prevent this from being filtered. David On Thu, Mar 1, 2012 at 12:45 PM, Sriraman Tallam <tmsriram@google.com> wrote: > Patch to add builtins to detect CPU type: > ======================================== > > I have ported the patch from google/gcc-4_6 to google/main. I also want this > patch to be considered for trunk. Please see this discussion: > http://gcc.gnu.org/ml/gcc-patches/2011-08/msg01355.html > when this patch for reviewed the last time. > > One of the main concerns was about making CPU detection initialization a > constructor. The main point raised was about constructor ordering. I have now > added a priority value to the CPU detection constructor to make it very high > priority so that it is guaranteed to fire before every constructor without > an explicitly marked priority value of 101. However, IFUNC initializers > will still fire before this constructor, so the cpu initialization routine > has to be explicitly called in such initializers for which I have added a > builtin: __builtin_cpu_init (). > > I would like to reopen discussions on this to make it suitable for trunk > this time around. > > This patch adds the following new builtins: > > __builtin_cpu_init > __builtin_cpu_supports_cmov > __builtin_cpu_supports_mmx > __builtin_cpu_supports_popcount > __builtin_cpu_supports_sse > __builtin_cpu_supports_sse2 > __builtin_cpu_supports_sse3 > __builtin_cpu_supports_ssse3 > __builtin_cpu_supports_sse4_1 > __builtin_cpu_supports_sse4_2 > __builtin_cpu_is_amd > __builtin_cpu_is_intel_atom > __builtin_cpu_is_intel_core2 > __builtin_cpu_is_intel > __builtin_cpu_is_intel_corei7 > __builtin_cpu_is_intel_corei7_nehalem > __builtin_cpu_is_intel_corei7_westmere > __builtin_cpu_is_intel_corei7_sandybridge > __builtin_cpu_is_amdfam10 > __builtin_cpu_is_amdfam10_barcelona > __builtin_cpu_is_amdfam10_shanghai > __builtin_cpu_is_amdfam10_istanbul > __builtin_cpu_is_amdfam15_bdver1 > __builtin_cpu_is_amdfam15_bdver2 > > > * config/i386/i386.c (build_struct_with_one_bit_fields): New function. > (make_var_decl): New function. > (get_field_from_struct): New function. > (fold_builtin_target): New function. > (ix86_fold_builtin): New function. > (ix86_expand_builtin): Expand new builtins by folding them. > (make_platform_builtin): New functions. > (ix86_init_platform_type_builtins): Make the new builtins. > (ix86_init_builtins): Make new builtins to detect CPU type. > (TARGET_FOLD_BUILTIN): New macro. > (IX86_BUILTIN_CPU_SUPPORTS_CMOV): New enum value. > (IX86_BUILTIN_CPU_SUPPORTS_MMX): New enum value. > (IX86_BUILTIN_CPU_SUPPORTS_POPCOUNT): New enum value. > (IX86_BUILTIN_CPU_SUPPORTS_SSE): New enum value. > (IX86_BUILTIN_CPU_SUPPORTS_SSE2): New enum value. > (IX86_BUILTIN_CPU_SUPPORTS_SSE3): New enum value. > (IX86_BUILTIN_CPU_SUPPORTS_SSSE3): New enum value. > (IX86_BUILTIN_CPU_SUPPORTS_SSE4_1): New enum value. > (IX86_BUILTIN_CPU_SUPPORTS_SSE4_2): New enum value. > (IX86_BUILTIN_CPU_INIT): New enum value. > (IX86_BUILTIN_CPU_IS_AMD): New enum value. > (IX86_BUILTIN_CPU_IS_INTEL): New enum value. > (IX86_BUILTIN_CPU_IS_INTEL_ATOM): New enum value. > (IX86_BUILTIN_CPU_IS_INTEL_CORE2): New enum value. > (IX86_BUILTIN_CPU_IS_INTEL_COREI7_NEHALEM): New enum value. > (IX86_BUILTIN_CPU_IS_INTEL_COREI7_WESTMERE): New enum value. > (IX86_BUILTIN_CPU_IS_INTEL_COREI7_SANDYBRIDGE): New enum value. > (IX86_BUILTIN_CPU_IS_AMDFAM10_BARCELONA): New enum value. > (IX86_BUILTIN_CPU_IS_AMDFAM10_SHANGHAI): New enum value. > (IX86_BUILTIN_CPU_IS_AMDFAM10_ISTANBUL): New enum value. > (IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER1): New enum value. > (IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER2): New enum value. > * config/i386/i386-builtin-types.def: New function type. > * testsuite/gcc.target/builtin_target.c: New testcase. > > * libgcc/config/i386/i386-cpuinfo.c: New file. > * libgcc/config/i386/t-cpuinfo: New file. > * libgcc/config.host: Include t-cpuinfo. > * libgcc/config/i386/libgcc-glibc.ver: Version symbols __cpu_model > and __cpu_features. > > > > Index: libgcc/config.host > =================================================================== > --- libgcc/config.host (revision 184644) > +++ libgcc/config.host (working copy) > @@ -1128,7 +1128,7 @@ i[34567]86-*-linux* | x86_64-*-linux* | \ > i[34567]86-*-kfreebsd*-gnu | x86_64-*-kfreebsd*-gnu | \ > i[34567]86-*-knetbsd*-gnu | \ > i[34567]86-*-gnu*) > - tmake_file="${tmake_file} t-tls i386/t-linux" > + tmake_file="${tmake_file} t-tls i386/t-linux i386/t-cpuinfo" > if test "$libgcc_cv_cfi" = "yes"; then > tmake_file="${tmake_file} t-stack i386/t-stack-i386" > fi > Index: libgcc/config/i386/t-cpuinfo > =================================================================== > --- libgcc/config/i386/t-cpuinfo (revision 0) > +++ libgcc/config/i386/t-cpuinfo (revision 0) > @@ -0,0 +1 @@ > +LIB2ADD += $(srcdir)/config/i386/i386-cpuinfo.c > Index: libgcc/config/i386/i386-cpuinfo.c > =================================================================== > --- libgcc/config/i386/i386-cpuinfo.c (revision 0) > +++ libgcc/config/i386/i386-cpuinfo.c (revision 0) > @@ -0,0 +1,306 @@ > +/* Get CPU type and Features for x86 processors. > + Copyright (C) 2011 Free Software Foundation, Inc. > + Contributed by Sriraman Tallam (tmsriram@google.com) > + > +This file is part of GCC. > + > +GCC is free software; you can redistribute it and/or modify it under > +the terms of the GNU General Public License as published by the Free > +Software Foundation; either version 3, or (at your option) any later > +version. > + > +GCC is distributed in the hope that it will be useful, but WITHOUT ANY > +WARRANTY; without even the implied warranty of MERCHANTABILITY or > +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License > +for more details. > + > +You should have received a copy of the GNU General Public License > +along with GCC; see the file COPYING3. If not see > +<http://www.gnu.org/licenses/>. */ > + > +#include "cpuid.h" > +#include "tsystem.h" > + > +int __cpu_indicator_init (void) __attribute__ ((constructor (101))); > + > +enum vendor_signatures > +{ > + SIG_INTEL = 0x756e6547 /* Genu */, > + SIG_AMD = 0x68747541 /* Auth */ > +}; > + > +/* ISA Features supported. */ > + > +struct __processor_features > +{ > + unsigned int __cpu_cmov : 1; > + unsigned int __cpu_mmx : 1; > + unsigned int __cpu_popcnt : 1; > + unsigned int __cpu_sse : 1; > + unsigned int __cpu_sse2 : 1; > + unsigned int __cpu_sse3 : 1; > + unsigned int __cpu_ssse3 : 1; > + unsigned int __cpu_sse4_1 : 1; > + unsigned int __cpu_sse4_2 : 1; > +} __cpu_features; > + > +/* Processor Model. */ > + > +struct __processor_model > +{ > + /* Vendor. */ > + unsigned int __cpu_is_amd : 1; > + unsigned int __cpu_is_intel : 1; > + /* CPU type. */ > + unsigned int __cpu_is_intel_atom : 1; > + unsigned int __cpu_is_intel_core2 : 1; > + unsigned int __cpu_is_intel_corei7 : 1; > + unsigned int __cpu_is_intel_corei7_nehalem : 1; > + unsigned int __cpu_is_intel_corei7_westmere : 1; > + unsigned int __cpu_is_intel_corei7_sandybridge : 1; > + unsigned int __cpu_is_amdfam10h : 1; > + unsigned int __cpu_is_amdfam10h_barcelona : 1; > + unsigned int __cpu_is_amdfam10h_shanghai : 1; > + unsigned int __cpu_is_amdfam10h_istanbul : 1; > + unsigned int __cpu_is_amdfam15h_bdver1 : 1; > + unsigned int __cpu_is_amdfam15h_bdver2 : 1; > +} __cpu_model; > + > +/* Get the specific type of AMD CPU. */ > + > +static void > +get_amd_cpu (unsigned int family, unsigned int model) > +{ > + switch (family) > + { > + /* AMD Family 10h. */ > + case 0x10: > + switch (model) > + { > + case 0x2: > + /* Barcelona. */ > + __cpu_model.__cpu_is_amdfam10h = 1; > + __cpu_model.__cpu_is_amdfam10h_barcelona = 1; > + break; > + case 0x4: > + /* Shanghai. */ > + __cpu_model.__cpu_is_amdfam10h = 1; > + __cpu_model.__cpu_is_amdfam10h_shanghai = 1; > + break; > + case 0x8: > + /* Istanbul. */ > + __cpu_model.__cpu_is_amdfam10h = 1; > + __cpu_model.__cpu_is_amdfam10h_istanbul = 1; > + break; > + default: > + break; > + } > + break; > + /* AMD Family 15h. */ > + case 0x15: > + /* Bulldozer version 1. */ > + if (model >= 0 && model <= 0xf) > + __cpu_model.__cpu_is_amdfam15h_bdver1 = 1; > + /* Bulldozer version 2. */ > + if (model >= 0x10 && model <= 0x1f) > + __cpu_model.__cpu_is_amdfam15h_bdver2 = 1; > + break; > + default: > + break; > + } > +} > + > +/* Get the specific type of Intel CPU. */ > + > +static void > +get_intel_cpu (unsigned int family, unsigned int model, unsigned int brand_id) > +{ > + /* Parse family and model only if brand ID is 0. */ > + if (brand_id == 0) > + { > + switch (family) > + { > + case 0x5: > + /* Pentium. */ > + break; > + case 0x6: > + switch (model) > + { > + case 0x1c: > + case 0x26: > + /* Atom. */ > + __cpu_model.__cpu_is_intel_atom = 1; > + break; > + case 0x1a: > + case 0x1e: > + case 0x1f: > + case 0x2e: > + /* Nehalem. */ > + __cpu_model.__cpu_is_intel_corei7 = 1; > + __cpu_model.__cpu_is_intel_corei7_nehalem = 1; > + break; > + case 0x25: > + case 0x2c: > + case 0x2f: > + /* Westmere. */ > + __cpu_model.__cpu_is_intel_corei7 = 1; > + __cpu_model.__cpu_is_intel_corei7_westmere = 1; > + break; > + case 0x2a: > + /* Sandy Bridge. */ > + __cpu_model.__cpu_is_intel_corei7 = 1; > + __cpu_model.__cpu_is_intel_corei7_sandybridge = 1; > + break; > + case 0x17: > + case 0x1d: > + /* Penryn. */ > + case 0x0f: > + /* Merom. */ > + __cpu_model.__cpu_is_intel_core2 = 1; > + break; > + default: > + break; > + } > + break; > + default: > + /* We have no idea. */ > + break; > + } > + } > +} > + > +static void > +get_available_features (unsigned int ecx, unsigned int edx) > +{ > + __cpu_features.__cpu_cmov = (edx & bit_CMOV) ? 1 : 0; > + __cpu_features.__cpu_mmx = (edx & bit_MMX) ? 1 : 0; > + __cpu_features.__cpu_sse = (edx & bit_SSE) ? 1 : 0; > + __cpu_features.__cpu_sse2 = (edx & bit_SSE2) ? 1 : 0; > + __cpu_features.__cpu_popcnt = (ecx & bit_POPCNT) ? 1 : 0; > + __cpu_features.__cpu_sse3 = (ecx & bit_SSE3) ? 1 : 0; > + __cpu_features.__cpu_ssse3 = (ecx & bit_SSSE3) ? 1 : 0; > + __cpu_features.__cpu_sse4_1 = (ecx & bit_SSE4_1) ? 1 : 0; > + __cpu_features.__cpu_sse4_2 = (ecx & bit_SSE4_2) ? 1 : 0; > +} > + > + > +/* Sanity check for the vendor and cpu type flags. */ > + > +static int > +sanity_check (void) > +{ > + unsigned int one_type = 0; > + > + /* Vendor cannot be Intel and AMD. */ > + gcc_assert((__cpu_model.__cpu_is_intel == 0) > + || (__cpu_model.__cpu_is_amd == 0)); > + > + /* Only one CPU type can be set. */ > + one_type = (__cpu_model.__cpu_is_intel_atom > + + __cpu_model.__cpu_is_intel_core2 > + + __cpu_model.__cpu_is_intel_corei7_nehalem > + + __cpu_model.__cpu_is_intel_corei7_westmere > + + __cpu_model.__cpu_is_intel_corei7_sandybridge > + + __cpu_model.__cpu_is_amdfam10h_barcelona > + + __cpu_model.__cpu_is_amdfam10h_shanghai > + + __cpu_model.__cpu_is_amdfam10h_istanbul > + + __cpu_model.__cpu_is_amdfam15h_bdver1 > + + __cpu_model.__cpu_is_amdfam15h_bdver2); > + > + gcc_assert (one_type <= 1); > + return 0; > +} > + > +/* A noinline function calling __get_cpuid. Having many calls to > + cpuid in one function in 32-bit mode causes GCC to complain: > + "can’t find a register in class ‘CLOBBERED_REGS’". This is > + related to PR rtl-optimization 44174. */ > + > +static int __attribute__ ((noinline)) > +__get_cpuid_output (unsigned int __level, > + unsigned int *__eax, unsigned int *__ebx, > + unsigned int *__ecx, unsigned int *__edx) > +{ > + return __get_cpuid (__level, __eax, __ebx, __ecx, __edx); > +} > + > + > +/* A constructor function that sets __cpu_model and __cpu_features with > + the right values. This needs to run only once. This constructor is > + given the highest priority and it will run before constructors without > + the priority set. However, it still runs after ifunc initializers and > + needs to be called explicitly there. */ > + > +int __attribute__ ((constructor (101))) > +__cpu_indicator_init (void) > +{ > + unsigned int eax, ebx, ecx, edx; > + > + int max_level = 5; > + unsigned int vendor; > + unsigned int model, family, brand_id; > + unsigned int extended_model, extended_family; > + static int called = 0; > + > + /* This function needs to run just once. */ > + if (called) > + return 0; > + else > + called = 1; > + > + /* Assume cpuid insn present. Run in level 0 to get vendor id. */ > + if (!__get_cpuid_output (0, &eax, &ebx, &ecx, &edx)) > + return -1; > + > + vendor = ebx; > + max_level = eax; > + > + if (max_level < 1) > + return -1; > + > + if (!__get_cpuid_output (1, &eax, &ebx, &ecx, &edx)) > + return -1; > + > + model = (eax >> 4) & 0x0f; > + family = (eax >> 8) & 0x0f; > + brand_id = ebx & 0xff; > + extended_model = (eax >> 12) & 0xf0; > + extended_family = (eax >> 20) & 0xff; > + > + if (vendor == SIG_INTEL) > + { > + /* Adjust model and family for Intel CPUS. */ > + if (family == 0x0f) > + { > + family += extended_family; > + model += extended_model; > + } > + else if (family == 0x06) > + model += extended_model; > + > + /* Get CPU type. */ > + __cpu_model.__cpu_is_intel = 1; > + get_intel_cpu (family, model, brand_id); > + } > + > + if (vendor == SIG_AMD) > + { > + /* Adjust model and family for AMD CPUS. */ > + if (family == 0x0f) > + { > + family += extended_family; > + model += (extended_model << 4); > + } > + > + /* Get CPU type. */ > + __cpu_model.__cpu_is_amd = 1; > + get_amd_cpu (family, model); > + } > + > + /* Find available features. */ > + get_available_features (ecx, edx); > + > + sanity_check (); > + > + return 0; > +} > Index: libgcc/config/i386/libgcc-glibc.ver > =================================================================== > --- libgcc/config/i386/libgcc-glibc.ver (revision 184644) > +++ libgcc/config/i386/libgcc-glibc.ver (working copy) > @@ -147,6 +147,11 @@ GCC_4.3.0 { > __trunctfxf2 > __unordtf2 > } > + > +GCC_4.7.0 { > + __cpu_model > + __cpu_features > +} > %else > GCC_4.4.0 { > __addtf3 > @@ -183,4 +188,8 @@ GCC_4.4.0 { > GCC_4.5.0 { > __extendxftf2 > } > +GCC_4.7.0 { > + __cpu_model > + __cpu_features > +} > %endif > Index: gcc/testsuite/gcc.target/i386/builtin_target.c > =================================================================== > --- gcc/testsuite/gcc.target/i386/builtin_target.c (revision 0) > +++ gcc/testsuite/gcc.target/i386/builtin_target.c (revision 0) > @@ -0,0 +1,61 @@ > +/* This test checks if the __builtin_cpu_* calls are recognized. */ > + > +/* { dg-do run } */ > + > +int > +fn1 () > +{ > + if (__builtin_cpu_supports_cmov () < 0) > + return -1; > + if (__builtin_cpu_supports_mmx () < 0) > + return -1; > + if (__builtin_cpu_supports_popcount () < 0) > + return -1; > + if (__builtin_cpu_supports_sse () < 0) > + return -1; > + if (__builtin_cpu_supports_sse2 () < 0) > + return -1; > + if (__builtin_cpu_supports_sse3 () < 0) > + return -1; > + if (__builtin_cpu_supports_ssse3 () < 0) > + return -1; > + if (__builtin_cpu_supports_sse4_1 () < 0) > + return -1; > + if (__builtin_cpu_supports_sse4_2 () < 0) > + return -1; > + if (__builtin_cpu_is_amd () < 0) > + return -1; > + if (__builtin_cpu_is_intel () < 0) > + return -1; > + if (__builtin_cpu_is_intel_atom () < 0) > + return -1; > + if (__builtin_cpu_is_intel_core2 () < 0) > + return -1; > + if (__builtin_cpu_is_intel_corei7 () < 0) > + return -1; > + if (__builtin_cpu_is_intel_corei7_nehalem () < 0) > + return -1; > + if (__builtin_cpu_is_intel_corei7_westmere () < 0) > + return -1; > + if (__builtin_cpu_is_intel_corei7_sandybridge () < 0) > + return -1; > + if (__builtin_cpu_is_amdfam10 () < 0) > + return -1; > + if (__builtin_cpu_is_amdfam10_barcelona () < 0) > + return -1; > + if (__builtin_cpu_is_amdfam10_shanghai () < 0) > + return -1; > + if (__builtin_cpu_is_amdfam10_istanbul () < 0) > + return -1; > + if (__builtin_cpu_is_amdfam15_bdver1 () < 0) > + return -1; > + if (__builtin_cpu_is_amdfam15_bdver2 () < 0) > + return -1; > + > + return 0; > +} > + > +int main () > +{ > + return fn1 (); > +} > Index: gcc/config/i386/i386-builtin-types.def > =================================================================== > --- gcc/config/i386/i386-builtin-types.def (revision 184644) > +++ gcc/config/i386/i386-builtin-types.def (working copy) > @@ -143,6 +143,7 @@ DEF_FUNCTION_TYPE (UINT64) > DEF_FUNCTION_TYPE (UNSIGNED) > DEF_FUNCTION_TYPE (VOID) > DEF_FUNCTION_TYPE (PVOID) > +DEF_FUNCTION_TYPE (INT) > > DEF_FUNCTION_TYPE (FLOAT, FLOAT) > DEF_FUNCTION_TYPE (FLOAT128, FLOAT128) > Index: gcc/config/i386/i386.c > =================================================================== > --- gcc/config/i386/i386.c (revision 184644) > +++ gcc/config/i386/i386.c (working copy) > @@ -25798,6 +25798,33 @@ enum ix86_builtins > /* CFString built-in for darwin */ > IX86_BUILTIN_CFSTRING, > > + /* Builtins to get CPU features. */ > + IX86_BUILTIN_CPU_SUPPORTS_CMOV, > + IX86_BUILTIN_CPU_SUPPORTS_MMX, > + IX86_BUILTIN_CPU_SUPPORTS_POPCOUNT, > + IX86_BUILTIN_CPU_SUPPORTS_SSE, > + IX86_BUILTIN_CPU_SUPPORTS_SSE2, > + IX86_BUILTIN_CPU_SUPPORTS_SSE3, > + IX86_BUILTIN_CPU_SUPPORTS_SSSE3, > + IX86_BUILTIN_CPU_SUPPORTS_SSE4_1, > + IX86_BUILTIN_CPU_SUPPORTS_SSE4_2, > + /* Builtins to get CPU type. */ > + IX86_BUILTIN_CPU_INIT, > + IX86_BUILTIN_CPU_IS_AMD, > + IX86_BUILTIN_CPU_IS_INTEL, > + IX86_BUILTIN_CPU_IS_INTEL_ATOM, > + IX86_BUILTIN_CPU_IS_INTEL_CORE2, > + IX86_BUILTIN_CPU_IS_INTEL_COREI7, > + IX86_BUILTIN_CPU_IS_INTEL_COREI7_NEHALEM, > + IX86_BUILTIN_CPU_IS_INTEL_COREI7_WESTMERE, > + IX86_BUILTIN_CPU_IS_INTEL_COREI7_SANDYBRIDGE, > + IX86_BUILTIN_CPU_IS_AMDFAM10H, > + IX86_BUILTIN_CPU_IS_AMDFAM10H_BARCELONA, > + IX86_BUILTIN_CPU_IS_AMDFAM10H_SHANGHAI, > + IX86_BUILTIN_CPU_IS_AMDFAM10H_ISTANBUL, > + IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER1, > + IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER2, > + > IX86_BUILTIN_MAX > }; > > @@ -27607,6 +27634,350 @@ ix86_init_mmx_sse_builtins (void) > } > } > > +/* Returns a struct type with name NAME and number of fields equal to > + NUM_FIELDS. Each field is a unsigned int bit field of length 1 bit. */ > + > +static tree > +build_struct_with_one_bit_fields (int num_fields, const char *name) > +{ > + int i; > + char field_name [10]; > + tree field = NULL_TREE, field_chain = NULL_TREE; > + tree type = make_node (RECORD_TYPE); > + > + strcpy (field_name, "k_field"); > + > + for (i = 0; i < num_fields; i++) > + { > + /* Name the fields, 0_field, 1_field, ... */ > + field_name [0] = '0' + i; > + field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, > + get_identifier (field_name), unsigned_type_node); > + DECL_BIT_FIELD (field) = 1; > + DECL_SIZE (field) = bitsize_one_node; > + if (field_chain != NULL_TREE) > + DECL_CHAIN (field) = field_chain; > + field_chain = field; > + } > + finish_builtin_struct (type, name, field_chain, NULL_TREE); > + return type; > +} > + > +/* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */ > + > +static tree > +make_var_decl (tree type, const char *name) > +{ > + tree new_decl; > + struct varpool_node *vnode; > + > + new_decl = build_decl (UNKNOWN_LOCATION, > + VAR_DECL, > + get_identifier(name), > + type); > + > + DECL_EXTERNAL (new_decl) = 1; > + TREE_STATIC (new_decl) = 1; > + TREE_PUBLIC (new_decl) = 1; > + DECL_INITIAL (new_decl) = 0; > + DECL_ARTIFICIAL (new_decl) = 0; > + DECL_PRESERVE_P (new_decl) = 1; > + > + make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl)); > + assemble_variable (new_decl, 0, 0, 0); > + > + vnode = varpool_node (new_decl); > + gcc_assert (vnode != NULL); > + /* Set finalized to 1, otherwise it asserts in function "write_symbol" in > + lto-streamer-out.c. */ > + vnode->finalized = 1; > + > + return new_decl; > +} > + > +/* Traverses the chain of fields in STRUCT_TYPE and returns the FIELD_NUM > + numbered field. */ > + > +static tree > +get_field_from_struct (tree struct_type, int field_num) > +{ > + int i; > + tree field = TYPE_FIELDS (struct_type); > + > + for (i = 0; i < field_num; i++, field = DECL_CHAIN(field)) > + { > + gcc_assert (field != NULL_TREE); > + } > + > + return field; > +} > + > +/* FNDECL is a __builtin_cpu_* call that is folded into an integer defined > + in libgcc/config/i386/i386-cpuinfo.c */ > + > +static tree > +fold_builtin_cpu (enum ix86_builtins fn_code) > +{ > + /* This is the order of bit-fields in __processor_features in > + i386-cpuinfo.c */ > + enum processor_features > + { > + F_CMOV = 0, > + F_MMX, > + F_POPCNT, > + F_SSE, > + F_SSE2, > + F_SSE3, > + F_SSSE3, > + F_SSE4_1, > + F_SSE4_2, > + F_MAX > + }; > + > + /* This is the order of bit-fields in __processor_model in > + i386-cpuinfo.c */ > + enum processor_model > + { > + M_AMD = 0, > + M_INTEL, > + M_INTEL_ATOM, > + M_INTEL_CORE2, > + M_INTEL_COREI7, > + M_INTEL_COREI7_NEHALEM, > + M_INTEL_COREI7_WESTMERE, > + M_INTEL_COREI7_SANDYBRIDGE, > + M_AMDFAM10H, > + M_AMDFAM10H_BARCELONA, > + M_AMDFAM10H_SHANGHAI, > + M_AMDFAM10H_ISTANBUL, > + M_AMDFAM15H_BDVER1, > + M_AMDFAM15H_BDVER2, > + M_MAX > + }; > + > + static tree __processor_features_type = NULL_TREE; > + static tree __cpu_features_var = NULL_TREE; > + static tree __processor_model_type = NULL_TREE; > + static tree __cpu_model_var = NULL_TREE; > + static tree field; > + static tree which_struct; > + > + if (__processor_features_type == NULL_TREE) > + __processor_features_type = build_struct_with_one_bit_fields (F_MAX, > + "__processor_features"); > + > + if (__processor_model_type == NULL_TREE) > + __processor_model_type = build_struct_with_one_bit_fields (M_MAX, > + "__processor_model"); > + > + if (__cpu_features_var == NULL_TREE) > + __cpu_features_var = make_var_decl (__processor_features_type, > + "__cpu_features"); > + > + if (__cpu_model_var == NULL_TREE) > + __cpu_model_var = make_var_decl (__processor_model_type, > + "__cpu_model"); > + > + /* Look at the code to identify the field requested. */ > + switch (fn_code) > + { > + case IX86_BUILTIN_CPU_SUPPORTS_CMOV: > + field = get_field_from_struct (__processor_features_type, F_CMOV); > + which_struct = __cpu_features_var; > + break; > + case IX86_BUILTIN_CPU_SUPPORTS_MMX: > + field = get_field_from_struct (__processor_features_type, F_MMX); > + which_struct = __cpu_features_var; > + break; > + case IX86_BUILTIN_CPU_SUPPORTS_POPCOUNT: > + field = get_field_from_struct (__processor_features_type, F_POPCNT); > + which_struct = __cpu_features_var; > + break; > + case IX86_BUILTIN_CPU_SUPPORTS_SSE: > + field = get_field_from_struct (__processor_features_type, F_SSE); > + which_struct = __cpu_features_var; > + break; > + case IX86_BUILTIN_CPU_SUPPORTS_SSE2: > + field = get_field_from_struct (__processor_features_type, F_SSE2); > + which_struct = __cpu_features_var; > + break; > + case IX86_BUILTIN_CPU_SUPPORTS_SSE3: > + field = get_field_from_struct (__processor_features_type, F_SSE3); > + which_struct = __cpu_features_var; > + break; > + case IX86_BUILTIN_CPU_SUPPORTS_SSSE3: > + field = get_field_from_struct (__processor_features_type, F_SSSE3); > + which_struct = __cpu_features_var; > + break; > + case IX86_BUILTIN_CPU_SUPPORTS_SSE4_1: > + field = get_field_from_struct (__processor_features_type, F_SSE4_1); > + which_struct = __cpu_features_var; > + break; > + case IX86_BUILTIN_CPU_SUPPORTS_SSE4_2: > + field = get_field_from_struct (__processor_features_type, F_SSE4_2); > + which_struct = __cpu_features_var; > + break; > + case IX86_BUILTIN_CPU_IS_AMD: > + field = get_field_from_struct (__processor_model_type, M_AMD); > + which_struct = __cpu_model_var; > + break; > + case IX86_BUILTIN_CPU_IS_INTEL: > + field = get_field_from_struct (__processor_model_type, M_INTEL); > + which_struct = __cpu_model_var; > + break; > + case IX86_BUILTIN_CPU_IS_INTEL_ATOM: > + field = get_field_from_struct (__processor_model_type, M_INTEL_ATOM); > + which_struct = __cpu_model_var; > + break; > + case IX86_BUILTIN_CPU_IS_INTEL_CORE2: > + field = get_field_from_struct (__processor_model_type, M_INTEL_CORE2); > + which_struct = __cpu_model_var; > + break; > + case IX86_BUILTIN_CPU_IS_INTEL_COREI7: > + field = get_field_from_struct (__processor_model_type, > + M_INTEL_COREI7); > + which_struct = __cpu_model_var; > + break; > + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_NEHALEM: > + field = get_field_from_struct (__processor_model_type, > + M_INTEL_COREI7_NEHALEM); > + which_struct = __cpu_model_var; > + break; > + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_WESTMERE: > + field = get_field_from_struct (__processor_model_type, > + M_INTEL_COREI7_WESTMERE); > + which_struct = __cpu_model_var; > + break; > + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_SANDYBRIDGE: > + field = get_field_from_struct (__processor_model_type, > + M_INTEL_COREI7_SANDYBRIDGE); > + which_struct = __cpu_model_var; > + break; > + case IX86_BUILTIN_CPU_IS_AMDFAM10H: > + field = get_field_from_struct (__processor_model_type, > + M_AMDFAM10H); > + which_struct = __cpu_model_var; > + break; > + case IX86_BUILTIN_CPU_IS_AMDFAM10H_BARCELONA: > + field = get_field_from_struct (__processor_model_type, > + M_AMDFAM10H_BARCELONA); > + which_struct = __cpu_model_var; > + break; > + case IX86_BUILTIN_CPU_IS_AMDFAM10H_SHANGHAI: > + field = get_field_from_struct (__processor_model_type, > + M_AMDFAM10H_SHANGHAI); > + which_struct = __cpu_model_var; > + break; > + case IX86_BUILTIN_CPU_IS_AMDFAM10H_ISTANBUL: > + field = get_field_from_struct (__processor_model_type, > + M_AMDFAM10H_ISTANBUL); > + which_struct = __cpu_model_var; > + break; > + case IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER1: > + field = get_field_from_struct (__processor_model_type, > + M_AMDFAM15H_BDVER1); > + which_struct = __cpu_model_var; > + break; > + case IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER2: > + field = get_field_from_struct (__processor_model_type, > + M_AMDFAM15H_BDVER2); > + which_struct = __cpu_model_var; > + break; > + default: > + return NULL_TREE; > + } > + > + return build3 (COMPONENT_REF, TREE_TYPE (field), which_struct, field, NULL_TREE); > +} > + > +static tree > +ix86_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, > + tree *args ATTRIBUTE_UNUSED, bool ignore ATTRIBUTE_UNUSED) > +{ > + const char* decl_name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); > + if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD > + && strstr(decl_name, "__builtin_cpu") != NULL) > + { > + enum ix86_builtins code = (enum ix86_builtins) > + DECL_FUNCTION_CODE (fndecl); > + return fold_builtin_cpu (code); > + } > + return NULL_TREE; > +} > + > +/* A builtin to init/return the cpu type or feature. Returns an > + integer and the type is a const if IS_CONST is set. */ > + > +static void > +make_platform_builtin (const char* name, int code, int is_const) > +{ > + tree decl; > + tree type; > + > + type = ix86_get_builtin_func_type (INT_FTYPE_VOID); > + decl = add_builtin_function (name, type, code, BUILT_IN_MD, > + NULL, NULL_TREE); > + gcc_assert (decl != NULL_TREE); > + ix86_builtins[(int) code] = decl; > + if (is_const) > + TREE_READONLY (decl) = 1; > +} > + > +/* Builtins to get CPU type and features supported. */ > + > +static void > +ix86_init_platform_type_builtins (void) > +{ > + make_platform_builtin ("__builtin_cpu_init", > + IX86_BUILTIN_CPU_INIT, 0); > + make_platform_builtin ("__builtin_cpu_supports_cmov", > + IX86_BUILTIN_CPU_SUPPORTS_CMOV, 1); > + make_platform_builtin ("__builtin_cpu_supports_mmx", > + IX86_BUILTIN_CPU_SUPPORTS_MMX, 1); > + make_platform_builtin ("__builtin_cpu_supports_popcount", > + IX86_BUILTIN_CPU_SUPPORTS_POPCOUNT, 1); > + make_platform_builtin ("__builtin_cpu_supports_sse", > + IX86_BUILTIN_CPU_SUPPORTS_SSE, 1); > + make_platform_builtin ("__builtin_cpu_supports_sse2", > + IX86_BUILTIN_CPU_SUPPORTS_SSE2, 1); > + make_platform_builtin ("__builtin_cpu_supports_sse3", > + IX86_BUILTIN_CPU_SUPPORTS_SSE3, 1); > + make_platform_builtin ("__builtin_cpu_supports_ssse3", > + IX86_BUILTIN_CPU_SUPPORTS_SSSE3, 1); > + make_platform_builtin ("__builtin_cpu_supports_sse4_1", > + IX86_BUILTIN_CPU_SUPPORTS_SSE4_1, 1); > + make_platform_builtin ("__builtin_cpu_supports_sse4_2", > + IX86_BUILTIN_CPU_SUPPORTS_SSE4_2, 1); > + make_platform_builtin ("__builtin_cpu_is_amd", > + IX86_BUILTIN_CPU_IS_AMD, 1); > + make_platform_builtin ("__builtin_cpu_is_intel_atom", > + IX86_BUILTIN_CPU_IS_INTEL_ATOM, 1); > + make_platform_builtin ("__builtin_cpu_is_intel_core2", > + IX86_BUILTIN_CPU_IS_INTEL_CORE2, 1); > + make_platform_builtin ("__builtin_cpu_is_intel", > + IX86_BUILTIN_CPU_IS_INTEL, 1); > + make_platform_builtin ("__builtin_cpu_is_intel_corei7", > + IX86_BUILTIN_CPU_IS_INTEL_COREI7, 1); > + make_platform_builtin ("__builtin_cpu_is_intel_corei7_nehalem", > + IX86_BUILTIN_CPU_IS_INTEL_COREI7_NEHALEM, 1); > + make_platform_builtin ("__builtin_cpu_is_intel_corei7_westmere", > + IX86_BUILTIN_CPU_IS_INTEL_COREI7_WESTMERE, 1); > + make_platform_builtin ("__builtin_cpu_is_intel_corei7_sandybridge", > + IX86_BUILTIN_CPU_IS_INTEL_COREI7_SANDYBRIDGE, 1); > + make_platform_builtin ("__builtin_cpu_is_amdfam10", > + IX86_BUILTIN_CPU_IS_AMDFAM10H, 1); > + make_platform_builtin ("__builtin_cpu_is_amdfam10_barcelona", > + IX86_BUILTIN_CPU_IS_AMDFAM10H_BARCELONA, 1); > + make_platform_builtin ("__builtin_cpu_is_amdfam10_shanghai", > + IX86_BUILTIN_CPU_IS_AMDFAM10H_SHANGHAI, 1); > + make_platform_builtin ("__builtin_cpu_is_amdfam10_istanbul", > + IX86_BUILTIN_CPU_IS_AMDFAM10H_ISTANBUL, 1); > + make_platform_builtin ("__builtin_cpu_is_amdfam15_bdver1", > + IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER1, 1); > + make_platform_builtin ("__builtin_cpu_is_amdfam15_bdver2", > + IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER2, 1); > +} > + > /* Detect if this unaligned vectorizable load/stores should be > considered slow. This is true for core2 where the movdqu insn > is slow, ~5x slower than the movdqa. */ > @@ -27705,6 +28076,9 @@ ix86_init_builtins (void) > > ix86_init_builtin_types (); > > + /* Builtins to get CPU type and features. */ > + ix86_init_platform_type_builtins (); > + > /* TFmode support builtins. */ > def_builtin_const (0, "__builtin_infq", > FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ); > @@ -29321,6 +29695,48 @@ ix86_expand_builtin (tree exp, rtx target, rtx sub > enum machine_mode mode0, mode1, mode2, mode3, mode4; > unsigned int fcode = DECL_FUNCTION_CODE (fndecl); > > + /* For CPU builtins that can be folded, fold first and expand the fold. */ > + switch (fcode) > + { > + case IX86_BUILTIN_CPU_SUPPORTS_CMOV: > + case IX86_BUILTIN_CPU_SUPPORTS_MMX: > + case IX86_BUILTIN_CPU_SUPPORTS_POPCOUNT: > + case IX86_BUILTIN_CPU_SUPPORTS_SSE: > + case IX86_BUILTIN_CPU_SUPPORTS_SSE2: > + case IX86_BUILTIN_CPU_SUPPORTS_SSE3: > + case IX86_BUILTIN_CPU_SUPPORTS_SSSE3: > + case IX86_BUILTIN_CPU_SUPPORTS_SSE4_1: > + case IX86_BUILTIN_CPU_SUPPORTS_SSE4_2: > + case IX86_BUILTIN_CPU_IS_AMD: > + case IX86_BUILTIN_CPU_IS_INTEL: > + case IX86_BUILTIN_CPU_IS_INTEL_ATOM: > + case IX86_BUILTIN_CPU_IS_INTEL_CORE2: > + case IX86_BUILTIN_CPU_IS_INTEL_COREI7: > + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_NEHALEM: > + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_WESTMERE: > + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_SANDYBRIDGE: > + case IX86_BUILTIN_CPU_IS_AMDFAM10H: > + case IX86_BUILTIN_CPU_IS_AMDFAM10H_BARCELONA: > + case IX86_BUILTIN_CPU_IS_AMDFAM10H_SHANGHAI: > + case IX86_BUILTIN_CPU_IS_AMDFAM10H_ISTANBUL: > + case IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER1: > + case IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER2: > + { > + tree fold_expr = fold_builtin_cpu ((enum ix86_builtins) fcode); > + gcc_assert (fold_expr != NULL_TREE); > + return expand_expr (fold_expr, target, mode, EXPAND_NORMAL); > + } > + case IX86_BUILTIN_CPU_INIT: > + { > + /* Make it call __cpu_indicator_init in libgcc. */ > + tree call_expr, fndecl, type; > + type = build_function_type_list (integer_type_node, NULL_TREE); > + fndecl = build_fn_decl ("__cpu_indicator_init", type); > + call_expr = build_call_expr (fndecl, 0); > + return expand_expr (call_expr, target, mode, EXPAND_NORMAL); > + } > + } > + > /* Determine whether the builtin function is available under the current ISA. > Originally the builtin was not created if it wasn't applicable to the > current ISA based on the command line switches. With function specific > @@ -38867,6 +39283,9 @@ ix86_loop_unroll_adjust (unsigned nunroll, struct > #undef TARGET_BUILD_BUILTIN_VA_LIST > #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list > > +#undef TARGET_FOLD_BUILTIN > +#define TARGET_FOLD_BUILTIN ix86_fold_builtin > + > #undef TARGET_SLOW_UNALIGNED_VECTOR_MEMOP > #define TARGET_SLOW_UNALIGNED_VECTOR_MEMOP ix86_slow_unaligned_vector_memop > > > -- > This patch is available for review at http://codereview.appspot.com/5715051
Sign in to reply to this message.
Removing [google] prefix from the subject line. On Thu, Mar 1, 2012 at 12:54 PM, Xinliang David Li <davidxl@google.com> wrote: > Sri, probably need to remove the [google] prefix in the subject line > to prevent this from being filtered. > > David > > On Thu, Mar 1, 2012 at 12:45 PM, Sriraman Tallam <tmsriram@google.com> wrote: >> Patch to add builtins to detect CPU type: >> ======================================== >> >> I have ported the patch from google/gcc-4_6 to google/main. I also want this >> patch to be considered for trunk. Please see this discussion: >> http://gcc.gnu.org/ml/gcc-patches/2011-08/msg01355.html >> when this patch for reviewed the last time. >> >> One of the main concerns was about making CPU detection initialization a >> constructor. The main point raised was about constructor ordering. I have now >> added a priority value to the CPU detection constructor to make it very high >> priority so that it is guaranteed to fire before every constructor without >> an explicitly marked priority value of 101. However, IFUNC initializers >> will still fire before this constructor, so the cpu initialization routine >> has to be explicitly called in such initializers for which I have added a >> builtin: __builtin_cpu_init (). >> >> I would like to reopen discussions on this to make it suitable for trunk >> this time around. >> >> This patch adds the following new builtins: >> >> __builtin_cpu_init >> __builtin_cpu_supports_cmov >> __builtin_cpu_supports_mmx >> __builtin_cpu_supports_popcount >> __builtin_cpu_supports_sse >> __builtin_cpu_supports_sse2 >> __builtin_cpu_supports_sse3 >> __builtin_cpu_supports_ssse3 >> __builtin_cpu_supports_sse4_1 >> __builtin_cpu_supports_sse4_2 >> __builtin_cpu_is_amd >> __builtin_cpu_is_intel_atom >> __builtin_cpu_is_intel_core2 >> __builtin_cpu_is_intel >> __builtin_cpu_is_intel_corei7 >> __builtin_cpu_is_intel_corei7_nehalem >> __builtin_cpu_is_intel_corei7_westmere >> __builtin_cpu_is_intel_corei7_sandybridge >> __builtin_cpu_is_amdfam10 >> __builtin_cpu_is_amdfam10_barcelona >> __builtin_cpu_is_amdfam10_shanghai >> __builtin_cpu_is_amdfam10_istanbul >> __builtin_cpu_is_amdfam15_bdver1 >> __builtin_cpu_is_amdfam15_bdver2 >> >> >> * config/i386/i386.c (build_struct_with_one_bit_fields): New function. >> (make_var_decl): New function. >> (get_field_from_struct): New function. >> (fold_builtin_target): New function. >> (ix86_fold_builtin): New function. >> (ix86_expand_builtin): Expand new builtins by folding them. >> (make_platform_builtin): New functions. >> (ix86_init_platform_type_builtins): Make the new builtins. >> (ix86_init_builtins): Make new builtins to detect CPU type. >> (TARGET_FOLD_BUILTIN): New macro. >> (IX86_BUILTIN_CPU_SUPPORTS_CMOV): New enum value. >> (IX86_BUILTIN_CPU_SUPPORTS_MMX): New enum value. >> (IX86_BUILTIN_CPU_SUPPORTS_POPCOUNT): New enum value. >> (IX86_BUILTIN_CPU_SUPPORTS_SSE): New enum value. >> (IX86_BUILTIN_CPU_SUPPORTS_SSE2): New enum value. >> (IX86_BUILTIN_CPU_SUPPORTS_SSE3): New enum value. >> (IX86_BUILTIN_CPU_SUPPORTS_SSSE3): New enum value. >> (IX86_BUILTIN_CPU_SUPPORTS_SSE4_1): New enum value. >> (IX86_BUILTIN_CPU_SUPPORTS_SSE4_2): New enum value. >> (IX86_BUILTIN_CPU_INIT): New enum value. >> (IX86_BUILTIN_CPU_IS_AMD): New enum value. >> (IX86_BUILTIN_CPU_IS_INTEL): New enum value. >> (IX86_BUILTIN_CPU_IS_INTEL_ATOM): New enum value. >> (IX86_BUILTIN_CPU_IS_INTEL_CORE2): New enum value. >> (IX86_BUILTIN_CPU_IS_INTEL_COREI7_NEHALEM): New enum value. >> (IX86_BUILTIN_CPU_IS_INTEL_COREI7_WESTMERE): New enum value. >> (IX86_BUILTIN_CPU_IS_INTEL_COREI7_SANDYBRIDGE): New enum value. >> (IX86_BUILTIN_CPU_IS_AMDFAM10_BARCELONA): New enum value. >> (IX86_BUILTIN_CPU_IS_AMDFAM10_SHANGHAI): New enum value. >> (IX86_BUILTIN_CPU_IS_AMDFAM10_ISTANBUL): New enum value. >> (IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER1): New enum value. >> (IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER2): New enum value. >> * config/i386/i386-builtin-types.def: New function type. >> * testsuite/gcc.target/builtin_target.c: New testcase. >> >> * libgcc/config/i386/i386-cpuinfo.c: New file. >> * libgcc/config/i386/t-cpuinfo: New file. >> * libgcc/config.host: Include t-cpuinfo. >> * libgcc/config/i386/libgcc-glibc.ver: Version symbols __cpu_model >> and __cpu_features. >> >> >> >> Index: libgcc/config.host >> =================================================================== >> --- libgcc/config.host (revision 184644) >> +++ libgcc/config.host (working copy) >> @@ -1128,7 +1128,7 @@ i[34567]86-*-linux* | x86_64-*-linux* | \ >> i[34567]86-*-kfreebsd*-gnu | x86_64-*-kfreebsd*-gnu | \ >> i[34567]86-*-knetbsd*-gnu | \ >> i[34567]86-*-gnu*) >> - tmake_file="${tmake_file} t-tls i386/t-linux" >> + tmake_file="${tmake_file} t-tls i386/t-linux i386/t-cpuinfo" >> if test "$libgcc_cv_cfi" = "yes"; then >> tmake_file="${tmake_file} t-stack i386/t-stack-i386" >> fi >> Index: libgcc/config/i386/t-cpuinfo >> =================================================================== >> --- libgcc/config/i386/t-cpuinfo (revision 0) >> +++ libgcc/config/i386/t-cpuinfo (revision 0) >> @@ -0,0 +1 @@ >> +LIB2ADD += $(srcdir)/config/i386/i386-cpuinfo.c >> Index: libgcc/config/i386/i386-cpuinfo.c >> =================================================================== >> --- libgcc/config/i386/i386-cpuinfo.c (revision 0) >> +++ libgcc/config/i386/i386-cpuinfo.c (revision 0) >> @@ -0,0 +1,306 @@ >> +/* Get CPU type and Features for x86 processors. >> + Copyright (C) 2011 Free Software Foundation, Inc. >> + Contributed by Sriraman Tallam (tmsriram@google.com) >> + >> +This file is part of GCC. >> + >> +GCC is free software; you can redistribute it and/or modify it under >> +the terms of the GNU General Public License as published by the Free >> +Software Foundation; either version 3, or (at your option) any later >> +version. >> + >> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY >> +WARRANTY; without even the implied warranty of MERCHANTABILITY or >> +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License >> +for more details. >> + >> +You should have received a copy of the GNU General Public License >> +along with GCC; see the file COPYING3. If not see >> +<http://www.gnu.org/licenses/>. */ >> + >> +#include "cpuid.h" >> +#include "tsystem.h" >> + >> +int __cpu_indicator_init (void) __attribute__ ((constructor (101))); >> + >> +enum vendor_signatures >> +{ >> + SIG_INTEL = 0x756e6547 /* Genu */, >> + SIG_AMD = 0x68747541 /* Auth */ >> +}; >> + >> +/* ISA Features supported. */ >> + >> +struct __processor_features >> +{ >> + unsigned int __cpu_cmov : 1; >> + unsigned int __cpu_mmx : 1; >> + unsigned int __cpu_popcnt : 1; >> + unsigned int __cpu_sse : 1; >> + unsigned int __cpu_sse2 : 1; >> + unsigned int __cpu_sse3 : 1; >> + unsigned int __cpu_ssse3 : 1; >> + unsigned int __cpu_sse4_1 : 1; >> + unsigned int __cpu_sse4_2 : 1; >> +} __cpu_features; >> + >> +/* Processor Model. */ >> + >> +struct __processor_model >> +{ >> + /* Vendor. */ >> + unsigned int __cpu_is_amd : 1; >> + unsigned int __cpu_is_intel : 1; >> + /* CPU type. */ >> + unsigned int __cpu_is_intel_atom : 1; >> + unsigned int __cpu_is_intel_core2 : 1; >> + unsigned int __cpu_is_intel_corei7 : 1; >> + unsigned int __cpu_is_intel_corei7_nehalem : 1; >> + unsigned int __cpu_is_intel_corei7_westmere : 1; >> + unsigned int __cpu_is_intel_corei7_sandybridge : 1; >> + unsigned int __cpu_is_amdfam10h : 1; >> + unsigned int __cpu_is_amdfam10h_barcelona : 1; >> + unsigned int __cpu_is_amdfam10h_shanghai : 1; >> + unsigned int __cpu_is_amdfam10h_istanbul : 1; >> + unsigned int __cpu_is_amdfam15h_bdver1 : 1; >> + unsigned int __cpu_is_amdfam15h_bdver2 : 1; >> +} __cpu_model; >> + >> +/* Get the specific type of AMD CPU. */ >> + >> +static void >> +get_amd_cpu (unsigned int family, unsigned int model) >> +{ >> + switch (family) >> + { >> + /* AMD Family 10h. */ >> + case 0x10: >> + switch (model) >> + { >> + case 0x2: >> + /* Barcelona. */ >> + __cpu_model.__cpu_is_amdfam10h = 1; >> + __cpu_model.__cpu_is_amdfam10h_barcelona = 1; >> + break; >> + case 0x4: >> + /* Shanghai. */ >> + __cpu_model.__cpu_is_amdfam10h = 1; >> + __cpu_model.__cpu_is_amdfam10h_shanghai = 1; >> + break; >> + case 0x8: >> + /* Istanbul. */ >> + __cpu_model.__cpu_is_amdfam10h = 1; >> + __cpu_model.__cpu_is_amdfam10h_istanbul = 1; >> + break; >> + default: >> + break; >> + } >> + break; >> + /* AMD Family 15h. */ >> + case 0x15: >> + /* Bulldozer version 1. */ >> + if (model >= 0 && model <= 0xf) >> + __cpu_model.__cpu_is_amdfam15h_bdver1 = 1; >> + /* Bulldozer version 2. */ >> + if (model >= 0x10 && model <= 0x1f) >> + __cpu_model.__cpu_is_amdfam15h_bdver2 = 1; >> + break; >> + default: >> + break; >> + } >> +} >> + >> +/* Get the specific type of Intel CPU. */ >> + >> +static void >> +get_intel_cpu (unsigned int family, unsigned int model, unsigned int brand_id) >> +{ >> + /* Parse family and model only if brand ID is 0. */ >> + if (brand_id == 0) >> + { >> + switch (family) >> + { >> + case 0x5: >> + /* Pentium. */ >> + break; >> + case 0x6: >> + switch (model) >> + { >> + case 0x1c: >> + case 0x26: >> + /* Atom. */ >> + __cpu_model.__cpu_is_intel_atom = 1; >> + break; >> + case 0x1a: >> + case 0x1e: >> + case 0x1f: >> + case 0x2e: >> + /* Nehalem. */ >> + __cpu_model.__cpu_is_intel_corei7 = 1; >> + __cpu_model.__cpu_is_intel_corei7_nehalem = 1; >> + break; >> + case 0x25: >> + case 0x2c: >> + case 0x2f: >> + /* Westmere. */ >> + __cpu_model.__cpu_is_intel_corei7 = 1; >> + __cpu_model.__cpu_is_intel_corei7_westmere = 1; >> + break; >> + case 0x2a: >> + /* Sandy Bridge. */ >> + __cpu_model.__cpu_is_intel_corei7 = 1; >> + __cpu_model.__cpu_is_intel_corei7_sandybridge = 1; >> + break; >> + case 0x17: >> + case 0x1d: >> + /* Penryn. */ >> + case 0x0f: >> + /* Merom. */ >> + __cpu_model.__cpu_is_intel_core2 = 1; >> + break; >> + default: >> + break; >> + } >> + break; >> + default: >> + /* We have no idea. */ >> + break; >> + } >> + } >> +} >> + >> +static void >> +get_available_features (unsigned int ecx, unsigned int edx) >> +{ >> + __cpu_features.__cpu_cmov = (edx & bit_CMOV) ? 1 : 0; >> + __cpu_features.__cpu_mmx = (edx & bit_MMX) ? 1 : 0; >> + __cpu_features.__cpu_sse = (edx & bit_SSE) ? 1 : 0; >> + __cpu_features.__cpu_sse2 = (edx & bit_SSE2) ? 1 : 0; >> + __cpu_features.__cpu_popcnt = (ecx & bit_POPCNT) ? 1 : 0; >> + __cpu_features.__cpu_sse3 = (ecx & bit_SSE3) ? 1 : 0; >> + __cpu_features.__cpu_ssse3 = (ecx & bit_SSSE3) ? 1 : 0; >> + __cpu_features.__cpu_sse4_1 = (ecx & bit_SSE4_1) ? 1 : 0; >> + __cpu_features.__cpu_sse4_2 = (ecx & bit_SSE4_2) ? 1 : 0; >> +} >> + >> + >> +/* Sanity check for the vendor and cpu type flags. */ >> + >> +static int >> +sanity_check (void) >> +{ >> + unsigned int one_type = 0; >> + >> + /* Vendor cannot be Intel and AMD. */ >> + gcc_assert((__cpu_model.__cpu_is_intel == 0) >> + || (__cpu_model.__cpu_is_amd == 0)); >> + >> + /* Only one CPU type can be set. */ >> + one_type = (__cpu_model.__cpu_is_intel_atom >> + + __cpu_model.__cpu_is_intel_core2 >> + + __cpu_model.__cpu_is_intel_corei7_nehalem >> + + __cpu_model.__cpu_is_intel_corei7_westmere >> + + __cpu_model.__cpu_is_intel_corei7_sandybridge >> + + __cpu_model.__cpu_is_amdfam10h_barcelona >> + + __cpu_model.__cpu_is_amdfam10h_shanghai >> + + __cpu_model.__cpu_is_amdfam10h_istanbul >> + + __cpu_model.__cpu_is_amdfam15h_bdver1 >> + + __cpu_model.__cpu_is_amdfam15h_bdver2); >> + >> + gcc_assert (one_type <= 1); >> + return 0; >> +} >> + >> +/* A noinline function calling __get_cpuid. Having many calls to >> + cpuid in one function in 32-bit mode causes GCC to complain: >> + "can’t find a register in class ‘CLOBBERED_REGS’". This is >> + related to PR rtl-optimization 44174. */ >> + >> +static int __attribute__ ((noinline)) >> +__get_cpuid_output (unsigned int __level, >> + unsigned int *__eax, unsigned int *__ebx, >> + unsigned int *__ecx, unsigned int *__edx) >> +{ >> + return __get_cpuid (__level, __eax, __ebx, __ecx, __edx); >> +} >> + >> + >> +/* A constructor function that sets __cpu_model and __cpu_features with >> + the right values. This needs to run only once. This constructor is >> + given the highest priority and it will run before constructors without >> + the priority set. However, it still runs after ifunc initializers and >> + needs to be called explicitly there. */ >> + >> +int __attribute__ ((constructor (101))) >> +__cpu_indicator_init (void) >> +{ >> + unsigned int eax, ebx, ecx, edx; >> + >> + int max_level = 5; >> + unsigned int vendor; >> + unsigned int model, family, brand_id; >> + unsigned int extended_model, extended_family; >> + static int called = 0; >> + >> + /* This function needs to run just once. */ >> + if (called) >> + return 0; >> + else >> + called = 1; >> + >> + /* Assume cpuid insn present. Run in level 0 to get vendor id. */ >> + if (!__get_cpuid_output (0, &eax, &ebx, &ecx, &edx)) >> + return -1; >> + >> + vendor = ebx; >> + max_level = eax; >> + >> + if (max_level < 1) >> + return -1; >> + >> + if (!__get_cpuid_output (1, &eax, &ebx, &ecx, &edx)) >> + return -1; >> + >> + model = (eax >> 4) & 0x0f; >> + family = (eax >> 8) & 0x0f; >> + brand_id = ebx & 0xff; >> + extended_model = (eax >> 12) & 0xf0; >> + extended_family = (eax >> 20) & 0xff; >> + >> + if (vendor == SIG_INTEL) >> + { >> + /* Adjust model and family for Intel CPUS. */ >> + if (family == 0x0f) >> + { >> + family += extended_family; >> + model += extended_model; >> + } >> + else if (family == 0x06) >> + model += extended_model; >> + >> + /* Get CPU type. */ >> + __cpu_model.__cpu_is_intel = 1; >> + get_intel_cpu (family, model, brand_id); >> + } >> + >> + if (vendor == SIG_AMD) >> + { >> + /* Adjust model and family for AMD CPUS. */ >> + if (family == 0x0f) >> + { >> + family += extended_family; >> + model += (extended_model << 4); >> + } >> + >> + /* Get CPU type. */ >> + __cpu_model.__cpu_is_amd = 1; >> + get_amd_cpu (family, model); >> + } >> + >> + /* Find available features. */ >> + get_available_features (ecx, edx); >> + >> + sanity_check (); >> + >> + return 0; >> +} >> Index: libgcc/config/i386/libgcc-glibc.ver >> =================================================================== >> --- libgcc/config/i386/libgcc-glibc.ver (revision 184644) >> +++ libgcc/config/i386/libgcc-glibc.ver (working copy) >> @@ -147,6 +147,11 @@ GCC_4.3.0 { >> __trunctfxf2 >> __unordtf2 >> } >> + >> +GCC_4.7.0 { >> + __cpu_model >> + __cpu_features >> +} >> %else >> GCC_4.4.0 { >> __addtf3 >> @@ -183,4 +188,8 @@ GCC_4.4.0 { >> GCC_4.5.0 { >> __extendxftf2 >> } >> +GCC_4.7.0 { >> + __cpu_model >> + __cpu_features >> +} >> %endif >> Index: gcc/testsuite/gcc.target/i386/builtin_target.c >> =================================================================== >> --- gcc/testsuite/gcc.target/i386/builtin_target.c (revision 0) >> +++ gcc/testsuite/gcc.target/i386/builtin_target.c (revision 0) >> @@ -0,0 +1,61 @@ >> +/* This test checks if the __builtin_cpu_* calls are recognized. */ >> + >> +/* { dg-do run } */ >> + >> +int >> +fn1 () >> +{ >> + if (__builtin_cpu_supports_cmov () < 0) >> + return -1; >> + if (__builtin_cpu_supports_mmx () < 0) >> + return -1; >> + if (__builtin_cpu_supports_popcount () < 0) >> + return -1; >> + if (__builtin_cpu_supports_sse () < 0) >> + return -1; >> + if (__builtin_cpu_supports_sse2 () < 0) >> + return -1; >> + if (__builtin_cpu_supports_sse3 () < 0) >> + return -1; >> + if (__builtin_cpu_supports_ssse3 () < 0) >> + return -1; >> + if (__builtin_cpu_supports_sse4_1 () < 0) >> + return -1; >> + if (__builtin_cpu_supports_sse4_2 () < 0) >> + return -1; >> + if (__builtin_cpu_is_amd () < 0) >> + return -1; >> + if (__builtin_cpu_is_intel () < 0) >> + return -1; >> + if (__builtin_cpu_is_intel_atom () < 0) >> + return -1; >> + if (__builtin_cpu_is_intel_core2 () < 0) >> + return -1; >> + if (__builtin_cpu_is_intel_corei7 () < 0) >> + return -1; >> + if (__builtin_cpu_is_intel_corei7_nehalem () < 0) >> + return -1; >> + if (__builtin_cpu_is_intel_corei7_westmere () < 0) >> + return -1; >> + if (__builtin_cpu_is_intel_corei7_sandybridge () < 0) >> + return -1; >> + if (__builtin_cpu_is_amdfam10 () < 0) >> + return -1; >> + if (__builtin_cpu_is_amdfam10_barcelona () < 0) >> + return -1; >> + if (__builtin_cpu_is_amdfam10_shanghai () < 0) >> + return -1; >> + if (__builtin_cpu_is_amdfam10_istanbul () < 0) >> + return -1; >> + if (__builtin_cpu_is_amdfam15_bdver1 () < 0) >> + return -1; >> + if (__builtin_cpu_is_amdfam15_bdver2 () < 0) >> + return -1; >> + >> + return 0; >> +} >> + >> +int main () >> +{ >> + return fn1 (); >> +} >> Index: gcc/config/i386/i386-builtin-types.def >> =================================================================== >> --- gcc/config/i386/i386-builtin-types.def (revision 184644) >> +++ gcc/config/i386/i386-builtin-types.def (working copy) >> @@ -143,6 +143,7 @@ DEF_FUNCTION_TYPE (UINT64) >> DEF_FUNCTION_TYPE (UNSIGNED) >> DEF_FUNCTION_TYPE (VOID) >> DEF_FUNCTION_TYPE (PVOID) >> +DEF_FUNCTION_TYPE (INT) >> >> DEF_FUNCTION_TYPE (FLOAT, FLOAT) >> DEF_FUNCTION_TYPE (FLOAT128, FLOAT128) >> Index: gcc/config/i386/i386.c >> =================================================================== >> --- gcc/config/i386/i386.c (revision 184644) >> +++ gcc/config/i386/i386.c (working copy) >> @@ -25798,6 +25798,33 @@ enum ix86_builtins >> /* CFString built-in for darwin */ >> IX86_BUILTIN_CFSTRING, >> >> + /* Builtins to get CPU features. */ >> + IX86_BUILTIN_CPU_SUPPORTS_CMOV, >> + IX86_BUILTIN_CPU_SUPPORTS_MMX, >> + IX86_BUILTIN_CPU_SUPPORTS_POPCOUNT, >> + IX86_BUILTIN_CPU_SUPPORTS_SSE, >> + IX86_BUILTIN_CPU_SUPPORTS_SSE2, >> + IX86_BUILTIN_CPU_SUPPORTS_SSE3, >> + IX86_BUILTIN_CPU_SUPPORTS_SSSE3, >> + IX86_BUILTIN_CPU_SUPPORTS_SSE4_1, >> + IX86_BUILTIN_CPU_SUPPORTS_SSE4_2, >> + /* Builtins to get CPU type. */ >> + IX86_BUILTIN_CPU_INIT, >> + IX86_BUILTIN_CPU_IS_AMD, >> + IX86_BUILTIN_CPU_IS_INTEL, >> + IX86_BUILTIN_CPU_IS_INTEL_ATOM, >> + IX86_BUILTIN_CPU_IS_INTEL_CORE2, >> + IX86_BUILTIN_CPU_IS_INTEL_COREI7, >> + IX86_BUILTIN_CPU_IS_INTEL_COREI7_NEHALEM, >> + IX86_BUILTIN_CPU_IS_INTEL_COREI7_WESTMERE, >> + IX86_BUILTIN_CPU_IS_INTEL_COREI7_SANDYBRIDGE, >> + IX86_BUILTIN_CPU_IS_AMDFAM10H, >> + IX86_BUILTIN_CPU_IS_AMDFAM10H_BARCELONA, >> + IX86_BUILTIN_CPU_IS_AMDFAM10H_SHANGHAI, >> + IX86_BUILTIN_CPU_IS_AMDFAM10H_ISTANBUL, >> + IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER1, >> + IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER2, >> + >> IX86_BUILTIN_MAX >> }; >> >> @@ -27607,6 +27634,350 @@ ix86_init_mmx_sse_builtins (void) >> } >> } >> >> +/* Returns a struct type with name NAME and number of fields equal to >> + NUM_FIELDS. Each field is a unsigned int bit field of length 1 bit. */ >> + >> +static tree >> +build_struct_with_one_bit_fields (int num_fields, const char *name) >> +{ >> + int i; >> + char field_name [10]; >> + tree field = NULL_TREE, field_chain = NULL_TREE; >> + tree type = make_node (RECORD_TYPE); >> + >> + strcpy (field_name, "k_field"); >> + >> + for (i = 0; i < num_fields; i++) >> + { >> + /* Name the fields, 0_field, 1_field, ... */ >> + field_name [0] = '0' + i; >> + field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, >> + get_identifier (field_name), unsigned_type_node); >> + DECL_BIT_FIELD (field) = 1; >> + DECL_SIZE (field) = bitsize_one_node; >> + if (field_chain != NULL_TREE) >> + DECL_CHAIN (field) = field_chain; >> + field_chain = field; >> + } >> + finish_builtin_struct (type, name, field_chain, NULL_TREE); >> + return type; >> +} >> + >> +/* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */ >> + >> +static tree >> +make_var_decl (tree type, const char *name) >> +{ >> + tree new_decl; >> + struct varpool_node *vnode; >> + >> + new_decl = build_decl (UNKNOWN_LOCATION, >> + VAR_DECL, >> + get_identifier(name), >> + type); >> + >> + DECL_EXTERNAL (new_decl) = 1; >> + TREE_STATIC (new_decl) = 1; >> + TREE_PUBLIC (new_decl) = 1; >> + DECL_INITIAL (new_decl) = 0; >> + DECL_ARTIFICIAL (new_decl) = 0; >> + DECL_PRESERVE_P (new_decl) = 1; >> + >> + make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl)); >> + assemble_variable (new_decl, 0, 0, 0); >> + >> + vnode = varpool_node (new_decl); >> + gcc_assert (vnode != NULL); >> + /* Set finalized to 1, otherwise it asserts in function "write_symbol" in >> + lto-streamer-out.c. */ >> + vnode->finalized = 1; >> + >> + return new_decl; >> +} >> + >> +/* Traverses the chain of fields in STRUCT_TYPE and returns the FIELD_NUM >> + numbered field. */ >> + >> +static tree >> +get_field_from_struct (tree struct_type, int field_num) >> +{ >> + int i; >> + tree field = TYPE_FIELDS (struct_type); >> + >> + for (i = 0; i < field_num; i++, field = DECL_CHAIN(field)) >> + { >> + gcc_assert (field != NULL_TREE); >> + } >> + >> + return field; >> +} >> + >> +/* FNDECL is a __builtin_cpu_* call that is folded into an integer defined >> + in libgcc/config/i386/i386-cpuinfo.c */ >> + >> +static tree >> +fold_builtin_cpu (enum ix86_builtins fn_code) >> +{ >> + /* This is the order of bit-fields in __processor_features in >> + i386-cpuinfo.c */ >> + enum processor_features >> + { >> + F_CMOV = 0, >> + F_MMX, >> + F_POPCNT, >> + F_SSE, >> + F_SSE2, >> + F_SSE3, >> + F_SSSE3, >> + F_SSE4_1, >> + F_SSE4_2, >> + F_MAX >> + }; >> + >> + /* This is the order of bit-fields in __processor_model in >> + i386-cpuinfo.c */ >> + enum processor_model >> + { >> + M_AMD = 0, >> + M_INTEL, >> + M_INTEL_ATOM, >> + M_INTEL_CORE2, >> + M_INTEL_COREI7, >> + M_INTEL_COREI7_NEHALEM, >> + M_INTEL_COREI7_WESTMERE, >> + M_INTEL_COREI7_SANDYBRIDGE, >> + M_AMDFAM10H, >> + M_AMDFAM10H_BARCELONA, >> + M_AMDFAM10H_SHANGHAI, >> + M_AMDFAM10H_ISTANBUL, >> + M_AMDFAM15H_BDVER1, >> + M_AMDFAM15H_BDVER2, >> + M_MAX >> + }; >> + >> + static tree __processor_features_type = NULL_TREE; >> + static tree __cpu_features_var = NULL_TREE; >> + static tree __processor_model_type = NULL_TREE; >> + static tree __cpu_model_var = NULL_TREE; >> + static tree field; >> + static tree which_struct; >> + >> + if (__processor_features_type == NULL_TREE) >> + __processor_features_type = build_struct_with_one_bit_fields (F_MAX, >> + "__processor_features"); >> + >> + if (__processor_model_type == NULL_TREE) >> + __processor_model_type = build_struct_with_one_bit_fields (M_MAX, >> + "__processor_model"); >> + >> + if (__cpu_features_var == NULL_TREE) >> + __cpu_features_var = make_var_decl (__processor_features_type, >> + "__cpu_features"); >> + >> + if (__cpu_model_var == NULL_TREE) >> + __cpu_model_var = make_var_decl (__processor_model_type, >> + "__cpu_model"); >> + >> + /* Look at the code to identify the field requested. */ >> + switch (fn_code) >> + { >> + case IX86_BUILTIN_CPU_SUPPORTS_CMOV: >> + field = get_field_from_struct (__processor_features_type, F_CMOV); >> + which_struct = __cpu_features_var; >> + break; >> + case IX86_BUILTIN_CPU_SUPPORTS_MMX: >> + field = get_field_from_struct (__processor_features_type, F_MMX); >> + which_struct = __cpu_features_var; >> + break; >> + case IX86_BUILTIN_CPU_SUPPORTS_POPCOUNT: >> + field = get_field_from_struct (__processor_features_type, F_POPCNT); >> + which_struct = __cpu_features_var; >> + break; >> + case IX86_BUILTIN_CPU_SUPPORTS_SSE: >> + field = get_field_from_struct (__processor_features_type, F_SSE); >> + which_struct = __cpu_features_var; >> + break; >> + case IX86_BUILTIN_CPU_SUPPORTS_SSE2: >> + field = get_field_from_struct (__processor_features_type, F_SSE2); >> + which_struct = __cpu_features_var; >> + break; >> + case IX86_BUILTIN_CPU_SUPPORTS_SSE3: >> + field = get_field_from_struct (__processor_features_type, F_SSE3); >> + which_struct = __cpu_features_var; >> + break; >> + case IX86_BUILTIN_CPU_SUPPORTS_SSSE3: >> + field = get_field_from_struct (__processor_features_type, F_SSSE3); >> + which_struct = __cpu_features_var; >> + break; >> + case IX86_BUILTIN_CPU_SUPPORTS_SSE4_1: >> + field = get_field_from_struct (__processor_features_type, F_SSE4_1); >> + which_struct = __cpu_features_var; >> + break; >> + case IX86_BUILTIN_CPU_SUPPORTS_SSE4_2: >> + field = get_field_from_struct (__processor_features_type, F_SSE4_2); >> + which_struct = __cpu_features_var; >> + break; >> + case IX86_BUILTIN_CPU_IS_AMD: >> + field = get_field_from_struct (__processor_model_type, M_AMD); >> + which_struct = __cpu_model_var; >> + break; >> + case IX86_BUILTIN_CPU_IS_INTEL: >> + field = get_field_from_struct (__processor_model_type, M_INTEL); >> + which_struct = __cpu_model_var; >> + break; >> + case IX86_BUILTIN_CPU_IS_INTEL_ATOM: >> + field = get_field_from_struct (__processor_model_type, M_INTEL_ATOM); >> + which_struct = __cpu_model_var; >> + break; >> + case IX86_BUILTIN_CPU_IS_INTEL_CORE2: >> + field = get_field_from_struct (__processor_model_type, M_INTEL_CORE2); >> + which_struct = __cpu_model_var; >> + break; >> + case IX86_BUILTIN_CPU_IS_INTEL_COREI7: >> + field = get_field_from_struct (__processor_model_type, >> + M_INTEL_COREI7); >> + which_struct = __cpu_model_var; >> + break; >> + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_NEHALEM: >> + field = get_field_from_struct (__processor_model_type, >> + M_INTEL_COREI7_NEHALEM); >> + which_struct = __cpu_model_var; >> + break; >> + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_WESTMERE: >> + field = get_field_from_struct (__processor_model_type, >> + M_INTEL_COREI7_WESTMERE); >> + which_struct = __cpu_model_var; >> + break; >> + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_SANDYBRIDGE: >> + field = get_field_from_struct (__processor_model_type, >> + M_INTEL_COREI7_SANDYBRIDGE); >> + which_struct = __cpu_model_var; >> + break; >> + case IX86_BUILTIN_CPU_IS_AMDFAM10H: >> + field = get_field_from_struct (__processor_model_type, >> + M_AMDFAM10H); >> + which_struct = __cpu_model_var; >> + break; >> + case IX86_BUILTIN_CPU_IS_AMDFAM10H_BARCELONA: >> + field = get_field_from_struct (__processor_model_type, >> + M_AMDFAM10H_BARCELONA); >> + which_struct = __cpu_model_var; >> + break; >> + case IX86_BUILTIN_CPU_IS_AMDFAM10H_SHANGHAI: >> + field = get_field_from_struct (__processor_model_type, >> + M_AMDFAM10H_SHANGHAI); >> + which_struct = __cpu_model_var; >> + break; >> + case IX86_BUILTIN_CPU_IS_AMDFAM10H_ISTANBUL: >> + field = get_field_from_struct (__processor_model_type, >> + M_AMDFAM10H_ISTANBUL); >> + which_struct = __cpu_model_var; >> + break; >> + case IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER1: >> + field = get_field_from_struct (__processor_model_type, >> + M_AMDFAM15H_BDVER1); >> + which_struct = __cpu_model_var; >> + break; >> + case IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER2: >> + field = get_field_from_struct (__processor_model_type, >> + M_AMDFAM15H_BDVER2); >> + which_struct = __cpu_model_var; >> + break; >> + default: >> + return NULL_TREE; >> + } >> + >> + return build3 (COMPONENT_REF, TREE_TYPE (field), which_struct, field, NULL_TREE); >> +} >> + >> +static tree >> +ix86_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, >> + tree *args ATTRIBUTE_UNUSED, bool ignore ATTRIBUTE_UNUSED) >> +{ >> + const char* decl_name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); >> + if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD >> + && strstr(decl_name, "__builtin_cpu") != NULL) >> + { >> + enum ix86_builtins code = (enum ix86_builtins) >> + DECL_FUNCTION_CODE (fndecl); >> + return fold_builtin_cpu (code); >> + } >> + return NULL_TREE; >> +} >> + >> +/* A builtin to init/return the cpu type or feature. Returns an >> + integer and the type is a const if IS_CONST is set. */ >> + >> +static void >> +make_platform_builtin (const char* name, int code, int is_const) >> +{ >> + tree decl; >> + tree type; >> + >> + type = ix86_get_builtin_func_type (INT_FTYPE_VOID); >> + decl = add_builtin_function (name, type, code, BUILT_IN_MD, >> + NULL, NULL_TREE); >> + gcc_assert (decl != NULL_TREE); >> + ix86_builtins[(int) code] = decl; >> + if (is_const) >> + TREE_READONLY (decl) = 1; >> +} >> + >> +/* Builtins to get CPU type and features supported. */ >> + >> +static void >> +ix86_init_platform_type_builtins (void) >> +{ >> + make_platform_builtin ("__builtin_cpu_init", >> + IX86_BUILTIN_CPU_INIT, 0); >> + make_platform_builtin ("__builtin_cpu_supports_cmov", >> + IX86_BUILTIN_CPU_SUPPORTS_CMOV, 1); >> + make_platform_builtin ("__builtin_cpu_supports_mmx", >> + IX86_BUILTIN_CPU_SUPPORTS_MMX, 1); >> + make_platform_builtin ("__builtin_cpu_supports_popcount", >> + IX86_BUILTIN_CPU_SUPPORTS_POPCOUNT, 1); >> + make_platform_builtin ("__builtin_cpu_supports_sse", >> + IX86_BUILTIN_CPU_SUPPORTS_SSE, 1); >> + make_platform_builtin ("__builtin_cpu_supports_sse2", >> + IX86_BUILTIN_CPU_SUPPORTS_SSE2, 1); >> + make_platform_builtin ("__builtin_cpu_supports_sse3", >> + IX86_BUILTIN_CPU_SUPPORTS_SSE3, 1); >> + make_platform_builtin ("__builtin_cpu_supports_ssse3", >> + IX86_BUILTIN_CPU_SUPPORTS_SSSE3, 1); >> + make_platform_builtin ("__builtin_cpu_supports_sse4_1", >> + IX86_BUILTIN_CPU_SUPPORTS_SSE4_1, 1); >> + make_platform_builtin ("__builtin_cpu_supports_sse4_2", >> + IX86_BUILTIN_CPU_SUPPORTS_SSE4_2, 1); >> + make_platform_builtin ("__builtin_cpu_is_amd", >> + IX86_BUILTIN_CPU_IS_AMD, 1); >> + make_platform_builtin ("__builtin_cpu_is_intel_atom", >> + IX86_BUILTIN_CPU_IS_INTEL_ATOM, 1); >> + make_platform_builtin ("__builtin_cpu_is_intel_core2", >> + IX86_BUILTIN_CPU_IS_INTEL_CORE2, 1); >> + make_platform_builtin ("__builtin_cpu_is_intel", >> + IX86_BUILTIN_CPU_IS_INTEL, 1); >> + make_platform_builtin ("__builtin_cpu_is_intel_corei7", >> + IX86_BUILTIN_CPU_IS_INTEL_COREI7, 1); >> + make_platform_builtin ("__builtin_cpu_is_intel_corei7_nehalem", >> + IX86_BUILTIN_CPU_IS_INTEL_COREI7_NEHALEM, 1); >> + make_platform_builtin ("__builtin_cpu_is_intel_corei7_westmere", >> + IX86_BUILTIN_CPU_IS_INTEL_COREI7_WESTMERE, 1); >> + make_platform_builtin ("__builtin_cpu_is_intel_corei7_sandybridge", >> + IX86_BUILTIN_CPU_IS_INTEL_COREI7_SANDYBRIDGE, 1); >> + make_platform_builtin ("__builtin_cpu_is_amdfam10", >> + IX86_BUILTIN_CPU_IS_AMDFAM10H, 1); >> + make_platform_builtin ("__builtin_cpu_is_amdfam10_barcelona", >> + IX86_BUILTIN_CPU_IS_AMDFAM10H_BARCELONA, 1); >> + make_platform_builtin ("__builtin_cpu_is_amdfam10_shanghai", >> + IX86_BUILTIN_CPU_IS_AMDFAM10H_SHANGHAI, 1); >> + make_platform_builtin ("__builtin_cpu_is_amdfam10_istanbul", >> + IX86_BUILTIN_CPU_IS_AMDFAM10H_ISTANBUL, 1); >> + make_platform_builtin ("__builtin_cpu_is_amdfam15_bdver1", >> + IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER1, 1); >> + make_platform_builtin ("__builtin_cpu_is_amdfam15_bdver2", >> + IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER2, 1); >> +} >> + >> /* Detect if this unaligned vectorizable load/stores should be >> considered slow. This is true for core2 where the movdqu insn >> is slow, ~5x slower than the movdqa. */ >> @@ -27705,6 +28076,9 @@ ix86_init_builtins (void) >> >> ix86_init_builtin_types (); >> >> + /* Builtins to get CPU type and features. */ >> + ix86_init_platform_type_builtins (); >> + >> /* TFmode support builtins. */ >> def_builtin_const (0, "__builtin_infq", >> FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ); >> @@ -29321,6 +29695,48 @@ ix86_expand_builtin (tree exp, rtx target, rtx sub >> enum machine_mode mode0, mode1, mode2, mode3, mode4; >> unsigned int fcode = DECL_FUNCTION_CODE (fndecl); >> >> + /* For CPU builtins that can be folded, fold first and expand the fold. */ >> + switch (fcode) >> + { >> + case IX86_BUILTIN_CPU_SUPPORTS_CMOV: >> + case IX86_BUILTIN_CPU_SUPPORTS_MMX: >> + case IX86_BUILTIN_CPU_SUPPORTS_POPCOUNT: >> + case IX86_BUILTIN_CPU_SUPPORTS_SSE: >> + case IX86_BUILTIN_CPU_SUPPORTS_SSE2: >> + case IX86_BUILTIN_CPU_SUPPORTS_SSE3: >> + case IX86_BUILTIN_CPU_SUPPORTS_SSSE3: >> + case IX86_BUILTIN_CPU_SUPPORTS_SSE4_1: >> + case IX86_BUILTIN_CPU_SUPPORTS_SSE4_2: >> + case IX86_BUILTIN_CPU_IS_AMD: >> + case IX86_BUILTIN_CPU_IS_INTEL: >> + case IX86_BUILTIN_CPU_IS_INTEL_ATOM: >> + case IX86_BUILTIN_CPU_IS_INTEL_CORE2: >> + case IX86_BUILTIN_CPU_IS_INTEL_COREI7: >> + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_NEHALEM: >> + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_WESTMERE: >> + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_SANDYBRIDGE: >> + case IX86_BUILTIN_CPU_IS_AMDFAM10H: >> + case IX86_BUILTIN_CPU_IS_AMDFAM10H_BARCELONA: >> + case IX86_BUILTIN_CPU_IS_AMDFAM10H_SHANGHAI: >> + case IX86_BUILTIN_CPU_IS_AMDFAM10H_ISTANBUL: >> + case IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER1: >> + case IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER2: >> + { >> + tree fold_expr = fold_builtin_cpu ((enum ix86_builtins) fcode); >> + gcc_assert (fold_expr != NULL_TREE); >> + return expand_expr (fold_expr, target, mode, EXPAND_NORMAL); >> + } >> + case IX86_BUILTIN_CPU_INIT: >> + { >> + /* Make it call __cpu_indicator_init in libgcc. */ >> + tree call_expr, fndecl, type; >> + type = build_function_type_list (integer_type_node, NULL_TREE); >> + fndecl = build_fn_decl ("__cpu_indicator_init", type); >> + call_expr = build_call_expr (fndecl, 0); >> + return expand_expr (call_expr, target, mode, EXPAND_NORMAL); >> + } >> + } >> + >> /* Determine whether the builtin function is available under the current ISA. >> Originally the builtin was not created if it wasn't applicable to the >> current ISA based on the command line switches. With function specific >> @@ -38867,6 +39283,9 @@ ix86_loop_unroll_adjust (unsigned nunroll, struct >> #undef TARGET_BUILD_BUILTIN_VA_LIST >> #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list >> >> +#undef TARGET_FOLD_BUILTIN >> +#define TARGET_FOLD_BUILTIN ix86_fold_builtin >> + >> #undef TARGET_SLOW_UNALIGNED_VECTOR_MEMOP >> #define TARGET_SLOW_UNALIGNED_VECTOR_MEMOP ix86_slow_unaligned_vector_memop >> >> >> -- >> This patch is available for review at http://codereview.appspot.com/5715051
Sign in to reply to this message.
I committed this patch to google/main. I have created a new patch for review for trunk here : http://gcc.gnu.org/ml/gcc-patches/2012-03/msg00458.html Thanks, -Sri. On Thu, Mar 1, 2012 at 2:08 PM, Sriraman Tallam <tmsriram@google.com> wrote: > Removing [google] prefix from the subject line. > > On Thu, Mar 1, 2012 at 12:54 PM, Xinliang David Li <davidxl@google.com> wrote: >> Sri, probably need to remove the [google] prefix in the subject line >> to prevent this from being filtered. >> >> David >> >> On Thu, Mar 1, 2012 at 12:45 PM, Sriraman Tallam <tmsriram@google.com> wrote: >>> Patch to add builtins to detect CPU type: >>> ======================================== >>> >>> I have ported the patch from google/gcc-4_6 to google/main. I also want this >>> patch to be considered for trunk. Please see this discussion: >>> http://gcc.gnu.org/ml/gcc-patches/2011-08/msg01355.html >>> when this patch for reviewed the last time. >>> >>> One of the main concerns was about making CPU detection initialization a >>> constructor. The main point raised was about constructor ordering. I have now >>> added a priority value to the CPU detection constructor to make it very high >>> priority so that it is guaranteed to fire before every constructor without >>> an explicitly marked priority value of 101. However, IFUNC initializers >>> will still fire before this constructor, so the cpu initialization routine >>> has to be explicitly called in such initializers for which I have added a >>> builtin: __builtin_cpu_init (). >>> >>> I would like to reopen discussions on this to make it suitable for trunk >>> this time around. >>> >>> This patch adds the following new builtins: >>> >>> __builtin_cpu_init >>> __builtin_cpu_supports_cmov >>> __builtin_cpu_supports_mmx >>> __builtin_cpu_supports_popcount >>> __builtin_cpu_supports_sse >>> __builtin_cpu_supports_sse2 >>> __builtin_cpu_supports_sse3 >>> __builtin_cpu_supports_ssse3 >>> __builtin_cpu_supports_sse4_1 >>> __builtin_cpu_supports_sse4_2 >>> __builtin_cpu_is_amd >>> __builtin_cpu_is_intel_atom >>> __builtin_cpu_is_intel_core2 >>> __builtin_cpu_is_intel >>> __builtin_cpu_is_intel_corei7 >>> __builtin_cpu_is_intel_corei7_nehalem >>> __builtin_cpu_is_intel_corei7_westmere >>> __builtin_cpu_is_intel_corei7_sandybridge >>> __builtin_cpu_is_amdfam10 >>> __builtin_cpu_is_amdfam10_barcelona >>> __builtin_cpu_is_amdfam10_shanghai >>> __builtin_cpu_is_amdfam10_istanbul >>> __builtin_cpu_is_amdfam15_bdver1 >>> __builtin_cpu_is_amdfam15_bdver2 >>> >>> >>> * config/i386/i386.c (build_struct_with_one_bit_fields): New function. >>> (make_var_decl): New function. >>> (get_field_from_struct): New function. >>> (fold_builtin_target): New function. >>> (ix86_fold_builtin): New function. >>> (ix86_expand_builtin): Expand new builtins by folding them. >>> (make_platform_builtin): New functions. >>> (ix86_init_platform_type_builtins): Make the new builtins. >>> (ix86_init_builtins): Make new builtins to detect CPU type. >>> (TARGET_FOLD_BUILTIN): New macro. >>> (IX86_BUILTIN_CPU_SUPPORTS_CMOV): New enum value. >>> (IX86_BUILTIN_CPU_SUPPORTS_MMX): New enum value. >>> (IX86_BUILTIN_CPU_SUPPORTS_POPCOUNT): New enum value. >>> (IX86_BUILTIN_CPU_SUPPORTS_SSE): New enum value. >>> (IX86_BUILTIN_CPU_SUPPORTS_SSE2): New enum value. >>> (IX86_BUILTIN_CPU_SUPPORTS_SSE3): New enum value. >>> (IX86_BUILTIN_CPU_SUPPORTS_SSSE3): New enum value. >>> (IX86_BUILTIN_CPU_SUPPORTS_SSE4_1): New enum value. >>> (IX86_BUILTIN_CPU_SUPPORTS_SSE4_2): New enum value. >>> (IX86_BUILTIN_CPU_INIT): New enum value. >>> (IX86_BUILTIN_CPU_IS_AMD): New enum value. >>> (IX86_BUILTIN_CPU_IS_INTEL): New enum value. >>> (IX86_BUILTIN_CPU_IS_INTEL_ATOM): New enum value. >>> (IX86_BUILTIN_CPU_IS_INTEL_CORE2): New enum value. >>> (IX86_BUILTIN_CPU_IS_INTEL_COREI7_NEHALEM): New enum value. >>> (IX86_BUILTIN_CPU_IS_INTEL_COREI7_WESTMERE): New enum value. >>> (IX86_BUILTIN_CPU_IS_INTEL_COREI7_SANDYBRIDGE): New enum value. >>> (IX86_BUILTIN_CPU_IS_AMDFAM10_BARCELONA): New enum value. >>> (IX86_BUILTIN_CPU_IS_AMDFAM10_SHANGHAI): New enum value. >>> (IX86_BUILTIN_CPU_IS_AMDFAM10_ISTANBUL): New enum value. >>> (IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER1): New enum value. >>> (IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER2): New enum value. >>> * config/i386/i386-builtin-types.def: New function type. >>> * testsuite/gcc.target/builtin_target.c: New testcase. >>> >>> * libgcc/config/i386/i386-cpuinfo.c: New file. >>> * libgcc/config/i386/t-cpuinfo: New file. >>> * libgcc/config.host: Include t-cpuinfo. >>> * libgcc/config/i386/libgcc-glibc.ver: Version symbols __cpu_model >>> and __cpu_features. >>> >>> >>> >>> Index: libgcc/config.host >>> =================================================================== >>> --- libgcc/config.host (revision 184644) >>> +++ libgcc/config.host (working copy) >>> @@ -1128,7 +1128,7 @@ i[34567]86-*-linux* | x86_64-*-linux* | \ >>> i[34567]86-*-kfreebsd*-gnu | x86_64-*-kfreebsd*-gnu | \ >>> i[34567]86-*-knetbsd*-gnu | \ >>> i[34567]86-*-gnu*) >>> - tmake_file="${tmake_file} t-tls i386/t-linux" >>> + tmake_file="${tmake_file} t-tls i386/t-linux i386/t-cpuinfo" >>> if test "$libgcc_cv_cfi" = "yes"; then >>> tmake_file="${tmake_file} t-stack i386/t-stack-i386" >>> fi >>> Index: libgcc/config/i386/t-cpuinfo >>> =================================================================== >>> --- libgcc/config/i386/t-cpuinfo (revision 0) >>> +++ libgcc/config/i386/t-cpuinfo (revision 0) >>> @@ -0,0 +1 @@ >>> +LIB2ADD += $(srcdir)/config/i386/i386-cpuinfo.c >>> Index: libgcc/config/i386/i386-cpuinfo.c >>> =================================================================== >>> --- libgcc/config/i386/i386-cpuinfo.c (revision 0) >>> +++ libgcc/config/i386/i386-cpuinfo.c (revision 0) >>> @@ -0,0 +1,306 @@ >>> +/* Get CPU type and Features for x86 processors. >>> + Copyright (C) 2011 Free Software Foundation, Inc. >>> + Contributed by Sriraman Tallam (tmsriram@google.com) >>> + >>> +This file is part of GCC. >>> + >>> +GCC is free software; you can redistribute it and/or modify it under >>> +the terms of the GNU General Public License as published by the Free >>> +Software Foundation; either version 3, or (at your option) any later >>> +version. >>> + >>> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY >>> +WARRANTY; without even the implied warranty of MERCHANTABILITY or >>> +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License >>> +for more details. >>> + >>> +You should have received a copy of the GNU General Public License >>> +along with GCC; see the file COPYING3. If not see >>> +<http://www.gnu.org/licenses/>. */ >>> + >>> +#include "cpuid.h" >>> +#include "tsystem.h" >>> + >>> +int __cpu_indicator_init (void) __attribute__ ((constructor (101))); >>> + >>> +enum vendor_signatures >>> +{ >>> + SIG_INTEL = 0x756e6547 /* Genu */, >>> + SIG_AMD = 0x68747541 /* Auth */ >>> +}; >>> + >>> +/* ISA Features supported. */ >>> + >>> +struct __processor_features >>> +{ >>> + unsigned int __cpu_cmov : 1; >>> + unsigned int __cpu_mmx : 1; >>> + unsigned int __cpu_popcnt : 1; >>> + unsigned int __cpu_sse : 1; >>> + unsigned int __cpu_sse2 : 1; >>> + unsigned int __cpu_sse3 : 1; >>> + unsigned int __cpu_ssse3 : 1; >>> + unsigned int __cpu_sse4_1 : 1; >>> + unsigned int __cpu_sse4_2 : 1; >>> +} __cpu_features; >>> + >>> +/* Processor Model. */ >>> + >>> +struct __processor_model >>> +{ >>> + /* Vendor. */ >>> + unsigned int __cpu_is_amd : 1; >>> + unsigned int __cpu_is_intel : 1; >>> + /* CPU type. */ >>> + unsigned int __cpu_is_intel_atom : 1; >>> + unsigned int __cpu_is_intel_core2 : 1; >>> + unsigned int __cpu_is_intel_corei7 : 1; >>> + unsigned int __cpu_is_intel_corei7_nehalem : 1; >>> + unsigned int __cpu_is_intel_corei7_westmere : 1; >>> + unsigned int __cpu_is_intel_corei7_sandybridge : 1; >>> + unsigned int __cpu_is_amdfam10h : 1; >>> + unsigned int __cpu_is_amdfam10h_barcelona : 1; >>> + unsigned int __cpu_is_amdfam10h_shanghai : 1; >>> + unsigned int __cpu_is_amdfam10h_istanbul : 1; >>> + unsigned int __cpu_is_amdfam15h_bdver1 : 1; >>> + unsigned int __cpu_is_amdfam15h_bdver2 : 1; >>> +} __cpu_model; >>> + >>> +/* Get the specific type of AMD CPU. */ >>> + >>> +static void >>> +get_amd_cpu (unsigned int family, unsigned int model) >>> +{ >>> + switch (family) >>> + { >>> + /* AMD Family 10h. */ >>> + case 0x10: >>> + switch (model) >>> + { >>> + case 0x2: >>> + /* Barcelona. */ >>> + __cpu_model.__cpu_is_amdfam10h = 1; >>> + __cpu_model.__cpu_is_amdfam10h_barcelona = 1; >>> + break; >>> + case 0x4: >>> + /* Shanghai. */ >>> + __cpu_model.__cpu_is_amdfam10h = 1; >>> + __cpu_model.__cpu_is_amdfam10h_shanghai = 1; >>> + break; >>> + case 0x8: >>> + /* Istanbul. */ >>> + __cpu_model.__cpu_is_amdfam10h = 1; >>> + __cpu_model.__cpu_is_amdfam10h_istanbul = 1; >>> + break; >>> + default: >>> + break; >>> + } >>> + break; >>> + /* AMD Family 15h. */ >>> + case 0x15: >>> + /* Bulldozer version 1. */ >>> + if (model >= 0 && model <= 0xf) >>> + __cpu_model.__cpu_is_amdfam15h_bdver1 = 1; >>> + /* Bulldozer version 2. */ >>> + if (model >= 0x10 && model <= 0x1f) >>> + __cpu_model.__cpu_is_amdfam15h_bdver2 = 1; >>> + break; >>> + default: >>> + break; >>> + } >>> +} >>> + >>> +/* Get the specific type of Intel CPU. */ >>> + >>> +static void >>> +get_intel_cpu (unsigned int family, unsigned int model, unsigned int brand_id) >>> +{ >>> + /* Parse family and model only if brand ID is 0. */ >>> + if (brand_id == 0) >>> + { >>> + switch (family) >>> + { >>> + case 0x5: >>> + /* Pentium. */ >>> + break; >>> + case 0x6: >>> + switch (model) >>> + { >>> + case 0x1c: >>> + case 0x26: >>> + /* Atom. */ >>> + __cpu_model.__cpu_is_intel_atom = 1; >>> + break; >>> + case 0x1a: >>> + case 0x1e: >>> + case 0x1f: >>> + case 0x2e: >>> + /* Nehalem. */ >>> + __cpu_model.__cpu_is_intel_corei7 = 1; >>> + __cpu_model.__cpu_is_intel_corei7_nehalem = 1; >>> + break; >>> + case 0x25: >>> + case 0x2c: >>> + case 0x2f: >>> + /* Westmere. */ >>> + __cpu_model.__cpu_is_intel_corei7 = 1; >>> + __cpu_model.__cpu_is_intel_corei7_westmere = 1; >>> + break; >>> + case 0x2a: >>> + /* Sandy Bridge. */ >>> + __cpu_model.__cpu_is_intel_corei7 = 1; >>> + __cpu_model.__cpu_is_intel_corei7_sandybridge = 1; >>> + break; >>> + case 0x17: >>> + case 0x1d: >>> + /* Penryn. */ >>> + case 0x0f: >>> + /* Merom. */ >>> + __cpu_model.__cpu_is_intel_core2 = 1; >>> + break; >>> + default: >>> + break; >>> + } >>> + break; >>> + default: >>> + /* We have no idea. */ >>> + break; >>> + } >>> + } >>> +} >>> + >>> +static void >>> +get_available_features (unsigned int ecx, unsigned int edx) >>> +{ >>> + __cpu_features.__cpu_cmov = (edx & bit_CMOV) ? 1 : 0; >>> + __cpu_features.__cpu_mmx = (edx & bit_MMX) ? 1 : 0; >>> + __cpu_features.__cpu_sse = (edx & bit_SSE) ? 1 : 0; >>> + __cpu_features.__cpu_sse2 = (edx & bit_SSE2) ? 1 : 0; >>> + __cpu_features.__cpu_popcnt = (ecx & bit_POPCNT) ? 1 : 0; >>> + __cpu_features.__cpu_sse3 = (ecx & bit_SSE3) ? 1 : 0; >>> + __cpu_features.__cpu_ssse3 = (ecx & bit_SSSE3) ? 1 : 0; >>> + __cpu_features.__cpu_sse4_1 = (ecx & bit_SSE4_1) ? 1 : 0; >>> + __cpu_features.__cpu_sse4_2 = (ecx & bit_SSE4_2) ? 1 : 0; >>> +} >>> + >>> + >>> +/* Sanity check for the vendor and cpu type flags. */ >>> + >>> +static int >>> +sanity_check (void) >>> +{ >>> + unsigned int one_type = 0; >>> + >>> + /* Vendor cannot be Intel and AMD. */ >>> + gcc_assert((__cpu_model.__cpu_is_intel == 0) >>> + || (__cpu_model.__cpu_is_amd == 0)); >>> + >>> + /* Only one CPU type can be set. */ >>> + one_type = (__cpu_model.__cpu_is_intel_atom >>> + + __cpu_model.__cpu_is_intel_core2 >>> + + __cpu_model.__cpu_is_intel_corei7_nehalem >>> + + __cpu_model.__cpu_is_intel_corei7_westmere >>> + + __cpu_model.__cpu_is_intel_corei7_sandybridge >>> + + __cpu_model.__cpu_is_amdfam10h_barcelona >>> + + __cpu_model.__cpu_is_amdfam10h_shanghai >>> + + __cpu_model.__cpu_is_amdfam10h_istanbul >>> + + __cpu_model.__cpu_is_amdfam15h_bdver1 >>> + + __cpu_model.__cpu_is_amdfam15h_bdver2); >>> + >>> + gcc_assert (one_type <= 1); >>> + return 0; >>> +} >>> + >>> +/* A noinline function calling __get_cpuid. Having many calls to >>> + cpuid in one function in 32-bit mode causes GCC to complain: >>> + "can’t find a register in class ‘CLOBBERED_REGS’". This is >>> + related to PR rtl-optimization 44174. */ >>> + >>> +static int __attribute__ ((noinline)) >>> +__get_cpuid_output (unsigned int __level, >>> + unsigned int *__eax, unsigned int *__ebx, >>> + unsigned int *__ecx, unsigned int *__edx) >>> +{ >>> + return __get_cpuid (__level, __eax, __ebx, __ecx, __edx); >>> +} >>> + >>> + >>> +/* A constructor function that sets __cpu_model and __cpu_features with >>> + the right values. This needs to run only once. This constructor is >>> + given the highest priority and it will run before constructors without >>> + the priority set. However, it still runs after ifunc initializers and >>> + needs to be called explicitly there. */ >>> + >>> +int __attribute__ ((constructor (101))) >>> +__cpu_indicator_init (void) >>> +{ >>> + unsigned int eax, ebx, ecx, edx; >>> + >>> + int max_level = 5; >>> + unsigned int vendor; >>> + unsigned int model, family, brand_id; >>> + unsigned int extended_model, extended_family; >>> + static int called = 0; >>> + >>> + /* This function needs to run just once. */ >>> + if (called) >>> + return 0; >>> + else >>> + called = 1; >>> + >>> + /* Assume cpuid insn present. Run in level 0 to get vendor id. */ >>> + if (!__get_cpuid_output (0, &eax, &ebx, &ecx, &edx)) >>> + return -1; >>> + >>> + vendor = ebx; >>> + max_level = eax; >>> + >>> + if (max_level < 1) >>> + return -1; >>> + >>> + if (!__get_cpuid_output (1, &eax, &ebx, &ecx, &edx)) >>> + return -1; >>> + >>> + model = (eax >> 4) & 0x0f; >>> + family = (eax >> 8) & 0x0f; >>> + brand_id = ebx & 0xff; >>> + extended_model = (eax >> 12) & 0xf0; >>> + extended_family = (eax >> 20) & 0xff; >>> + >>> + if (vendor == SIG_INTEL) >>> + { >>> + /* Adjust model and family for Intel CPUS. */ >>> + if (family == 0x0f) >>> + { >>> + family += extended_family; >>> + model += extended_model; >>> + } >>> + else if (family == 0x06) >>> + model += extended_model; >>> + >>> + /* Get CPU type. */ >>> + __cpu_model.__cpu_is_intel = 1; >>> + get_intel_cpu (family, model, brand_id); >>> + } >>> + >>> + if (vendor == SIG_AMD) >>> + { >>> + /* Adjust model and family for AMD CPUS. */ >>> + if (family == 0x0f) >>> + { >>> + family += extended_family; >>> + model += (extended_model << 4); >>> + } >>> + >>> + /* Get CPU type. */ >>> + __cpu_model.__cpu_is_amd = 1; >>> + get_amd_cpu (family, model); >>> + } >>> + >>> + /* Find available features. */ >>> + get_available_features (ecx, edx); >>> + >>> + sanity_check (); >>> + >>> + return 0; >>> +} >>> Index: libgcc/config/i386/libgcc-glibc.ver >>> =================================================================== >>> --- libgcc/config/i386/libgcc-glibc.ver (revision 184644) >>> +++ libgcc/config/i386/libgcc-glibc.ver (working copy) >>> @@ -147,6 +147,11 @@ GCC_4.3.0 { >>> __trunctfxf2 >>> __unordtf2 >>> } >>> + >>> +GCC_4.7.0 { >>> + __cpu_model >>> + __cpu_features >>> +} >>> %else >>> GCC_4.4.0 { >>> __addtf3 >>> @@ -183,4 +188,8 @@ GCC_4.4.0 { >>> GCC_4.5.0 { >>> __extendxftf2 >>> } >>> +GCC_4.7.0 { >>> + __cpu_model >>> + __cpu_features >>> +} >>> %endif >>> Index: gcc/testsuite/gcc.target/i386/builtin_target.c >>> =================================================================== >>> --- gcc/testsuite/gcc.target/i386/builtin_target.c (revision 0) >>> +++ gcc/testsuite/gcc.target/i386/builtin_target.c (revision 0) >>> @@ -0,0 +1,61 @@ >>> +/* This test checks if the __builtin_cpu_* calls are recognized. */ >>> + >>> +/* { dg-do run } */ >>> + >>> +int >>> +fn1 () >>> +{ >>> + if (__builtin_cpu_supports_cmov () < 0) >>> + return -1; >>> + if (__builtin_cpu_supports_mmx () < 0) >>> + return -1; >>> + if (__builtin_cpu_supports_popcount () < 0) >>> + return -1; >>> + if (__builtin_cpu_supports_sse () < 0) >>> + return -1; >>> + if (__builtin_cpu_supports_sse2 () < 0) >>> + return -1; >>> + if (__builtin_cpu_supports_sse3 () < 0) >>> + return -1; >>> + if (__builtin_cpu_supports_ssse3 () < 0) >>> + return -1; >>> + if (__builtin_cpu_supports_sse4_1 () < 0) >>> + return -1; >>> + if (__builtin_cpu_supports_sse4_2 () < 0) >>> + return -1; >>> + if (__builtin_cpu_is_amd () < 0) >>> + return -1; >>> + if (__builtin_cpu_is_intel () < 0) >>> + return -1; >>> + if (__builtin_cpu_is_intel_atom () < 0) >>> + return -1; >>> + if (__builtin_cpu_is_intel_core2 () < 0) >>> + return -1; >>> + if (__builtin_cpu_is_intel_corei7 () < 0) >>> + return -1; >>> + if (__builtin_cpu_is_intel_corei7_nehalem () < 0) >>> + return -1; >>> + if (__builtin_cpu_is_intel_corei7_westmere () < 0) >>> + return -1; >>> + if (__builtin_cpu_is_intel_corei7_sandybridge () < 0) >>> + return -1; >>> + if (__builtin_cpu_is_amdfam10 () < 0) >>> + return -1; >>> + if (__builtin_cpu_is_amdfam10_barcelona () < 0) >>> + return -1; >>> + if (__builtin_cpu_is_amdfam10_shanghai () < 0) >>> + return -1; >>> + if (__builtin_cpu_is_amdfam10_istanbul () < 0) >>> + return -1; >>> + if (__builtin_cpu_is_amdfam15_bdver1 () < 0) >>> + return -1; >>> + if (__builtin_cpu_is_amdfam15_bdver2 () < 0) >>> + return -1; >>> + >>> + return 0; >>> +} >>> + >>> +int main () >>> +{ >>> + return fn1 (); >>> +} >>> Index: gcc/config/i386/i386-builtin-types.def >>> =================================================================== >>> --- gcc/config/i386/i386-builtin-types.def (revision 184644) >>> +++ gcc/config/i386/i386-builtin-types.def (working copy) >>> @@ -143,6 +143,7 @@ DEF_FUNCTION_TYPE (UINT64) >>> DEF_FUNCTION_TYPE (UNSIGNED) >>> DEF_FUNCTION_TYPE (VOID) >>> DEF_FUNCTION_TYPE (PVOID) >>> +DEF_FUNCTION_TYPE (INT) >>> >>> DEF_FUNCTION_TYPE (FLOAT, FLOAT) >>> DEF_FUNCTION_TYPE (FLOAT128, FLOAT128) >>> Index: gcc/config/i386/i386.c >>> =================================================================== >>> --- gcc/config/i386/i386.c (revision 184644) >>> +++ gcc/config/i386/i386.c (working copy) >>> @@ -25798,6 +25798,33 @@ enum ix86_builtins >>> /* CFString built-in for darwin */ >>> IX86_BUILTIN_CFSTRING, >>> >>> + /* Builtins to get CPU features. */ >>> + IX86_BUILTIN_CPU_SUPPORTS_CMOV, >>> + IX86_BUILTIN_CPU_SUPPORTS_MMX, >>> + IX86_BUILTIN_CPU_SUPPORTS_POPCOUNT, >>> + IX86_BUILTIN_CPU_SUPPORTS_SSE, >>> + IX86_BUILTIN_CPU_SUPPORTS_SSE2, >>> + IX86_BUILTIN_CPU_SUPPORTS_SSE3, >>> + IX86_BUILTIN_CPU_SUPPORTS_SSSE3, >>> + IX86_BUILTIN_CPU_SUPPORTS_SSE4_1, >>> + IX86_BUILTIN_CPU_SUPPORTS_SSE4_2, >>> + /* Builtins to get CPU type. */ >>> + IX86_BUILTIN_CPU_INIT, >>> + IX86_BUILTIN_CPU_IS_AMD, >>> + IX86_BUILTIN_CPU_IS_INTEL, >>> + IX86_BUILTIN_CPU_IS_INTEL_ATOM, >>> + IX86_BUILTIN_CPU_IS_INTEL_CORE2, >>> + IX86_BUILTIN_CPU_IS_INTEL_COREI7, >>> + IX86_BUILTIN_CPU_IS_INTEL_COREI7_NEHALEM, >>> + IX86_BUILTIN_CPU_IS_INTEL_COREI7_WESTMERE, >>> + IX86_BUILTIN_CPU_IS_INTEL_COREI7_SANDYBRIDGE, >>> + IX86_BUILTIN_CPU_IS_AMDFAM10H, >>> + IX86_BUILTIN_CPU_IS_AMDFAM10H_BARCELONA, >>> + IX86_BUILTIN_CPU_IS_AMDFAM10H_SHANGHAI, >>> + IX86_BUILTIN_CPU_IS_AMDFAM10H_ISTANBUL, >>> + IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER1, >>> + IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER2, >>> + >>> IX86_BUILTIN_MAX >>> }; >>> >>> @@ -27607,6 +27634,350 @@ ix86_init_mmx_sse_builtins (void) >>> } >>> } >>> >>> +/* Returns a struct type with name NAME and number of fields equal to >>> + NUM_FIELDS. Each field is a unsigned int bit field of length 1 bit. */ >>> + >>> +static tree >>> +build_struct_with_one_bit_fields (int num_fields, const char *name) >>> +{ >>> + int i; >>> + char field_name [10]; >>> + tree field = NULL_TREE, field_chain = NULL_TREE; >>> + tree type = make_node (RECORD_TYPE); >>> + >>> + strcpy (field_name, "k_field"); >>> + >>> + for (i = 0; i < num_fields; i++) >>> + { >>> + /* Name the fields, 0_field, 1_field, ... */ >>> + field_name [0] = '0' + i; >>> + field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, >>> + get_identifier (field_name), unsigned_type_node); >>> + DECL_BIT_FIELD (field) = 1; >>> + DECL_SIZE (field) = bitsize_one_node; >>> + if (field_chain != NULL_TREE) >>> + DECL_CHAIN (field) = field_chain; >>> + field_chain = field; >>> + } >>> + finish_builtin_struct (type, name, field_chain, NULL_TREE); >>> + return type; >>> +} >>> + >>> +/* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */ >>> + >>> +static tree >>> +make_var_decl (tree type, const char *name) >>> +{ >>> + tree new_decl; >>> + struct varpool_node *vnode; >>> + >>> + new_decl = build_decl (UNKNOWN_LOCATION, >>> + VAR_DECL, >>> + get_identifier(name), >>> + type); >>> + >>> + DECL_EXTERNAL (new_decl) = 1; >>> + TREE_STATIC (new_decl) = 1; >>> + TREE_PUBLIC (new_decl) = 1; >>> + DECL_INITIAL (new_decl) = 0; >>> + DECL_ARTIFICIAL (new_decl) = 0; >>> + DECL_PRESERVE_P (new_decl) = 1; >>> + >>> + make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl)); >>> + assemble_variable (new_decl, 0, 0, 0); >>> + >>> + vnode = varpool_node (new_decl); >>> + gcc_assert (vnode != NULL); >>> + /* Set finalized to 1, otherwise it asserts in function "write_symbol" in >>> + lto-streamer-out.c. */ >>> + vnode->finalized = 1; >>> + >>> + return new_decl; >>> +} >>> + >>> +/* Traverses the chain of fields in STRUCT_TYPE and returns the FIELD_NUM >>> + numbered field. */ >>> + >>> +static tree >>> +get_field_from_struct (tree struct_type, int field_num) >>> +{ >>> + int i; >>> + tree field = TYPE_FIELDS (struct_type); >>> + >>> + for (i = 0; i < field_num; i++, field = DECL_CHAIN(field)) >>> + { >>> + gcc_assert (field != NULL_TREE); >>> + } >>> + >>> + return field; >>> +} >>> + >>> +/* FNDECL is a __builtin_cpu_* call that is folded into an integer defined >>> + in libgcc/config/i386/i386-cpuinfo.c */ >>> + >>> +static tree >>> +fold_builtin_cpu (enum ix86_builtins fn_code) >>> +{ >>> + /* This is the order of bit-fields in __processor_features in >>> + i386-cpuinfo.c */ >>> + enum processor_features >>> + { >>> + F_CMOV = 0, >>> + F_MMX, >>> + F_POPCNT, >>> + F_SSE, >>> + F_SSE2, >>> + F_SSE3, >>> + F_SSSE3, >>> + F_SSE4_1, >>> + F_SSE4_2, >>> + F_MAX >>> + }; >>> + >>> + /* This is the order of bit-fields in __processor_model in >>> + i386-cpuinfo.c */ >>> + enum processor_model >>> + { >>> + M_AMD = 0, >>> + M_INTEL, >>> + M_INTEL_ATOM, >>> + M_INTEL_CORE2, >>> + M_INTEL_COREI7, >>> + M_INTEL_COREI7_NEHALEM, >>> + M_INTEL_COREI7_WESTMERE, >>> + M_INTEL_COREI7_SANDYBRIDGE, >>> + M_AMDFAM10H, >>> + M_AMDFAM10H_BARCELONA, >>> + M_AMDFAM10H_SHANGHAI, >>> + M_AMDFAM10H_ISTANBUL, >>> + M_AMDFAM15H_BDVER1, >>> + M_AMDFAM15H_BDVER2, >>> + M_MAX >>> + }; >>> + >>> + static tree __processor_features_type = NULL_TREE; >>> + static tree __cpu_features_var = NULL_TREE; >>> + static tree __processor_model_type = NULL_TREE; >>> + static tree __cpu_model_var = NULL_TREE; >>> + static tree field; >>> + static tree which_struct; >>> + >>> + if (__processor_features_type == NULL_TREE) >>> + __processor_features_type = build_struct_with_one_bit_fields (F_MAX, >>> + "__processor_features"); >>> + >>> + if (__processor_model_type == NULL_TREE) >>> + __processor_model_type = build_struct_with_one_bit_fields (M_MAX, >>> + "__processor_model"); >>> + >>> + if (__cpu_features_var == NULL_TREE) >>> + __cpu_features_var = make_var_decl (__processor_features_type, >>> + "__cpu_features"); >>> + >>> + if (__cpu_model_var == NULL_TREE) >>> + __cpu_model_var = make_var_decl (__processor_model_type, >>> + "__cpu_model"); >>> + >>> + /* Look at the code to identify the field requested. */ >>> + switch (fn_code) >>> + { >>> + case IX86_BUILTIN_CPU_SUPPORTS_CMOV: >>> + field = get_field_from_struct (__processor_features_type, F_CMOV); >>> + which_struct = __cpu_features_var; >>> + break; >>> + case IX86_BUILTIN_CPU_SUPPORTS_MMX: >>> + field = get_field_from_struct (__processor_features_type, F_MMX); >>> + which_struct = __cpu_features_var; >>> + break; >>> + case IX86_BUILTIN_CPU_SUPPORTS_POPCOUNT: >>> + field = get_field_from_struct (__processor_features_type, F_POPCNT); >>> + which_struct = __cpu_features_var; >>> + break; >>> + case IX86_BUILTIN_CPU_SUPPORTS_SSE: >>> + field = get_field_from_struct (__processor_features_type, F_SSE); >>> + which_struct = __cpu_features_var; >>> + break; >>> + case IX86_BUILTIN_CPU_SUPPORTS_SSE2: >>> + field = get_field_from_struct (__processor_features_type, F_SSE2); >>> + which_struct = __cpu_features_var; >>> + break; >>> + case IX86_BUILTIN_CPU_SUPPORTS_SSE3: >>> + field = get_field_from_struct (__processor_features_type, F_SSE3); >>> + which_struct = __cpu_features_var; >>> + break; >>> + case IX86_BUILTIN_CPU_SUPPORTS_SSSE3: >>> + field = get_field_from_struct (__processor_features_type, F_SSSE3); >>> + which_struct = __cpu_features_var; >>> + break; >>> + case IX86_BUILTIN_CPU_SUPPORTS_SSE4_1: >>> + field = get_field_from_struct (__processor_features_type, F_SSE4_1); >>> + which_struct = __cpu_features_var; >>> + break; >>> + case IX86_BUILTIN_CPU_SUPPORTS_SSE4_2: >>> + field = get_field_from_struct (__processor_features_type, F_SSE4_2); >>> + which_struct = __cpu_features_var; >>> + break; >>> + case IX86_BUILTIN_CPU_IS_AMD: >>> + field = get_field_from_struct (__processor_model_type, M_AMD); >>> + which_struct = __cpu_model_var; >>> + break; >>> + case IX86_BUILTIN_CPU_IS_INTEL: >>> + field = get_field_from_struct (__processor_model_type, M_INTEL); >>> + which_struct = __cpu_model_var; >>> + break; >>> + case IX86_BUILTIN_CPU_IS_INTEL_ATOM: >>> + field = get_field_from_struct (__processor_model_type, M_INTEL_ATOM); >>> + which_struct = __cpu_model_var; >>> + break; >>> + case IX86_BUILTIN_CPU_IS_INTEL_CORE2: >>> + field = get_field_from_struct (__processor_model_type, M_INTEL_CORE2); >>> + which_struct = __cpu_model_var; >>> + break; >>> + case IX86_BUILTIN_CPU_IS_INTEL_COREI7: >>> + field = get_field_from_struct (__processor_model_type, >>> + M_INTEL_COREI7); >>> + which_struct = __cpu_model_var; >>> + break; >>> + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_NEHALEM: >>> + field = get_field_from_struct (__processor_model_type, >>> + M_INTEL_COREI7_NEHALEM); >>> + which_struct = __cpu_model_var; >>> + break; >>> + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_WESTMERE: >>> + field = get_field_from_struct (__processor_model_type, >>> + M_INTEL_COREI7_WESTMERE); >>> + which_struct = __cpu_model_var; >>> + break; >>> + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_SANDYBRIDGE: >>> + field = get_field_from_struct (__processor_model_type, >>> + M_INTEL_COREI7_SANDYBRIDGE); >>> + which_struct = __cpu_model_var; >>> + break; >>> + case IX86_BUILTIN_CPU_IS_AMDFAM10H: >>> + field = get_field_from_struct (__processor_model_type, >>> + M_AMDFAM10H); >>> + which_struct = __cpu_model_var; >>> + break; >>> + case IX86_BUILTIN_CPU_IS_AMDFAM10H_BARCELONA: >>> + field = get_field_from_struct (__processor_model_type, >>> + M_AMDFAM10H_BARCELONA); >>> + which_struct = __cpu_model_var; >>> + break; >>> + case IX86_BUILTIN_CPU_IS_AMDFAM10H_SHANGHAI: >>> + field = get_field_from_struct (__processor_model_type, >>> + M_AMDFAM10H_SHANGHAI); >>> + which_struct = __cpu_model_var; >>> + break; >>> + case IX86_BUILTIN_CPU_IS_AMDFAM10H_ISTANBUL: >>> + field = get_field_from_struct (__processor_model_type, >>> + M_AMDFAM10H_ISTANBUL); >>> + which_struct = __cpu_model_var; >>> + break; >>> + case IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER1: >>> + field = get_field_from_struct (__processor_model_type, >>> + M_AMDFAM15H_BDVER1); >>> + which_struct = __cpu_model_var; >>> + break; >>> + case IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER2: >>> + field = get_field_from_struct (__processor_model_type, >>> + M_AMDFAM15H_BDVER2); >>> + which_struct = __cpu_model_var; >>> + break; >>> + default: >>> + return NULL_TREE; >>> + } >>> + >>> + return build3 (COMPONENT_REF, TREE_TYPE (field), which_struct, field, NULL_TREE); >>> +} >>> + >>> +static tree >>> +ix86_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, >>> + tree *args ATTRIBUTE_UNUSED, bool ignore ATTRIBUTE_UNUSED) >>> +{ >>> + const char* decl_name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); >>> + if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD >>> + && strstr(decl_name, "__builtin_cpu") != NULL) >>> + { >>> + enum ix86_builtins code = (enum ix86_builtins) >>> + DECL_FUNCTION_CODE (fndecl); >>> + return fold_builtin_cpu (code); >>> + } >>> + return NULL_TREE; >>> +} >>> + >>> +/* A builtin to init/return the cpu type or feature. Returns an >>> + integer and the type is a const if IS_CONST is set. */ >>> + >>> +static void >>> +make_platform_builtin (const char* name, int code, int is_const) >>> +{ >>> + tree decl; >>> + tree type; >>> + >>> + type = ix86_get_builtin_func_type (INT_FTYPE_VOID); >>> + decl = add_builtin_function (name, type, code, BUILT_IN_MD, >>> + NULL, NULL_TREE); >>> + gcc_assert (decl != NULL_TREE); >>> + ix86_builtins[(int) code] = decl; >>> + if (is_const) >>> + TREE_READONLY (decl) = 1; >>> +} >>> + >>> +/* Builtins to get CPU type and features supported. */ >>> + >>> +static void >>> +ix86_init_platform_type_builtins (void) >>> +{ >>> + make_platform_builtin ("__builtin_cpu_init", >>> + IX86_BUILTIN_CPU_INIT, 0); >>> + make_platform_builtin ("__builtin_cpu_supports_cmov", >>> + IX86_BUILTIN_CPU_SUPPORTS_CMOV, 1); >>> + make_platform_builtin ("__builtin_cpu_supports_mmx", >>> + IX86_BUILTIN_CPU_SUPPORTS_MMX, 1); >>> + make_platform_builtin ("__builtin_cpu_supports_popcount", >>> + IX86_BUILTIN_CPU_SUPPORTS_POPCOUNT, 1); >>> + make_platform_builtin ("__builtin_cpu_supports_sse", >>> + IX86_BUILTIN_CPU_SUPPORTS_SSE, 1); >>> + make_platform_builtin ("__builtin_cpu_supports_sse2", >>> + IX86_BUILTIN_CPU_SUPPORTS_SSE2, 1); >>> + make_platform_builtin ("__builtin_cpu_supports_sse3", >>> + IX86_BUILTIN_CPU_SUPPORTS_SSE3, 1); >>> + make_platform_builtin ("__builtin_cpu_supports_ssse3", >>> + IX86_BUILTIN_CPU_SUPPORTS_SSSE3, 1); >>> + make_platform_builtin ("__builtin_cpu_supports_sse4_1", >>> + IX86_BUILTIN_CPU_SUPPORTS_SSE4_1, 1); >>> + make_platform_builtin ("__builtin_cpu_supports_sse4_2", >>> + IX86_BUILTIN_CPU_SUPPORTS_SSE4_2, 1); >>> + make_platform_builtin ("__builtin_cpu_is_amd", >>> + IX86_BUILTIN_CPU_IS_AMD, 1); >>> + make_platform_builtin ("__builtin_cpu_is_intel_atom", >>> + IX86_BUILTIN_CPU_IS_INTEL_ATOM, 1); >>> + make_platform_builtin ("__builtin_cpu_is_intel_core2", >>> + IX86_BUILTIN_CPU_IS_INTEL_CORE2, 1); >>> + make_platform_builtin ("__builtin_cpu_is_intel", >>> + IX86_BUILTIN_CPU_IS_INTEL, 1); >>> + make_platform_builtin ("__builtin_cpu_is_intel_corei7", >>> + IX86_BUILTIN_CPU_IS_INTEL_COREI7, 1); >>> + make_platform_builtin ("__builtin_cpu_is_intel_corei7_nehalem", >>> + IX86_BUILTIN_CPU_IS_INTEL_COREI7_NEHALEM, 1); >>> + make_platform_builtin ("__builtin_cpu_is_intel_corei7_westmere", >>> + IX86_BUILTIN_CPU_IS_INTEL_COREI7_WESTMERE, 1); >>> + make_platform_builtin ("__builtin_cpu_is_intel_corei7_sandybridge", >>> + IX86_BUILTIN_CPU_IS_INTEL_COREI7_SANDYBRIDGE, 1); >>> + make_platform_builtin ("__builtin_cpu_is_amdfam10", >>> + IX86_BUILTIN_CPU_IS_AMDFAM10H, 1); >>> + make_platform_builtin ("__builtin_cpu_is_amdfam10_barcelona", >>> + IX86_BUILTIN_CPU_IS_AMDFAM10H_BARCELONA, 1); >>> + make_platform_builtin ("__builtin_cpu_is_amdfam10_shanghai", >>> + IX86_BUILTIN_CPU_IS_AMDFAM10H_SHANGHAI, 1); >>> + make_platform_builtin ("__builtin_cpu_is_amdfam10_istanbul", >>> + IX86_BUILTIN_CPU_IS_AMDFAM10H_ISTANBUL, 1); >>> + make_platform_builtin ("__builtin_cpu_is_amdfam15_bdver1", >>> + IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER1, 1); >>> + make_platform_builtin ("__builtin_cpu_is_amdfam15_bdver2", >>> + IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER2, 1); >>> +} >>> + >>> /* Detect if this unaligned vectorizable load/stores should be >>> considered slow. This is true for core2 where the movdqu insn >>> is slow, ~5x slower than the movdqa. */ >>> @@ -27705,6 +28076,9 @@ ix86_init_builtins (void) >>> >>> ix86_init_builtin_types (); >>> >>> + /* Builtins to get CPU type and features. */ >>> + ix86_init_platform_type_builtins (); >>> + >>> /* TFmode support builtins. */ >>> def_builtin_const (0, "__builtin_infq", >>> FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ); >>> @@ -29321,6 +29695,48 @@ ix86_expand_builtin (tree exp, rtx target, rtx sub >>> enum machine_mode mode0, mode1, mode2, mode3, mode4; >>> unsigned int fcode = DECL_FUNCTION_CODE (fndecl); >>> >>> + /* For CPU builtins that can be folded, fold first and expand the fold. */ >>> + switch (fcode) >>> + { >>> + case IX86_BUILTIN_CPU_SUPPORTS_CMOV: >>> + case IX86_BUILTIN_CPU_SUPPORTS_MMX: >>> + case IX86_BUILTIN_CPU_SUPPORTS_POPCOUNT: >>> + case IX86_BUILTIN_CPU_SUPPORTS_SSE: >>> + case IX86_BUILTIN_CPU_SUPPORTS_SSE2: >>> + case IX86_BUILTIN_CPU_SUPPORTS_SSE3: >>> + case IX86_BUILTIN_CPU_SUPPORTS_SSSE3: >>> + case IX86_BUILTIN_CPU_SUPPORTS_SSE4_1: >>> + case IX86_BUILTIN_CPU_SUPPORTS_SSE4_2: >>> + case IX86_BUILTIN_CPU_IS_AMD: >>> + case IX86_BUILTIN_CPU_IS_INTEL: >>> + case IX86_BUILTIN_CPU_IS_INTEL_ATOM: >>> + case IX86_BUILTIN_CPU_IS_INTEL_CORE2: >>> + case IX86_BUILTIN_CPU_IS_INTEL_COREI7: >>> + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_NEHALEM: >>> + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_WESTMERE: >>> + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_SANDYBRIDGE: >>> + case IX86_BUILTIN_CPU_IS_AMDFAM10H: >>> + case IX86_BUILTIN_CPU_IS_AMDFAM10H_BARCELONA: >>> + case IX86_BUILTIN_CPU_IS_AMDFAM10H_SHANGHAI: >>> + case IX86_BUILTIN_CPU_IS_AMDFAM10H_ISTANBUL: >>> + case IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER1: >>> + case IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER2: >>> + { >>> + tree fold_expr = fold_builtin_cpu ((enum ix86_builtins) fcode); >>> + gcc_assert (fold_expr != NULL_TREE); >>> + return expand_expr (fold_expr, target, mode, EXPAND_NORMAL); >>> + } >>> + case IX86_BUILTIN_CPU_INIT: >>> + { >>> + /* Make it call __cpu_indicator_init in libgcc. */ >>> + tree call_expr, fndecl, type; >>> + type = build_function_type_list (integer_type_node, NULL_TREE); >>> + fndecl = build_fn_decl ("__cpu_indicator_init", type); >>> + call_expr = build_call_expr (fndecl, 0); >>> + return expand_expr (call_expr, target, mode, EXPAND_NORMAL); >>> + } >>> + } >>> + >>> /* Determine whether the builtin function is available under the current ISA. >>> Originally the builtin was not created if it wasn't applicable to the >>> current ISA based on the command line switches. With function specific >>> @@ -38867,6 +39283,9 @@ ix86_loop_unroll_adjust (unsigned nunroll, struct >>> #undef TARGET_BUILD_BUILTIN_VA_LIST >>> #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list >>> >>> +#undef TARGET_FOLD_BUILTIN >>> +#define TARGET_FOLD_BUILTIN ix86_fold_builtin >>> + >>> #undef TARGET_SLOW_UNALIGNED_VECTOR_MEMOP >>> #define TARGET_SLOW_UNALIGNED_VECTOR_MEMOP ix86_slow_unaligned_vector_memop >>> >>> >>> -- >>> This patch is available for review at http://codereview.appspot.com/5715051
Sign in to reply to this message.
|