SPRUJG0 User guide

SPRUJG0B December 2024 – November 2025 F29H850TU , F29H859TU-Q1

2.4.6 Coding Style and Impact on Performance

The way the developer writes C code can have an impact on performance. This section illustrates specific example scenarios where this can occur.

With loops, performance can vary depending on whether the loop counter is a fixed or a variable value. With a fixed value, the compiler has complete knowledge of the loop, and can determine the approach that maximizes performance - whether that means unrolling the loop, software pipelining the loop, and so forth. For example, with matrix multiplication, the performance is significantly better when the matrix row and column sizes are specified in the loops, versus passing them in as function arguments.
In some cases, merging independent loops into a single loop can speed up performance. The first code block below generates sub-optimal code. The second code block is more optimized.

uint8_T Bit_Manipulation_Test_Case(void) 
{ 
uint32_T result; 
uint32_T i; 
uint8_T valid; 
result = 0u; 
valid = TC_OK; 
i = 0u; 
/* Or Test Case */
for(i=0; i<BIT_MANIPULATION_ARRAY_SIZE; i++) 
{ 
    result = (Swc1_Bit_Manipulation.Operand_A[i] | Swc1_Bit_Manipulation.Operand_B[i]); 
    if(result != Swc1_Bit_Manipulation.Result_Or[i]) 
    { 
        valid = TC_NOK; 
    } 
} 
/* And Test Case */
for(i=0; i<BIT_MANIPULATION_ARRAY_SIZE; i++) 
{ 
    result = (Swc1_Bit_Manipulation.Operand_A[i] & Swc1_Bit_Manipulation.Operand_B[i]); 
    if(result != Swc1_Bit_Manipulation.Result_And[i]) 
    { 
        valid = TC_NOK; 
    } 
} 
/* Xor Test Case */
for(i=0; i<BIT_MANIPULATION_ARRAY_SIZE; i++) { 
    result = (Swc1_Bit_Manipulation.Operand_A[i] ^ Swc1_Bit_Manipulation.Operand_B[i]); 
    if(result != Swc1_Bit_Manipulation.Result_Xor[i]) { 
        valid = TC_NOK; 
    } 
} 
    return valid; 
}

uint8_T Bit_Manipulation_Test_Case(void) 
{ 
uint32_T result_or,result_and,result_xor; 
uint32_T i; 
uint8_T valid; 
result_or = 0u; 
result_and = 0u; 
result_xor = 0u; 
valid = TC_OK; 
i = 0u; 
/* Or, And, Xor Test Case */ 
for(i=0; i<BIT_MANIPULATION_ARRAY_SIZE; i++) 
{ 
    result_or = (Swc1_Bit_Manipulation.Operand_A[i] | Swc1_Bit_Manipulation.Operand_B[i]); 
    if(result_or != Swc1_Bit_Manipulation.Result_Or[i]) 
    { 
        valid = TC_NOK; 
    } 
    result_and = (Swc1_Bit_Manipulation.Operand_A[i] & Swc1_Bit_Manipulation.Operand_B[i]); 
    if(result_and != Swc1_Bit_Manipulation.Result_And[i]) 
    { 
        valid = TC_NOK; 
    } 
    result_xor = (Swc1_Bit_Manipulation.Operand_A[i] ^ Swc1_Bit_Manipulation.Operand_B[i]); 
    if(result_xor != Swc1_Bit_Manipulation.Result_Xor[i]) 
    { 
        valid = TC_NOK; 
    } 
} 
return valid; 
}

Also, if conditional statements involve loads from memory or access to global variables, it may be helpful for them to be pre-loaded into local variables if possible. This allows for increased use of the wider register set on the C29 CPU. It also prevents pipeline stalls that occur from loading a value from a memory and immediately performing a conditional check on it. The first code block below generates sub-optimal code. The second code block is more optimized.

// Variables are globals
if(xx ==FALSE) 
{ 
    A = b * c + d; 
    E = f * c + d;
    if(dd > high) 
    { 
        D = high; 
    } elseif (dd < low) {
        if(kk == RUN)
        { 
            D = low; 
        } else { 
            D = dd; 
        } 
    } else { 
        D=dd; 
    } 
}

// Local copies of globals
float b_temp=b, c_temp=c, d_temp=d, f_temp=f, high_temp=high, low_temp=low, dd_temp=dd, kk_temp=kk, D_temp=D, g_temp=g, h_temp=h;
if(xx==FALSE) 
{ 
    A = b_temp * c_temp + d_temp; 
    E = f_temp * c_temp + d_temp;
    if(dd_temp > high_temp) 
    { 
        D_temp = high_temp; 
    } elseif (dd_temp < low_temp) {
        if(kk_temp == RUN)
        { 
            D_temp = low_temp; 
        } else { 
            D_temp = dd_temp; 
        } 
    } else { 
        D_temp=dd_temp; 
    } 
}