/*
* file test\src\main.c
* brief conformity and performance tests for FIR algorithms library
* author hugo descoubes
* date mixing 09/2012 and 09/2021
*/

#include <firtest.h>
#include <a_sp.h>


float32_t xk_sp[XK_LENGTH];			/* input  vector */
float32_t yk_sp_ti[YK_LENGTH];		/* ouput vector for TI reference algorithm */
float32_t yk_sp[YK_LENGTH];			/* ouput vector for algorithm to optimiez */


/* arrays memory alignments - CPU to L1D data path length 64bits / 8bytes */
#pragma DATA_ALIGN(xk_sp, 8);
#pragma DATA_ALIGN(a_sp, 8);
#pragma DATA_ALIGN(yk_sp_ti, 8);
#pragma DATA_ALIGN(yk_sp, 8);


void main(void)
{
	Boolean  validity;
	TestSystem_obj conformity;
	TestPerf_obj benchmark;
	int i;

	firtest_init(&conformity, &benchmark, xk_sp, XK_LENGTH);

	printf(	"\nFIR algorithms benchmarking on TI TMS320C6678 DSP architecture"
			"\ntesting conditions :"
			"\n--> input vector size : %d samples"
			"\n--> floating MAC's per cycle max : 8"
			"\n--> repetitions for average calculation : %d"
			"\n--> tolerated error : less than %3.1f%%\n",
			XK_LENGTH,
			benchmark.perf_rep,
			conformity.error_margin);

	/* FIR algorithm from TI optimized DSPLIB for C6600 DSP */
	firtest_perf (&benchmark, UMA_L2CACHE_L1DCACHE, yk_sp_ti, &DSPF_sp_fir_gen);

	printf(	"\n--> FIR algorithm from TI optimized DSPLIB for C6600 DSP - DSPF_sp_fir_gen, dsplib_c66x_3_1_0_0"
			"\nmemory model : full cacheability"
			"\nperformance : %d cycles, %3.3f ms, %1.2f MAC's per cycle\n"	, benchmark.perf_nbcycles
																			, benchmark.perf_usertime_ms
																			, benchmark.perf_macs);

	fflush(stdout);

	/* FIR algorithm in canonical c */
	#if ( TEST_FIR_SP != 0 )

		printf(	"\n--> FIR algorithm in canonical c "
				"\nmemory model : full cacheability");

		for (i=0; i<YK_LENGTH; i++) {
			yk_sp[i] = 0.0f;
		}

		firtest_perf (&benchmark, UMA_L2CACHE_L1DCACHE, yk_sp, &fir_sp);

		validity = firtest_sys (&conformity, yk_sp_ti, yk_sp, YK_LENGTH);

		printf("\nconformity : status %s, max %3.3f%%, %d samples\n"	, conformity.error_status
																		, conformity.error_percent
																		, conformity.error_samples);

		if ( validity ) {
			printf("performance : %d cycles, %3.3f ms, %1.2f floating MAC's per cycle\n"	, benchmark.perf_nbcycles
																							, benchmark.perf_usertime_ms
																							, benchmark.perf_macs);
		}

		fflush(stdout);

	#endif

	/* FIR algorithm in canonical C6600 asm */
	#if ( TEST_FIR_ASM != 0 )

		printf(	"\n--> FIR algorithm in canonical C6600 asm"
				"\nmemory model : full cacheability");


		for (i=0; i<YK_LENGTH; i++) {
			yk_sp[i] = 0.0f;
		}

		firtest_perf (&benchmark, UMA_L2CACHE_L1DCACHE, yk_sp, &fir_sp_asm);

		validity = firtest_sys (&conformity, yk_sp_ti, yk_sp, YK_LENGTH);

		printf("\nconformity : status %s, max %3.3f%%, %d samples\n"	, conformity.error_status
																		, conformity.error_percent
																		, conformity.error_samples);

		if ( validity ) {
			printf("performance : %d cycles, %3.3f ms, %1.2f floating MAC's per cycle\n"	, benchmark.perf_nbcycles
																							, benchmark.perf_usertime_ms
																							, benchmark.perf_macs);
		}

		fflush(stdout);

	#endif

	/* FIR algorithm in C6600 asm vliw */
	#if ( TEST_FIR_ASM_VLIW != 0 )

		printf(	"\n--> FIR algorithm in C6600 asm vliw"
				"\nmemory model : full cacheability");

		for (i=0; i<YK_LENGTH; i++) {
			yk_sp[i] = 0.0f;
		}

		firtest_perf (&benchmark, UMA_L2CACHE_L1DCACHE, yk_sp, &fir_sp_asm_vliw);

		validity = firtest_sys (&conformity, yk_sp_ti, yk_sp, YK_LENGTH);

		printf("\nconformity : status %s, max %3.3f%%, %d samples\n"	, conformity.error_status
																		, conformity.error_percent
																		, conformity.error_samples);

		if ( validity ) {
			printf("performance : %d cycles, %3.3f ms, %1.2f floating MAC's per cycle\n"	, benchmark.perf_nbcycles
																							, benchmark.perf_usertime_ms
																							, benchmark.perf_macs);
		}

		fflush(stdout);

	#endif

	/* FIR algorithm in C6600 asm pipelining software */
	#if ( TEST_FIR_ASM_PIPE != 0 )

		printf(	"\n--> FIR algorithm in C6600 asm pipelining software"
				"\nmemory model : full cacheability");

		for (i=0; i<YK_LENGTH; i++) {
			yk_sp[i] = 0.0f;
		}

		firtest_perf (&benchmark, UMA_L2CACHE_L1DCACHE, yk_sp, &fir_sp_asm_softPipeline);

		validity = firtest_sys (&conformity, yk_sp_ti, yk_sp, YK_LENGTH);

		printf("\nconformity : status %s, max %3.3f%%, %d samples\n"	, conformity.error_status
																		, conformity.error_percent
																		, conformity.error_samples);

		if ( validity ) {
			printf("performance : %d cycles, %3.3f ms, %1.2f floating MAC's per cycle\n"	, benchmark.perf_nbcycles
																							, benchmark.perf_usertime_ms
																							, benchmark.perf_macs);
		}

		fflush(stdout);

	#endif

	/* FIR algorithm in C6600 vectorial asm radix 4 */
	#if ( TEST_FIR_ASM_R4 != 0 )

		printf(	"\n--> FIR algorithm in C6600 vectorial asm radix 4"
				"\nmemory model : full cacheability");

		for (i=0; i<YK_LENGTH; i++) {
			yk_sp[i] = 0.0f;
		}

		firtest_perf (&benchmark, UMA_L2CACHE_L1DCACHE, yk_sp, &fir_sp_asm_r14);

		validity = firtest_sys (&conformity, yk_sp_ti, yk_sp, YK_LENGTH);

		printf("\nconformity : status %s, max %3.3f%%, %d samples\n"	, conformity.error_status
																		, conformity.error_percent
																		, conformity.error_samples);

		if ( validity ) {
			printf("performance : %d cycles, %3.3f ms, %1.2f floating MAC's per cycle\n"	, benchmark.perf_nbcycles
																							, benchmark.perf_usertime_ms
																							, benchmark.perf_macs);
		}

		fflush(stdout);

	#endif

	/* FIR algorithm in c canonical unrolling radix 4 */
	#if ( TEST_FIR_SP_R4 != 0 )

		printf(	"\n--> FIR algorithm in c canonical unrolling radix 4"
				"\nmemory model : full cacheability");

		for (i=0; i<YK_LENGTH; i++) {
			yk_sp[i] = 0.0f;
		}

		firtest_perf (&benchmark, UMA_L2CACHE_L1DCACHE, yk_sp, &fir_sp_r4);

		validity = firtest_sys (&conformity, yk_sp_ti, yk_sp, YK_LENGTH);

		printf("\nconformity : status %s, max %3.3f%%, %d samples\n"	, conformity.error_status
																		, conformity.error_percent
																		, conformity.error_samples);

		if ( validity ) {
			printf("performance : %d cycles, %3.3f ms, %1.2f floating MAC's per cycle\n"	, benchmark.perf_nbcycles
																							, benchmark.perf_usertime_ms
																							, benchmark.perf_macs);
		}

		fflush(stdout);

	#endif

	/* FIR algorithm in c vectorized intrinsics radix 4 */
	#if ( TEST_FIR_SP_OPT_R4 != 0 )
		printf(	"\n--> FIR algorithm in c vectorized intrinsics radix 4"
				"\nmemory model : full cacheability");

		for (i=0; i<YK_LENGTH; i++) {
			yk_sp[i] = 0.0f;
		}

		firtest_perf (&benchmark, UMA_L2CACHE_L1DCACHE, yk_sp, &fir_sp_opt_r4);

		validity = firtest_sys (&conformity, yk_sp_ti, yk_sp, YK_LENGTH);

		printf("\nconformity : status %s, max %3.3f%%, %d samples\n"	, conformity.error_status
																		, conformity.error_percent
																		, conformity.error_samples);

		if ( validity ) {
			printf("performance : %d cycles, %3.3f ms, %1.2f floating MAC's per cycle\n"	, benchmark.perf_nbcycles
																							, benchmark.perf_usertime_ms
																							, benchmark.perf_macs);
		}

		fflush(stdout);

	#endif

	while(1);
}

