/*
* Eval file for DSP exam
* 2023, Dimitri Boudier
*/

#include <test.h>
#include <evallib.h>


float32_t src1_sp[ARRAY_LENGTH];
float32_t src2_sp[ARRAY_LENGTH];
float32_t dest_sp[ARRAY_LENGTH];
float32_t dest_ref_sp[ARRAY_LENGTH];


/* arrays memory alignments - CPU to L1D data path length 64bits / 8bytes */
#pragma DATA_ALIGN(src1_sp, 8);
#pragma DATA_ALIGN(src2_sp, 8);
#pragma DATA_ALIGN(dest_sp, 8);
#pragma DATA_ALIGN(dest_ref_sp, 8);


void main(void)
{
	Boolean  validity;
	TestSystem_obj conformity;
	TestPerf_obj benchmark;
	int i;

	test_init(&conformity, &benchmark, src1_sp, ARRAY_LENGTH);
	test_init(&conformity, &benchmark, src2_sp, ARRAY_LENGTH);

	printf(	"\nEVAL algorithms benchmarking on TI TMS320C6678 DSP architecture"
			"\ntesting conditions :"
			"\n--> input vector size : %d samples"
			"\n--> floating MAC's per cycle max : 8"
			"\n--> repetitions for average calculation : %d"
			"\n--> tolerated error : less than %3.1f%%\n",
			ARRAY_LENGTH,
			benchmark.perf_rep,
			conformity.error_margin);

	fflush(stdout);

	/* FIR algorithm in canonical c */
	printf(	"\n--> algorithm in canonical c (reference) "
			"\nmemory model : full cacheability");

	for (i=0; i< ARRAY_LENGTH; i++) {
		dest_ref_sp[i] = 0.0f;
	}

	test_perf (&benchmark, UMA_L2CACHE_L1DCACHE, dest_ref_sp, &eval_sp);

	printf("\nperformance : %d cycles, %3.3f ms, %1.2f floating MAC's per cycle\n"	, benchmark.perf_nbcycles
																					, benchmark.perf_usertime_ms
																					, benchmark.perf_macs);
	fflush(stdout);



	/* FIR algorithm in canonical C6600 asm */
	#if ( TEST_EVAL_ASM != 0 )

		printf(	"\n--> EVAL algorithm in canonical C6600 asm"
				"\nmemory model : full cacheability");


		for (i=0; i<ARRAY_LENGTH; i++) {
			dest_sp[i] = 0.0f;
		}

		test_perf (&benchmark, UMA_L2CACHE_L1DCACHE, dest_sp, &eval_sp_asm);

		validity = test_sys (&conformity, dest_ref_sp, dest_sp, ARRAY_LENGTH);

		printf("\nconformity : status %s, max %3.3f%%, %d samples\n"	, conformity.error_status
																		, conformity.error_percent
																		, conformity.error_samples);

		if ( validity ) {
			printf("performance : %d cycles, %3.3f ms, %1.2f floating MAC's per cycle\n"	, benchmark.perf_nbcycles
																							, benchmark.perf_usertime_ms
																							, benchmark.perf_macs);
		}

		fflush(stdout);

	#endif

	/* FIR algorithm in C6600 asm vliw */
	#if ( TEST_EVAL_ASM_VLIW != 0 )

		printf(	"\n--> EVAL algorithm in C6600 asm vliw"
				"\nmemory model : full cacheability");

		for (i=0; i<ARRAY_LENGTH; i++) {
			dest_sp[i] = 0.0f;
		}

		test_perf (&benchmark, UMA_L2CACHE_L1DCACHE, dest_sp, &eval_sp_asm_vliw);

		validity = test_sys (&conformity, dest_ref_sp, dest_sp, ARRAY_LENGTH);

		printf("\nconformity : status %s, max %3.3f%%, %d samples\n"	, conformity.error_status
																		, conformity.error_percent
																		, conformity.error_samples);

		if ( validity ) {
			printf("performance : %d cycles, %3.3f ms, %1.2f floating MAC's per cycle\n"	, benchmark.perf_nbcycles
																							, benchmark.perf_usertime_ms
																							, benchmark.perf_macs);
		}

		fflush(stdout);

	#endif

	/* FIR algorithm in C6600 asm pipelining software */
	#if ( TEST_EVAL_ASM_PIPE != 0 )

		printf(	"\n--> EVAL algorithm in C6600 asm pipelining software"
				"\nmemory model : full cacheability");

		for (i=0; i<ARRAY_LENGTH; i++) {
			dest_sp[i] = 0.0f;
		}

		test_perf (&benchmark, UMA_L2CACHE_L1DCACHE, dest_sp, &eval_sp_asm_softPipeline);

		validity = test_sys (&conformity, dest_ref_sp, dest_sp, ARRAY_LENGTH);

		printf("\nconformity : status %s, max %3.3f%%, %d samples\n"	, conformity.error_status
																		, conformity.error_percent
																		, conformity.error_samples);

		if ( validity ) {
			printf("performance : %d cycles, %3.3f ms, %1.2f floating MAC's per cycle\n"	, benchmark.perf_nbcycles
																							, benchmark.perf_usertime_ms
																							, benchmark.perf_macs);
		}

		fflush(stdout);

	#endif

	/* FIR algorithm in C6600 vectorial asm radix 4 */
	#if ( TEST_EVAL_ASM_R4 != 0 )

		printf(	"\n--> EVAL algorithm in C6600 vectorial asm radix 4"
				"\nmemory model : full cacheability");

		for (i=0; i<ARRAY_LENGTH; i++) {
			dest_sp[i] = 0.0f;
		}

		test_perf (&benchmark, UMA_L2CACHE_L1DCACHE, dest_sp, &eval_sp_asm_r4);

		validity = test_sys (&conformity, dest_ref_sp, dest_sp, ARRAY_LENGTH);

		printf("\nconformity : status %s, max %3.3f%%, %d samples\n"	, conformity.error_status
																		, conformity.error_percent
																		, conformity.error_samples);

		if ( validity ) {
			printf("performance : %d cycles, %3.3f ms, %1.2f floating MAC's per cycle\n"	, benchmark.perf_nbcycles
																							, benchmark.perf_usertime_ms
																							, benchmark.perf_macs);
		}

		fflush(stdout);

	#endif

	/* FIR algorithm in c canonical unrolling radix 4 */
	#if ( TEST_EVAL_SP_R4 != 0 )

		printf(	"\n--> EVAL algorithm in c canonical unrolling radix 4"
				"\nmemory model : full cacheability");

		for (i=0; i<ARRAY_LENGTH; i++) {
			dest_sp[i] = 0.0f;
		}

		test_perf (&benchmark, UMA_L2CACHE_L1DCACHE, dest_sp, &eval_sp_r4);

		validity = test_sys (&conformity, dest_ref_sp, dest_sp, ARRAY_LENGTH);

		printf("\nconformity : status %s, max %3.3f%%, %d samples\n"	, conformity.error_status
																		, conformity.error_percent
																		, conformity.error_samples);

		if ( validity ) {
			printf("performance : %d cycles, %3.3f ms, %1.2f floating MAC's per cycle\n"	, benchmark.perf_nbcycles
																							, benchmark.perf_usertime_ms
																							, benchmark.perf_macs);
		}

		fflush(stdout);

	#endif

	/* FIR algorithm in c vectorized intrinsics radix 4 */
	#if ( TEST_EVAL_SP_OPT_R4 != 0 )
		printf(	"\n--> EVAL algorithm in c vectorized intrinsics radix 4"
				"\nmemory model : full cacheability");

		for (i=0; i<ARRAY_LENGTH; i++) {
			dest_sp[i] = 0.0f;
		}

		test_perf (&benchmark, UMA_L2CACHE_L1DCACHE, dest_sp, &eval_sp_opt_r4);

		validity = test_sys (&conformity, dest_ref_sp, dest_sp, ARRAY_LENGTH);

		printf("\nconformity : status %s, max %3.3f%%, %d samples\n"	, conformity.error_status
																		, conformity.error_percent
																		, conformity.error_samples);

		if ( validity ) {
			printf("performance : %d cycles, %3.3f ms, %1.2f floating MAC's per cycle\n"	, benchmark.perf_nbcycles
																							, benchmark.perf_usertime_ms
																							, benchmark.perf_macs);
		}

		fflush(stdout);

	#endif

	printf(	"\n-- END --\n");
	while(1);
}

