/**
 *   @file  test_qosSched.c
 *
 *   @brief   
 *      This is the QMSS unit test code for QoS scheduler.  This
 *      corresponds to the firmware in qos_sched_be[] and 
 *      qos_sched_le[] and the LLD in qmss_qosSched.c
 *
 *  \par
 *  ============================================================================
 *  @n   (C) Copyright 2012, Texas Instruments, Inc.
 * 
 *  Redistribution and use in source and binary forms, with or without 
 *  modification, are permitted provided that the following conditions 
 *  are met:
 *
 *    Redistributions of source code must retain the above copyright 
 *    notice, this list of conditions and the following disclaimer.
 *
 *    Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the 
 *    documentation and/or other materials provided with the   
 *    distribution.
 *
 *    Neither the name of Texas Instruments Incorporated nor the names of
 *    its contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 *  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *  \par
*/

#include <xdc/std.h>
#include <xdc/runtime/System.h>
#include <string.h>

#ifndef SIMULATOR_SUPPORT
/* QMSS LLD include */
#include <ti/drv/qmss/qmss_drv.h>
#include <ti/drv/qmss/qmss_firmware.h>
#include <ti/drv/qmss/qmss_qosSched.h>

/* CPPI LLD includes */
#include <ti/drv/cppi/cppi_drv.h>
#include <ti/drv/cppi/cppi_desc.h>

/* CSL RL includes */
#include <ti/csl/csl_chip.h>

/* OSAL includes */
#include <qmss_osal.h>

/************************ USER DEFINES ********************/
#define NUM_MONOLITHIC_DESC         8192
#define SIZE_MONOLITHIC_DESC        32
#define PROFILE_DESCS               39
#define QOS_TX_QUEUES               (80+40) // This is the number of queues that are allocated
#define TEST_PORTS                  12
#define TEST_QUEUES                 (40*2 + 4*(TEST_PORTS - 2)) // This is the number of queues actually used

#if (TEST_QUEUES*PROFILE_DESCS > NUM_MONOLITHIC_DESC)
#error Not enough descriptors 
#endif

/* 100us when clock is 983 MHz (or QM's clock is 983/3 = 327.67 MHz) */
/* The test case will still operate if the clocks are different */
#define QOS_TIMER_CONFIG            16383

/* No data is actually transmitted, just used to allow QoS to calculate bandwidth */
#define QOS_DATA_PACKET_SIZE        60


/* Stops the model on descID for debug purposes */
//#define QOS_MODEL_DEBUG_TRIGGER_ENABLE
#define QOS_MODEL_DEBUG_TRIGGER_VAL 0x04600000

#define DESC_ID_OFFSET 1 /* uses tagInfo, which is not used by firmware, this is in 32 bit words */

/************************ GLOBAL VARIABLES ********************/
/* Descriptor pool [Size of descriptor * Number of descriptors] */
#pragma DATA_ALIGN (monolithicDesc, 16)
UInt8                   monolithicDesc[SIZE_MONOLITHIC_DESC * NUM_MONOLITHIC_DESC];

/* Timestamp when model moved each descriptor */
struct 
{
    uint32_t *descPtr;
    uint32_t  timestamp;
} timestamps[NUM_MONOLITHIC_DESC];

/* This allocates memory as if all ports are used */
#define FULL_PORT_MAX_DESC (QMSS_QOS_SCHED_FULL_MAX_LOG_GROUPS * QMSS_QOS_SCHED_FULL_MAX_QUEUES_PER_GROUP * PROFILE_DESCS)
#define LITE_PORT_MAX_DESC (QMSS_QOS_SCHED_LITE_MAX_LOG_GROUPS * QMSS_QOS_SCHED_LITE_MAX_QUEUES_PER_GROUP * PROFILE_DESCS)
uint32_t fullPortSeqns[QMSS_QOS_SCHED_FULL_MAX_PHYS_PORTS][FULL_PORT_MAX_DESC];
uint32_t litePortSeqns[QMSS_QOS_SCHED_LITE_MAX_PHYS_PORTS][LITE_PORT_MAX_DESC];

uint32_t *expectedSeqn[TEST_PORTS] =
{
   fullPortSeqns[0],
#if TEST_PORTS >= 2
   fullPortSeqns[1],
#endif
#if TEST_PORTS >= 3
   litePortSeqns[0],
#endif
#if TEST_PORTS >= 4
   litePortSeqns[1],
#endif
#if TEST_PORTS >= 5
   litePortSeqns[2],
#endif
#if TEST_PORTS >= 6
   litePortSeqns[3],
#endif
#if TEST_PORTS >= 7
   litePortSeqns[4],
#endif
#if TEST_PORTS >= 8
   litePortSeqns[5],
#endif
#if TEST_PORTS >= 9
   litePortSeqns[6],
#endif
#if TEST_PORTS >= 10
   litePortSeqns[7],
#endif
#if TEST_PORTS >= 11
   litePortSeqns[8],
#endif
#if TEST_PORTS >= 12
   litePortSeqns[9]
#endif
};

/* All the queue handles used by the test */
typedef struct
{
    /* Queue handle containing free/unused descriptors */
    Qmss_QueueHnd freeQ;
    /* Set of queues that feed the firmware (input) */
    Qmss_QueueHnd fwQHnds[QOS_TX_QUEUES];
    /* Output queue from firmware */
    Qmss_QueueHnd qosOutQHnd;
    /* Queue to recycle to when firmware drops descriptors */
    Qmss_QueueHnd dropQHnd;
} testState_t;

/* Global variable common to all test cases */

/* QMSS configuration */
Qmss_InitCfg            qmssInitConfig;
/* Memory region configuration information */
Qmss_MemRegInfo         memInfo;
/* QM descriptor configuration */
Qmss_DescCfg            descCfg;
/* Store the queue handle for destination queues on which allocated descriptors are stored */
Qmss_QueueHnd           QueHnd[QMSS_MAX_MEM_REGIONS];

/************************ EXTERN VARIABLES ********************/
/* Error counter */
extern UInt32                   errorCount;
/* QMSS device specific configuration */
extern Qmss_GlobalConfigParams  qmssGblCfgParams;

/**
 *  @b Description
 *  @n  
 *      Utility function which converts a local GEM L2 memory address 
 *      to global memory address.
 *
 *  @param[in]  addr
 *      Local address to be converted
 *
 *  @retval
 *      Computed L2 global Address
 */
static UInt32 l2_global_address (UInt32 addr)
{
        UInt32 corenum;

        /* Get the core number. */
        corenum = CSL_chipReadReg (CSL_CHIP_DNUM); 

        /* Compute the global address. */
        return (addr + (0x10000000 + (corenum * 0x1000000)));
}

/*****************************************************************************
 * Function concatenates port, group, queue, and descNum into a unique ID
 * that can be written into the descriptor
 *****************************************************************************/
static uint32_t inline make_descID (uint32_t port, uint32_t group, uint32_t queue, uint32_t descNum)
{
    return (port  << 28) | (group << 24) | (queue << 20) | (descNum);
}
/*****************************************************************************
 * Function splits port, group, queue, and descNum from unique ID
 *****************************************************************************/
#ifdef DEBUG_FCN
static break_descID (uint32_t *port, uint32_t *group, uint32_t *queue, uint32_t *descNum, uint32_t descID)
{
    *port    = (descID >> 28) & 0x000f;
    *group   = (descID >> 24) & 0x000f;
    *queue   = (descID >> 20) & 0x000f;
    *descNum = (descID >>  0) & 0xffff;
}
#endif
/*****************************************************************************
 * Function splits just port
 *****************************************************************************/
static inline uint32_t break_descID_port (uint32_t descID)
{
    return ((descID >> 28) & 0x000f);
}

/**
 *  @b Description
 *  @n  
 *      Utility function to allocate a contigous block of aligned queues
 *
 *  @param[out] queueHnds 
 *      Queue handles for each successfully opened Q.  Should be at least nQ long.
 *  @param[in]  nQ
 *      Number of queues to allocate as a block
 *  @param[in]  align
 *      Queue number to align to
 *
 *  @retval
 *      Queue number of first allocated queue, or -1 on failure
 */
int allocate_contig_queues (Qmss_QueueHnd *queueHnds, int nQ, int align)
{
    int queueNum, baseQueue;
    uint8_t isAllocated;
    int foundBase = -1;

    /* Allocate a contigous block of nQ queues aligned to align */
    for (baseQueue = ((QMSS_GENERAL_PURPOSE_QUEUE_BASE + align - 1) / align) * align;
         baseQueue < (QMSS_GENERAL_PURPOSE_QUEUE_BASE + QMSS_MAX_GENERAL_PURPOSE_QUEUE - nQ);
         baseQueue += align) 
    {
        for (queueNum = 0; queueNum < nQ; queueNum++) 
        {
            queueHnds[queueNum] = 
                Qmss_queueOpen (Qmss_QueueType_GENERAL_PURPOSE_QUEUE, 
                                baseQueue + queueNum, &isAllocated);
            if (queueHnds[queueNum] < 0) 
            {
                errorCount++;
                System_printf("Queue open failed: %d\n", queueHnds[queueNum]);
                break;
            }
            if (! isAllocated) 
            {
                /* Somebody else got the queue. Close what we got, and try next range */
                for (queueNum--; queueNum >= 0; queueNum--) 
                {
                    Qmss_queueClose (queueHnds[queueNum]);
                }
                break;
            }
        }
        if (queueNum == nQ) 
        {
            foundBase = baseQueue;
            break;
        }
    }

    return foundBase;
}

/**
 *  @b Description
 *  @n  
 *      Utility function to sleep N cycles.  This is immune to timer rollover.
 *
 *  @param[in]  n
 *      Number of cycles to sleep
 *
 */
void delay (uint32_t cycles)
{
    uint32_t start = TSCL;

    while ( (TSCL - start) < cycles);
}

/**
 *  @b Description
 *  @n  
 *      Utility function to convert Qmss_QosSchedAcctType to a string for print
 *
 *  @param[in]  type
 *      Value to convert
 *
 */

const char *type_packets = "Qmss_QosSchedAcctType_PACKETS";
const char *type_bytes   = "Qmss_QosSchedAcctType_BYTES";
const char *type_invalid = "Invalid Type";
const char *string_type (Qmss_QosSchedAcctType type)
{
    const char *ret = type_invalid;
    switch (type) {
        case Qmss_QosSchedAcctType_PACKETS:
            ret = type_packets;
            break;
        case Qmss_QosSchedAcctType_BYTES:
            ret = type_bytes;
            break;
    }
    return ret;
}

/**
 *  @b Description
 *  @n  
 *      Utility function to print configuration
 *
 *  @param[in]  cfg
 *      Configuration to print
 *
 */
void print_config (Qmss_QosSchedPortCfg *cfg)
{
    int group, queue;

    System_printf (        "cfg->wrrType          = %s\n", string_type(cfg->wrrType));
    System_printf (        "cfg->cirType          = %s\n", string_type(cfg->cirType));
    System_printf (        "cfg->congestionType   = %s\n", string_type(cfg->congestionType));
    System_printf (        "cfg->cirIteration     = %d\n", cfg->cirIteration);
    System_printf (        "cfg->cirMax           = %d\n", cfg->cirMax);
    System_printf (        "cfg->groupCount       = %d\n", cfg->groupCount);
    System_printf (        "cfg->outputQueue.qMgr = %d\n", cfg->outputQueue.qMgr);
    System_printf (        "cfg->outputQueue.qNum = %d\n", cfg->outputQueue.qNum);

    for (group = 0; group < cfg->groupCount; group++)
    {
        Qmss_QosSchedGroupCfg *pGroup = &cfg->group[group];
        System_printf (    "cfg->group[%d].cirIteration     = %d\n", group, pGroup->cirIteration);
        System_printf (    "cfg->group[%d].pirIteration     = %d\n", group, pGroup->pirIteration);
        System_printf (    "cfg->group[%d].cirMax           = %d\n", group, pGroup->cirMax);
        System_printf (    "cfg->group[%d].pirMax           = %d\n", group, pGroup->pirMax);
        System_printf (    "cfg->group[%d].wrrInitialCredit = %d\n", group, pGroup->wrrInitialCredit);
        System_printf (    "cfg->group[%d].totQueueCount    = %d\n", group, pGroup->totQueueCount);
        System_printf (    "cfg->group[%d].spQueueCount     = %d\n", group, pGroup->spQueueCount);
        System_printf (    "cfg->group[%d].wrrQueueCount    = %d\n", group, pGroup->wrrQueueCount);
        for (queue = 0; queue < pGroup->totQueueCount; queue++)
        {
            Qmss_QosSchedQueueCfg *pQueue = &pGroup->Queue[queue];
            System_printf (    "cfg->group[%d].Queue[%d].wrrInitialCredit = %d\n", group, queue, pQueue->wrrInitialCredit);
            System_printf (    "cfg->group[%d].Queue[%d].congestionThresh = %d\n", group, queue, pQueue->congestionThresh);
        }
    }
}


/*****************************************************************************
 * The following data structures are a C model of the firmware's foreground
 * scheduling.  It doesn't consider congestion dropping since that is a 
 * background task that uses all available fw cycles to drop.
 * **************************************************************************/
typedef struct {
    uint32_t    WrrInitialCredit;       // Initial Queue WRR credit on a "new" schedule
    int32_t     WrrCurrentCredit;       // Current Queue WRR credit
    uint32_t    CongestionThresh;       // The max amount of congestion before drop
    uint32_t    PacketsForwarded;       // Number of packets forwarded
    uint64_t    BytesForwarded;         // Number of bytes forwarded
    Qmss_QueueHnd QueueHnd; 
} modelQueue;

typedef struct {
    int32_t     CirIteration;           // CIR credit per iteration
    int32_t     PirIteration;           // PIR credit per iteration
    int32_t     CirCurrent;             // Current CIR credit
    int32_t     PirCurrent;             // Current PIR credit
    int32_t     CirMax;                 // Max total CIR credit
    int32_t     PirMax;                 // Max total PIR credit
    int32_t     WrrInitialCredit;       // Initial Group WRR credit on a "new" schedule
    int32_t     WrrCurrentCredit;       // Current Group WRR credit
    uint8_t     QueueCount;             // Total number of active QOS queues (up to 8)
    uint8_t     SPCount;                // The number of SP queues (usually 2 or 3)
    uint8_t     RRCount;                // The number of RR queues (usually QueueCount-SPCount)
    uint8_t     NextQueue;              // The next RR queue to examine in the group
    uint8_t     WrrCreditMask;          // Flag mask of WRR queues that have WRR credit remaining
    modelQueue  Queue[8];               // Up to eight queues per logical group
} modelGroup;

typedef struct {
    int         fByteWrrCredits;        // When set, WRR credits are in bytes, else packets
    int         fByteCirCredits;        // When set, CIR/PIR credits are in bytes, else packets
    int         fByteCongest;           // When set, congestion is in bytes, else packets
    int32_t     CirIteration;           // CIR credit per iteration (always in bytes)
    int32_t     CirCurrent;             // Current CIR credit (always in bytes)
    int32_t     CirMax;                 // Max total CIR credit (always in bytes)
    uint8_t     GroupCount;             // The number of logical groups
    uint8_t     WrrCreditMask;          // Flag mask of WRR groups that have WRR credit remaining
    uint8_t     NextGroup;              // The next RR group to examine
    modelGroup  Group[5];               // Up to 5 logical groups
    Qmss_QueueHnd OutputQueueHnd;
} modelPort;

modelPort qos_model_ports[QMSS_QOS_SCHED_MAX_PHYS_PORTS];
#ifdef QOS_MODEL_DEBUG_TRIGGER_ENABLE
modelPort qos_model_ports_dbg_snapshot[QMSS_QOS_SCHED_MAX_PHYS_PORTS];
#endif

/*****************************************************************************
 * model function to transfer a packet from input queue to output queue
 * **************************************************************************/
int qos_model_queue_scheduler (modelPort *pPort, modelQueue *pQueue)
{
    uint32_t ByteSize;
    void *desc;

    Qmss_queuePopDescSize (pQueue->QueueHnd, &desc, &ByteSize);
#ifdef QOS_MODEL_DEBUG_TRIGGER_ENABLE
    if ((*((uint32_t *)(QMSS_DESC_PTR(desc))) + DESC_ID_OFFSET) == QOS_MODEL_DEBUG_TRIGGER_VAL)
    {
        /* Make snapshot of state at this descriptor for debug purposes */
        memcpy (qos_model_ports_dbg_snapshot, qos_model_ports, sizeof(qos_model_ports));
    }
#endif
    Qmss_queuePush (pPort->OutputQueueHnd, desc, ByteSize, QMSS_DESC_SIZE(desc), Qmss_Location_TAIL);

    pQueue->PacketsForwarded += 1;
    pQueue->BytesForwarded   += ByteSize;

    return((int)ByteSize);
}

/*****************************************************************************
 * model function to schedule a group
 * **************************************************************************/
int qos_model_group_scheduler (modelPort *pPort, modelGroup *pGroup)
{
    int32_t BytesUsed;
    int32_t packetSent = 0;
    uint8_t PacketPendingMask;
    int     i, j;

    PacketPendingMask = 0;
    for (i = 0; i < pGroup->QueueCount; i++)
    {
        if (Qmss_getQueueEntryCount (pGroup->Queue[i].QueueHnd)) 
        {
            PacketPendingMask |= 1 << i;
        }
    }

    //
    // Try to take a high priority queue first
    //
    for( i=0; i<pGroup->SPCount; i++ )
    {
        if( PacketPendingMask & (1<<i) )
            return( qos_model_queue_scheduler (pPort, &pGroup->Queue[i]) );
    }

    //
    // Next try to pick a round robin queue
    //
    for( i=0; i<pGroup->RRCount; i++ )
    {
        // If all queues with WRR credit remaining are empty, reset the credit
        if( !(pGroup->WrrCreditMask & PacketPendingMask) )
        {
            // Reset credits
            for(j=pGroup->SPCount; j<(pGroup->SPCount+pGroup->RRCount); j++)
            {
                pGroup->WrrCreditMask |= (1<<j);
                pGroup->Queue[j].WrrCurrentCredit = pGroup->Queue[j].WrrInitialCredit;
            }

            // If there are still no packets, we're done
            if( !(pGroup->WrrCreditMask & PacketPendingMask) )
                break;
        }

        // If the next queue has WRR credit and packets, then schedule a packet
        if( (pGroup->WrrCreditMask & PacketPendingMask) & (1<<pGroup->NextQueue) )
        {
            // Attempt to schedule a packet
            BytesUsed = qos_model_queue_scheduler( pPort, &pGroup->Queue[pGroup->NextQueue] );

            // Deduct the WRR credit
            if( pPort->fByteWrrCredits )
                pGroup->Queue[pGroup->NextQueue].WrrCurrentCredit -= BytesUsed << QMSS_QOS_SCHED_BYTES_SCALE_SHIFT;
            else
                pGroup->Queue[pGroup->NextQueue].WrrCurrentCredit -= 1 << QMSS_QOS_SCHED_PACKETS_SCALE_SHIFT;

            // Clear the queues's WWR credit mask if we depleted the WRR credit
            if( pGroup->Queue[pGroup->NextQueue].WrrCurrentCredit <= 0 )
                pGroup->WrrCreditMask &= ~(1<<pGroup->NextQueue);

            packetSent = 1;
        }

        // Move on to the next group
        pGroup->NextQueue++;
        if( pGroup->NextQueue == pGroup->SPCount+pGroup->RRCount )
            pGroup->NextQueue = pGroup->SPCount;

        // Quit now if we moved a packet
        if(packetSent)
            return(BytesUsed);
    }

    //
    // Finally, try to get a packet from the OPTIONAL best effort queues
    //
    for( i=pGroup->SPCount+pGroup->RRCount; i<pGroup->QueueCount; i++ )
    {
        if( PacketPendingMask & (1<<i) )
            return( qos_model_queue_scheduler (pPort, &pGroup->Queue[i]) );
    }

    // No packet was transferred
    return(0);
}

/*****************************************************************************
 * model function to schedule a port
 * **************************************************************************/
int qos_model_port_scheduler (modelPort *pPort)
{
    int32_t BytesUsed;              // Bytes used is returned from the Logical Scheduler
    int32_t CirCreditUsed;          // Cir/Pir Credit used (in packets or bytes as configured)
    int32_t WrrCreditUsed;          // Wrr Credit used (in packets or bytes as configured)
    uint8_t PacketPendingMask;      // Flag mask of RR groups that are not empty
    uint8_t PirCreditMask = 0;      // Flag set when more PIR credit remains
    int     i;
    int     PacketScheduled = 0;

    //
    // Add credits for all time based credit counters
    //

    // Credit for the main port
    pPort->CirCurrent +=  pPort->CirIteration;
    if( pPort->CirCurrent > pPort->CirMax )
        pPort->CirCurrent = pPort->CirMax;

    // Credit for the port's logical groups
    for( i=0; i<pPort->GroupCount; i++ )
    {
        pPort->Group[i].CirCurrent +=  pPort->Group[i].CirIteration;
        // Cap CIR credit at its max level
        if( pPort->Group[i].CirCurrent > pPort->Group[i].CirMax )
            pPort->Group[i].CirCurrent = pPort->Group[i].CirMax;
        pPort->Group[i].PirCurrent +=  pPort->Group[i].PirIteration;
        if( pPort->Group[i].PirCurrent > 0 )
        {
            // Track every group with PIR credit for later
            PirCreditMask |= (1<<i);
            // Cap PIR credit at its max level
            if( pPort->Group[i].PirCurrent > pPort->Group[i].PirMax )
                pPort->Group[i].PirCurrent = pPort->Group[i].PirMax;
        }
    }

    // Assume all groups have packets pending until we find out otherwise
    PacketPendingMask = 0x1F;

    //
    // Schedule each logic group's CIR, while also ensuring that the
    // physical port's CIR is not violated.
    //
    for( i=0; i<pPort->GroupCount; i++ )
    {
        // We will schedule each group for its full CIR
        while(pPort->Group[i].CirCurrent > 0)
        {
            // If the physical port has no credit quit out of the scheduler entirely
            if( pPort->CirCurrent <= 0 )
                return PacketScheduled;

            // Attempt to schedule a packet
            BytesUsed = qos_model_group_scheduler ( pPort, &pPort->Group[i] );

            // If no packet scheduled, move on to next logical group
            if( !BytesUsed )
            {
                // Clear the pending mask bit
                PacketPendingMask &= ~(1<<i);
                break;
            }
            PacketScheduled++;

            // Use packet or byte count, depending on configuration
            if( pPort->fByteCirCredits )
                CirCreditUsed = BytesUsed << QMSS_QOS_SCHED_BYTES_SCALE_SHIFT;
            else
                CirCreditUsed = 1 << QMSS_QOS_SCHED_PACKETS_SCALE_SHIFT;

            // Here we have a packet, so deduct the credit
            pPort->CirCurrent          -= CirCreditUsed;
            pPort->Group[i].CirCurrent -= CirCreditUsed;
            pPort->Group[i].PirCurrent -= CirCreditUsed;
        }
    }

    //
    // Schedule each logic group's PIR in a WRR fashion while the
    // physical port's CIR is not violated.
    //
    while(pPort->CirCurrent > 0)
    {
        // If there are no queues left with PIR credit and packets, then we're done
        if( !(PirCreditMask & PacketPendingMask) )
            return PacketScheduled;

        // If all groups with WRR credit remaining are empty, reset the WRR credit
        if( !(PirCreditMask & pPort->WrrCreditMask & PacketPendingMask) )
        {
            // Reset credits
            for(i=0; i<pPort->GroupCount; i++)
            {
                pPort->WrrCreditMask |= (1<<i);
                pPort->Group[i].WrrCurrentCredit = pPort->Group[i].WrrInitialCredit;
            }

            // If there are still no packets, we're done
            if( !(PirCreditMask & pPort->WrrCreditMask & PacketPendingMask) )
                return PacketScheduled;
        }

        // If this group has PIR credit, WRR credit, and packets pending, then schedule a packet
        if( (PirCreditMask & pPort->WrrCreditMask & PacketPendingMask) & (1<<pPort->NextGroup) )
        {
            // Attempt to schedule a packet
            BytesUsed = qos_model_group_scheduler ( pPort, &pPort->Group[pPort->NextGroup] );

            // If no packet scheduled, clear the pending mask
            if( !BytesUsed )
                PacketPendingMask &= ~(1<<pPort->NextGroup);
            else
            {
                // Use packet or byte count, depending on configuration
                if( pPort->fByteCirCredits )
                    CirCreditUsed = BytesUsed << QMSS_QOS_SCHED_BYTES_SCALE_SHIFT;
                else
                    CirCreditUsed = 1 << QMSS_QOS_SCHED_PACKETS_SCALE_SHIFT;

                // Use packet or byte count, depending on configuration
                if( pPort->fByteWrrCredits )
                    WrrCreditUsed = BytesUsed << QMSS_QOS_SCHED_BYTES_SCALE_SHIFT;
                else
                    WrrCreditUsed = 1 << QMSS_QOS_SCHED_PACKETS_SCALE_SHIFT;

                // Deduct the PIR/CIR credit
                pPort->CirCurrent -= CirCreditUsed;
                pPort->Group[pPort->NextGroup].PirCurrent -= CirCreditUsed;

                // We also deduct the WRR credit
                pPort->Group[pPort->NextGroup].WrrCurrentCredit -= WrrCreditUsed;

                // Clear the group's PIR credit mask if we depleted the PIR credit
                if( pPort->Group[pPort->NextGroup].PirCurrent <= 0 )
                    PirCreditMask &= ~(1<<pPort->NextGroup);

                // Clear the group's WWR credit mask if we depleted the WRR credit
                if( pPort->Group[pPort->NextGroup].WrrCurrentCredit <= 0 )
                    pPort->WrrCreditMask &= ~(1<<pPort->NextGroup);

                PacketScheduled++;
            }
        }

        // Move on to the next group
        pPort->NextGroup++;
        if( pPort->NextGroup == pPort->GroupCount )
            pPort->NextGroup = 0;
    }
    return PacketScheduled;
}

/*****************************************************************************
 * Model of the qos foreground scheduler which is used to check that the
 * firmware returns correct results.  This runs until packetsRemaining are
 * scheduled.
 * **************************************************************************/
void qos_model (Qmss_QosSchedPortCfg *cfg, testState_t *state, int packetsRemaining)
{
    int i;
    int port, group, queue;
    int queueIdx = 0;
    int loopLimit;

    /* Assign queues */
    loopLimit = QMSS_QOS_SCHED_FULL_MAX_PHYS_PORTS;
    if (loopLimit > TEST_PORTS)
    {
        loopLimit = TEST_PORTS;
    }
    for (port = 0; port < loopLimit; port++)
    {
        for (group = 0; group < QMSS_QOS_SCHED_FULL_MAX_LOG_GROUPS; group++)
        {
            for (queue = 0; queue < QMSS_QOS_SCHED_FULL_MAX_QUEUES_PER_GROUP; queue++)
            {
                qos_model_ports[port].Group[group].Queue[queue].QueueHnd = state->fwQHnds[queueIdx++];
            }
        }
    }
    loopLimit = QMSS_QOS_SCHED_MAX_PHYS_PORTS;
    if (loopLimit > TEST_PORTS)
    {
        loopLimit = TEST_PORTS;
    }

    for (port = QMSS_QOS_SCHED_FULL_MAX_PHYS_PORTS; port < loopLimit; port++)
    {
        for (group = 0; group < QMSS_QOS_SCHED_LITE_MAX_LOG_GROUPS; group++)
        {
            for (queue = 0; queue < QMSS_QOS_SCHED_LITE_MAX_QUEUES_PER_GROUP; queue++)
            {
                qos_model_ports[port].Group[group].Queue[queue].QueueHnd = state->fwQHnds[queueIdx++];
            }
        }
    }

    /* Copy config */
    for (port = 0; port < QMSS_QOS_SCHED_MAX_PHYS_PORTS; port++)
    {
        modelPort *pPort               = &qos_model_ports[port];
        Qmss_QosSchedPortCfg *pPortCfg = &cfg[port];
        pPort->fByteWrrCredits         = (pPortCfg->wrrType == Qmss_QosSchedAcctType_BYTES) ? 1 : 0;
        pPort->fByteCirCredits         = (pPortCfg->cirType == Qmss_QosSchedAcctType_BYTES) ? 1 : 0;
        pPort->fByteCongest            = (pPortCfg->congestionType == Qmss_QosSchedAcctType_BYTES) ? 1 : 0;
        pPort->CirIteration            = pPortCfg->cirIteration;
        pPort->CirCurrent              = 0;
        pPort->CirMax                  = pPortCfg->cirMax;
        pPort->GroupCount              = pPortCfg->groupCount;
        pPort->WrrCreditMask           = 0;
        pPort->NextGroup               = 0;
        pPort->OutputQueueHnd          = Qmss_getQueueHandle (pPortCfg->outputQueue);
        for (group = 0; group < QMSS_QOS_SCHED_FULL_MAX_LOG_GROUPS; group++)
        {
            modelGroup *pGroup               = &pPort->Group[group];
            Qmss_QosSchedGroupCfg *pGroupCfg = &pPortCfg->group[group];
            pGroup->CirIteration             = pGroupCfg->cirIteration;
            pGroup->PirIteration             = pGroupCfg->pirIteration;
            pGroup->CirCurrent               = 0;
            pGroup->PirCurrent               = 0;
            pGroup->CirMax                   = pGroupCfg->cirMax;
            pGroup->PirMax                   = pGroupCfg->pirMax;
            pGroup->WrrInitialCredit         = pGroupCfg->wrrInitialCredit;
            pGroup->WrrCurrentCredit         = 0;
            pGroup->QueueCount               = pGroupCfg->totQueueCount;
            pGroup->SPCount                  = pGroupCfg->spQueueCount;
            pGroup->RRCount                  = pGroupCfg->wrrQueueCount;
            pGroup->NextQueue                = pGroupCfg->spQueueCount;
            pGroup->WrrCreditMask            = 0;
            for (queue = 0; queue < QMSS_QOS_SCHED_FULL_MAX_QUEUES_PER_GROUP; queue++)
            {
                modelQueue *pQueue               = &pGroup->Queue[queue];
                Qmss_QosSchedQueueCfg *pQueueCfg = &pGroupCfg->Queue[queue];
                pQueue->WrrInitialCredit         = pQueueCfg->wrrInitialCredit;
                pQueue->WrrCurrentCredit         = 0;
                pQueue->CongestionThresh         = pQueueCfg->congestionThresh;
                pQueue->PacketsForwarded         = 0;
                pQueue->BytesForwarded           = 0;
            }
        }
    }

    while (packetsRemaining)
    {
        // Schedule packets from all active physical ports
        for(i=0; i<TEST_PORTS; i++)
        {
            packetsRemaining -= qos_model_port_scheduler (&qos_model_ports[i]);
        }
    }
}


/*****************************************************************************
 * This function reads back configuration and compares it to portCfg
 *****************************************************************************/
void port_readback (int port, Qmss_QosSchedPortCfg *portCfg)
{
    UInt32               corenum = CSL_chipReadReg(CSL_CHIP_DNUM); 
    Qmss_QosSchedPortCfg portCfgRB;
    Qmss_Result          result;

    /* Load back the configuration it should read back the same */
    memset (&portCfgRB, 0, sizeof(portCfgRB));
    if ((result = Qmss_getCfgQosSchedPort (port, &portCfgRB)) != QMSS_QOS_SCHED_RETCODE_SUCCESS)
    {
        errorCount++;
        System_printf("Core %d : Failed to query port %d's config: %d\n", corenum, port, result);
    } 
    if (memcmp (portCfg, &portCfgRB, sizeof(portCfgRB) != 0))
    {
        errorCount++;
        System_printf("Core %d : Didn't read back port %d's configuration correctly\n", corenum, port);
    }
}

/*****************************************************************************
 * This function sets the port configuration then reads it back to
 * ensure same result is returned.
 *****************************************************************************/
void port_config (int port, Qmss_QosSchedPortCfg *portCfg)
{
    UInt32      corenum = CSL_chipReadReg(CSL_CHIP_DNUM); 
    Qmss_Result result;

    /* Set the configuration */
    if ((result = Qmss_putCfgQosSchedPort (port, portCfg)) != QMSS_QOS_SCHED_RETCODE_SUCCESS)
    {
        errorCount++;
        System_printf("Core %d : Failed to set port %d's config: %d\n", corenum, port, result);
    } 

    port_readback (port, portCfg);
}

/*****************************************************************************
 * This functin puts TEST_DESCS on each of TEST_QUEUES pointed to by 
 * state->fwQHnds.
 *****************************************************************************/
int distribute_packets (testState_t *state, Qmss_QosSchedPortCfg *portCfg, int numDescsPerQueue)
{
    UInt32     corenum = CSL_chipReadReg(CSL_CHIP_DNUM); 
    Qmss_Queue dropQNum = Qmss_getQueueNumber (state->dropQHnd);
    uint32_t   port, group, queue, descNum;
    uint32_t   groupCount, queueCount;
    int        absQueueNum;
    int        totalDescs = 0;

    for (descNum = 0; descNum < numDescsPerQueue; descNum++)
    {
        absQueueNum = 0;
        for (port = 0; port < TEST_PORTS; port++)
        {
            if (port >= QMSS_QOS_SCHED_FULL_MAX_PHYS_PORTS) 
            {
                groupCount = QMSS_QOS_SCHED_LITE_MAX_LOG_GROUPS;
            }
            else
            {
                groupCount = QMSS_QOS_SCHED_FULL_MAX_LOG_GROUPS;
            }
            for (group = 0; group < groupCount; group++)
            {
                queueCount = portCfg[port].group[group].totQueueCount;
                for (queue = 0; queue < queueCount; queue++)
                {
                    uint32_t *desc;
                    desc = (uint32_t *)QMSS_DESC_PTR(Qmss_queuePop (state->freeQ));
                    if (! desc) 
                    {
                        System_printf ("Core %d : failed to pop a free desc\n", corenum);
                        errorCount++;
                        break;
                    }
                    /* Write unique ID onto the descriptor for tracking it */
                    /* fw doesn't use the first word of the descriptor */
                    desc[DESC_ID_OFFSET] = make_descID (port, group, queue, descNum);
                    /* Only set the return queue as if the descriptor is CPPI and
                     * set the descriptor type.  Otherwise
                     * QoS doesn't use any of the fields, so don't set them.
                     * If packets are sent to a CPPI destination such as ethernet
                     * then set the CPPI as if it were going directly to ethernet.
                     */
                    Cppi_setReturnQueue (Cppi_DescType_MONOLITHIC, (Cppi_Desc *)desc, dropQNum);
                    Cppi_setDescType ((Cppi_Desc *)desc, (Cppi_DescType_MONOLITHIC));
                    Qmss_queuePush (state->fwQHnds[absQueueNum], desc, QOS_DATA_PACKET_SIZE, SIZE_MONOLITHIC_DESC, Qmss_Location_TAIL);
                    absQueueNum++;
                    totalDescs++;
                }
                if (port >= QMSS_QOS_SCHED_FULL_MAX_PHYS_PORTS)
                {
                    absQueueNum += QMSS_QOS_SCHED_LITE_MAX_QUEUES_PER_GROUP - queueCount;
                }
                else
                {
                    absQueueNum += QMSS_QOS_SCHED_FULL_MAX_QUEUES_PER_GROUP - queueCount;
                }
            }
        }
    }
    return totalDescs;
}

/*****************************************************************************
 * This function sets up the QoS such that no credits are given, such that
 * no packets will drain out the output queue.  Instead, only the congestion
 * thresholds are set, such that correct congestion drop behavior can be
 * tested
 *****************************************************************************/
void congestion_test (testState_t *state, Qmss_QosSchedAcctType type)
{
    Qmss_Result          result;
    int                  port, group, queue;
    int                  descNum;
    UInt32               corenum = CSL_chipReadReg(CSL_CHIP_DNUM); 
    int                  absQueue;
    uint32_t             startTime, endTime;
    int                  expectedDisableDrops, actualDisableDrops;
    int                  expectedDiscards;
    Qmss_QosSchedPortCfg portCfg[TEST_PORTS];

    System_printf("Core %d: starting congestion (dropping) test using %s units\n", 
                  corenum, string_type (type));
    expectedDiscards = 0;
    for (port = 0; port < TEST_PORTS; port++)
    {
        if ((result = Qmss_disableQosSchedPort (port)) != QMSS_QOS_SCHED_RETCODE_SUCCESS)
        {
            System_printf ("Core %d : failed to disable QoS port %d: %d\n", corenum, port, result);
            errorCount++;
        }
        portCfg[port].wrrType = type;
        portCfg[port].cirType = type;
        portCfg[port].congestionType = type;
        portCfg[port].cirIteration = 0; // so we can test drop feature
        portCfg[port].cirMax = 10 << QMSS_QOS_SCHED_PACKETS_SCALE_SHIFT;
        if (port >= QMSS_QOS_SCHED_FULL_MAX_PHYS_PORTS) 
        {
            portCfg[port].groupCount = QMSS_QOS_SCHED_LITE_MAX_LOG_GROUPS;
        }
        else
        {
            portCfg[port].groupCount = QMSS_QOS_SCHED_FULL_MAX_LOG_GROUPS;
        }
        portCfg[port].outputQueue = Qmss_getQueueNumber (state->qosOutQHnd);
        for (group = 0; group < portCfg[port].groupCount; group++)
        {
            portCfg[port].group[group].cirIteration = 0; // so we can test drop feature
            portCfg[port].group[group].pirIteration = 0; // so we can test drop feature
            portCfg[port].group[group].cirMax = 10 << QMSS_QOS_SCHED_PACKETS_SCALE_SHIFT;
            portCfg[port].group[group].pirMax = 10 << QMSS_QOS_SCHED_PACKETS_SCALE_SHIFT;
            portCfg[port].group[group].wrrInitialCredit = 0; // so we can test drop feature
            if (port >= QMSS_QOS_SCHED_FULL_MAX_PHYS_PORTS) 
            {
                portCfg[port].group[group].totQueueCount = 4;
                portCfg[port].group[group].spQueueCount = 4;
                portCfg[port].group[group].wrrQueueCount = 0;
            } 
            else 
            {
                portCfg[port].group[group].totQueueCount = 8;
                portCfg[port].group[group].spQueueCount = 2;
                portCfg[port].group[group].wrrQueueCount = 5;
            }
            for (queue = 0; queue < portCfg[port].group[group].totQueueCount; queue++)
            {
                portCfg[port].group[group].Queue[queue].wrrInitialCredit = 0; // so we can test drop feature
                portCfg[port].group[group].Queue[queue].congestionThresh = port + 1;
                if (type == Qmss_QosSchedAcctType_BYTES)
                {
                    portCfg[port].group[group].Queue[queue].congestionThresh *= QOS_DATA_PACKET_SIZE;
                }
                expectedDiscards += PROFILE_DESCS - (port + 1);
            }
        }

        /* set and check the configuration */
        port_config (port, &portCfg[port]);
    }

    /* Put some packets in each input queue */
    distribute_packets (state, portCfg, PROFILE_DESCS);

    /* Enable the port */
    for (port = 0; port < TEST_PORTS; port++)
    {
        if ((result = Qmss_enableQosSchedPort (port)) != QMSS_QOS_SCHED_RETCODE_SUCCESS)
        {
            System_printf ("Core %d : failed to enable QoS port %d: %d\n", corenum, port, result);
            errorCount++;
        }
    }

    /* Start the clock */
    startTime = TSCL;

    /* Timestamp arrival of each descriptor */
    for (descNum = 0; descNum < expectedDiscards; )
    {
        uint32_t *desc;
        desc = (uint32_t *)QMSS_DESC_PTR(Qmss_queuePop (state->dropQHnd));
        if (desc) 
        {
            timestamps[descNum].timestamp = TSCL;
            timestamps[descNum].descPtr   = desc;
            descNum ++;
            Qmss_queuePushDesc (state->freeQ, desc);
        }
    }

    endTime = TSCL;

    System_printf ("Core %d: discarded %d descriptors across %d queues "
                   "in %d cycles (%d cycles per descriptor)\n",
                   corenum, 
                   expectedDiscards, 
                   TEST_QUEUES,
                   endTime - startTime, 
                   (endTime - startTime) / (expectedDiscards));

    /* Check that the correct number of descriptors were pulled from each queue */
    absQueue = 0;
    expectedDisableDrops = 0;
    for (port = 0; port < TEST_PORTS; port++)
    {
        for (group = 0; group < portCfg[port].groupCount; group++)
        {
            for (queue = 0; queue < portCfg[port].group[group].totQueueCount; queue++)
            {

               uint32_t count = Qmss_getQueueEntryCount (state->fwQHnds[absQueue]);
               uint32_t expectCount = portCfg[port].group[group].Queue[queue].congestionThresh;
               uint32_t expectDrop;
               Qmss_QosSchedStats      stats;
               if (type == Qmss_QosSchedAcctType_BYTES)
               {
                   expectCount /= QOS_DATA_PACKET_SIZE;
               }
               expectDrop = PROFILE_DESCS - expectCount;
               if (count != expectCount) 
               {
                   System_printf ("Core %d : expect %d descs but found %d : %d\n", 
                                  corenum, expectCount, count);
                   errorCount++;
               }
               /* Move remaining descriptors back to free queue */
               if (port != 1) 
               {
                   /* Port 1 is used to test discard on disable */
                   Qmss_queueDivert (state->fwQHnds[absQueue], state->freeQ, Qmss_Location_TAIL);
               }
               else 
               {
                   expectedDisableDrops += expectCount;
               }
               absQueue++;
               /* Check the stats */
               Qmss_getQosSchedStats (&stats, port, group, queue, 
                                      QMSS_QOS_SCHED_STATS_DISCARDED_BYTES |
                                      QMSS_QOS_SCHED_STATS_DISCARDED_PACKETS);
               if (stats.bytesForwarded || stats.packetsForwarded)
               {
                   System_printf ("Core %d: found forwarded stats when not expected: %d %d\n", 
                                  corenum, (uint32_t)stats.bytesForwarded, stats.packetsForwarded);
                   errorCount++;
               }
               if (stats.packetsDiscarded != expectDrop)
               {
                   System_printf ("Core %d: found wrong pkt discard stats: %d %d\n", 
                                  corenum, stats.packetsDiscarded, expectDrop);
                   errorCount++;
               }
               if (stats.bytesDiscarded != (expectDrop * QOS_DATA_PACKET_SIZE))
               {
                   System_printf ("Core %d: found wrong pkt discard stats: %d %d\n", 
                                  corenum, (uint32_t)stats.bytesDiscarded, 
                                  expectDrop * QOS_DATA_PACKET_SIZE);
                   errorCount++;
               }
            }
        }
    }    
    
    /* Disable port 1 */
    if (TEST_PORTS > 1)
    {
        if ((result = Qmss_disableQosSchedPort (1)) != QMSS_QOS_SCHED_RETCODE_SUCCESS)
        {
            System_printf ("Core %d : failed to disable QoS port %d: %d\n", corenum, port, result);
            errorCount++;
        }
        /* Re-enable port 1 */
        if ((result = Qmss_enableQosSchedPort (1)) != QMSS_QOS_SCHED_RETCODE_SUCCESS)
        {
            System_printf ("Core %d : failed to enable QoS port %d: %d\n", corenum, port, result);
            errorCount++;
        }
        /* Now the drop Q should have the port 1 packets */
        actualDisableDrops = Qmss_getQueueEntryCount (state->dropQHnd);
        Qmss_queueDivert (state->dropQHnd, state->freeQ, Qmss_Location_TAIL);
        if (actualDisableDrops != expectedDisableDrops)
        {
            System_printf ("Core %d : On port disable, found %d drops expected %d\n", 
                           corenum, actualDisableDrops, expectedDisableDrops);
            errorCount++;
        }
    }
    
    for (port = 0; port < TEST_PORTS; port++)
    {
        /* Configuration shouldn't have changed during test (check memory corruption) */
        port_readback (port, &portCfg[port]);
    }
}

/*****************************************************************************
 * This function sends portCfg to each of TEST_PORTS, then puts descriptors
 * on each queue, and records the results and determines the data rate on
 * each port.  If model is set, the resulting sequence is compared to the
 * C model.
 *
 * While the port is enabled inside this function, it is possible to use
 * this function to test reconfiguring running ports since it never
 * disables the port.
 *****************************************************************************/
void transfer_test (const char *description, testState_t *state, Qmss_QosSchedPortCfg *portCfg, 
                    int numDescsPerQueue, int model, int checkRate)
{
    Qmss_Result          result;
    int                  port, group, queue;
    int                  descNum;
    UInt32               corenum = CSL_chipReadReg(CSL_CHIP_DNUM); 
    uint32_t             startTime, endTime;
    uint32_t             portDescNum[TEST_PORTS];
    uint32_t             portStartTimes[TEST_PORTS];
    uint32_t             portStopTimes[TEST_PORTS];
    int                  totalDescs;

    memset (portDescNum, 0, sizeof(portDescNum));
    System_printf("Core %d: starting transfer test: %s\n", corenum, description);

    if (model) 
    {
        /* Comparing to model requires port to be disabled to get good timing */
        for (port = 0; port < TEST_PORTS; port++)
        {
            if ((result = Qmss_disableQosSchedPort (port)) != QMSS_QOS_SCHED_RETCODE_SUCCESS)
            {
                System_printf ("Core %d : failed to disable QoS port %d: %d\n", corenum, port, result);
                errorCount++;
            }
        }
        /* Distribute the packets for the model */
        totalDescs = distribute_packets (state, portCfg, numDescsPerQueue);
        /* Run the model */
        qos_model (portCfg, state, totalDescs);
        /* Drain the output queue and record the sequence */
        /* The sequence numbers are sorted by port, since the fw isn't guaranteed
         * to start all ports at the same time */
        for (descNum = 0; descNum < totalDescs; descNum++)
        {
            uint32_t *desc;
            desc = (uint32_t *)QMSS_DESC_PTR(Qmss_queuePop (state->qosOutQHnd));
            if (desc) 
            {
                uint32_t descID = desc[DESC_ID_OFFSET];
                uint32_t port = break_descID_port (descID);
                expectedSeqn[port][portDescNum[port]++] = descID;
                Qmss_queuePushDesc (state->freeQ, desc);
            }
            else
            {
                System_printf ("Core %d : Model failed (didn't move enough descs %d out of %d)\n", 
                               corenum, descNum, totalDescs);
                errorCount++;
                break;
            }
        }
    }

    /* Send the configuration */
    for (port = 0; port < TEST_PORTS; port++)
    {
        /* set and check the configuration */
        port_config (port, &portCfg[port]);
    }

    /* Put some packets in each input queue */
    if (! model)
    {
        /* The port is running now, so start timing while distributing */
        startTime = TSCL;
    }
    totalDescs = distribute_packets (state, portCfg, numDescsPerQueue);

    if (model)
    {
        /* The port wasn't running during distribution, so start timer just
         * before the port is enabled */
        startTime = TSCL;
    }
    /* Enable the port */
    if (model) 
    {
        /* Comparing to model requires port to be enabled to get good timing */
        for (port = 0; port < TEST_PORTS; port++)
        {
            if ((result = Qmss_enableQosSchedPort (port)) != QMSS_QOS_SCHED_RETCODE_SUCCESS)
            {
                System_printf ("Core %d : failed to enable QoS port %d: %d\n", corenum, port, result);
                errorCount++;
            }
        }
    }

    /* Timestamp arrival of each descriptor */
    for (descNum = 0; descNum < totalDescs; )
    {
        uint32_t *desc;
        desc = (uint32_t *)QMSS_DESC_PTR(Qmss_queuePop (state->qosOutQHnd));
        if (desc) 
        {
            timestamps[descNum].timestamp = TSCL;
            timestamps[descNum].descPtr   = desc;
            descNum ++;
            Qmss_queuePushDesc (state->freeQ, desc);
        }
    }

    endTime = TSCL;

    System_printf ("Core %d: moved %d descriptors across %d queues "
                   "in %d cycles (%d cycles per descriptor)\n",
                   corenum, 
                   totalDescs, 
                   TEST_QUEUES,
                   endTime - startTime, 
                   (endTime - startTime) / (totalDescs));

    if (model)
    {
        int fail = 0;
        memset (portDescNum, 0, sizeof(portDescNum));
        /* Check that the descriptors came in the right order */
        for (descNum = 0; descNum < totalDescs; descNum++)
        {
            uint32_t descID = timestamps[descNum].descPtr[DESC_ID_OFFSET];
            uint32_t port = break_descID_port (descID);
            uint32_t expectedDescID = expectedSeqn[port][portDescNum[port]++];
            if (expectedDescID != descID)
            {
                System_printf ("Core %d: port %d idx %d, found ID 0x%08x expected ID 0x%08x\n", 
                               corenum, port, descNum, descID, expectedDescID);

                fail = 1;
            }
        }
        if (fail)
        {
            System_printf("Core %d: model order doesn't match firmware\n", corenum);
            errorCount++;
        }
    }

    /* Find the first and last timestamp for each port and number of descs */
    memset (portDescNum, 0, sizeof(portDescNum));
    memset (portStartTimes, 0, sizeof(portStartTimes));
    memset (portStopTimes, 0, sizeof(portStopTimes));
    for (descNum = 0; descNum < totalDescs; descNum++)
    {
        uint32_t descID = timestamps[descNum].descPtr[DESC_ID_OFFSET];
        uint32_t port = break_descID_port (descID);
        if (portDescNum[port]) 
        {
            portStopTimes[port] = timestamps[descNum].timestamp;
        } 
        else 
        {
            portStartTimes[port] = timestamps[descNum].timestamp;
        }
        portDescNum[port]++;
    }

    /* Now we know how long it took between first and last desc */
    if (checkRate)
    {
    for (port = 0; port < TEST_PORTS; port++)
    {
        uint32_t cyclesPerTick = QOS_TIMER_CONFIG * 2 * 3;
        uint32_t expectedTicks, cirMaxTicks, expectedCycles, actualCycles;
        uint32_t allowedError, actualError;
        if (portCfg[port].cirType == Qmss_QosSchedAcctType_BYTES) 
        {
           expectedTicks = portDescNum[port] * 
                           (QOS_DATA_PACKET_SIZE << QMSS_QOS_SCHED_BYTES_SCALE_SHIFT) /
                           portCfg[port].cirIteration;
        }
        else
        {
           expectedTicks = portDescNum[port] *
                           (1 << QMSS_QOS_SCHED_PACKETS_SCALE_SHIFT) /
                           portCfg[port].cirIteration;
        }
        cirMaxTicks = (portCfg[port].cirMax - portCfg[port].cirIteration) / portCfg[port].cirIteration;
        actualCycles = portStopTimes[port] - portStartTimes[port];
        expectedCycles = expectedTicks * cyclesPerTick;
        allowedError = (portStartTimes[port] - startTime); /* from start of actual test until first packet really received by sw */
        /* One tick of uncertainty */
        allowedError += cyclesPerTick;
        if (! model)
        {
            /* Assume cirMax accumulated before starting */
            allowedError += cirMaxTicks * cyclesPerTick;
	    /* Allow 1 more ticks of error at startup when reconfig on fly */
	    allowedError += cyclesPerTick;
        } 
        allowedError += expectedCycles / 500; /* 0.2% */
        actualError = expectedCycles - actualCycles;
        System_printf ("Core %d: port %d took %d cycles to move %d descs ",
                       corenum, port, actualCycles, 
                       portDescNum[port]);
        System_printf ("expected %d, error=%d, allowed=%d) ", 
                       expectedCycles, actualError, allowedError);
        if (abs(actualError) > allowedError) 
        {
            System_printf("(*** FAIL ***)");
            errorCount++;
        }
        else
        {
            System_printf("(*** PASS ***)");
        }
        System_printf("\n");
    }
    }

    /* Check the stats */
    for (port = 0; port < TEST_PORTS; port++)
    {
        for (group = 0; group < portCfg[port].groupCount; group++)
        {
            for (queue = 0; queue < portCfg[port].group[group].totQueueCount; queue++)
            {
               Qmss_QosSchedStats      stats;
               /* Check the stats */
               Qmss_getQosSchedStats (&stats, port, group, queue, 
                                      QMSS_QOS_SCHED_STATS_FORWARDED_BYTES |
                                      QMSS_QOS_SCHED_STATS_FORWARDED_PACKETS);
               if (stats.bytesDiscarded || stats.packetsDiscarded)
               {
                   System_printf ("Core %d: found dropped stats when not expected: %d %d\n", 
                                  corenum, (uint32_t)stats.bytesDiscarded, stats.packetsDiscarded);
                   errorCount++;
               }
               if (stats.packetsForwarded != numDescsPerQueue)
               {
                   System_printf ("Core %d: port %d, group %d, queue %d, found wrong pkt forward stats: %d %d\n", 
                                  corenum, port, group, queue, stats.packetsForwarded, numDescsPerQueue);
                   errorCount++;
               }
               if (stats.bytesForwarded != (numDescsPerQueue * QOS_DATA_PACKET_SIZE))
               {
                   System_printf ("Core %d: port %d, group %d, queue %d, found wrong byte forward stats: %d %d\n", 
                                  corenum, port, group, queue, (uint32_t)stats.bytesForwarded, 
                                  numDescsPerQueue * QOS_DATA_PACKET_SIZE);
                   errorCount++;
               }
            }
        }
    }
    for (port = 0; port < TEST_PORTS; port++)
    {
        /* Configuration shouldn't have changed during test (check memory corruption) */
        port_readback (port, &portCfg[port]);
    }
}

Void testQosSched (Void)
{
    Qmss_Result          result;
    testState_t          state;
    UInt32               numAllocated, corenum;
    int                  baseQueue;
    int                  queueNum;
    uint8_t              isAllocated;
    Qmss_QosSchedPortCfg portCfg[TEST_PORTS];
    int                  port, group, queue;
    int                  beQueues, rrQueues, spQueues;

    /* Reset timer */
    TSCL = 0;

    /* Get the core number. */
    corenum = CSL_chipReadReg(CSL_CHIP_DNUM); 
    System_printf ("**********Core %d TESTING QoS scheduler ************\n", corenum);

    memset ((Void *) &qmssInitConfig, 0, sizeof (Qmss_InitCfg));

    /* Set up the linking RAM. Use internal Linking RAM.  */
    qmssInitConfig.linkingRAM0Base = 0;
    qmssInitConfig.linkingRAM0Size = 0;
    qmssInitConfig.linkingRAM1Base = 0x0;
    qmssInitConfig.maxDescNum      = NUM_MONOLITHIC_DESC;

#ifdef xdc_target__bigEndian
    qmssInitConfig.pdspFirmware[0].pdspId = Qmss_PdspId_PDSP2;
    qmssInitConfig.pdspFirmware[0].firmware = &qos_sched_be;
    qmssInitConfig.pdspFirmware[0].size = sizeof (qos_be);
#else
    qmssInitConfig.pdspFirmware[0].pdspId = Qmss_PdspId_PDSP2;
    qmssInitConfig.pdspFirmware[0].firmware = &qos_sched_le;
    qmssInitConfig.pdspFirmware[0].size = sizeof (qos_le);
#endif

    /* Initialize Queue Manager SubSystem */
    result = Qmss_init (&qmssInitConfig, &qmssGblCfgParams);
    if (result != QMSS_SOK)
    {
        System_printf ("Error Core %d : Initializing Queue Manager SubSystem error code : %d\n", corenum, result);
        errorCount++;
        return;
    }

    /* Start Queue Manager SubSystem */
    result = Qmss_start ();
    if (result != QMSS_SOK)
    {
        System_printf ("Core %d : Error starting Queue Manager error code : %d\n", corenum, result);
    }

    /* Now that the FW is downloaded, can query its version */
    System_printf ("Core %d : QoS Sched Firmware Rev 0x%08x\n", corenum, Qmss_getQosSchedFwVersion());

    /* Setup memory region for monolithic descriptors */
    memset ((Void *) &monolithicDesc, 0, SIZE_MONOLITHIC_DESC * NUM_MONOLITHIC_DESC);
    memInfo.descBase = (UInt32 *) l2_global_address ((UInt32) monolithicDesc);
    memInfo.descSize = SIZE_MONOLITHIC_DESC;
    memInfo.descNum = NUM_MONOLITHIC_DESC;
    memInfo.manageDescFlag = Qmss_ManageDesc_MANAGE_DESCRIPTOR;
    memInfo.memRegion = Qmss_MemRegion_MEMORY_REGION_NOT_SPECIFIED;
    memInfo.startIndex = 0;

    result = Qmss_insertMemoryRegion (&memInfo);
    if (result < QMSS_SOK)
    {
        System_printf ("Error Core %d : Inserting memory region %d error code : %d\n", corenum, memInfo.memRegion, result);
        errorCount++;
    }

    descCfg.memRegion = Qmss_MemRegion_MEMORY_REGION0;
    descCfg.descNum = NUM_MONOLITHIC_DESC;
    descCfg.destQueueNum = QMSS_PARAM_NOT_SPECIFIED;
    descCfg.queueType = Qmss_QueueType_STARVATION_COUNTER_QUEUE;
    
    /* Initialize the descriptors and push to free Queue */
    if ((state.freeQ = Qmss_initDescriptor (&descCfg, &numAllocated)) < 0)
    {
        System_printf ("Error Core %d : Initializing descriptor error code: %d \n", corenum, state.freeQ);
        errorCount++;
    }
    else
    {
        if (descCfg.descNum != numAllocated)
        {
            errorCount++;
        }
            
        System_printf ("Core %d : Number of descriptors requested : %d. Number of descriptors allocated : %d \n",
            corenum, descCfg.descNum, numAllocated);
    }

    /* Allocate block of queues to be used by firmware */
    baseQueue = allocate_contig_queues (state.fwQHnds, QOS_TX_QUEUES, 32);
    if (baseQueue < 0) 
    {
        System_printf ("Core %d : Failed to open 80 contiguous queues\n");
        errorCount++;
    }

    /* Set the FW's base queue */
    if ((result = Qmss_setQosSchedQueueBase (baseQueue)) != QMSS_QOS_SCHED_RETCODE_SUCCESS)
    {
        System_printf ("Error Core %d : Setting QoS queue base address error code: %d \n", corenum, result);
        errorCount++;
    }

    /* Configure the queue thresholds as required by the FW */
    for (queueNum = 0; queueNum < QOS_TX_QUEUES; queueNum++) 
    {
        Qmss_setQueueThreshold (state.fwQHnds[queueNum], 1, 1);
    }

    /* Open output queue */
    state.qosOutQHnd = Qmss_queueOpen (Qmss_QueueType_GENERAL_PURPOSE_QUEUE, 
                                       QMSS_PARAM_NOT_SPECIFIED, &isAllocated);
    if (state.qosOutQHnd < 0) 
    {
        errorCount++;
        System_printf("out Queue open failed: %d\n", state.qosOutQHnd);
    }

    /* Open drop/congestion queue */
    state.dropQHnd = Qmss_queueOpen (Qmss_QueueType_GENERAL_PURPOSE_QUEUE, 
                                     QMSS_PARAM_NOT_SPECIFIED, &isAllocated);
    if (state.dropQHnd < 0) 
    {
        errorCount++;
        System_printf("drop Queue open failed: %d\n", state.dropQHnd);
    }

    /* Set up QoS's timer */
    if ((result = Qmss_configureQosSchedTimer (QOS_TIMER_CONFIG) != QMSS_QOS_SCHED_RETCODE_SUCCESS))
    {
        errorCount++;
        System_printf("Core %d : Failed to configure QoS timer: %d\n", corenum, result);
    }

    /* Run congestion test */
    congestion_test(&state, Qmss_QosSchedAcctType_PACKETS);
    congestion_test(&state, Qmss_QosSchedAcctType_BYTES);

    /* Set up for scheduling using packet count units */
    for (port = 0; port < TEST_PORTS; port++)
    {
        portCfg[port].wrrType = Qmss_QosSchedAcctType_PACKETS;
        portCfg[port].cirType = Qmss_QosSchedAcctType_PACKETS;
        portCfg[port].congestionType = Qmss_QosSchedAcctType_PACKETS;
        portCfg[port].cirIteration = 1 << QMSS_QOS_SCHED_PACKETS_SCALE_SHIFT;
        portCfg[port].cirMax = 10 << QMSS_QOS_SCHED_PACKETS_SCALE_SHIFT;
        if (port >= QMSS_QOS_SCHED_FULL_MAX_PHYS_PORTS) 
        {
            portCfg[port].groupCount = QMSS_QOS_SCHED_LITE_MAX_LOG_GROUPS;
        }
        else
        {
            portCfg[port].groupCount = QMSS_QOS_SCHED_FULL_MAX_LOG_GROUPS;
        }
        portCfg[port].outputQueue = Qmss_getQueueNumber (state.qosOutQHnd);
        for (group = 0; group < portCfg[port].groupCount; group++)
        {
            portCfg[port].group[group].cirIteration = 1 << QMSS_QOS_SCHED_PACKETS_SCALE_SHIFT;
            portCfg[port].group[group].pirIteration = 1 << QMSS_QOS_SCHED_PACKETS_SCALE_SHIFT;
            portCfg[port].group[group].wrrInitialCredit = 1 << QMSS_QOS_SCHED_PACKETS_SCALE_SHIFT;
            portCfg[port].group[group].cirMax = 10 << QMSS_QOS_SCHED_PACKETS_SCALE_SHIFT;
            portCfg[port].group[group].pirMax = 10 << QMSS_QOS_SCHED_PACKETS_SCALE_SHIFT;
            if (port >= QMSS_QOS_SCHED_FULL_MAX_PHYS_PORTS) 
            {
                portCfg[port].group[group].totQueueCount = 4;
                portCfg[port].group[group].spQueueCount = 4;
                portCfg[port].group[group].wrrQueueCount = 0;
            } 
            else 
            {
                portCfg[port].group[group].totQueueCount = 8;
                portCfg[port].group[group].spQueueCount = 2;
                portCfg[port].group[group].wrrQueueCount = 5;
            }
            for (queue = 0; queue < portCfg[port].group[group].totQueueCount; queue++)
            {
                portCfg[port].group[group].Queue[queue].wrrInitialCredit = 1 << QMSS_QOS_SCHED_PACKETS_SCALE_SHIFT;
                /* Disable congestion dropping so we can test by pre-loading queues without dropping
                 * the data */
                portCfg[port].group[group].Queue[queue].congestionThresh = 0;
            }
        }

        /* set and check the configuration */
        port_config (port, &portCfg[port]);
    }

    /* This runs the transfer test while reconfiguring the ports on fly after the drop test */
    transfer_test ("packet units changed on the fly", &state, portCfg, PROFILE_DESCS, 0, 1);

    /* This re runs the test against the model, which resets the ports */
    transfer_test ("packet units after reset compared to model", &state, portCfg, PROFILE_DESCS, 1, 1);

    /* Re run but configure for 1 mbit from each port */
#define TEST_RATE 1000000 /* bits per second */
#define TEST_CLOCK 10000 /* ticks per second */
    for (port = 0; port < TEST_PORTS; port++)
    {
        portCfg[port].wrrType = Qmss_QosSchedAcctType_BYTES;
        portCfg[port].cirType = Qmss_QosSchedAcctType_BYTES;
        portCfg[port].congestionType = Qmss_QosSchedAcctType_BYTES;
        portCfg[port].cirIteration = (((TEST_RATE / TEST_CLOCK) << QMSS_QOS_SCHED_BYTES_SCALE_SHIFT) / 8);
        if (portCfg[port].cirIteration == 0)
        {
            portCfg[port].cirIteration = 1;
        }
        portCfg[port].cirMax = 10 * portCfg[port].cirIteration;
        for (group = 0; group < portCfg[port].groupCount; group++)
        {
            portCfg[port].group[group].cirIteration = portCfg[port].cirIteration / portCfg[port].groupCount;
            if (portCfg[port].group[group].cirIteration == 0)
            {
                portCfg[port].group[group].cirIteration = 1;
            }
            portCfg[port].group[group].pirIteration = portCfg[port].group[group].cirIteration;
            portCfg[port].group[group].wrrInitialCredit = portCfg[port].group[group].cirIteration;
            portCfg[port].group[group].cirMax = 10 * portCfg[port].group[group].cirIteration;
            portCfg[port].group[group].pirMax = 10 * portCfg[port].group[group].pirIteration;
            for (queue = 0; queue < portCfg[port].group[group].totQueueCount; queue++)
            {
                portCfg[port].group[group].Queue[queue].wrrInitialCredit = portCfg[port].group[group].cirIteration / portCfg[port].groupCount;
                if (portCfg[port].group[group].Queue[queue].wrrInitialCredit == 0) 
                {
                    portCfg[port].group[group].Queue[queue].wrrInitialCredit = 1;
                }
            }
        }
    }

    /* This runs the transfer test while reconfiguring the ports on fly after the packet test */
    transfer_test ("byte units reconfiguring on fly", &state, portCfg, PROFILE_DESCS, 0, 1);

    /* This re runs the test against the model, which resets the ports */
    transfer_test ("byte units with reset and comparing to model", &state, portCfg, PROFILE_DESCS, 1, 1);

    /* reconfigure for a more compex case where all the rates are different and
     * PIR is available.  This does NOT match any realistic use case.
     * It only makes sense to compare this result to the model.
     */
    for (port = 0; port < TEST_PORTS; port++)
    {
        portCfg[port].cirMax = 10 * portCfg[port].cirIteration;
        for (group = 0; group < portCfg[port].groupCount; group++)
        {
            portCfg[port].group[group].cirIteration = 
                portCfg[port].cirIteration / portCfg[port].groupCount;

            /* Make the rates different */
            portCfg[port].group[group].cirIteration += 
                group * portCfg[port].group[group].cirIteration / portCfg[port].groupCount;

            if (portCfg[port].group[group].cirIteration == 0)
            {
                portCfg[port].group[group].cirIteration = 1;
            }

            portCfg[port].group[group].pirIteration = portCfg[port].group[group].cirIteration * 11 / 10;
            portCfg[port].group[group].wrrInitialCredit = portCfg[port].cirIteration / portCfg[port].groupCount;
            portCfg[port].group[group].wrrInitialCredit +=
                group * portCfg[port].group[group].wrrInitialCredit /
                portCfg[port].groupCount;
            portCfg[port].group[group].cirMax = 10 * portCfg[port].group[group].cirIteration;
            portCfg[port].group[group].pirMax = 10 * portCfg[port].group[group].pirIteration;
            for (queue = 0; queue < portCfg[port].group[group].totQueueCount; queue++)
            {
                portCfg[port].group[group].Queue[queue].wrrInitialCredit = 
                    portCfg[port].group[group].cirIteration / portCfg[port].groupCount;

                portCfg[port].group[group].Queue[queue].wrrInitialCredit += 
                    queue * portCfg[port].group[group].Queue[queue].wrrInitialCredit /  
                    portCfg[port].group[group].totQueueCount;
                if (portCfg[port].group[group].Queue[queue].wrrInitialCredit == 0) 
                {
                    portCfg[port].group[group].Queue[queue].wrrInitialCredit = 1;
                }
            }
        }
    }

    /* This re runs the test against the model, which resets the ports */
    transfer_test ("complex configuration with reset vs model\n", &state, portCfg, PROFILE_DESCS, 1, 0);

    /* Now try combinations of 0, 1, 2 of each type of queue */
    for (spQueues = 0; spQueues <= 2; spQueues++)
    {
        for (rrQueues = 0; rrQueues <= 2; rrQueues++)
        {
            for (beQueues = 0; beQueues <= 2; beQueues++)
            {
                int portLimit = TEST_PORTS;
                if (!spQueues && !rrQueues & !beQueues)
                {
                    continue;
                }
                if (portLimit > QMSS_QOS_SCHED_FULL_MAX_PHYS_PORTS)
                {
                    portLimit = QMSS_QOS_SCHED_FULL_MAX_PHYS_PORTS;
                }
                for (port = 0; port < portLimit ; port++)
                {
                    for (group = 0; group < portCfg[port].groupCount; group++)
                    {
                        if (port < QMSS_QOS_SCHED_FULL_MAX_PHYS_PORTS)
                        {
                            portCfg[port].group[group].spQueueCount = spQueues;
                            portCfg[port].group[group].wrrQueueCount = rrQueues;
                            portCfg[port].group[group].totQueueCount = spQueues + rrQueues + beQueues;
                        }
                        for (queue = 0; queue < portCfg[port].group[group].totQueueCount; queue++)
                        {
                            portCfg[port].group[group].Queue[queue].wrrInitialCredit = 
                                portCfg[port].group[group].cirIteration / portCfg[port].groupCount;

                            portCfg[port].group[group].Queue[queue].wrrInitialCredit += 
                                queue * portCfg[port].group[group].Queue[queue].wrrInitialCredit /  
                                portCfg[port].group[group].totQueueCount;
                            if (portCfg[port].group[group].Queue[queue].wrrInitialCredit == 0) 
                            {
                                portCfg[port].group[group].Queue[queue].wrrInitialCredit = 1;
                            }
                        }
                    }
                }
                System_printf ("Core %d: trying %d sp queues, %d rr queues, and %d be queues\n",
                               corenum, spQueues, rrQueues, beQueues);
                /* This re runs the test against the model, which resets the ports */
                transfer_test ("queue loop corner case test", &state, portCfg, PROFILE_DESCS > 5 ? 5 : PROFILE_DESCS, 1, 0);
            }
        }
    }

    if (errorCount == 0)
        System_printf ("\nCore %d : QoS scheduler tests Passed\n", corenum);
    else 
        System_printf ("\nCore %d : ***********FAIL***********\n", corenum);
}



Void run_test (Void)
{
    testQosSched ();
}
#else
Void run_test (Void)
{
    System_printf ("Simulator doesn't fully support QoS functionality.");
}
#endif


