SPRUGR9H November   2010  – April 2015 66AK2E05 , 66AK2H06 , 66AK2H12 , 66AK2H14 , 66AK2L06 , AM5K2E02 , AM5K2E04 , SM320C6678-HIREL , TMS320C6652 , TMS320C6654 , TMS320C6655 , TMS320C6657 , TMS320C6670 , TMS320C6671 , TMS320C6672 , TMS320C6674 , TMS320C6678

 

  1.   Preface
    1.     About This Manual
    2.     Trademarks
    3.     Notational Conventions
    4.     Related Documentation from Texas Instruments
  2. 1Introduction
    1. 1.1  Terminology Used in This Document
    2. 1.2  KeyStone I Features
    3. 1.3  KeyStone I Functional Block Diagram
    4. 1.4  KeyStone II Changes to QMSS
    5. 1.5  KeyStone II QMSS Modes of Use
      1. 1.5.1 Shared Mode
      2. 1.5.2 Split Mode
    6. 1.6  Overview
    7. 1.7  Queue Manager
    8. 1.8  Packet DMA (PKTDMA)
    9. 1.9  Navigator Cloud
    10. 1.10 Virtualization
    11. 1.11 ARM-DSP Shared Use
    12. 1.12 PDSP Firmware
  3. 2Operational Concepts
    1. 2.1 Packets
    2. 2.2 Queues
      1. 2.2.1 Packet Queuing
      2. 2.2.2 Packet De-queuing
      3. 2.2.3 Queue Proxy
    3. 2.3 Queue Types
      1. 2.3.1 Transmit Queues
      2. 2.3.2 Transmit Completion Queues
      3. 2.3.3 Receive Queues
      4. 2.3.4 Free Descriptor Queues (FDQ)
        1. 2.3.4.1 Host Packet Free Descriptors
        2. 2.3.4.2 Monolithic Free Descriptors
      5. 2.3.5 Queue Pend Queues
    4. 2.4 Descriptors
      1. 2.4.1 Host Packet
      2. 2.4.2 Host Buffer
      3. 2.4.3 Monolithic Packet
    5. 2.5 Packet DMA
      1. 2.5.1 Channels
      2. 2.5.2 RX Flows
    6. 2.6 Packet Transmission Overview
    7. 2.7 Packet Reception Overview
    8. 2.8 ARM Endianess
  4. 3Descriptor Layouts
    1. 3.1 Host Packet Descriptor
    2. 3.2 Host Buffer Descriptor
    3. 3.3 Monolithic Descriptor
  5. 4Registers
    1. 4.1 Queue Manager
      1. 4.1.1 Queue Configuration Region
        1. 4.1.1.1 Revision Register (0x00000000)
        2. 4.1.1.2 Queue Diversion Register (0x00000008)
        3. 4.1.1.3 Linking RAM Region 0 Base Address Register (0x0000000C)
        4. 4.1.1.4 Linking RAM Region 0 Size Register (0x00000010)
        5. 4.1.1.5 Linking RAM Region 1 Base Address Register (0x00000014)
        6. 4.1.1.6 Free Descriptor/Buffer Starvation Count Register N (0x00000020 + N×4)
      2. 4.1.2 Queue Status RAM
      3. 4.1.3 Descriptor Memory Setup Region
        1. 4.1.3.1 Memory Region R Base Address Register (0x00000000 + 16×R)
        2. 4.1.3.2 Memory Region R Start Index Register (0x00000004 + 16×R)
        3. 4.1.3.3 Memory Region R Descriptor Setup Register (0x00000008 + 16×R)
      4. 4.1.4 Queue Management/Queue Proxy Regions
        1. 4.1.4.1 Queue N Register A (0x00000000 + 16×N)
        2. 4.1.4.2 Queue N Register B (0x00000004 + 16×N)
        3. 4.1.4.3 Queue N Register C (0x00000008 + 16×N)
        4. 4.1.4.4 Queue N Register D (0x0000000C + 16×N)
      5. 4.1.5 Queue Peek Region
        1. 4.1.5.1 Queue N Status and Configuration Register A (0x00000000 + 16×N)
        2. 4.1.5.2 Queue N Status and Configuration Register B (0x00000004 + 16×N)
        3. 4.1.5.3 Queue N Status and Configuration Register C (0x00000008 + 16×N)
        4. 4.1.5.4 Queue N Status and Configuration Register D (0x0000000C + 16×N)
    2. 4.2 Packet DMA
      1. 4.2.1 Global Control Registers Region
        1. 4.2.1.1 Revision Register (0x00)
        2. 4.2.1.2 Performance Control Register (0x04)
        3. 4.2.1.3 Emulation Control Register (0x08)
        4. 4.2.1.4 Priority Control Register (0x0C)
        5. 4.2.1.5 QMn Base Address Register (0x10, 0x14, 0x18, 0x1c)
      2. 4.2.2 TX DMA Channel Configuration Region
        1. 4.2.2.1 TX Channel N Global Configuration Register A (0x000 + 32×N)
        2. 4.2.2.2 TX Channel N Global Configuration Register B (0x004 + 32×N)
      3. 4.2.3 RX DMA Channel Configuration Region
        1. 4.2.3.1 RX Channel N Global Configuration Register A (0x000 + 32×N)
      4. 4.2.4 RX DMA Flow Configuration Region
        1. 4.2.4.1 RX Flow N Configuration Register A (0x000 + 32×N)
        2. 4.2.4.2 RX Flow N Configuration Register B (0x004 + 32×N)
        3. 4.2.4.3 RX Flow N Configuration Register C (0x008 + 32×N)
        4. 4.2.4.4 RX Flow N Configuration Register D (0x00C + 32×N)
        5. 4.2.4.5 RX Flow N Configuration Register E (0x010 + 32×N)
        6. 4.2.4.6 RX Flow N Configuration Register F (0x014 + 32×N)
        7. 4.2.4.7 RX Flow N Configuration Register G (0x018 + 32×N)
        8. 4.2.4.8 RX Flow N Configuration Register H (0x01C + 32×N)
      5. 4.2.5 TX Scheduler Configuration Region
        1. 4.2.5.1 TX Channel N Scheduler Configuration Register (0x000 + 4×N)
    3. 4.3 QMSS PDSPs
      1. 4.3.1 Descriptor Accumulation Firmware
        1. 4.3.1.1 Command Buffer Interface
        2. 4.3.1.2 Global Timer Command Interface
        3. 4.3.1.3 Reclamation Queue Command Interface
        4. 4.3.1.4 Queue Diversion Command Interface
      2. 4.3.2 Quality of Service Firmware
        1. 4.3.2.1 QoS Algorithms
          1. 4.3.2.1.1 Modified Token Bucket Algorithm
        2. 4.3.2.2 Command Buffer Interface
        3. 4.3.2.3 QoS Firmware Commands
        4. 4.3.2.4 QoS Queue Record
        5. 4.3.2.5 QoS Cluster Record
        6. 4.3.2.6 RR-Mode QoS Cluster Record
        7. 4.3.2.7 SRIO Queue Monitoring
          1. 4.3.2.7.1 QoS SRIO Queue Monitoring Record
      3. 4.3.3 Open Event Machine Firmware
      4. 4.3.4 Interrupt Operation
        1. 4.3.4.1 Interrupt Handshaking
        2. 4.3.4.2 Interrupt Processing
        3. 4.3.4.3 Interrupt Generation
        4. 4.3.4.4 Stall Avoidance
      5. 4.3.5 QMSS PDSP Registers
        1. 4.3.5.1 Control Register (0x00000000)
        2. 4.3.5.2 Status Register (0x00000004)
        3. 4.3.5.3 Cycle Count Register (0x0000000C)
        4. 4.3.5.4 Stall Count Register (0x00000010)
    4. 4.4 QMSS Interrupt Distributor
      1. 4.4.1 INTD Register Region
        1. 4.4.1.1  Revision Register (0x00000000)
        2. 4.4.1.2  End Of Interrupt (EOI) Register (0x00000010)
        3. 4.4.1.3  Status Register 0 (0x00000200)
        4. 4.4.1.4  Status Register 1 (0x00000204)
        5. 4.4.1.5  Status Register 2 (0x00000208)
        6. 4.4.1.6  Status Register 3 (0x0000020c)
        7. 4.4.1.7  Status Register 4 (0x00000210)
        8. 4.4.1.8  Status Clear Register 0 (0x00000280)
        9. 4.4.1.9  Status Clear Register 1 (0x00000284)
        10. 4.4.1.10 Status Clear Register 4 (0x00000290)
        11. 4.4.1.11 Interrupt N Count Register (0x00000300 + 4xN)
  6. 5Mapping Information
    1. 5.1 Queue Maps
    2. 5.2 Interrupt Maps
      1. 5.2.1 KeyStone I TCI661x, C6670, C665x devices
      2. 5.2.2 KeyStone I TCI660x, C667x devices
      3. 5.2.3 KeyStone II devices
    3. 5.3 Memory Maps
      1. 5.3.1 QMSS Register Memory Map
      2. 5.3.2 KeyStone I PKTDMA Register Memory Map
      3. 5.3.3 KeyStone II PKTDMA Register Memory Map
    4. 5.4 Packet DMA Channel Map
  7. 6Programming Information
    1. 6.1 Programming Considerations
      1. 6.1.1 System Planning
      2. 6.1.2 Notification of Completed Work
    2. 6.2 Example Code
      1. 6.2.1 QMSS Initialization
      2. 6.2.2 PKTDMA Initialization
      3. 6.2.3 Normal Infrastructure DMA with Accumulation
      4. 6.2.4 Bypass Infrastructure notification with Accumulation
      5. 6.2.5 Channel Teardown
    3. 6.3 Programming Overrides
    4. 6.4 Programming Errors
    5. 6.5 Questions and Answers
  8. AExample Code Utility Functions
  9. BExample Code Types
  10. CExample Code Addresses
    1. C.1 KeyStone I Addresses:
    2. C.2 KeyStone II Addresses:
  11.   Revision History

QMSS Initialization

Multicore Navigator is designed to be initialized at startup with enough resources to keep it running successfully during normal operation.

First, define memory areas to be used, and align them to 16 byte boundaries (only the QM memories require alignment, but it is a good idea to align the others as well):

#pragma DATA_ALIGN (host_region, 16) Uint8 host_region[64 * 64]; #pragma DATA_ALIGN (mono_region, 16) Uint8 mono_region[32 * 160]; #pragma DATA_ALIGN (buffers, 16) Uint32 buffers[64 * 256]; //these buffers are for Host Packets #pragma DATA_ALIGN (hostList, 16) Uint32 hostList[34]; // ping/pong of (16 + 1 word for list count) #pragma DATA_ALIGN (monoList, 16) Uint32 monoList[34]; // ping/pong of (16 + 1 word for list count)

Some declarations for clarity in the following code segments (see appendices for type definitions):

MNAV_HostPacketDescriptor *host_pkt; MNAV_MonolithicPacketDescriptor *mono_pkt; Qmss_AccCmd cmd;

Next, setup the QM memory regions to be used. This example will setup two: one for host descriptors and another for monolithic descriptors. The part that requires the most attention is specifying the size. The last parameter writes to the Memory Region Setup Register, and defines the size of the descriptor and the number of descriptors (see this register definition in previous sections).

/* Setup Memory Region 0 for 40 56 byte Host descriptors. Our * Host descriptors will be 32 bytes plus up to 6 words of PS data, * but the next best size is 64 bytes times 64 descriptors. */ set_memory_region(0, (Uint32) host_region, 0, 0x00030001); /* Setup Memory Region 1 for 8 148B Monolithic descriptors. Our * Mono descriptors will be 12 bytes plus 16 bytes of EPIB Info, plus * 128 bytes of payload, but the next best size is 160 bytes times * 32 descriptors. (dead space is possible) */ set_memory_region(1, (Uint32) mono_region, 64, 0x00090000);

An external Linking RAM needs to be configured with one 64-bit word for each descriptor in the memory regions that use the external Linking RAM. The internal Linking RAM does not require a buffer to be allocated for it.

/***************************************************************** * Configure Linking RAM 0 to use the 16k entry internal link ram. */ set_link_ram(0, 0x00080000, 0x3FFF);

Note that Linking RAM 0 may be configured to use internal QMSS memory as shown here. Linking RAM 1 may use L2 or DDR. For efficiency reasons, it is best to use the internal QMSS Link RAM memory whenever possible.

Once the memory regions and Link RAMs have been configured, two types of queues should be filled with empty descriptors: TX completion queues (otherwise known as TX FDQs), and RX FDQs.

/* Initialize descriptor regions to zero */ memset(host_region, 0, 64 * 64); memset(mono_region, 0, 32 * 160); /* Push Host Descriptors to Tx Completion Queue (FDQ) 5000 */ for (idx = 0; idx < 20; idx ++) { host_pkt = (MNAV_HostPacketDescriptor *)(host_region + (idx * 64)); host_pkt->pkt_return_qmgr = 1; host_pkt->pkt_return_qnum = 0; host_pkt->orig_buff0_len = 64 * 4; host_pkt->orig_buff0_ptr = (Uint32)(buffers + (idx * 128)); host_pkt->next_desc_ptr = NULL; push_queue(5000, 1, 0, (Uint32)(host_pkt)); } /* Push Monolithic packets to Tx Completion Queue (FDQ) 5001 */ for (idx = 0; idx < 16; idx ++) { mono_pkt = (MNAV_MonolithicPacketDescriptor *)(mono_region + (idx * 160)); mono_pkt->pkt_return_qmgr = 1; mono_pkt->pkt_return_qnum = 1; push_queue(5001, 1, 0, (Uint32)(mono_pkt)); } /* Push Host Descriptors to Rx FDQ 7000 */ for (idx = 20; idx < 64; idx ++) { host_pkt = (MNAV_HostPacketDescriptor *)(host_region + (idx * 64)); /* Set non-Rx overwrite fields */ host_pkt->orig_buff0_len = 64 * 4; host_pkt->orig_buff0_ptr = (Uint32)(buffers + (idx * 128)); host_pkt->next_desc_ptr = NULL; //don’t link Host buffers in Rx FDQ push_queue(7000, 1, 0, (Uint32)(host_pkt)); } /* Push Monolithic packets to Rx FDQ 7001 */ for (idx = 16; idx < 32; idx ++) { mono_pkt = (MNAV_MonolithicPacketDescriptor *)(mono_region + (idx * 160)); push_queue(7001, 1, 0, (Uint32)(mono_pkt)); }

Last, program the accumulator channels that are needed. Both of these channels are programmed to return only the QM Reg D value per descriptor. The high priority program will use the list count method and the low priority program will use NULL termination. The second time the interrupt triggers, the accumulators will write to the pong side of the lists (in both cases starting with word 17). It is up to the host to process and recycle the descriptors before that ping or pong side is needed again by the accumulator (which does not check for consumption).

/***************************************************************** * Program a hi-pri accumulation channel for queue 712. */ cmd.command = 0x81; //enable cmd.channel = 8; //will trigger qmss_intr1_8 to core 0 cmd.queue_mask = 0; //not used in single mode cmd.list_address = (uint32_t)hostList; //address of ping buffer cmd.max_entries = 17; //list can hold up to 16 (max-1) cmd.qm_index = 712; //que to monitor for channel 8 cmd.cfg_multi_q = 0; //0=single queue mode cmd.cfg_list_mode = 1; //1=list count in first entry cmd.cfg_list_size = 0; //0="D" Reg cmd.cfg_int_delay = 1; //1=delay since last interrupt (pacing mode) cmd.timer_count = 1; //number of timer ticks to delay interrupt program_accumulator(1, &cmd); /***************************************************************** * Program a lo-pri accumulation channel for queue 32. */ cmd.command = 0x81; //enable cmd.channel = 1; //will trigger qmss_intr0_1 to all cores cmd.queue_mask = 0x00000001; //look only at queue 32 for this example cmd.list_address = (uint32_t)monoList; //address of ping buffer cmd.max_entries = 17; //list can hold up to 16 (max-1) cmd.qm_index = 32; //first que to monitor for this channel cmd.cfg_multi_q = 1; //1=multi queue mode cmd.cfg_list_mode = 0; //0=NULL terminated list cmd.cfg_list_size = 0; //0="D" Reg cmd.cfg_int_delay = 1; //1=delay since last interrupt (pacing mode) cmd.timer_count = 1; //number of timer ticks to delay interrupt program_accumulator(0, &cmd); /* Clear the Accumulator lists. */ memset(hostList, 0, 34 * 4); memset(monoList, 0, 34 * 4);