From 5445c2727193647baefb5018eb79c031bfa3ae41 Mon Sep 17 00:00:00 2001 From: kuwoyuki Date: Sun, 9 Nov 2025 12:15:13 +0600 Subject: [PATCH] fix: add tx queue --- .vscode/settings.json | 4 +- README.md | 14 +++ main.c | 20 ++-- port/ethernetif.c | 226 ++++++++++++++++++++++++++++-------------- port/ethernetif.h | 5 +- port/lwipopts.h | 6 +- port/sys_arch.c | 2 +- 7 files changed, 184 insertions(+), 93 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 3254d1a..89a936c 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -38,6 +38,8 @@ "vector": "c", "memory_resource": "c", "__config": "c", - "string": "c" + "string": "c", + "atomic": "c", + "__bit_reference": "c" } } \ No newline at end of file diff --git a/README.md b/README.md index b96fdcd..8025023 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,20 @@ while (1) { } ``` +seems okayish + +```sh +$ wrk -t12 -c500 -d10s http://192.168.102.119 +Running 10s test @ http://192.168.102.119 + 12 threads and 500 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 1.87ms 6.98ms 613.62ms 99.63% + Req/Sec 334.20 201.29 0.88k 74.58% + 8197 requests in 10.10s, 5.30MB read +Requests/sec: 811.63 +Transfer/sec: 537.39KB +``` + ## Impl note This driver is kinda functional but not optimized diff --git a/main.c b/main.c index 5f84abd..9b8ef17 100644 --- a/main.c +++ b/main.c @@ -192,15 +192,15 @@ int main() { } #endif - uint32_t now = millis(); - if (now - last_led_toggle_time > LED_TOGGLE_INTERVAL_MS) { - if (led_state) { - GPIOA->BSHR = (1 << LED1_PIN); - } else { - GPIOA->BSHR = (1 << (LED1_PIN + 16)); - } - led_state = !led_state; - last_led_toggle_time = now; - } + // uint32_t now = millis(); + // if (now - last_led_toggle_time > LED_TOGGLE_INTERVAL_MS) { + // if (led_state) { + // GPIOA->BSHR = (1 << LED1_PIN); + // } else { + // GPIOA->BSHR = (1 << (LED1_PIN + 16)); + // } + // led_state = !led_state; + // last_led_toggle_time = now; + // } } } diff --git a/port/ethernetif.c b/port/ethernetif.c index 6604108..f5aed1b 100644 --- a/port/ethernetif.c +++ b/port/ethernetif.c @@ -13,36 +13,66 @@ #define IFNAME0 'e' #define IFNAME1 'n' -#define ETH_RXBUFNB 4 -#define ETH_TXBUFNB 1 -#define ETH_RX_BUF_SZE ETH_MAX_PACKET_SIZE -#define ETH_TX_BUF_SZE ETH_MAX_PACKET_SIZE +#define ETH_RX_BUF_COUNT 4 +#define ETH_TX_BUF_COUNT 2 +/* buf size should be at least ETH_MAX_PACKET_SIZE */ +#define ETH_RX_BUF_SIZE ETH_MAX_PACKET_SIZE +#define ETH_TX_BUF_SIZE ETH_MAX_PACKET_SIZE + +typedef struct { + volatile uint32_t head; // producer idx: next free slot to write to + volatile uint32_t tail; // consumer idx: next slot to be txed + volatile bool is_full; // for N=1 size +} tx_queue_t; struct ethernetif { - ETH_DMADESCTypeDef* DMARxDescToGet; - ETH_DMADESCTypeDef* DMARxDescToRead; - ETH_DMADESCTypeDef* DMATxDescToSet; + ETH_DMADESCTypeDef* rx_desc_head; // next desc to be filled by DMA + ETH_DMADESCTypeDef* rx_desc_tail; // next desc to be read by CPU + tx_queue_t tx_q; }; -__attribute__((aligned(4))) ETH_DMADESCTypeDef DMARxDscrTab[ETH_RXBUFNB]; -__attribute__((aligned(4))) ETH_DMADESCTypeDef DMATxDscrTab[ETH_TXBUFNB]; -__attribute__((aligned(4))) uint8_t MACRxBuf[ETH_RXBUFNB * ETH_RX_BUF_SZE]; -__attribute__((aligned(4))) uint8_t MACTxBuf[ETH_TXBUFNB * ETH_TX_BUF_SZE]; +__attribute__((aligned(4))) ETH_DMADESCTypeDef g_dma_rx_descs[ETH_RX_BUF_COUNT]; +__attribute__((aligned(4))) ETH_DMADESCTypeDef g_dma_tx_descs[ETH_TX_BUF_COUNT]; +__attribute__(( + aligned(4))) uint8_t g_mac_rx_bufs[ETH_RX_BUF_COUNT * ETH_RX_BUF_SIZE]; +__attribute__(( + aligned(4))) uint8_t g_mac_tx_bufs[ETH_TX_BUF_COUNT * ETH_TX_BUF_SIZE]; -static volatile bool g_link_interrupt_flag = false; -static struct ethernetif eth_state; +static struct ethernetif g_eth_state; +static volatile bool g_link_irq_flag = false; + +static inline void tx_queue_init(tx_queue_t* q) { + q->head = 0; + q->tail = 0; + q->is_full = false; +} + +static inline bool tx_queue_is_empty(const tx_queue_t* q) { + return !q->is_full && (q->head == q->tail); +} +static inline bool tx_queue_is_full(const tx_queue_t* q) { return q->is_full; } +static inline void tx_queue_produce(tx_queue_t* q) { + q->head = (q->head + 1) % ETH_TX_BUF_COUNT; + if (q->head == q->tail) { + q->is_full = true; + } +} +static inline void tx_queue_consume(tx_queue_t* q) { + q->is_full = false; + q->tail = (q->tail + 1) % ETH_TX_BUF_COUNT; +} static void low_level_init(struct netif* netif); static err_t low_level_output(struct netif* netif, struct pbuf* p); static struct pbuf* low_level_input(struct netif* netif); -void WritePHYReg(uint8_t reg_add, uint16_t reg_val); -uint16_t ReadPHYReg(uint8_t reg_add); +void phy_write_reg(uint8_t reg_add, uint16_t reg_val); +uint16_t phy_read_reg(uint8_t reg_add); -static void eth_get_mac_in_uc(uint8_t* mac) { +static void eth_get_mac_addr(uint8_t* mac) { // Mac is backwards. - const uint8_t* macaddr = (const uint8_t*)(ROM_CFG_USERADR_ID + 5); + const uint8_t* macaddr_src = (const uint8_t*)(ROM_CFG_USERADR_ID + 5); for (int i = 0; i < 6; i++) { - mac[i] = *(macaddr--); + mac[i] = *(macaddr_src--); } } @@ -51,7 +81,7 @@ err_t ethernetif_init(struct netif* netif) { netif->hostname = "lwip-ch32"; #endif - netif->state = ð_state; + netif->state = &g_eth_state; netif->name[0] = IFNAME0; netif->name[1] = IFNAME1; @@ -61,7 +91,7 @@ err_t ethernetif_init(struct netif* netif) { MIB2_INIT_NETIF(netif, snmp_ifType_ethernet_csmacd, 10000000); // 10Mbps netif->hwaddr_len = ETH_HWADDR_LEN; - eth_get_mac_in_uc(netif->hwaddr); + eth_get_mac_addr(netif->hwaddr); printf("MAC Address: %02X:%02X:%02X:%02X:%02X:%02X\n", netif->hwaddr[0], netif->hwaddr[1], netif->hwaddr[2], netif->hwaddr[3], netif->hwaddr[4], @@ -104,32 +134,34 @@ static void low_level_init(struct netif* netif) { ETH10M->ECON2 = RB_ETH_ECON2_DEFAULT; // init TX descriptors - ethernetif->DMATxDescToSet = DMATxDscrTab; - for (int i = 0; i < ETH_TXBUFNB; i++) { - DMATxDscrTab[i].Status = 0; - DMATxDscrTab[i].Buffer1Addr = (uint32_t)&MACTxBuf[i * ETH_TX_BUF_SZE]; - DMATxDscrTab[i].Buffer2NextDescAddr = - (uint32_t)&DMATxDscrTab[(i + 1) % ETH_TXBUFNB]; + tx_queue_init(ðernetif->tx_q); + for (int i = 0; i < ETH_TX_BUF_COUNT; i++) { + g_dma_tx_descs[i].Status = 0; + g_dma_tx_descs[i].Buffer1Addr = + (uint32_t)&g_mac_tx_bufs[i * ETH_TX_BUF_SIZE]; + g_dma_tx_descs[i].Buffer2NextDescAddr = + (uint32_t)&g_dma_tx_descs[(i + 1) % ETH_TX_BUF_COUNT]; } // init RX descriptors - ethernetif->DMARxDescToGet = DMARxDscrTab; - ethernetif->DMARxDescToRead = DMARxDscrTab; - for (int i = 0; i < ETH_RXBUFNB; i++) { - DMARxDscrTab[i].Status = ETH_DMARxDesc_OWN; - DMARxDscrTab[i].Buffer1Addr = (uint32_t)&MACRxBuf[i * ETH_RX_BUF_SZE]; - DMARxDscrTab[i].Buffer2NextDescAddr = - (uint32_t)&DMARxDscrTab[(i + 1) % ETH_RXBUFNB]; + ethernetif->rx_desc_head = g_dma_rx_descs; + ethernetif->rx_desc_tail = g_dma_rx_descs; + for (int i = 0; i < ETH_RX_BUF_COUNT; i++) { + g_dma_rx_descs[i].Status = ETH_DMARxDesc_OWN; + g_dma_rx_descs[i].Buffer1Addr = + (uint32_t)&g_mac_rx_bufs[i * ETH_RX_BUF_SIZE]; + g_dma_rx_descs[i].Buffer2NextDescAddr = + (uint32_t)&g_dma_rx_descs[(i + 1) % ETH_RX_BUF_COUNT]; } // set RX buffer start and enable receiver - ETH10M->ERXST = ethernetif->DMARxDescToGet->Buffer1Addr; + ETH10M->ERXST = ethernetif->rx_desc_head->Buffer1Addr; ETH10M->ECON1 = RB_ETH_ECON1_RXEN; - WritePHYReg(PHY_BMCR, PHY_BMCR_RESET); + phy_write_reg(PHY_BMCR, PHY_BMCR_RESET); Delay_Ms(200); - WritePHYReg(PHY_BMCR, PHY_BMCR_FULL_DUPLEX); + phy_write_reg(PHY_BMCR, PHY_BMCR_FULL_DUPLEX); ETH10M->EIR = 0xFF; // clear all interrupt flags ETH10M->EIE = RB_ETH_EIE_INTIE | RB_ETH_EIE_RXIE | RB_ETH_EIE_TXIE | @@ -139,31 +171,67 @@ static void low_level_init(struct netif* netif) { NVIC_EnableIRQ(ETH_IRQn); } -static err_t low_level_output(struct netif* netif, struct pbuf* p) { - (void)netif; - - if (DMATxDscrTab[0].Status & ETH_DMATxDesc_OWN) { - LINK_STATS_INC(link.drop); - return ERR_BUF; +static void tx_start_if_possible(void) { + // if TXRTS bit is set, MAC is busy sending a packet + if (ETH10M->ECON1 & RB_ETH_ECON1_TXRTS) { + return; } - uint32_t len = 0; - uint8_t* tx_buf_ptr = (uint8_t*)DMATxDscrTab[0].Buffer1Addr; + struct ethernetif* ethernetif = &g_eth_state; - for (struct pbuf* q = p; q != NULL; q = q->next) { - memcpy(&tx_buf_ptr[len], q->payload, q->len); - len += q->len; + if (tx_queue_is_empty(ðernetif->tx_q)) { + return; } + // get descriptor for the next packet to send + uint32_t idx = ethernetif->tx_q.tail; + ETH_DMADESCTypeDef* dma_desc = &g_dma_tx_descs[idx]; + + uint16_t len = dma_desc->Status; + + // tell MAC which buffer to send ETH10M->ETXLN = len; - ETH10M->ETXST = (uint32_t)tx_buf_ptr; - DMATxDscrTab[0].Status |= ETH_DMATxDesc_OWN; + ETH10M->ETXST = dma_desc->Buffer1Addr; + // start tx ETH10M->ECON1 |= RB_ETH_ECON1_TXRTS; +} - LINK_STATS_INC(link.xmit); - MIB2_STATS_NETIF_ADD(netif, ifoutoctets, len); +static err_t low_level_output(struct netif* netif, struct pbuf* p) { + struct ethernetif* ethernetif = netif->state; + err_t errval = ERR_OK; - return ERR_OK; + NVIC_DisableIRQ(ETH_IRQn); + + if (tx_queue_is_full(ðernetif->tx_q)) { + // queue full, drop pkt + errval = ERR_BUF; + } else { + uint32_t current_idx = ethernetif->tx_q.head; + uint8_t* tx_buf_ptr = (uint8_t*)g_dma_tx_descs[current_idx].Buffer1Addr; + uint32_t len = 0; + + for (struct pbuf* q = p; q != NULL; q = q->next) { + memcpy(&tx_buf_ptr[len], q->payload, q->len); + len += q->len; + } + + g_dma_tx_descs[current_idx].Status = len; + + tx_queue_produce(ðernetif->tx_q); + + LINK_STATS_INC(link.xmit); + MIB2_STATS_NETIF_ADD(netif, ifoutoctets, len); + } + + tx_start_if_possible(); + + NVIC_EnableIRQ(ETH_IRQn); + + if (errval == ERR_BUF) { + LINK_STATS_INC(link.drop); + } + + return errval; } static struct pbuf* low_level_input(struct netif* netif) { @@ -171,16 +239,16 @@ static struct pbuf* low_level_input(struct netif* netif) { struct pbuf* p = NULL; // if OWN bit is set, it's still owned by DMA and no packet rdy - if (ethernetif->DMARxDescToRead->Status & ETH_DMARxDesc_OWN) { + if (ethernetif->rx_desc_tail->Status & ETH_DMARxDesc_OWN) { return NULL; } // packet ready - uint32_t len = (ethernetif->DMARxDescToRead->Status & ETH_DMARxDesc_FL) >> 16; + uint32_t len = (ethernetif->rx_desc_tail->Status & ETH_DMARxDesc_FL) >> 16; p = pbuf_alloc(PBUF_RAW, len, PBUF_POOL); if (p != NULL) { - uint8_t* buffer = (uint8_t*)ethernetif->DMARxDescToRead->Buffer1Addr; + uint8_t* buffer = (uint8_t*)ethernetif->rx_desc_tail->Buffer1Addr; uint32_t offset = 0; for (struct pbuf* q = p; q != NULL; q = q->next) { memcpy(q->payload, buffer + offset, q->len); @@ -193,10 +261,10 @@ static struct pbuf* low_level_input(struct netif* netif) { } // give buffer back to DMA - ethernetif->DMARxDescToRead->Status = ETH_DMARxDesc_OWN; + ethernetif->rx_desc_tail->Status = ETH_DMARxDesc_OWN; // advance read pointer to the next descriptor in the ring - ethernetif->DMARxDescToRead = - (ETH_DMADESCTypeDef*)ethernetif->DMARxDescToRead->Buffer2NextDescAddr; + ethernetif->rx_desc_tail = + (ETH_DMADESCTypeDef*)ethernetif->rx_desc_tail->Buffer2NextDescAddr; return p; } @@ -211,12 +279,12 @@ void ethernetif_input(struct netif* netif) { } void ethernetif_link_poll(struct netif* netif) { - if (!g_link_interrupt_flag) return; - g_link_interrupt_flag = false; + if (!g_link_irq_flag) return; + g_link_irq_flag = false; // supposedly, first read latches link status 2nd get cur val - (void)ReadPHYReg(PHY_BMSR); - uint16_t bmsr = ReadPHYReg(PHY_BMSR); + (void)phy_read_reg(PHY_BMSR); + uint16_t bmsr = phy_read_reg(PHY_BMSR); if (bmsr & PHY_BMSR_LINK_STATUS) { if (!netif_is_link_up(netif)) { @@ -233,62 +301,70 @@ void ethernetif_link_poll(struct netif* netif) { void ETH_IRQHandler(void) __attribute__((interrupt)) __attribute__((used)); void ETH_IRQHandler(void) { uint32_t flags = ETH10M->EIR; - struct ethernetif* ethernetif = ð_state; + struct ethernetif* ethernetif = &g_eth_state; if (flags & RB_ETH_EIR_RXIF) { ETH10M->EIR = RB_ETH_EIR_RXIF; // descriptor should be owned by DMA - if (ethernetif->DMARxDescToGet->Status & ETH_DMARxDesc_OWN) { + if (ethernetif->rx_desc_head->Status & ETH_DMARxDesc_OWN) { ETH_DMADESCTypeDef* next_desc = - (ETH_DMADESCTypeDef*)ethernetif->DMARxDescToGet->Buffer2NextDescAddr; + (ETH_DMADESCTypeDef*)ethernetif->rx_desc_head->Buffer2NextDescAddr; // if next descriptor OWN bit is 0, ring is full and we must drop if (!(next_desc->Status & ETH_DMARxDesc_OWN)) { LINK_STATS_INC(link.drop); } else { // process and re-arm - ethernetif->DMARxDescToGet->Status &= ~ETH_DMARxDesc_OWN; + ethernetif->rx_desc_head->Status &= ~ETH_DMARxDesc_OWN; // write packet len into status field for CPU - ethernetif->DMARxDescToGet->Status |= + ethernetif->rx_desc_head->Status |= (ETH_DMARxDesc_FS | ETH_DMARxDesc_LS | (ETH10M->ERXLN << 16)); // advance descripotor ptr - ethernetif->DMARxDescToGet = next_desc; + ethernetif->rx_desc_head = next_desc; // re-arm receiver with new emtpy buf - ETH10M->ERXST = (uint32_t)ethernetif->DMARxDescToGet->Buffer1Addr; + ETH10M->ERXST = (uint32_t)ethernetif->rx_desc_head->Buffer1Addr; } } } if (flags & RB_ETH_EIR_TXIF) { - DMATxDscrTab[0].Status &= ~ETH_DMATxDesc_OWN; ETH10M->EIR = RB_ETH_EIR_TXIF; + + if (!tx_queue_is_empty(ðernetif->tx_q)) { + tx_queue_consume(ðernetif->tx_q); + } + tx_start_if_possible(); } if (flags & RB_ETH_EIR_TXERIF) { - DMATxDscrTab[0].Status &= ~ETH_DMATxDesc_OWN; ETH10M->EIR = RB_ETH_EIR_TXERIF; LINK_STATS_INC(link.err); + + if (!tx_queue_is_empty(ðernetif->tx_q)) { + tx_queue_consume(ðernetif->tx_q); + } + tx_start_if_possible(); } if (flags & RB_ETH_EIR_RXERIF) { ETH10M->EIR = RB_ETH_EIR_RXERIF; - ETH10M->ECON1 |= RB_ETH_ECON1_RXEN; + ETH10M->ECON1 |= RB_ETH_ECON1_RXEN; // re-enable receiver LINK_STATS_INC(link.err); } if (flags & RB_ETH_EIR_LINKIF) { - g_link_interrupt_flag = true; + g_link_irq_flag = true; ETH10M->EIR = RB_ETH_EIR_LINKIF; } } -void WritePHYReg(uint8_t reg_add, uint16_t reg_val) { +void phy_write_reg(uint8_t reg_add, uint16_t reg_val) { R32_ETH_MIWR = (reg_add & RB_ETH_MIREGADR_MASK) | RB_ETH_MIWR_MIIWR | (reg_val << RB_ETH_MIWR_DATA_SHIFT); } -uint16_t ReadPHYReg(uint8_t reg_add) { +uint16_t phy_read_reg(uint8_t reg_add) { ETH10M->MIERGADR = reg_add; return ETH10M->MIRD; } diff --git a/port/ethernetif.h b/port/ethernetif.h index d3d00e7..00c0fee 100644 --- a/port/ethernetif.h +++ b/port/ethernetif.h @@ -4,9 +4,8 @@ #include "lwip/err.h" #include "lwip/netif.h" -void run_tx_test(void); -void WritePHYReg(uint8_t reg_add, uint16_t reg_val); -uint16_t ReadPHYReg(uint8_t reg_add); +void phy_write_reg(uint8_t reg_add, uint16_t reg_val); +uint16_t phy_read_reg(uint8_t reg_add); #define ROM_CFG_USERADR_ID 0x1FFFF7E8 diff --git a/port/lwipopts.h b/port/lwipopts.h index a3f6df3..2b74a58 100644 --- a/port/lwipopts.h +++ b/port/lwipopts.h @@ -50,9 +50,9 @@ #define LWIP_SOCKET 0 // Statistics -#define LWIP_STATS 0 -#define LINK_STATS 0 -#define MIB2_STATS 0 +#define LWIP_STATS 1 +#define LINK_STATS 1 +// #define MIB2_STATS 1 #define LWIP_HTTPD 1 // Use a read-only filesystem populated by makefsdata diff --git a/port/sys_arch.c b/port/sys_arch.c index d3c2b3f..090b440 100644 --- a/port/sys_arch.c +++ b/port/sys_arch.c @@ -12,7 +12,7 @@ int rand(void) { void srand(unsigned int seed) { next = seed; } -uint32_t sys_now(void) { return systick_millis; } +uint32_t sys_now(void) { return millis(); } sys_prot_t sys_arch_protect(void) { unsigned int old_mstatus;