Index: sys/sys/callout.h
===================================================================
RCS file: /usr/users/he/nbcvs/netbsd/src/sys/sys/callout.h,v
retrieving revision 1.17
diff -u -u -2 -0 -r1.17 callout.h
--- sys/sys/callout.h	4 Feb 2003 01:21:06 -0000	1.17
+++ sys/sys/callout.h	13 Mar 2003 21:05:56 -0000
@@ -66,40 +66,43 @@
  */
 
 #ifndef _SYS_CALLOUT_H_
 #define _SYS_CALLOUT_H_
 
 struct callout_circq {
 	struct callout_circq *cq_next;	/* next element */
 	struct callout_circq *cq_prev;	/* previous element */
 };
 
 struct callout {
 	struct callout_circq c_list;		/* linkage on queue */
 	void	(*c_func)(void *);		/* function to call */
 	void	*c_arg;				/* function argument */
 	int	c_time;				/* when callout fires */
 	int	c_flags;			/* state of this entry */
 };
 
 #define	CALLOUT_PENDING		0x0002	/* callout is on the queue */
 #define	CALLOUT_FIRED		0x0004	/* callout has fired */
+#define	CALLOUT_RUNNING		0x0008	/* callout function is being invoked */
 
 #define	CALLOUT_INITIALIZER_SETFUNC(func, arg)				\
 				{ { NULL, NULL }, func, arg, 0, 0 }
 
 #define	CALLOUT_INITIALIZER	CALLOUT_INITIALIZER_SETFUNC(NULL, NULL)
 
 #ifdef _KERNEL
 void	callout_startup(void);
 void	callout_init(struct callout *);
 void	callout_setfunc(struct callout *, void (*)(void *), void *);
 void	callout_reset(struct callout *, int, void (*)(void *), void *);
 void	callout_schedule(struct callout *, int);
 void	callout_stop(struct callout *);
 int	callout_hardclock(void);
 
 #define	callout_pending(c)	((c)->c_flags & CALLOUT_PENDING)
 #define	callout_expired(c)	((c)->c_flags & CALLOUT_FIRED)
+#define	callout_is_running(c)	((c)->c_flags & CALLOUT_RUNNING)
+#define	callout_ack(c)		((c)->c_flags &= ~CALLOUT_RUNNING)
 #endif /* _KERNEL */
 
 #endif /* !_SYS_CALLOUT_H_ */
Index: sys/kern/kern_timeout.c
===================================================================
RCS file: /usr/users/he/nbcvs/netbsd/src/sys/kern/kern_timeout.c,v
retrieving revision 1.5
diff -u -u -2 -0 -r1.5 kern_timeout.c
--- sys/kern/kern_timeout.c	26 Feb 2003 23:13:19 -0000	1.5
+++ sys/kern/kern_timeout.c	13 Mar 2003 20:57:45 -0000
@@ -238,117 +238,117 @@
 }
 
 /*
  * callout_reset:
  *
  *	Reset a callout structure with a new function and argument, and
  *	schedule it to run.
  */
 void
 callout_reset(struct callout *c, int to_ticks, void (*func)(void *), void *arg)
 {
 	int s, old_time;
 
 	KASSERT(to_ticks >= 0);
 
 	CALLOUT_LOCK(s);
 
 	/* Initialize the time here, it won't change. */
 	old_time = c->c_time;
 	c->c_time = to_ticks + hardclock_ticks;
-	c->c_flags &= ~CALLOUT_FIRED;
+	c->c_flags &= ~(CALLOUT_FIRED|CALLOUT_RUNNING);
 
 	c->c_func = func;
 	c->c_arg = arg;
 
 	/*
 	 * If this timeout is already scheduled and now is moved
 	 * earlier, reschedule it now. Otherwise leave it in place
 	 * and let it be rescheduled later.
 	 */
 	if (callout_pending(c)) {
 		if (c->c_time - old_time < 0) {
 			CIRCQ_REMOVE(&c->c_list);
 			CIRCQ_INSERT(&c->c_list, &timeout_todo);
 		}
 	} else {
 		c->c_flags |= CALLOUT_PENDING;
 		CIRCQ_INSERT(&c->c_list, &timeout_todo);
 	}
 
 	CALLOUT_UNLOCK(s);
 }
 
 /*
  * callout_schedule:
  *
  *	Schedule a callout to run.  The function and argument must
  *	already be set in the callout structure.
  */
 void
 callout_schedule(struct callout *c, int to_ticks)
 {
 	int s, old_time;
 
 	KASSERT(to_ticks >= 0);
 
 	CALLOUT_LOCK(s);
 
 	/* Initialize the time here, it won't change. */
 	old_time = c->c_time;
 	c->c_time = to_ticks + hardclock_ticks;
-	c->c_flags &= ~CALLOUT_FIRED;
+	c->c_flags &= ~(CALLOUT_FIRED|CALLOUT_RUNNING);
 
 	/*
 	 * If this timeout is already scheduled and now is moved
 	 * earlier, reschedule it now. Otherwise leave it in place
 	 * and let it be rescheduled later.
 	 */
 	if (callout_pending(c)) {
 		if (c->c_time - old_time < 0) {
 			CIRCQ_REMOVE(&c->c_list);
 			CIRCQ_INSERT(&c->c_list, &timeout_todo);
 		}
 	} else {
 		c->c_flags |= CALLOUT_PENDING;
 		CIRCQ_INSERT(&c->c_list, &timeout_todo);
 	}
 
 	CALLOUT_UNLOCK(s);
 }
 
 /*
  * callout_stop:
  *
  *	Cancel a pending callout.
  */
 void
 callout_stop(struct callout *c)
 {
 	int s;
 
 	CALLOUT_LOCK(s);
 
 	if (callout_pending(c))
 		CIRCQ_REMOVE(&c->c_list);
 
-	c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED);
+	c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED|CALLOUT_RUNNING);
 
 	CALLOUT_UNLOCK(s);
 }
 
 /*
  * This is called from hardclock() once every tick.
  * We return !0 if we need to schedule a softclock.
  */
 int
 callout_hardclock(void)
 {
 	int s;
 	int needsoftclock;
 
 	CALLOUT_LOCK(s);
 
 	MOVEBUCKET(0, hardclock_ticks);
 	if (MASKWHEEL(0, hardclock_ticks) == 0) {
 		MOVEBUCKET(1, hardclock_ticks);
 		if (MASKWHEEL(1, hardclock_ticks) == 0) {
@@ -373,41 +373,41 @@
 	void *arg;
 	int s;
 
 	CALLOUT_LOCK(s);
 
 	while (!CIRCQ_EMPTY(&timeout_todo)) {
 
 		c = (struct callout *)CIRCQ_FIRST(&timeout_todo); /* XXX */
 		CIRCQ_REMOVE(&c->c_list);
 
 		/* If due run it, otherwise insert it into the right bucket. */
 		if (c->c_time - hardclock_ticks > 0) {
 			CIRCQ_INSERT(&c->c_list,
 			    BUCKET((c->c_time - hardclock_ticks), c->c_time));
 		} else {
 #ifdef CALLOUT_EVENT_COUNTERS
 			if (c->c_time - hardclock_ticks < 0)
 				callout_ev_late.ev_count++;
 #endif
 			c->c_flags = (c->c_flags  & ~CALLOUT_PENDING) |
-			    CALLOUT_FIRED;
+			    (CALLOUT_FIRED|CALLOUT_RUNNING);
 
 			func = c->c_func;
 			arg = c->c_arg;
 
 			CALLOUT_UNLOCK(s);
 			(*func)(arg);
 			CALLOUT_LOCK(s);
 		}
 	}
 
 	CALLOUT_UNLOCK(s);
 }
 
 #ifdef DDB
 static void
 db_show_callout_bucket(struct callout_circq *bucket)
 {
 	struct callout *c;
 	struct callout_circq *p;
 	db_expr_t offset;
Index: sys/netinet/tcp_input.c
===================================================================
RCS file: /usr/users/he/nbcvs/netbsd/src/sys/netinet/tcp_input.c,v
retrieving revision 1.163
diff -u -u -2 -0 -r1.163 tcp_input.c
--- sys/netinet/tcp_input.c	1 Mar 2003 04:40:27 -0000	1.163
+++ sys/netinet/tcp_input.c	13 Mar 2003 20:59:15 -0000
@@ -2865,41 +2865,44 @@
 } while (/*CONSTCOND*/0)
 #endif /* INET6 */
 
 #define	SYN_CACHE_RM(sc)						\
 do {									\
 	TAILQ_REMOVE(&tcp_syn_cache[(sc)->sc_bucketidx].sch_bucket,	\
 	    (sc), sc_bucketq);						\
 	(sc)->sc_tp = NULL;						\
 	LIST_REMOVE((sc), sc_tpq);					\
 	tcp_syn_cache[(sc)->sc_bucketidx].sch_length--;			\
 	callout_stop(&(sc)->sc_timer);					\
 	syn_cache_count--;						\
 } while (/*CONSTCOND*/0)
 
 #define	SYN_CACHE_PUT(sc)						\
 do {									\
 	if ((sc)->sc_ipopts)						\
 		(void) m_free((sc)->sc_ipopts);				\
 	if ((sc)->sc_route4.ro_rt != NULL)				\
 		RTFREE((sc)->sc_route4.ro_rt);				\
-	pool_put(&syn_cache_pool, (sc));				\
+	if (callout_is_running(&(sc)->sc_timer))			\
+		(sc)->sc_flags |= SCF_DEAD;				\
+	else								\
+		pool_put(&syn_cache_pool, (sc));			\
 } while (/*CONSTCOND*/0)
 
 struct pool syn_cache_pool;
 
 /*
  * We don't estimate RTT with SYNs, so each packet starts with the default
  * RTT and each timer step has a fixed timeout value.
  */
 #define	SYN_CACHE_TIMER_ARM(sc)						\
 do {									\
 	TCPT_RANGESET((sc)->sc_rxtcur,					\
 	    TCPTV_SRTTDFLT * tcp_backoff[(sc)->sc_rxtshift], TCPTV_MIN,	\
 	    TCPTV_REXMTMAX);						\
 	callout_reset(&(sc)->sc_timer,					\
 	    (sc)->sc_rxtcur * (hz / PR_SLOWHZ), syn_cache_timer, (sc));	\
 } while (/*CONSTCOND*/0)
 
 #define	SYN_CACHE_TIMESTAMP(sc)	(tcp_now - (sc)->sc_timebase)
 
 void
@@ -3012,40 +3015,48 @@
 	TAILQ_INSERT_TAIL(&scp->sch_bucket, sc, sc_bucketq);
 	scp->sch_length++;
 	syn_cache_count++;
 
 	tcpstat.tcps_sc_added++;
 	splx(s);
 }
 
 /*
  * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted.
  * If we have retransmitted an entry the maximum number of times, expire
  * that entry.
  */
 void
 syn_cache_timer(void *arg)
 {
 	struct syn_cache *sc = arg;
 	int s;
 
 	s = splsoftnet();
+
+	callout_ack(&sc->sc_timer);
+	if (__predict_false(sc->sc_flags & SCF_DEAD)) {
+		tcpstat.tcps_sc_delayed_free++;
+		pool_put(&syn_cache_pool, sc);
+		splx(s);
+		return;
+	}
 
 	if (__predict_false(sc->sc_rxtshift == TCP_MAXRXTSHIFT)) {
 		/* Drop it -- too many retransmissions. */
 		goto dropit;
 	}
 
 	/*
 	 * Compute the total amount of time this entry has
 	 * been on a queue.  If this entry has been on longer
 	 * than the keep alive timer would allow, expire it.
 	 */
 	sc->sc_rxttot += sc->sc_rxtcur;
 	if (sc->sc_rxttot >= TCPTV_KEEP_INIT)
 		goto dropit;
 
 	tcpstat.tcps_sc_retransmitted++;
 	(void) syn_cache_respond(sc, NULL);
 
 	/* Advance the timer back-off. */
 	sc->sc_rxtshift++;
Index: sys/netinet/tcp_subr.c
===================================================================
RCS file: /usr/users/he/nbcvs/netbsd/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.139
diff -u -u -2 -0 -r1.139 tcp_subr.c
--- sys/netinet/tcp_subr.c	1 Mar 2003 04:40:28 -0000	1.139
+++ sys/netinet/tcp_subr.c	17 Mar 2003 20:48:25 -0000
@@ -1004,40 +1004,62 @@
 #ifdef INET6
 	if (tp->t_in6pcb)
 		so = tp->t_in6pcb->in6p_socket;
 #endif
 	if (!so)
 		return NULL;
 
 	if (TCPS_HAVERCVDSYN(tp->t_state)) {
 		tp->t_state = TCPS_CLOSED;
 		(void) tcp_output(tp);
 		tcpstat.tcps_drops++;
 	} else
 		tcpstat.tcps_conndrops++;
 	if (errno == ETIMEDOUT && tp->t_softerror)
 		errno = tp->t_softerror;
 	so->so_error = errno;
 	return (tcp_close(tp));
 }
 
 /*
+ * Return whether this tcpcb is marked as dead, indicating
+ * to the calling timer function that no further action should
+ * be taken, as we are about to release this tcpcb.  The release
+ * of the storage will be done here if no other timer functions
+ * are about to be invoked.
+ */
+int
+tcp_isdead(tp)
+	struct tcpcb *tp;
+{
+	int dead = (tp->t_flags & TF_DEAD);
+
+	if (dead) {
+		if (tcp_timers_running(tp))
+			return dead; /* not quite there yet -- should count? */
+		tcpstat.tcps_delayed_free++;
+		pool_put(&tcpcb_pool, tp);
+	}
+	return dead;
+}
+
+/*
  * Close a TCP control block:
  *	discard all space held by the tcp
  *	discard internet protocol block
  *	wake up any sleepers
  */
 struct tcpcb *
 tcp_close(tp)
 	struct tcpcb *tp;
 {
 	struct inpcb *inp;
 #ifdef INET6
 	struct in6pcb *in6p;
 #endif
 	struct socket *so;
 #ifdef RTV_RTT
 	struct rtentry *rt;
 #endif
 	struct route *ro;
 
 	inp = tp->t_inpcb;
@@ -1121,41 +1143,45 @@
 				rt->rt_rmx.rmx_ssthresh =
 				    (rt->rt_rmx.rmx_ssthresh + i) / 2;
 			else
 				rt->rt_rmx.rmx_ssthresh = i;
 		}
 	}
 #endif /* RTV_RTT */
 	/* free the reassembly queue, if any */
 	TCP_REASS_LOCK(tp);
 	(void) tcp_freeq(tp);
 	TCP_REASS_UNLOCK(tp);
 
 	tcp_canceltimers(tp);
 	TCP_CLEAR_DELACK(tp);
 	syn_cache_cleanup(tp);
 
 	if (tp->t_template) {
 		m_free(tp->t_template);
 		tp->t_template = NULL;
 	}
-	pool_put(&tcpcb_pool, tp);
+	if (tcp_timers_running(tp))
+		tp->t_flags |= TF_DEAD;
+	else
+		pool_put(&tcpcb_pool, tp);
+
 	if (inp) {
 		inp->inp_ppcb = 0;
 		soisdisconnected(so);
 		in_pcbdetach(inp);
 	}
 #ifdef INET6
 	else if (in6p) {
 		in6p->in6p_ppcb = 0;
 		soisdisconnected(so);
 		in6_pcbdetach(in6p);
 	}
 #endif
 	tcpstat.tcps_closed++;
 	return ((struct tcpcb *)0);
 }
 
 int
 tcp_freeq(tp)
 	struct tcpcb *tp;
 {
Index: sys/netinet/tcp_timer.c
===================================================================
RCS file: /usr/users/he/nbcvs/netbsd/src/sys/netinet/tcp_timer.c,v
retrieving revision 1.62
diff -u -u -2 -0 -r1.62 tcp_timer.c
--- sys/netinet/tcp_timer.c	3 Feb 2003 23:51:04 -0000	1.62
+++ sys/netinet/tcp_timer.c	13 Mar 2003 21:03:36 -0000
@@ -179,55 +179,80 @@
 tcp_timer_init(void)
 {
 
 	if (tcp_keepidle == 0)
 		tcp_keepidle = TCPTV_KEEP_IDLE;
 
 	if (tcp_keepintvl == 0)
 		tcp_keepintvl = TCPTV_KEEPINTVL;
 
 	if (tcp_keepcnt == 0)
 		tcp_keepcnt = TCPTV_KEEPCNT;
 
 	if (tcp_maxpersistidle == 0)
 		tcp_maxpersistidle = TCPTV_KEEP_IDLE;
 
 	if (tcp_delack_ticks == 0)
 		tcp_delack_ticks = TCP_DELACK_TICKS;
 }
 
 /*
+ * Return how many timers are currently (about to be) running,
+ * i.e. how many callouts are about to be started.
+ */
+int
+tcp_timers_running(struct tcpcb *tp)
+{
+	int i;
+	int count = 0;
+
+	for (i = 0; i < TCPT_NTIMERS; i++)
+		if (callout_is_running(&tp->t_timer[i]))
+			count++;
+	if (callout_is_running(&tp->t_delack_ch))
+		count++;
+
+	return count;
+}
+
+/*
  * Callout to process delayed ACKs for a TCPCB.
  */
 void
 tcp_delack(void *arg)
 {
 	struct tcpcb *tp = arg;
 	int s;
 
 	/*
 	 * If tcp_output() wasn't able to transmit the ACK
 	 * for whatever reason, it will restart the delayed
 	 * ACK callout.
 	 */
 
 	s = splsoftnet();
+	callout_ack(&tp->t_delack_ch);
+	if (tcp_isdead(tp)) {
+		splx(s);
+		return;
+	}
+
 	tp->t_flags |= TF_ACKNOW;
 	(void) tcp_output(tp);
 	splx(s);
 }
 
 /*
  * Tcp protocol timeout routine called every 500 ms.
  * Updates the timers in all active tcb's and
  * causes finite state machine actions if timers expire.
  */
 void
 tcp_slowtimo()
 {
 	int s;
 
 	s = splsoftnet();
 	tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
 	tcp_iss_seq += TCP_ISSINCR;			/* increment iss */
 	tcp_now++;					/* for timestamps */
 	splx(s);
@@ -251,40 +276,46 @@
 
 const int	tcp_totbackoff = 511;	/* sum of tcp_backoff[] */
 
 /*
  * TCP timer processing.
  */
 
 void
 tcp_timer_rexmt(void *arg)
 {
 	struct tcpcb *tp = arg;
 	uint32_t rto;
 	int s;
 #ifdef TCP_DEBUG
 	struct socket *so;
 	short ostate;
 #endif
 
 	s = splsoftnet();
 
+	callout_ack(&tp->t_timer[TCPT_REXMT]);
+	if (tcp_isdead(tp)) {
+		splx(s);
+		return;
+	}
+
 #ifdef TCP_DEBUG
 #ifdef INET
 	if (tp->t_inpcb)
 		so = tp->t_inpcb->inp_socket;
 #endif
 #ifdef INET6
 	if (tp->t_in6pcb)
 		so = tp->t_in6pcb->in6p_socket;
 #endif
 	ostate = tp->t_state;
 #endif /* TCP_DEBUG */
 
 	/*
 	 * Retransmission timer went off.  Message has not
 	 * been acked within retransmit interval.  Back off
 	 * to a longer retransmit interval and retransmit one segment.
 	 */
 
 	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
 		tp->t_rxtshift = TCP_MAXRXTSHIFT;
@@ -398,40 +429,46 @@
 	if (tp && so->so_options & SO_DEBUG)
 		tcp_trace(TA_USER, ostate, tp, NULL,
 		    PRU_SLOWTIMO | (TCPT_REXMT << 8));
 #endif
 	splx(s);
 }
 
 void
 tcp_timer_persist(void *arg)
 {
 	struct tcpcb *tp = arg;
 	uint32_t rto;
 	int s;
 #ifdef TCP_DEBUG
 	struct socket *so;
 	short ostate;
 #endif
 
 	s = splsoftnet();
 
+	callout_ack(&tp->t_timer[TCPT_PERSIST]);
+	if (tcp_isdead(tp)) {
+		splx(s);
+		return;
+	}
+
 #ifdef TCP_DEBUG
 #ifdef INET
 	if (tp->t_inpcb)
 		so = tp->t_inpcb->inp_socket;
 #endif
 #ifdef INET6
 	if (tp->t_in6pcb)
 		so = tp->t_in6pcb->in6p_socket;
 #endif
 
 	ostate = tp->t_state;
 #endif /* TCP_DEBUG */
 
 	/*
 	 * Persistance timer into zero window.
 	 * Force a byte to be output, if possible.
 	 */
 
 	/*
 	 * Hack: if the peer is dead/unreachable, we do not
@@ -460,40 +497,46 @@
 #ifdef TCP_DEBUG
 	if (tp && so->so_options & SO_DEBUG)
 		tcp_trace(TA_USER, ostate, tp, NULL,
 		    PRU_SLOWTIMO | (TCPT_PERSIST << 8));
 #endif
 	splx(s);
 }
 
 void
 tcp_timer_keep(void *arg)
 {
 	struct tcpcb *tp = arg;
 	struct socket *so = NULL;	/* Quell compiler warning */
 	int s;
 #ifdef TCP_DEBUG
 	short ostate;
 #endif
 
 	s = splsoftnet();
 
+	callout_ack(&tp->t_timer[TCPT_KEEP]);
+	if (tcp_isdead(tp)) {
+		splx(s);
+		return;
+	}
+
 #ifdef TCP_DEBUG
 	ostate = tp->t_state;
 #endif /* TCP_DEBUG */
 
 	/*
 	 * Keep-alive timer went off; send something
 	 * or drop connection if idle for too long.
 	 */
 
 	tcpstat.tcps_keeptimeo++;
 	if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
 		goto dropit;
 #ifdef INET
 	if (tp->t_inpcb)
 		so = tp->t_inpcb->inp_socket;
 #endif
 #ifdef INET6
 	if (tp->t_in6pcb)
 		so = tp->t_in6pcb->in6p_socket;
 #endif
@@ -541,40 +584,46 @@
 	splx(s);
 	return;
 
  dropit:
 	tcpstat.tcps_keepdrops++;
 	(void) tcp_drop(tp, ETIMEDOUT);
 	splx(s);
 }
 
 void
 tcp_timer_2msl(void *arg)
 {
 	struct tcpcb *tp = arg;
 	int s;
 #ifdef TCP_DEBUG
 	struct socket *so;
 	short ostate;
 #endif
 
 	s = splsoftnet();
+
+	callout_ack(&tp->t_timer[TCPT_2MSL]);
+	if (tcp_isdead(tp)) {
+		splx(s);
+		return;
+	}
 
 #ifdef TCP_DEBUG
 #ifdef INET
 	if (tp->t_inpcb)
 		so = tp->t_inpcb->inp_socket;
 #endif
 #ifdef INET6
 	if (tp->t_in6pcb)
 		so = tp->t_in6pcb->in6p_socket;
 #endif
 
 	ostate = tp->t_state;
 #endif /* TCP_DEBUG */
 
 	/*
 	 * 2 MSL timeout in shutdown went off.  If we're closed but
 	 * still waiting for peer to close and connection has been idle
 	 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
 	 * control block.  Otherwise, check again in a bit.
 	 */
Index: sys/netinet/tcp_var.h
===================================================================
RCS file: /usr/users/he/nbcvs/netbsd/src/sys/netinet/tcp_var.h,v
retrieving revision 1.96
diff -u -u -2 -0 -r1.96 tcp_var.h
--- sys/netinet/tcp_var.h	1 Mar 2003 04:40:28 -0000	1.96
+++ sys/netinet/tcp_var.h	7 Mar 2003 18:05:11 -0000
@@ -167,40 +167,41 @@
 	short	t_dupacks;		/* consecutive dup acks recd */
 	u_short	t_peermss;		/* peer's maximum segment size */
 	u_short	t_ourmss;		/* our's maximum segment size */
 	u_short t_segsz;		/* current segment size in use */
 	char	t_force;		/* 1 if forcing out a byte */
 	u_int	t_flags;
 #define	TF_ACKNOW	0x0001		/* ack peer immediately */
 #define	TF_DELACK	0x0002		/* ack, but try to delay it */
 #define	TF_NODELAY	0x0004		/* don't delay packets to coalesce */
 #define	TF_NOOPT	0x0008		/* don't use tcp options */
 #define	TF_REQ_SCALE	0x0020		/* have/will request window scaling */
 #define	TF_RCVD_SCALE	0x0040		/* other side has requested scaling */
 #define	TF_REQ_TSTMP	0x0080		/* have/will request timestamps */
 #define	TF_RCVD_TSTMP	0x0100		/* a timestamp was received in SYN */
 #define	TF_SACK_PERMIT	0x0200		/* other side said I could SACK */
 #define	TF_SYN_REXMT	0x0400		/* rexmit timer fired on SYN */
 #define	TF_WILL_SACK	0x0800		/* try to use SACK */
 #define	TF_CANT_TXSACK	0x1000		/* other side said I could not SACK */
 #define	TF_IGNR_RXSACK	0x2000		/* ignore received SACK blocks */
 #define	TF_REASSEMBLING	0x4000		/* we're busy reassembling */
+#define	TF_DEAD		0x8000		/* dead and to-be-released */
 
 
 	struct	mbuf *t_template;	/* skeletal packet for transmit */
 	struct	inpcb *t_inpcb;		/* back pointer to internet pcb */
 	struct	in6pcb *t_in6pcb;	/* back pointer to internet pcb */
 	struct	callout t_delack_ch;	/* delayed ACK callout */
 /*
  * The following fields are used as in the protocol specification.
  * See RFC783, Dec. 1981, page 21.
  */
 /* send sequence variables */
 	tcp_seq	snd_una;		/* send unacknowledged */
 	tcp_seq	snd_nxt;		/* send next */
 	tcp_seq	snd_up;			/* send urgent pointer */
 	tcp_seq	snd_wl1;		/* window update seg seq number */
 	tcp_seq	snd_wl2;		/* window update seg ack number */
 	tcp_seq	iss;			/* initial send sequence number */
 	u_long	snd_wnd;		/* send window */
 	tcp_seq snd_recover;		/* for use in fast recovery */
 /* receive sequence variables */
@@ -393,40 +394,41 @@
 #define sc_route4	sc_route_u.route4
 #ifdef INET6
 #define sc_route6	sc_route_u.route6
 #endif
 	long sc_win;				/* advertised window */
 	int sc_bucketidx;			/* our bucket index */
 	u_int32_t sc_hash;
 	u_int32_t sc_timestamp;			/* timestamp from SYN */
 	u_int32_t sc_timebase;			/* our local timebase */
 	union syn_cache_sa sc_src;
 	union syn_cache_sa sc_dst;
 	tcp_seq sc_irs;
 	tcp_seq sc_iss;
 	u_int sc_rxtcur;			/* current rxt timeout */
 	u_int sc_rxttot;			/* total time spend on queues */
 	u_short sc_rxtshift;			/* for computing backoff */
 	u_short sc_flags;
 
 #define	SCF_UNREACH		0x0001		/* we've had an unreach error */
 #define	SCF_TIMESTAMP		0x0002		/* peer will do timestamps */
+#define	SCF_DEAD		0x0004		/* this entry to be released */
 
 	struct mbuf *sc_ipopts;			/* IP options */
 	u_int16_t sc_peermaxseg;
 	u_int16_t sc_ourmaxseg;
 	u_int8_t sc_request_r_scale	: 4,
 		 sc_requested_s_scale	: 4;
 
 	struct tcpcb *sc_tp;			/* tcb for listening socket */
 	LIST_ENTRY(syn_cache) sc_tpq;		/* list of entries by same tp */
 };
 
 struct syn_cache_head {
 	TAILQ_HEAD(, syn_cache) sch_bucket;	/* bucket entries */
 	u_short sch_length;			/* # entries in bucket */
 };
 
 #define	intotcpcb(ip)	((struct tcpcb *)(ip)->inp_ppcb)
 #ifdef INET6
 #define	in6totcpcb(ip)	((struct tcpcb *)(ip)->in6p_ppcb)
 #endif
@@ -523,54 +525,56 @@
 	u_quad_t tcps_rcvpartdupbyte;	/* dup. bytes in part-dup. packets */
 	u_quad_t tcps_rcvoopack;	/* out-of-order packets received */
 	u_quad_t tcps_rcvoobyte;	/* out-of-order bytes received */
 	u_quad_t tcps_rcvpackafterwin;	/* packets with data after window */
 	u_quad_t tcps_rcvbyteafterwin;	/* bytes rcvd after window */
 	u_quad_t tcps_rcvafterclose;	/* packets rcvd after "close" */
 	u_quad_t tcps_rcvwinprobe;	/* rcvd window probe packets */
 	u_quad_t tcps_rcvdupack;	/* rcvd duplicate acks */
 	u_quad_t tcps_rcvacktoomuch;	/* rcvd acks for unsent data */
 	u_quad_t tcps_rcvackpack;	/* rcvd ack packets */
 	u_quad_t tcps_rcvackbyte;	/* bytes acked by rcvd acks */
 	u_quad_t tcps_rcvwinupd;	/* rcvd window update packets */
 	u_quad_t tcps_pawsdrop;		/* segments dropped due to PAWS */
 	u_quad_t tcps_predack;		/* times hdr predict ok for acks */
 	u_quad_t tcps_preddat;		/* times hdr predict ok for data pkts */
 
 	u_quad_t tcps_pcbhashmiss;	/* input packets missing pcb hash */
 	u_quad_t tcps_noport;		/* no socket on port */
 	u_quad_t tcps_badsyn;		/* received ack for which we have
 					   no SYN in compressed state */
+	u_quad_t tcps_delayed_free;	/* delayed pool_put() of tcpcb */
 
 	/* These statistics deal with the SYN cache. */
 	u_quad_t tcps_sc_added;		/* # of entries added */
 	u_quad_t tcps_sc_completed;	/* # of connections completed */
 	u_quad_t tcps_sc_timed_out;	/* # of entries timed out */
 	u_quad_t tcps_sc_overflowed;	/* # dropped due to overflow */
 	u_quad_t tcps_sc_reset;		/* # dropped due to RST */
 	u_quad_t tcps_sc_unreach;	/* # dropped due to ICMP unreach */
 	u_quad_t tcps_sc_bucketoverflow;/* # dropped due to bucket overflow */
 	u_quad_t tcps_sc_aborted;	/* # of entries aborted (no mem) */
 	u_quad_t tcps_sc_dupesyn;	/* # of duplicate SYNs received */
 	u_quad_t tcps_sc_dropped;	/* # of SYNs dropped (no route/mem) */
 	u_quad_t tcps_sc_collisions;	/* # of hash collisions */
 	u_quad_t tcps_sc_retransmitted;	/* # of retransmissions */
+	u_quad_t tcps_sc_delayed_free;	/* # of delayed pool_put()s */
 
 	u_quad_t tcps_selfquench;	/* # of ENOBUFS we get on output */
 };
 
 /*
  * Names for TCP sysctl objects.
  */
 #define	TCPCTL_RFC1323		1	/* RFC1323 timestamps/scaling */
 #define	TCPCTL_SENDSPACE	2	/* default send buffer */
 #define	TCPCTL_RECVSPACE	3	/* default recv buffer */
 #define	TCPCTL_MSSDFLT		4	/* default seg size */
 #define	TCPCTL_SYN_CACHE_LIMIT	5	/* max size of comp. state engine */
 #define	TCPCTL_SYN_BUCKET_LIMIT	6	/* max size of hash bucket */
 #if 0	/*obsoleted*/
 #define	TCPCTL_SYN_CACHE_INTER	7	/* interval of comp. state timer */
 #endif
 #define	TCPCTL_INIT_WIN		8	/* initial window */
 #define	TCPCTL_MSS_IFMTU	9	/* mss from interface, not in_maxmtu */
 #define	TCPCTL_SACK		10	/* RFC2018 selective acknowledgement */
 #define	TCPCTL_WSCALE		11	/* RFC1323 window scaling */
@@ -680,42 +684,44 @@
 	{ 1, 0, &tcp_keepidle },		\
 	{ 1, 0, &tcp_keepintvl },		\
 	{ 1, 0, &tcp_keepcnt },			\
 	{ 1, 1, 0, PR_SLOWHZ },			\
 	{ 1, 0, &tcp_do_newreno },		\
 	{ 1, 0, &tcp_log_refused },		\
 	{ 0 },					\
 	{ 1, 0, &tcp_rst_ppslim },		\
 	{ 1, 0, &tcp_delack_ticks },		\
 	{ 1, 0, &tcp_init_win_local },		\
 }
 
 #ifdef __NO_STRICT_ALIGNMENT
 #define	TCP_HDR_ALIGNED_P(th)	1
 #else
 #define	TCP_HDR_ALIGNED_P(th)	((((vaddr_t) (th)) & 3) == 0)
 #endif
 
 int	 tcp_attach __P((struct socket *));
 void	 tcp_canceltimers __P((struct tcpcb *));
+int	 tcp_timers_running __P((struct tcpcb*));
 struct tcpcb *
 	 tcp_close __P((struct tcpcb *));
+int	 tcp_isdead __P((struct tcpcb *));
 #ifdef INET6
 void	 tcp6_ctlinput __P((int, struct sockaddr *, void *));
 #endif
 void	 *tcp_ctlinput __P((int, struct sockaddr *, void *));
 int	 tcp_ctloutput __P((int, struct socket *, int, int, struct mbuf **));
 struct tcpcb *
 	 tcp_disconnect __P((struct tcpcb *));
 struct tcpcb *
 	 tcp_drop __P((struct tcpcb *, int));
 void	 tcp_dooptions __P((struct tcpcb *,
 	    u_char *, int, struct tcphdr *, struct tcp_opt_info *));
 void	 tcp_drain __P((void));
 #ifdef INET6
 void	 tcp6_drain __P((void));
 #endif
 void	 tcp_established __P((struct tcpcb *));
 void	 tcp_init __P((void));
 #ifdef INET6
 int	 tcp6_input __P((struct mbuf **, int *, int));
 #endif
Index: usr.bin/netstat/inet.c
===================================================================
RCS file: /usr/users/he/nbcvs/netbsd/src/usr.bin/netstat/inet.c,v
retrieving revision 1.54
diff -u -u -2 -0 -r1.54 inet.c
--- usr.bin/netstat/inet.c	4 Feb 2003 01:22:08 -0000	1.54
+++ usr.bin/netstat/inet.c	13 Mar 2003 21:05:08 -0000
@@ -246,69 +246,72 @@
 	p(tcps_pawsdrop, "\t\t%llu old duplicate packet%s\n");
 	p2(tcps_rcvpartduppack, tcps_rcvpartdupbyte,
 		"\t\t%llu packet%s with some dup. data (%llu byte%s duped)\n");
 	p2(tcps_rcvoopack, tcps_rcvoobyte,
 		"\t\t%llu out-of-order packet%s (%llu byte%s)\n");
 	p2(tcps_rcvpackafterwin, tcps_rcvbyteafterwin,
 		"\t\t%llu packet%s (%llu byte%s) of data after window\n");
 	p(tcps_rcvwinprobe, "\t\t%llu window probe%s\n");
 	p(tcps_rcvwinupd, "\t\t%llu window update packet%s\n");
 	p(tcps_rcvafterclose, "\t\t%llu packet%s received after close\n");
 	p(tcps_rcvbadsum, "\t\t%llu discarded for bad checksum%s\n");
 	p(tcps_rcvbadoff, "\t\t%llu discarded for bad header offset field%s\n");
 	ps(tcps_rcvshort, "\t\t%llu discarded because packet too short\n");
 	p(tcps_connattempt, "\t%llu connection request%s\n");
 	p(tcps_accepts, "\t%llu connection accept%s\n");
 	p(tcps_connects,
 		"\t%llu connection%s established (including accepts)\n");
 	p2(tcps_closed, tcps_drops,
 		"\t%llu connection%s closed (including %llu drop%s)\n");
 	p(tcps_conndrops, "\t%llu embryonic connection%s dropped\n");
+	p(tcps_delayed_free, "\t%llu delayed free%s of tcpcb\n");
 	p2(tcps_rttupdated, tcps_segstimed,
 		"\t%llu segment%s updated rtt (of %llu attempt%s)\n");
 	p(tcps_rexmttimeo, "\t%llu retransmit timeout%s\n");
 	p(tcps_timeoutdrop,
 		"\t\t%llu connection%s dropped by rexmit timeout\n");
 	p2(tcps_persisttimeo, tcps_persistdrops,
 	   "\t%llu persist timeout%s (resulting in %llu dropped "
 		"connection%s)\n");
 	p(tcps_keeptimeo, "\t%llu keepalive timeout%s\n");
 	p(tcps_keepprobe, "\t\t%llu keepalive probe%s sent\n");
 	p(tcps_keepdrops, "\t\t%llu connection%s dropped by keepalive\n");
 	p(tcps_predack, "\t%llu correct ACK header prediction%s\n");
 	p(tcps_preddat, "\t%llu correct data packet header prediction%s\n");
 	p3(tcps_pcbhashmiss, "\t%llu PCB hash miss%s\n");
 	ps(tcps_noport, "\t%llu dropped due to no socket\n");
 	p(tcps_connsdrained, "\t%llu connection%s drained due to memory "
 		"shortage\n");
 	p(tcps_pmtublackhole, "\t%llu PMTUD blackhole%s detected\n");
 
 	p(tcps_badsyn, "\t%llu bad connection attempt%s\n");
 	ps(tcps_sc_added, "\t%llu SYN cache entries added\n");
 	p(tcps_sc_collisions, "\t\t%llu hash collision%s\n");
 	ps(tcps_sc_completed, "\t\t%llu completed\n");
 	ps(tcps_sc_aborted, "\t\t%llu aborted (no space to build PCB)\n");
 	ps(tcps_sc_timed_out, "\t\t%llu timed out\n");
 	ps(tcps_sc_overflowed, "\t\t%llu dropped due to overflow\n");
 	ps(tcps_sc_bucketoverflow, "\t\t%llu dropped due to bucket overflow\n");
 	ps(tcps_sc_reset, "\t\t%llu dropped due to RST\n");
 	ps(tcps_sc_unreach, "\t\t%llu dropped due to ICMP unreachable\n");
+	ps(tcps_sc_delayed_free, "\t\t%llu delayed free of SYN cache "
+		"entries\n");
 	p(tcps_sc_retransmitted, "\t%llu SYN,ACK%s retransmitted\n");
 	p(tcps_sc_dupesyn, "\t%llu duplicate SYN%s received for entries "
 		"already in the cache\n");
 	p(tcps_sc_dropped, "\t%llu SYN%s dropped (no route or no space)\n");
 
 #undef p
 #undef ps
 #undef p2
 #undef p2s
 #undef p3
 }
 
 /*
  * Dump UDP statistics structure.
  */
 void
 udp_stats(off, name)
 	u_long off;
 	char *name;
 {
Index: share/man/man9/callout.9
===================================================================
RCS file: /usr/users/he/nbcvs/netbsd/src/share/man/man9/callout.9,v
retrieving revision 1.8
diff -u -u -2 -0 -r1.8 callout.9
--- share/man/man9/callout.9	4 Feb 2003 01:22:36 -0000	1.8
+++ share/man/man9/callout.9	17 Mar 2003 20:54:23 -0000
@@ -26,60 +26,67 @@
 .\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 .\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 .\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 .\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
 .Dd February 3, 2003
 .Dt CALLOUT 9
 .Os
 .Sh NAME
 .Nm callout_init ,
 .Nm callout_reset ,
 .Nm callout_schedule ,
 .Nm callout_setfunc ,
 .Nm callout_stop ,
+.Nm callout_expired ,
+.Nm callout_is_running ,
+.Nm callout_ack ,
 .Nm CALLOUT_INITIALIZER ,
 .Nm CALLOUT_INITIALIZER_SETFUNC
 .Nd execute a function after a specified length of time
 .Sh SYNOPSIS
 .Fd #include \*[Lt]sys/callout.h\*[Gt]
 .Ft void
 .Fn "callout_init" "struct callout *c"
 .Ft void
 .Fn "callout_reset" "struct callout *c" "int ticks" \
     "void (*func)(void *)" "void *arg"
 .Ft void
 .Fn "callout_schedule" "struct callout *c" "int ticks"
 .Ft void
 .Fn "callout_setfunc" "struct callout *c" "void (*func)(void *)" "void *arg"
 .Ft void
 .Fn "callout_stop" "struct callout *c"
 .Ft int
 .Fn "callout_pending" "struct callout *c"
 .Ft int
 .Fn "callout_expired" "struct callout *c"
+.Ft int
+.Fn "callout_is_running" "struct callout *c"
+.Ft void
+.Fn "callout_ack" "struct callout *c"
 .Fd CALLOUT_INITIALIZER
 .Pp
 .Fd CALLOUT_INITIALIZER_SETFUNC(func, arg)
 .Sh DESCRIPTION
 The
 .Nm callout
 facility provides a mechanism to execute a function at a given time.
 The timer is based on the hardclock timer which ticks
 .Dv hz
 times per second.
 The function is called at softclock interrupt level.
 .Pp
 Clients of the
 .Nm callout
 facility are responsible for providing pre-allocated
 callout structures, or
 .Dq handles .
 The
 .Nm callout
 facility replaces the historic
@@ -100,100 +107,133 @@
 the value
 .Dv CALLOUT_INITIALIZER
 to them.
 .Pp
 The
 .Fn callout_reset
 function resets and starts the timer associated with the callout handle
 .Fa c .
 When the timer expires after
 .Fa ticks Ns No /hz
 seconds, the function specified by
 .Fa func
 will be called with the argument
 .Fa arg .
 If the timer associated with the callout handle is already running,
 the callout will simply be rescheduled to execute at the newly specified
 time.
 Once the timer is started, the callout handle is marked as
 .Em PENDING .
 Once the timer expires,
-the handle is marked at
+the handle is marked as
 .Em EXPIRED
+and
+.Em RUNNING
 and the
 .Em PENDING
 status is cleared.
 .Pp
 The
 .Fn callout_setfunc
 function initializes the callout handle
 .Fa c
 for use and sets the function and argument to
 .Fa func
 and
 .Fa arg
 respectively.
 If a callout will always be used with the same function and argument,
 then
 .Fn callout_setfunc
 used in conjunction with
 .Fn callout_schedule
 is slightly more efficient than using
 .Fn callout_init
 and
 .Fn callout_reset .
 If it is inconvenient to call
 .Fn callout_setfunc ,
 statically-allocated callout handles may be initialized by assigning
 the value
 .Dv CALLOUT_INITIALIZER_SETFUNC
 to them, passing the function and argument to the initializer.
 .Pp
 The
 .Fn callout_stop
 function stops the timer associated the callout handle
 .Fa c .
 The
-.Em PENDING
+.Em PENDING ,
+.Em EXPIRED ,
+and
+.Em RUNNING
 status for the callout handle is cleared.
-The
-.Em EXPIRED
-status is not affected.
 It is safe to call
 .Fn callout_stop
 on a callout handle that is not pending, so long as it is initialized.
 .Pp
 The
 .Fn callout_pending
 function tests the
 .Em PENDING
 status of the callout handle
 .Fa c .
 A
 .Em PENDING
 callout is one that has been started and whose function has not yet
 been called.
 Note that it is possible for a callout's timer to have expired without
 its function being called if interrupt level has not dropped low enough
 to let softclock interrupts through.
 Note that it is only safe to test
 .Em PENDING
 status when at softclock interrupt level or higher.
 .Pp
 The
 .Fn callout_expired
 function tests to see if the callout's timer has expired and its
 function called.
+.Pp
+The
+.Fn callout_is_running
+function tests to see if the callout's function is being called.
+For this to work, the callout function will have to use the
+.Fn callout_ack
+function to clear this flag after raising the priority level as
+appropriate.
+Since the priority is lowered prior to invocation of the callout
+function, other pending higher-priority code may run before the
+callout function is actually invoked.
+This may create a race condition if this higher-priority code
+deallocates storage containing one or more callout structures whose
+callout functions are about to be invoked.
+In such cases one technique to prevent references to deallocated
+storage would be to mark the data structure and defer deallocation
+until the callout function runs.
+.Pp
+The
+.Fn callout_ack
+function clears the
+.Em RUNNING
+state in the callout handle
+.Em c .
+This is used in situations where it is necessary to protect against
+the race condition described under
+.Fn callout_is_running .
+The
+.Fn callout_ack
+function would typically be called in the callout function after
+raising the priority level as appropriate.
 .Sh SEE ALSO
 .Xr hz 9
 .Sh HISTORY
 The
 .Nm callout
 facility was implemented by Artur Grabowski and Thomas Nordin, based
 on the work of G. Varghese and A. Lauck, described in the paper
 Hashed and Hierarchical Timing Wheels: Data Structures for the
 Efficient Implementation of a Timer Facility
 in the Proceedings of the 11th ACM Annual Symposium on Operating System
 Principles, Austin, Texas, November 1987.
 It was adapted to the
 .Nx
 kernel by Jason R. Thorpe.