Skip to content

Commit 34a1ef6

Browse files
authored
Fix the reprefill of evicted seqs with invalid draft tokens (#4564)
1 parent df9b428 commit 34a1ef6

3 files changed

Lines changed: 13 additions & 0 deletions

File tree

lmdeploy/pytorch/paging/seq_states/states.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ def activate(self):
9191
self.to_state(RunningState)
9292

9393
def evict(self):
94+
# clean up meta before evict
95+
self.seq.cleanup()
9496
self.to_state(WaitingState)
9597

9698

lmdeploy/pytorch/strategies/ar/sequence.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,10 @@ def set_step(self, step: int):
8787
if len(self.all_routed_experts) > step:
8888
self.all_routed_experts.resize(step)
8989

90+
def cleanup(self):
91+
"""Setup history meta after sequence stopped or cancelled."""
92+
pass
93+
9094

9195
class ARSequenceStrategy(SequenceStrategy):
9296

lmdeploy/pytorch/strategies/ar_spec/sequence.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,13 @@ def set_step(self, step: int):
166166
if len(self.all_routed_experts) > step:
167167
self.all_routed_experts.resize(step)
168168

169+
def cleanup(self):
170+
"""Setup history meta after sequence stopped or cancelled."""
171+
# truncate history cache to valid ids
172+
self.history_cache.resize(self.num_valid_ids)
173+
self._num_history_ids = self.num_valid_ids - 1
174+
self._num_token_ids = 1
175+
169176

170177
class ARSpecSequenceStrategy(ARSequenceStrategy):
171178

0 commit comments

Comments
 (0)