@@ -54,6 +54,7 @@ type CompleteCommand = {
5454 meter : boolean ,
5555 timing : boolean ,
5656 noMmap : boolean ,
57+ noDirectIo : boolean ,
5758 printTimings : boolean
5859} ;
5960
@@ -249,6 +250,11 @@ export const CompleteCommand: CommandModule<object, CompleteCommand> = {
249250 default : false ,
250251 description : "Disable mmap (memory-mapped file) usage"
251252 } )
253+ . option ( "noDirectIo" , {
254+ type : "boolean" ,
255+ default : false ,
256+ description : "Disable Direct I/O usage when available"
257+ } )
252258 . option ( "printTimings" , {
253259 alias : "pt" ,
254260 type : "boolean" ,
@@ -261,14 +267,14 @@ export const CompleteCommand: CommandModule<object, CompleteCommand> = {
261267 flashAttention, swaFullCache, threads, temperature, minP, topK,
262268 topP, seed, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine,
263269 repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, tokenPredictionDraftModel, tokenPredictionModelContextSize,
264- debug, numa, meter, timing, noMmap, printTimings
270+ debug, numa, meter, timing, noMmap, noDirectIo , printTimings
265271 } ) {
266272 try {
267273 await RunCompletion ( {
268274 modelPath, header, gpu, systemInfo, text, textFile, contextSize, batchSize, flashAttention, swaFullCache,
269275 threads, temperature, minP, topK, topP, seed, gpuLayers, lastTokensRepeatPenalty,
270276 repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens,
271- tokenPredictionDraftModel, tokenPredictionModelContextSize, debug, numa, meter, timing, noMmap, printTimings
277+ tokenPredictionDraftModel, tokenPredictionModelContextSize, debug, numa, meter, timing, noMmap, noDirectIo , printTimings
272278 } ) ;
273279 } catch ( err ) {
274280 await new Promise ( ( accept ) => setTimeout ( accept , 0 ) ) ; // wait for logs to finish printing
@@ -283,7 +289,7 @@ async function RunCompletion({
283289 modelPath : modelArg , header : headerArg , gpu, systemInfo, text, textFile, contextSize, batchSize, flashAttention, swaFullCache,
284290 threads, temperature, minP, topK, topP, seed, gpuLayers,
285291 lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty,
286- tokenPredictionDraftModel, tokenPredictionModelContextSize, maxTokens, debug, numa, meter, timing, noMmap, printTimings
292+ tokenPredictionDraftModel, tokenPredictionModelContextSize, maxTokens, debug, numa, meter, timing, noMmap, noDirectIo , printTimings
287293} : CompleteCommand ) {
288294 if ( contextSize === - 1 ) contextSize = undefined ;
289295 if ( gpuLayers === - 1 ) gpuLayers = undefined ;
@@ -308,6 +314,7 @@ async function RunCompletion({
308314 } ) ;
309315 const logBatchSize = batchSize != null ;
310316 const useMmap = ! noMmap && llama . supportsMmap ;
317+ const useDirectIo = ! noDirectIo ;
311318
312319 const resolvedModelPath = await resolveCommandGgufPath ( modelArg , llama , headers , {
313320 flashAttention,
@@ -358,6 +365,7 @@ async function RunCompletion({
358365 defaultContextFlashAttention : flashAttention ,
359366 defaultContextSwaFullCache : swaFullCache ,
360367 useMmap,
368+ useDirectIo,
361369 ignoreMemorySafetyChecks : gpuLayers != null ,
362370 onLoadProgress ( loadProgress : number ) {
363371 progressUpdater . setProgress ( loadProgress ) ;
@@ -392,6 +400,7 @@ async function RunCompletion({
392400 defaultContextFlashAttention : flashAttention ,
393401 defaultContextSwaFullCache : swaFullCache ,
394402 useMmap,
403+ useDirectIo,
395404 onLoadProgress ( loadProgress : number ) {
396405 progressUpdater . setProgress ( loadProgress ) ;
397406 } ,
@@ -470,6 +479,7 @@ async function RunCompletion({
470479 context,
471480 draftContext,
472481 useMmap,
482+ useDirectIo,
473483 minTitleLength : "Complete" . length + 1 ,
474484 logBatchSize,
475485 tokenMeterEnabled : meter
0 commit comments