From 0ba0415a5075ff53891910e0da97544585980095 Mon Sep 17 00:00:00 2001 From: Johan Walles Date: Sat, 18 May 2024 07:15:31 +0200 Subject: [PATCH 01/11] Set the stage for parallel search --- m/search.go | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/m/search.go b/m/search.go index d61e51b..767165b 100644 --- a/m/search.go +++ b/m/search.go @@ -30,12 +30,36 @@ func (p *Pager) scrollToSearchHits() { p.scrollPosition = *firstHitPosition } -// NOTE: When we search, we do that by looping over the *input lines*, not -// the screen lines. That's why we're using a line number rather than a +// NOTE: When we search, we do that by looping over the *input lines*, not the +// screen lines. That's why we're using a line number rather than a // scrollPosition for searching. // +// The `beforePosition` parameter is exclusive, meaning that line will not be +// searched. +// // FIXME: We should take startPosition.deltaScreenLines into account as well! func (p *Pager) findFirstHit(startPosition linenumbers.LineNumber, beforePosition *linenumbers.LineNumber, backwards bool) *scrollPosition { + // FIXME: Check the number of CPU cores + + // FIXME: If the number of lines to search match the number of cores (or + // more), divide the search into chunks. Otherwise use one chunk. + + // FIXME: Make a results array, with one result per chunk + + // FIXME: Search all chunks in parallel + + // FIXME: Return the first non-nil result +} + +// NOTE: When we search, we do that by looping over the *input lines*, not the +// screen lines. That's why we're using a line number rather than a +// scrollPosition for searching. +// +// The `beforePosition` parameter is exclusive, meaning that line will not be +// searched. +// +// FIXME: We should take startPosition.deltaScreenLines into account as well! +func (p *Pager) _findFirstHit(startPosition linenumbers.LineNumber, beforePosition *linenumbers.LineNumber, backwards bool) *scrollPosition { searchPosition := startPosition for { line := p.reader.GetLine(searchPosition) From 27d03630a59c34574a2497c36001e57c08510207 Mon Sep 17 00:00:00 2001 From: Johan Walles Date: Sat, 18 May 2024 07:23:40 +0200 Subject: [PATCH 02/11] Half way implement the parallel search --- m/search.go | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/m/search.go b/m/search.go index 767165b..d215522 100644 --- a/m/search.go +++ b/m/search.go @@ -2,6 +2,7 @@ package m import ( "fmt" + "runtime" "github.com/walles/moar/m/linenumbers" ) @@ -39,10 +40,20 @@ func (p *Pager) scrollToSearchHits() { // // FIXME: We should take startPosition.deltaScreenLines into account as well! func (p *Pager) findFirstHit(startPosition linenumbers.LineNumber, beforePosition *linenumbers.LineNumber, backwards bool) *scrollPosition { - // FIXME: Check the number of CPU cores + // If the number of lines to search matches the number of cores (or more), + // divide the search into chunks. Otherwise use one chunk. + chunkCount := runtime.NumCPU() + linesCount := p.reader.GetLineCount() - startPosition.AsZeroBased() + if linesCount < chunkCount { + chunkCount = 0 + } + chunkSize := linesCount / chunkCount - // FIXME: If the number of lines to search match the number of cores (or - // more), divide the search into chunks. Otherwise use one chunk. + // Each parallel search will start at one of these positions + searchStarts := make([]linenumbers.LineNumber, chunkCount) + for i := 0; i < chunkCount; i++ { + searchStarts[i] = startPosition.NonWrappingAdd(i * chunkSize) + } // FIXME: Make a results array, with one result per chunk From aa342d43256d24fad2f1b4965b7b2d0c1a44f335 Mon Sep 17 00:00:00 2001 From: Johan Walles Date: Sat, 18 May 2024 07:28:17 +0200 Subject: [PATCH 03/11] Implement more of the parallel search --- m/search.go | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/m/search.go b/m/search.go index d215522..7355a25 100644 --- a/m/search.go +++ b/m/search.go @@ -55,9 +55,23 @@ func (p *Pager) findFirstHit(startPosition linenumbers.LineNumber, beforePositio searchStarts[i] = startPosition.NonWrappingAdd(i * chunkSize) } - // FIXME: Make a results array, with one result per chunk + // Make a results array, with one result per chunk + findings := make([]chan *scrollPosition, chunkCount) - // FIXME: Search all chunks in parallel + // Search all chunks in parallel + for i, searchStart := range searchStarts { + findings[i] = make(chan *scrollPosition) + + searchEndIndex := i + 1 + var beforePosition *linenumbers.LineNumber + if searchEndIndex < len(searchStarts) { + beforePosition = &searchStarts[searchEndIndex] + } + + go func() { + findings[i] <- p._findFirstHit(searchStart, beforePosition, backwards) + }() + } // FIXME: Return the first non-nil result } From 84cf4a9cd993d7edad5c46183d00963f3a3ce742 Mon Sep 17 00:00:00 2001 From: Johan Walles Date: Sat, 18 May 2024 07:31:28 +0200 Subject: [PATCH 04/11] Return the search results And fix backwards searches. --- m/search.go | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/m/search.go b/m/search.go index 7355a25..ebe08de 100644 --- a/m/search.go +++ b/m/search.go @@ -51,8 +51,12 @@ func (p *Pager) findFirstHit(startPosition linenumbers.LineNumber, beforePositio // Each parallel search will start at one of these positions searchStarts := make([]linenumbers.LineNumber, chunkCount) + direction := 1 + if backwards { + direction = -1 + } for i := 0; i < chunkCount; i++ { - searchStarts[i] = startPosition.NonWrappingAdd(i * chunkSize) + searchStarts[i] = startPosition.NonWrappingAdd(i * direction * chunkSize) } // Make a results array, with one result per chunk @@ -73,7 +77,15 @@ func (p *Pager) findFirstHit(startPosition linenumbers.LineNumber, beforePositio }() } - // FIXME: Return the first non-nil result + // Return the first non-nil result + for _, finding := range findings { + result := <-finding + if result != nil { + return result + } + } + + return nil } // NOTE: When we search, we do that by looping over the *input lines*, not the From 387fb348c7edda2e77380453eac8a84e2986d9c6 Mon Sep 17 00:00:00 2001 From: Johan Walles Date: Sat, 18 May 2024 07:33:53 +0200 Subject: [PATCH 05/11] Fix loop variables warning --- m/search.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/m/search.go b/m/search.go index ebe08de..30a0695 100644 --- a/m/search.go +++ b/m/search.go @@ -72,9 +72,9 @@ func (p *Pager) findFirstHit(startPosition linenumbers.LineNumber, beforePositio beforePosition = &searchStarts[searchEndIndex] } - go func() { + go func(i int, searchStart linenumbers.LineNumber) { findings[i] <- p._findFirstHit(searchStart, beforePosition, backwards) - }() + }(i, searchStart) } // Return the first non-nil result From 625193933de42e05742a8034472ada7263719001 Mon Sep 17 00:00:00 2001 From: Johan Walles Date: Sat, 18 May 2024 07:44:01 +0200 Subject: [PATCH 06/11] Fix the line counts --- m/search.go | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/m/search.go b/m/search.go index 30a0695..33ad5e5 100644 --- a/m/search.go +++ b/m/search.go @@ -43,7 +43,22 @@ func (p *Pager) findFirstHit(startPosition linenumbers.LineNumber, beforePositio // If the number of lines to search matches the number of cores (or more), // divide the search into chunks. Otherwise use one chunk. chunkCount := runtime.NumCPU() - linesCount := p.reader.GetLineCount() - startPosition.AsZeroBased() + var linesCount int + if backwards { + // If the startPosition is zero, that should make the count one + linesCount = startPosition.AsZeroBased() + 1 + if beforePosition != nil { + // Searching from 1 with before set to 0 should make the count 1 + linesCount = startPosition.AsZeroBased() - beforePosition.AsZeroBased() + } + } else { + linesCount = p.reader.GetLineCount() - startPosition.AsZeroBased() + if beforePosition != nil { + // Searching from 1 with before set to 2 should make the count 1 + linesCount = beforePosition.AsZeroBased() - startPosition.AsZeroBased() + } + } + if linesCount < chunkCount { chunkCount = 0 } From ee35c6b1664f42aec3e463a8c0e20dfa8a368899 Mon Sep 17 00:00:00 2001 From: Johan Walles Date: Sat, 18 May 2024 07:47:40 +0200 Subject: [PATCH 07/11] Fix the chunk before positions --- m/search.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/m/search.go b/m/search.go index 33ad5e5..ee0522f 100644 --- a/m/search.go +++ b/m/search.go @@ -82,14 +82,16 @@ func (p *Pager) findFirstHit(startPosition linenumbers.LineNumber, beforePositio findings[i] = make(chan *scrollPosition) searchEndIndex := i + 1 - var beforePosition *linenumbers.LineNumber + var chunkBefore *linenumbers.LineNumber if searchEndIndex < len(searchStarts) { - beforePosition = &searchStarts[searchEndIndex] + chunkBefore = &searchStarts[searchEndIndex] + } else if beforePosition != nil { + chunkBefore = beforePosition } - go func(i int, searchStart linenumbers.LineNumber) { - findings[i] <- p._findFirstHit(searchStart, beforePosition, backwards) - }(i, searchStart) + go func(i int, searchStart linenumbers.LineNumber, chunkBefore *linenumbers.LineNumber) { + findings[i] <- p._findFirstHit(searchStart, chunkBefore, backwards) + }(i, searchStart, chunkBefore) } // Return the first non-nil result From 856574c28905b3029d3a8a7aac68a1adce36cee6 Mon Sep 17 00:00:00 2001 From: Johan Walles Date: Sat, 18 May 2024 07:52:20 +0200 Subject: [PATCH 08/11] Don't divide by zero --- m/search.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/search.go b/m/search.go index ee0522f..1742167 100644 --- a/m/search.go +++ b/m/search.go @@ -60,7 +60,7 @@ func (p *Pager) findFirstHit(startPosition linenumbers.LineNumber, beforePositio } if linesCount < chunkCount { - chunkCount = 0 + chunkCount = 1 } chunkSize := linesCount / chunkCount From 64ae443e1f83080da07bf1911b1fcb9685eb38fe Mon Sep 17 00:00:00 2001 From: Johan Walles Date: Sat, 18 May 2024 07:55:46 +0200 Subject: [PATCH 09/11] Log searches --- m/search.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/m/search.go b/m/search.go index 1742167..273e556 100644 --- a/m/search.go +++ b/m/search.go @@ -4,6 +4,8 @@ import ( "fmt" "runtime" + log "github.com/sirupsen/logrus" + "github.com/walles/moar/m/linenumbers" ) @@ -64,6 +66,8 @@ func (p *Pager) findFirstHit(startPosition linenumbers.LineNumber, beforePositio } chunkSize := linesCount / chunkCount + log.Debugf("Searching %d lines across %d cores with %d lines per core", linesCount, chunkCount, chunkSize) + // Each parallel search will start at one of these positions searchStarts := make([]linenumbers.LineNumber, chunkCount) direction := 1 From 60d3577b596eb70f3b1b19f0ee7e484fbb45d04e Mon Sep 17 00:00:00 2001 From: Johan Walles Date: Sat, 18 May 2024 07:59:44 +0200 Subject: [PATCH 10/11] Don't crash searching in empty buffer --- m/search.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/m/search.go b/m/search.go index 273e556..42cea77 100644 --- a/m/search.go +++ b/m/search.go @@ -15,10 +15,16 @@ func (p *Pager) scrollToSearchHits() { return } - firstHitPosition := p.findFirstHit(*p.scrollPosition.lineNumber(p), nil, false) + lineNumber := p.scrollPosition.lineNumber(p) + if lineNumber == nil { + // No lines to search + return + } + + firstHitPosition := p.findFirstHit(*lineNumber, nil, false) if firstHitPosition == nil { // Try again from the top - firstHitPosition = p.findFirstHit(linenumbers.LineNumber{}, p.scrollPosition.lineNumber(p), false) + firstHitPosition = p.findFirstHit(linenumbers.LineNumber{}, lineNumber, false) } if firstHitPosition == nil { // No match, give up From 686882ffcd2e80770cc0172080321bc0c944318f Mon Sep 17 00:00:00 2001 From: Johan Walles Date: Sat, 18 May 2024 08:13:08 +0200 Subject: [PATCH 11/11] Improve docs --- m/search.go | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/m/search.go b/m/search.go index 42cea77..81ffc23 100644 --- a/m/search.go +++ b/m/search.go @@ -40,12 +40,15 @@ func (p *Pager) scrollToSearchHits() { } // NOTE: When we search, we do that by looping over the *input lines*, not the -// screen lines. That's why we're using a line number rather than a -// scrollPosition for searching. +// screen lines. That's why startPosition is a LineNumber rather than a +// scrollPosition. // // The `beforePosition` parameter is exclusive, meaning that line will not be // searched. // +// For the actual searching, this method will call _findFirstHit() in parallel +// on multiple cores, to help large file search performance. +// // FIXME: We should take startPosition.deltaScreenLines into account as well! func (p *Pager) findFirstHit(startPosition linenumbers.LineNumber, beforePosition *linenumbers.LineNumber, backwards bool) *scrollPosition { // If the number of lines to search matches the number of cores (or more), @@ -116,12 +119,15 @@ func (p *Pager) findFirstHit(startPosition linenumbers.LineNumber, beforePositio } // NOTE: When we search, we do that by looping over the *input lines*, not the -// screen lines. That's why we're using a line number rather than a -// scrollPosition for searching. +// screen lines. That's why startPosition is a LineNumber rather than a +// scrollPosition. // // The `beforePosition` parameter is exclusive, meaning that line will not be // searched. // +// This method will run over multiple chunks of the input file in parallel to +// help large file search performance. +// // FIXME: We should take startPosition.deltaScreenLines into account as well! func (p *Pager) _findFirstHit(startPosition linenumbers.LineNumber, beforePosition *linenumbers.LineNumber, backwards bool) *scrollPosition { searchPosition := startPosition