59 lines
1.2 KiB
Go
59 lines
1.2 KiB
Go
|
|
package placement
|
||
|
|
|
||
|
|
import (
|
||
|
|
"context"
|
||
|
|
"math/rand"
|
||
|
|
"sync"
|
||
|
|
"time"
|
||
|
|
)
|
||
|
|
|
||
|
|
const (
|
||
|
|
CrawlerMinQueryInterval = 8 * time.Second
|
||
|
|
CrawlerMaxQueryJitter = 4 * time.Second
|
||
|
|
)
|
||
|
|
|
||
|
|
// WrapPoliteCrawler spaces out Playwright keyword searches to reduce Threads rate limits.
|
||
|
|
func WrapPoliteCrawler(inner CrawlerSearchFn) CrawlerSearchFn {
|
||
|
|
if inner == nil {
|
||
|
|
return nil
|
||
|
|
}
|
||
|
|
guard := &crawlerPacing{inner: inner}
|
||
|
|
return guard.search
|
||
|
|
}
|
||
|
|
|
||
|
|
type crawlerPacing struct {
|
||
|
|
inner CrawlerSearchFn
|
||
|
|
mu sync.Mutex
|
||
|
|
last time.Time
|
||
|
|
}
|
||
|
|
|
||
|
|
func (p *crawlerPacing) search(ctx context.Context, member MemberContext, keyword string, limit int) ([]DiscoverPost, error) {
|
||
|
|
p.mu.Lock()
|
||
|
|
defer p.mu.Unlock()
|
||
|
|
|
||
|
|
if !p.last.IsZero() {
|
||
|
|
elapsed := time.Since(p.last)
|
||
|
|
wait := CrawlerMinQueryInterval + jitterDuration(CrawlerMaxQueryJitter) - elapsed
|
||
|
|
if wait > 0 {
|
||
|
|
timer := time.NewTimer(wait)
|
||
|
|
defer timer.Stop()
|
||
|
|
select {
|
||
|
|
case <-ctx.Done():
|
||
|
|
return nil, ctx.Err()
|
||
|
|
case <-timer.C:
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
posts, err := p.inner(ctx, member, keyword, limit)
|
||
|
|
p.last = time.Now()
|
||
|
|
return posts, err
|
||
|
|
}
|
||
|
|
|
||
|
|
func jitterDuration(max time.Duration) time.Duration {
|
||
|
|
if max <= 0 {
|
||
|
|
return 0
|
||
|
|
}
|
||
|
|
return time.Duration(rand.Int63n(int64(max)))
|
||
|
|
}
|