thread-master/internal/library/placement/crawler_polite.go

59 lines
1.2 KiB
Go

package placement
import (
"context"
"math/rand"
"sync"
"time"
)
const (
CrawlerMinQueryInterval = 8 * time.Second
CrawlerMaxQueryJitter = 4 * time.Second
)
// WrapPoliteCrawler spaces out Playwright keyword searches to reduce Threads rate limits.
func WrapPoliteCrawler(inner CrawlerSearchFn) CrawlerSearchFn {
if inner == nil {
return nil
}
guard := &crawlerPacing{inner: inner}
return guard.search
}
type crawlerPacing struct {
inner CrawlerSearchFn
mu sync.Mutex
last time.Time
}
func (p *crawlerPacing) search(ctx context.Context, member MemberContext, keyword string, limit int) ([]DiscoverPost, error) {
p.mu.Lock()
defer p.mu.Unlock()
if !p.last.IsZero() {
elapsed := time.Since(p.last)
wait := CrawlerMinQueryInterval + jitterDuration(CrawlerMaxQueryJitter) - elapsed
if wait > 0 {
timer := time.NewTimer(wait)
defer timer.Stop()
select {
case <-ctx.Done():
return nil, ctx.Err()
case <-timer.C:
}
}
}
posts, err := p.inner(ctx, member, keyword, limit)
p.last = time.Now()
return posts, err
}
func jitterDuration(max time.Duration) time.Duration {
if max <= 0 {
return 0
}
return time.Duration(rand.Int63n(int64(max)))
}