Documentation
¶
Index ¶
- Constants
- Variables
- type DomProcesser
- type Downloader
- type FileDownloader
- type Filter
- type LimitFilter
- type OptionDomProcesser
- type OptionFilter
- type OptionSpider
- func OptionSpiderConcu(concu uint32) OptionSpider
- func OptionSpiderDownloader(downloader Downloader) OptionSpider
- func OptionSpiderFilter(filter Filter) OptionSpider
- func OptionSpiderProcesser(processer Processer) OptionSpider
- func OptionSpiderRequestCheckRedirect(checkRedirect func(req *http.Request, via []*http.Request) error) OptionSpider
- func OptionSpiderRequestHeader(key, value string) OptionSpider
- func OptionSpiderRequestTimeout(timeout time.Duration) OptionSpider
- func OptionSpiderResponseChunkedAllowed(allowed bool) OptionSpider
- func OptionSpiderSchduler(resourceMgr ResourceManager) OptionSpider
- func OptionSpiderScheduler(scheduler Scheduler) OptionSpider
- func OptionSpiderSleep(tp, min, max uint) OptionSpider
- type Processer
- type ResourceChan
- type ResourceManager
- type Result
- type Scheduler
- type SchedulerChan
- type Spider
Constants ¶
View Source
const ( ContentTypeACC = ".acc" ContentTypeABW = ".abw" ContentTypeARC = ".arc" ContentTypeAVI = ".avi" ContentTypeAZW = ".azw" ContentTypeBIN = ".bin" ContentTypeBMP = ".bmp" ContentTypeBZ = ".bz" ContentTypeBZ2 = ".bz2" ContentTypeCSH = ".csh" ContentTypeCSS = ".css" ContentTypeCSV = ".csv" ContentTypeDOC = ".doc" ContentTypeDOCX = ".docx" ContentTypeEOT = ".eot" ContentTypeEPUB = ".epub" ContentTypeGIF = ".gif" ContentTypeHTM = ".htm" ContentTypeHTML = ".html" ContentTypeICO = ".ico" ContentTypeICS = ".ics" ContentTypeJAR = ".jar" ContentTypeJPEG = ".jpeg" ContentTypeJPG = ".jpg" ContentTypeJS = ".js" ContentTypeJSON = ".json" ContentTypeJSONLD = ".jsonld" ContentTypeMID = ".mid" ContentTypeMIDI = ".midi" ContentTypeMJS = ".mjs" ContentTypeMP3 = ".mp3" ContentTypeMPEG = ".mpeg" ContentTypeMPKG = ".mpkg" ContentTypeODP = ".odp" ContentTypeODS = ".ods" ContentTypeODT = ".odt" ContentTypeOGA = ".oga" ContentTypeOGV = ".ogv" ContentTypeOGX = ".ogx" ContentTypeOTF = ".otf" ContentTypePNG = ".png" ContentTypePDF = ".pdf" ContentTypePPT = ".ppt" ContentTypePPTX = ".pptx" ContentTypeRAR = ".rar" ContentTypeRTF = ".rtf" ContentTypeSH = ".sh" ContentTypeSVG = ".svg" ContentTypeSWF = ".swf" ContentTypeTAR = ".tar" ContentTypeTIF = ".tif" ContentTypeTIFF = ".tiff" ContentTypeTTF = ".ttf" ContentTypeTXT = ".txt" ContentTypeVSD = ".vsd" ContentTypeWAV = ".wav" ContentTypeWEBA = ".weba" ContentTypeWEBM = ".webm" ContentTypeWEBP = ".webp" ContentTypeWOFF = ".woff" ContentTypeWOFF2 = ".woff2" ContentTypeXHTML = ".xhtml" ContentTypeXLS = ".xls" ContentTypeXLSX = ".xlsx" ContentTypeXML = ".xml" ContentTypeXUL = ".xul" ContentTypeZIP = ".zip" ContentType3GP = ".3GP" ContentType3G2 = ".3G2" ContentType7Z = ".7Z" )
View Source
const ( SleepTypeNode = iota SleepTypeFixed SleepTypeRandom )
View Source
const ( SpiderConcuDefault = 100 SleepTypeDefault = SleepTypeRandom SleepMinDefault = 0 SleepMaxDefault = 1000 DownloadPathDefault = "/tmp/tamper" )
View Source
const (
SelectorDefault = "script, link, a, img, frame, iframe, area, base, blockquote, body, del, head, ins, object, q"
)
Variables ¶
View Source
var ContentTypes = map[string]string{ "audio/aac": ContentTypeACC, "application/x-abiwor": ContentTypeABW, "application/x-freearc": ContentTypeARC, "video/x-msvideo": ContentTypeAVI, "application/vnd.amazon.ebook": ContentTypeAZW, "application/octet-stream": ContentTypeBIN, "image/bmp": ContentTypeBMP, "application/x-bzip": ContentTypeBZ, "application/x-bzip2": ContentTypeBZ2, "application/x-csh": ContentTypeCSH, "text/css": ContentTypeCSS, "text/csv": ContentTypeCSV, "application/msword": ContentTypeDOC, "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ContentTypeDOCX, "application/vnd.ms-fontobject": ContentTypeEOT, "application/epub+zip": ContentTypeEPUB, "image/gif": ContentTypeGIF, "text/html": ContentTypeHTML, "image/vnd.microsoft.ico": ContentTypeICO, "text/calendar": ContentTypeICS, "application/java-archiv": ContentTypeJAR, "image/jpeg": ContentTypeJPEG, "text/javascript": ContentTypeJS, "application/javascript": ContentTypeJS, "application/json": ContentTypeJSON, "application/ld+json": ContentTypeJSONLD, "audio/midi audio/x-midi": ContentTypeMIDI, "audio/mpeg": ContentTypeMP3, "video/mpeg": ContentTypeMPEG, "application/vnd.apple.installer+xml": ContentTypeMPKG, "application/vnd.oasis.opendocument.presentation": ContentTypeMPEG, "application/vnd.oasis.opendocument.spreadsheet": ContentTypeODP, "application/vnd.oasis.opendocument.text": ContentTypeODS, "audio/ogg": ContentTypeODT, "video/ogg": ContentTypeOGA, "application/ogg": ContentTypeOGX, "font/otf": ContentTypeOTF, "image/png": ContentTypePNG, "application/pdf": ContentTypePDF, "application/vnd.ms-powerpoint": ContentTypePPT, "application/vnd.openxmlformats-officedocument.presentationml.presentation": ContentTypePPTX, "application/x-rar-compressed": ContentTypeRAR, "application/rtf": ContentTypeRTF, "application/x-sh": ContentTypeSH, "image/svg+xml": ContentTypeSVG, "application/x-shockwave-flash": ContentTypeSWF, "application/x-tar": ContentTypeTAR, "image/tiff": ContentTypeTIFF, "font/ttf": ContentTypeTTF, "text/plain": ContentTypeTXT, "application/vnd.visio": ContentTypeVSD, "audio/wav": ContentTypeWAV, "audio/webm": ContentTypeWEBA, "video/webm": ContentTypeWEBM, "image/webp": ContentTypeWEBP, "font/woff": ContentTypeWOFF, "font/woff2": ContentTypeWOFF2, "application/xhtml+xml": ContentTypeXHTML, "application/vnd.ms-excel": ContentTypeXLS, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ContentTypeXLSX, "application/xml": ContentTypeXML, "application/vnd.mozilla.xul+xml": ContentTypeXUL, "application/zip": ContentTypeZIP, "video/3gpp": ContentType3GP, "audio/3gpp": ContentType3GP, "video/3gpp2": ContentType3G2, "audio/3gpp2": ContentType3G2, "application/x-7z-compressed": ContentType7Z, }
Functions ¶
This section is empty.
Types ¶
type DomProcesser ¶
type DomProcesser struct {
// contains filtered or unexported fields
}
func NewDomProcesser ¶
func NewDomProcesser(options ...OptionDomProcesser) *DomProcesser
func (*DomProcesser) Finish ¶
func (dp *DomProcesser) Finish()
type Downloader ¶
type FileDownloader ¶
type FileDownloader struct {
// contains filtered or unexported fields
}
func NewFileDownloader ¶
func NewFileDownloader(path string) *FileDownloader
type Filter ¶
type Filter interface { SuffixAllow(suffix string) bool SizeAllow(size int64) bool HttpsAllow() bool }
默认Transfer-Encoding: chunked不会被缓存 默认Content-Type: application/octet-stream不会被缓存
type LimitFilter ¶
type LimitFilter struct {
// contains filtered or unexported fields
}
func NewLimitFilter ¶
func NewLimitFilter(options ...OptionFilter) (*LimitFilter, error)
func (*LimitFilter) HttpsAllow ¶
func (lm *LimitFilter) HttpsAllow() bool
func (*LimitFilter) SizeAllow ¶
func (lm *LimitFilter) SizeAllow(size int64) bool
func (*LimitFilter) SuffixAllow ¶
func (lm *LimitFilter) SuffixAllow(suffix string) bool
type OptionDomProcesser ¶
type OptionDomProcesser func(*DomProcesser)
func OptionDomProcesserSelectors ¶
func OptionDomProcesserSelectors(selectors []string) OptionDomProcesser
type OptionFilter ¶
type OptionFilter func(*LimitFilter) error
func OptionFilterSize ¶
func OptionFilterSize(size int64) OptionFilter
func OptionFilterSuffixs ¶
func OptionFilterSuffixs(suffixs []string) OptionFilter
type OptionSpider ¶
type OptionSpider func(*Spider)
func OptionSpiderConcu ¶
func OptionSpiderConcu(concu uint32) OptionSpider
func OptionSpiderDownloader ¶
func OptionSpiderDownloader(downloader Downloader) OptionSpider
func OptionSpiderFilter ¶
func OptionSpiderFilter(filter Filter) OptionSpider
func OptionSpiderProcesser ¶
func OptionSpiderProcesser(processer Processer) OptionSpider
func OptionSpiderRequestHeader ¶
func OptionSpiderRequestHeader(key, value string) OptionSpider
func OptionSpiderRequestTimeout ¶
func OptionSpiderRequestTimeout(timeout time.Duration) OptionSpider
func OptionSpiderResponseChunkedAllowed ¶
func OptionSpiderResponseChunkedAllowed(allowed bool) OptionSpider
func OptionSpiderSchduler ¶
func OptionSpiderSchduler(resourceMgr ResourceManager) OptionSpider
func OptionSpiderScheduler ¶
func OptionSpiderScheduler(scheduler Scheduler) OptionSpider
func OptionSpiderSleep ¶
func OptionSpiderSleep(tp, min, max uint) OptionSpider
type ResourceChan ¶
type ResourceChan struct {
// contains filtered or unexported fields
}
func NewResourceChan ¶
func NewResourceChan(all uint32) *ResourceChan
func (*ResourceChan) Acquire ¶
func (rc *ResourceChan) Acquire()
func (*ResourceChan) Free ¶
func (rc *ResourceChan) Free() uint32
func (*ResourceChan) Release ¶
func (rc *ResourceChan) Release()
func (*ResourceChan) Used ¶
func (rc *ResourceChan) Used() uint32
type ResourceManager ¶
type Result ¶
type Result struct { Error string `json:"error,omitempty"` //request result Req *http.Request `json:"-"` Rsp *http.Response `json:"-"` //inner parser result Size int64 `json:"size,omitempty"` Suffix string `json:"suffix,omitempty"` CharSet string `json:"charset,omitempty"` //download result UrlPath *string `json:"url_path,omitempty"` HdrPath *string `json:"hdr_path,omitempty"` BodyPath *string `json:"body_path,omitempty"` //processer result Depth uint `json:"depth"` Subs []string `json:"subs,omitempty"` }
type SchedulerChan ¶
type SchedulerChan struct {
// contains filtered or unexported fields
}
func NewSchedulerChan ¶
func NewSchedulerChan() *SchedulerChan
func (*SchedulerChan) Poll ¶
func (sc *SchedulerChan) Poll() *http.Request
func (*SchedulerChan) Push ¶
func (sc *SchedulerChan) Push(req *http.Request)
func (*SchedulerChan) Rest ¶
func (sc *SchedulerChan) Rest() int
Click to show internal directories.
Click to hide internal directories.