Web 爬虫:scrape
jopen
9年前
scrape 是一个使用 Go 语言开发的简单高级Web 爬虫。
示例代码:
package main import ( "fmt" "net/http" "github.com/yhat/scrape" "golang.org/x/net/html" "golang.org/x/net/html/atom" ) func main() { // request and parse the front page resp, err := http.Get("https://news.ycombinator.com/") if err != nil { panic(err) } root, err := html.Parse(resp.Body) if err != nil { panic(err) } // define a matcher matcher := func(n *html.Node) bool { // must check for nil values if n.DataAtom == atom.A && n.Parent != nil && n.Parent.Parent != nil { return scrape.Attr(n.Parent.Parent, "class") == "athing" } return false } // grab all articles and print them articles := scrape.FindAll(root, matcher) for i, article := range articles { fmt.Printf("%2d %s (%s)\n", i, scrape.Text(article), scrape.Attr(article, "href")) } }