Skip to content

Commit

Permalink
form parsing and various concurrency fix
Browse files Browse the repository at this point in the history
  • Loading branch information
yukinying committed Oct 8, 2015
1 parent c6de829 commit 44e7059
Show file tree
Hide file tree
Showing 11 changed files with 450 additions and 142 deletions.
13 changes: 7 additions & 6 deletions cmd/gryffin-distributed/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"github.com/bitly/go-nsq"

"github.com/yahoo/gryffin"
"github.com/yahoo/gryffin/data"
"github.com/yahoo/gryffin/fuzzer/arachni"
"github.com/yahoo/gryffin/fuzzer/sqlmap"
"github.com/yahoo/gryffin/renderer"
Expand Down Expand Up @@ -107,7 +106,7 @@ func crawl() {
var consumer *nsq.Consumer

handler := nsq.HandlerFunc(func(m *nsq.Message) error {
scan := gryffin.NewScanFromJson(m.Body, t)
scan := gryffin.NewScanFromJson(m.Body)

if delay := scan.RateLimit(); delay != 0 {
go func() {
Expand Down Expand Up @@ -139,7 +138,7 @@ func crawl() {
// Therefore we don't need to test whether the link is coming
// from a duplicated page or not
for s := range r.GetLinks() {
if ok := s.ApplyLinkRules(); ok {
if ok := s.ShouldCrawl(); ok {
err := producer.Publish("seed", s.Json())
if err != nil {
fmt.Println("Could not publish", "seed", err)
Expand All @@ -166,7 +165,7 @@ func fuzzWithSqlmap() {
var consumer *nsq.Consumer
handler := nsq.HandlerFunc(func(m *nsq.Message) error {
wq <- true
scan := gryffin.NewScanFromJson(m.Body, t)
scan := gryffin.NewScanFromJson(m.Body)
f := &sqlmap.Fuzzer{}
f.Fuzz(scan)
<-wq
Expand All @@ -181,7 +180,7 @@ func fuzzWithArachni() {
var consumer *nsq.Consumer
handler := nsq.HandlerFunc(func(m *nsq.Message) error {
wq <- true
scan := gryffin.NewScanFromJson(m.Body, t)
scan := gryffin.NewScanFromJson(m.Body)
f := &arachni.Fuzzer{}
f.Fuzz(scan)
<-wq
Expand Down Expand Up @@ -224,11 +223,13 @@ func main() {
logWriter = io.MultiWriter(os.Stdout, tcpout)
}

gryffin.SetLogWriter(logWriter)

// we use a buffered channel to block when max concurrency is reach.
maxconcurrency := 5
wq = make(chan bool, maxconcurrency)

t = gryffin.NewScan("GET", url, "", data.NewMemoryStore(), logWriter)
t = gryffin.NewScan("GET", url, "")

// seed is unique case that we exit the program immediately
if service == "seed" {
Expand Down
22 changes: 13 additions & 9 deletions cmd/gryffin-standalone/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import (
"time"

"github.com/yahoo/gryffin"
"github.com/yahoo/gryffin/data"
"github.com/yahoo/gryffin/fuzzer/arachni"
"github.com/yahoo/gryffin/fuzzer/sqlmap"
"github.com/yahoo/gryffin/renderer"
Expand Down Expand Up @@ -53,16 +52,14 @@ func linkChannels(s *gryffin.Scan) {
go func() {

for scan := range chanCrawl {
// scan := <-chanCrawl
r := &renderer.PhantomJSRenderer{Timeout: 10}
// r := &renderer.NoScriptRenderer{}
scan.CrawlAsync(r)

go func() {
if s := <-r.GetRequestBody(); s != nil {
// add two workers (two fuzzers)
wg.Add(2)
chanFuzz <- s
} else {
wg.Done()
}

}()
Expand All @@ -74,12 +71,14 @@ func linkChannels(s *gryffin.Scan) {
// Therefore we don't need to test whether the link is coming
// from a duplicated page or not
for newScan := range r.GetLinks() {
if ok := newScan.ApplyLinkRules(); ok {
if ok := newScan.ShouldCrawl(); ok {
// add one workers (a new crawl)
wg.Add(1)
chanRateLimit <- newScan
}
}

// remove one worker (finish crawl)
wg.Done()
scan.Logm("Get Links", "Finished")

}()
Expand All @@ -91,15 +90,16 @@ func linkChannels(s *gryffin.Scan) {
go func() {
for scan := range chanFuzz {

wg.Add(1) // we got two fuzzers, so add one more worker to the worker group.
go func() {
f := &arachni.Fuzzer{}
f.Fuzz(scan)
// remove a fuzzer worker.
wg.Done()
}()
go func() {
f := &sqlmap.Fuzzer{}
f.Fuzz(scan)
// remove a fuzzer worker.
wg.Done()
}()
}
Expand Down Expand Up @@ -140,6 +140,8 @@ func linkChannels(s *gryffin.Scan) {

chanStart <- s
close(chanStart)

// add one worker (start crawl)
wg.Add(1)
wg.Wait()
}
Expand Down Expand Up @@ -170,7 +172,9 @@ func main() {
w = io.MultiWriter(os.Stdout, tcpout)
}

scan := gryffin.NewScan(*method, url, *body, data.NewMemoryStore(), w)
gryffin.SetLogWriter(w)

scan := gryffin.NewScan(*method, url, *body)
scan.Logm("Main", "Started")

linkChannels(scan)
Expand Down
4 changes: 4 additions & 0 deletions fuzzer/arachni/arachni.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ func (s *Fuzzer) Fuzz(g *gryffin.Scan) (count int, err error) {

count = s.extract(g, string(output))

if err != nil {
return
}

g.Logm("Arachni.Scan", fmt.Sprintf("Arachni return %t", cmd.ProcessState.Success()))
return

Expand Down
4 changes: 4 additions & 0 deletions fuzzer/sqlmap/sqlmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ func (s *Fuzzer) Fuzz(g *gryffin.Scan) (count int, err error) {

output, err := cmd.Output()

if err != nil {
return
}

count = s.extract(g, string(output))

g.Logm("SQLMap.Scan", fmt.Sprintf("SQLMap return %t", cmd.ProcessState.Success()))
Expand Down
21 changes: 21 additions & 0 deletions global.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Copyright 2015, Yahoo Inc. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package gryffin

import (
"io"
// "io/ioutil"
)

var memoryStore = NewGryffinStore(nil)
var logWriter io.Writer

func SetMemoryStore(m *GryffinStore) {
memoryStore = m
}

func SetLogWriter(w io.Writer) {
logWriter = w
}
70 changes: 21 additions & 49 deletions gryffin.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,13 @@ import (
"encoding/json"
"fmt"
"hash/fnv"
"io"
"io/ioutil"
"net/http"
"net/http/cookiejar"
"net/url"
"strconv"
"strings"
"time"

"github.com/yahoo/gryffin/data"
"github.com/yahoo/gryffin/html-distance"
)

Expand All @@ -38,8 +35,6 @@ type Scan struct {
Cookies []*http.Cookie
Fingerprint Fingerprint
HitCount int
LogWriter io.Writer
Session data.Store
}

// Job stores the job id and config (if any).
Expand Down Expand Up @@ -84,10 +79,8 @@ type LogMessage struct {
Url string
}

var memoryStore = NewGryffinStore(nil, nil)

// NewScan creates a scan.
func NewScan(method, url, post string, session data.Store, writer io.Writer) *Scan {
func NewScan(method, url, post string) *Scan {

id := GenRandomID()

Expand All @@ -110,25 +103,12 @@ func NewScan(method, url, post string, session data.Store, writer io.Writer) *Sc
Job: job,
Request: req,
RequestBody: post,
Session: session,
LogWriter: writer,
}
}

func NewScanFromJson(b []byte, base *Scan) *Scan {
var scan Scan
json.Unmarshal(b, &scan)
if base != nil {
scan.LogWriter = base.LogWriter
scan.Session = base.Session
}
// fmt.Printf("Cookie: %v\n", scan.Cookies)
return &scan
}

// getOrigin returns the Origin of the URL (scheme, hostname, port )
func getOrigin(u *url.URL) string {
return u.Scheme + u.Host
return u.Scheme + "://" + u.Host
}

// MergeRequest merge the request field in scan with the existing one.
Expand All @@ -143,8 +123,11 @@ func (s *Scan) MergeRequest(req *http.Request) {
// read the request body, and then reset the reader
var post []byte
if req.Body != nil {
if post, err := ioutil.ReadAll(req.Body); err != nil {
if post, err := ioutil.ReadAll(req.Body); err == nil {
req.Body = ioutil.NopCloser(bytes.NewReader(post))
} else {
// only possible error is bytes.ErrTooLarge from ioutil package.
s.Error("MergeRequest", err)
}
}

Expand Down Expand Up @@ -210,8 +193,6 @@ func (s *Scan) Spawn() *Scan {
Request: &req,
RequestBody: post,
Cookies: cookies,
Session: s.Session,
LogWriter: s.LogWriter,
}
}

Expand Down Expand Up @@ -337,7 +318,7 @@ func (s *Scan) IsScanAllowed() bool {
func (s *Scan) CrawlAsync(r Renderer) {
s.Logm("CrawlAsync", "Started")
if s.IsScanAllowed() {
go r.Do(s)
r.Do(s)
} else {
s.Logm("CrawlAsync", "Scan Not Allowed")
}
Expand All @@ -347,8 +328,8 @@ func (s *Scan) CrawlAsync(r Renderer) {
func (s *Scan) IsDuplicatedPage() bool {
s.UpdateFingerprint()
f := s.Fingerprint.ResponseSimilarity
if !memoryStore.Seen(s, f, 2) {
memoryStore.See(s, f)
if !memoryStore.Seen(s.Job.ID, "oracle", f, 2) {
memoryStore.See(s.Job.ID, "oracle", f)
s.Logm("IsDuplicatedPage", "Unique Page")
return false
} else {
Expand All @@ -369,28 +350,19 @@ func (s *Scan) Fuzz(fuzzer Fuzzer) (int, error) {
// return
// }

// ApplyLinkRules checks if the links should be queued for next crawl.
func (s *Scan) ApplyLinkRules() bool {
// ShouldCrawl checks if the links should be queued for next crawl.
func (s *Scan) ShouldCrawl() bool {

s.UpdateFingerprint()
store := s.Session

// k := "hash/" + hex.EncodeToString(s.Fingerprint.URL)
k := "hash/url/" + strconv.FormatUint(s.Fingerprint.URL, 16)

// link seen before.
if _, ok := store.Get(k); ok {
// s.Logm("LinkRules", "Duplicated")
return false
f := s.Fingerprint.URL
if !memoryStore.Seen(s.Job.ID, "hash", f, 0) {
memoryStore.See(s.Job.ID, "hash", f)
s.Logm("ShouldCrawl", "Unique Link")
return true
} else {
s.Logm("ShouldCrawl", "Duplicate Link")
}
store.Set(k, true)
return true
}

func (s *Scan) Json() []byte {
b, _ := json.Marshal(s)
return b

return false
}

// TODO - LogFmt (fmt string)
Expand Down Expand Up @@ -423,9 +395,9 @@ func (s *Scan) Logf(format string, a ...interface{}) {
}

func (s *Scan) Log(v interface{}) {
if s.LogWriter == nil {
if logWriter == nil {
return
}
encoder := json.NewEncoder(s.LogWriter)
encoder := json.NewEncoder(logWriter)
encoder.Encode(v)
}
Loading

0 comments on commit 44e7059

Please sign in to comment.