-
Notifications
You must be signed in to change notification settings - Fork 6
/
dump.go
116 lines (110 loc) · 2.72 KB
/
dump.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
package solrdump
import (
"encoding/json"
"fmt"
"io"
"io/ioutil"
"log"
"net/url"
"strings"
"github.com/sethgrid/pester"
)
// Response is a SOLR response.
type Response struct {
Header struct {
Status int `json:"status"`
QTime int `json:"QTime"`
Params struct {
Query string `json:"q"`
CursorMark string `json:"cursorMark"`
Sort string `json:"sort"`
Rows string `json:"rows"`
} `json:"params"`
} `json:"header"`
Response struct {
NumFound int `json:"numFound"`
Start int `json:"start"`
Docs []json.RawMessage `json:"docs"` // dependent on SOLR schema
} `json:"response"`
NextCursorMark string `json:"nextCursorMark"`
}
// PrependSchema http, if missing.
func PrependSchema(s string) string {
if !strings.HasPrefix(s, "http") {
return fmt.Sprintf("http://%s", s)
}
return s
}
// Dumper can run a data extraction from solr.
type Dumper struct {
Writer io.Writer
Server string
Fields string
Sort string
Query string
NumRows int
Wt string
SkipCertificateVerification bool
Verbose bool
}
func (d *Dumper) Run() error {
var (
total int
v = url.Values{}
)
v.Set("q", d.Query)
v.Set("sort", d.Sort)
v.Set("rows", fmt.Sprintf("%d", d.NumRows))
v.Set("fl", d.Fields)
v.Set("wt", "json")
v.Set("cursorMark", "*")
for {
link := fmt.Sprintf("%s/select?%s", d.Server, v.Encode())
if d.Verbose {
log.Println(link)
}
resp, err := pester.Get(link)
if err != nil {
return fmt.Errorf("http: %s", err)
}
defer resp.Body.Close()
if resp.StatusCode >= 400 {
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Println("failed to fetch response body for debugging")
}
log.Printf("response body (%d): %s", len(b), string(b))
return fmt.Errorf("status: %v", resp.Status)
}
var response Response
switch d.Wt {
case "json":
// invalid character '\r' in string literal
dec := json.NewDecoder(resp.Body)
if err := dec.Decode(&response); err != nil {
return fmt.Errorf("decode: %s", err)
}
default:
return fmt.Errorf("wt=%s not implemented", d.Wt)
}
// We do not defer, since we hard-exit on errors anyway.
if err := resp.Body.Close(); err != nil {
return err
}
for _, doc := range response.Response.Docs {
fmt.Println(string(doc))
}
total += len(response.Response.Docs)
if d.Verbose {
log.Printf("fetched %d docs", total)
}
if response.NextCursorMark == v.Get("cursorMark") {
break
}
v.Set("cursorMark", response.NextCursorMark)
}
if d.Verbose {
log.Printf("fetched %d docs", total)
}
return nil
}