File README.md changed (mode: 100644) (index 9f22177..d2ae41c) |
... |
... |
If you don't have a Go toolchain, grab an executable from the Releases tab |
10 |
10 |
|
|
11 |
11 |
##### Project structure |
##### Project structure |
12 |
12 |
|
|
13 |
|
- _/common_: commonly used HTTP code |
|
|
13 |
|
- _/controller_: Manages workers (sends tasks, gets results, …) |
|
14 |
|
- _/common_: Commonly used HTTP code |
14 |
15 |
- _/data_: Data structures |
- _/data_: Data structures |
|
16 |
|
- _/db_: MongoDB connection |
15 |
17 |
- _/classic_: Extractor calling the HTML `/watch` API |
- _/classic_: Extractor calling the HTML `/watch` API |
16 |
18 |
- _/watchapi_: Extractor calling the JSON `/watch` API |
- _/watchapi_: Extractor calling the JSON `/watch` API |
17 |
19 |
|
|
File browseajax/grab.go added (mode: 100644) (index 0000000..7427999) |
|
1 |
|
package browseajax |
|
2 |
|
|
|
3 |
|
import ( |
|
4 |
|
"net/http" |
|
5 |
|
"github.com/terorie/yt-mango/common" |
|
6 |
|
"errors" |
|
7 |
|
"io/ioutil" |
|
8 |
|
"github.com/valyala/fastjson" |
|
9 |
|
) |
|
10 |
|
|
|
11 |
|
const mainURL = "https://www.youtube.com/browse_ajax?ctoken=" |
|
12 |
|
|
|
13 |
|
func GrabPage(channelID string, page uint) (*fastjson.Value, error) { |
|
14 |
|
// Generate page URL |
|
15 |
|
token := GenerateToken(channelID, uint64(page)) |
|
16 |
|
url := mainURL + token |
|
17 |
|
|
|
18 |
|
// Prepare request |
|
19 |
|
req, err := http.NewRequest("GET", url, nil) |
|
20 |
|
if err != nil { return nil, err } |
|
21 |
|
req.Header.Add("X-YouTube-Client-Name", "1") |
|
22 |
|
req.Header.Add("X-YouTube-Client-Version", "2.20180726") |
|
23 |
|
|
|
24 |
|
// Send request |
|
25 |
|
res, err := common.Client.Do(req) |
|
26 |
|
if err != nil { return nil, err } |
|
27 |
|
if res.StatusCode == 500 { |
|
28 |
|
defer res.Body.Close() |
|
29 |
|
buf, _ := ioutil.ReadAll(res.Body) |
|
30 |
|
println(string(buf)) |
|
31 |
|
} |
|
32 |
|
if res.StatusCode != 200 { return nil, errors.New("HTTP failure") } |
|
33 |
|
|
|
34 |
|
// Download response |
|
35 |
|
defer res.Body.Close() |
|
36 |
|
buf, err := ioutil.ReadAll(res.Body) |
|
37 |
|
if err != nil { return nil, err } |
|
38 |
|
|
|
39 |
|
// Parse JSON |
|
40 |
|
var p fastjson.Parser |
|
41 |
|
root, err := p.ParseBytes(buf) |
|
42 |
|
if err != nil { return nil, err } |
|
43 |
|
|
|
44 |
|
return root, nil |
|
45 |
|
} |
File browseajax/parse.go added (mode: 100644) (index 0000000..60abbe2) |
|
1 |
|
package browseajax |
|
2 |
|
|
|
3 |
|
import ( |
|
4 |
|
"github.com/valyala/fastjson" |
|
5 |
|
"errors" |
|
6 |
|
) |
|
7 |
|
|
|
8 |
|
var missingData = errors.New("missing data") |
|
9 |
|
|
|
10 |
|
func ParsePage(rootObj *fastjson.Value) error { |
|
11 |
|
// Root as array |
|
12 |
|
root, err := rootObj.Array() |
|
13 |
|
if err != nil { return err } |
|
14 |
|
|
|
15 |
|
// Find response container |
|
16 |
|
var container *fastjson.Value |
|
17 |
|
for _, item := range root { |
|
18 |
|
if item.Exists("response") { |
|
19 |
|
container = item |
|
20 |
|
break |
|
21 |
|
} |
|
22 |
|
} |
|
23 |
|
if container == nil { return missingData } |
|
24 |
|
|
|
25 |
|
// Get error obj |
|
26 |
|
|
|
27 |
|
// Get items from grid |
|
28 |
|
itemsObj := container.Get( |
|
29 |
|
"response", |
|
30 |
|
"continuationContents", |
|
31 |
|
"gridContinuation", |
|
32 |
|
"items", |
|
33 |
|
) |
|
34 |
|
if itemsObj == nil { return missingData } |
|
35 |
|
|
|
36 |
|
// Items as array |
|
37 |
|
items, err := itemsObj.Array() |
|
38 |
|
if err != nil { return err } |
|
39 |
|
|
|
40 |
|
// Enumerate |
|
41 |
|
for _, item := range items { |
|
42 |
|
// Find URL |
|
43 |
|
urlObj := item.Get( |
|
44 |
|
"gridVideoRenderer", |
|
45 |
|
"navigationEndpoint", |
|
46 |
|
"commandMetadata", |
|
47 |
|
"webCommandMetadata", |
|
48 |
|
"url", |
|
49 |
|
) |
|
50 |
|
if urlObj == nil { return missingData } |
|
51 |
|
|
|
52 |
|
// URL as string |
|
53 |
|
urlBytes, err := urlObj.StringBytes() |
|
54 |
|
if err != nil { return err } |
|
55 |
|
url := string(urlBytes) |
|
56 |
|
|
|
57 |
|
println(url) |
|
58 |
|
} |
|
59 |
|
return nil |
|
60 |
|
} |
File browseajax/token.go added (mode: 100644) (index 0000000..fbfdecf) |
|
1 |
|
package browseajax |
|
2 |
|
|
|
3 |
|
import ( |
|
4 |
|
"bytes" |
|
5 |
|
"strconv" |
|
6 |
|
"encoding/base64" |
|
7 |
|
) |
|
8 |
|
|
|
9 |
|
func GenerateToken(channelId string, page uint64) string { |
|
10 |
|
// Generate the inner token |
|
11 |
|
token := genInnerToken(page) |
|
12 |
|
|
|
13 |
|
// Build the inner object |
|
14 |
|
var inner bytes.Buffer |
|
15 |
|
|
|
16 |
|
// channelId |
|
17 |
|
inner.WriteByte(0x12) // type |
|
18 |
|
writeVarint(&inner, uint64(len(channelId))) // len |
|
19 |
|
inner.WriteString(channelId) // data |
|
20 |
|
|
|
21 |
|
// token |
|
22 |
|
inner.WriteByte(0x1a) // type |
|
23 |
|
writeVarint(&inner, uint64(len(token))) // len |
|
24 |
|
inner.WriteString(token) // data |
|
25 |
|
|
|
26 |
|
innerBytes := inner.Bytes() |
|
27 |
|
|
|
28 |
|
var root bytes.Buffer |
|
29 |
|
|
|
30 |
|
// innerBytes |
|
31 |
|
root.Write([]byte{0xe2, 0xa9, 0x85, 0xb2, 0x02}) // probably types |
|
32 |
|
writeVarint(&root, uint64(len(innerBytes))) |
|
33 |
|
root.Write(innerBytes) |
|
34 |
|
|
|
35 |
|
rootBytes := root.Bytes() |
|
36 |
|
|
|
37 |
|
return base64.URLEncoding.EncodeToString(rootBytes) |
|
38 |
|
} |
|
39 |
|
|
|
40 |
|
func genInnerToken(page uint64) string { |
|
41 |
|
var buf bytes.Buffer |
|
42 |
|
|
|
43 |
|
pageStr := strconv.FormatUint(page, 10) |
|
44 |
|
|
|
45 |
|
// Probably protobuf |
|
46 |
|
buf.Write([]byte{0x12, 0x06}) |
|
47 |
|
buf.WriteString("videos") |
|
48 |
|
buf.Write([]byte{ |
|
49 |
|
0x20, 0x00, 0x30, 0x01, 0x38, 0x01, 0x60, 0x01, |
|
50 |
|
0x6a, 0x00, 0x7a, |
|
51 |
|
}) |
|
52 |
|
// Write size-prefixed page string |
|
53 |
|
writeVarint(&buf, uint64(len(pageStr))) |
|
54 |
|
buf.WriteString(pageStr) |
|
55 |
|
buf.Write([]byte{0xb8, 0x01, 0x00}) |
|
56 |
|
|
|
57 |
|
return base64.URLEncoding.EncodeToString(buf.Bytes()) |
|
58 |
|
} |
|
59 |
|
|
|
60 |
|
func writeVarint(buf *bytes.Buffer, n uint64) { |
|
61 |
|
var enc [10]byte |
|
62 |
|
i := uint(0) |
|
63 |
|
for { |
|
64 |
|
enc[i] = uint8(n & 0x7F) |
|
65 |
|
n >>= 7 |
|
66 |
|
if n != 0 { |
|
67 |
|
enc[i] |= 0x80 |
|
68 |
|
i++ |
|
69 |
|
} else { |
|
70 |
|
i++ |
|
71 |
|
break |
|
72 |
|
} |
|
73 |
|
} |
|
74 |
|
buf.Write(enc[:i]) |
|
75 |
|
} |
File classic/grab.go changed (mode: 100644) (index d347c83..48b28ad) |
... |
... |
import ( |
4 |
4 |
"net/http" |
"net/http" |
5 |
5 |
"errors" |
"errors" |
6 |
6 |
"encoding/xml" |
"encoding/xml" |
7 |
|
"time" |
|
8 |
7 |
"github.com/PuerkitoBio/goquery" |
"github.com/PuerkitoBio/goquery" |
9 |
8 |
"github.com/terorie/yt-mango/data" |
"github.com/terorie/yt-mango/data" |
|
9 |
|
"github.com/terorie/yt-mango/common" |
10 |
10 |
) |
) |
11 |
11 |
|
|
12 |
|
var transport = http.Transport{ |
|
13 |
|
MaxIdleConns: 10, |
|
14 |
|
IdleConnTimeout: 30 * time.Second, |
|
15 |
|
} |
|
16 |
|
var client = http.Client{Transport: &transport} |
|
17 |
|
|
|
18 |
12 |
const mainURL = "https://www.youtube.com/watch?has_verified=1&bpctr=6969696969&v=" |
const mainURL = "https://www.youtube.com/watch?has_verified=1&bpctr=6969696969&v=" |
19 |
13 |
const subtitleURL = "https://video.google.com/timedtext?type=list&v=" |
const subtitleURL = "https://video.google.com/timedtext?type=list&v=" |
20 |
14 |
|
|
|
... |
... |
func grab(v *data.Video) (doc *goquery.Document, err error) { |
24 |
18 |
if err != nil { return } |
if err != nil { return } |
25 |
19 |
requestHeader(&req.Header) |
requestHeader(&req.Header) |
26 |
20 |
|
|
27 |
|
res, err := client.Do(req) |
|
|
21 |
|
res, err := common.Client.Do(req) |
28 |
22 |
if err != nil { return } |
if err != nil { return } |
29 |
23 |
if res.StatusCode != 200 { return nil, errors.New("HTTP failure") } |
if res.StatusCode != 200 { return nil, errors.New("HTTP failure") } |
30 |
24 |
|
|
|
... |
... |
func grab(v *data.Video) (doc *goquery.Document, err error) { |
38 |
32 |
// Grabs and parses a subtitle list |
// Grabs and parses a subtitle list |
39 |
33 |
func grabSubtitleList(v *data.Video) (err error) { |
func grabSubtitleList(v *data.Video) (err error) { |
40 |
34 |
req, err := http.NewRequest("GET", subtitleURL + v.ID, nil) |
req, err := http.NewRequest("GET", subtitleURL + v.ID, nil) |
41 |
|
|
|
42 |
35 |
if err != nil { return err } |
if err != nil { return err } |
43 |
|
requestHeader(&req.Header) |
|
44 |
36 |
|
|
45 |
37 |
res, err := client.Do(req) |
res, err := client.Do(req) |
46 |
|
|
|
47 |
38 |
if err != nil { return err } |
if err != nil { return err } |
48 |
39 |
if res.StatusCode != 200 { return errors.New("HTTP failure") } |
if res.StatusCode != 200 { return errors.New("HTTP failure") } |
49 |
40 |
|
|
|
... |
... |
func grabSubtitleList(v *data.Video) (err error) { |
60 |
51 |
|
|
61 |
52 |
return |
return |
62 |
53 |
} |
} |
63 |
|
|
|
64 |
|
// Important: |
|
65 |
|
// - Set header "Accept-Language: en-US" or else parser might break |
|
66 |
|
// - Set header "User-Agent: youtube-mango/1.0" |
|
67 |
|
func requestHeader(h *http.Header) { |
|
68 |
|
h.Add("Accept-Language", "en-US") |
|
69 |
|
h.Add("User-Agent", "youtube-mango/0.1") |
|
70 |
|
} |
|
File common/http.go changed (mode: 100644) (index ed5ba4c..66bb07f) |
... |
... |
package common |
2 |
2 |
|
|
3 |
3 |
import "net/http" |
import "net/http" |
4 |
4 |
|
|
5 |
|
var Client = http.Client{Transport: http.DefaultTransport} |
|
|
5 |
|
// Custom headers |
|
6 |
|
type transport struct{} |
|
7 |
|
|
|
8 |
|
// Important: |
|
9 |
|
// - Set header "Accept-Language: en-US" or else parser might break |
|
10 |
|
// - Set header "User-Agent: youtube-mango/1.0" |
|
11 |
|
func (t transport) RoundTrip(r *http.Request) (*http.Response, error) { |
|
12 |
|
r.Header.Add("Accept-Language", "en-US") |
|
13 |
|
r.Header.Add("User-Agent", "youtube-mango/0.1") |
|
14 |
|
return http.DefaultTransport.RoundTrip(r) |
|
15 |
|
} |
|
16 |
|
|
|
17 |
|
var Client = http.Client{Transport: transport{}} |
File main.go changed (mode: 100644) (index 586ff5e..3f7005f) |
5 |
5 |
package main |
package main |
6 |
6 |
|
|
7 |
7 |
import ( |
import ( |
8 |
|
"encoding/json" |
|
9 |
|
"github.com/terorie/yt-mango/data" |
|
10 |
|
"github.com/terorie/yt-mango/classic" |
|
|
8 |
|
"github.com/spf13/cobra" |
|
9 |
|
"fmt" |
|
10 |
|
"os" |
11 |
11 |
) |
) |
12 |
12 |
|
|
|
13 |
|
const Version = "v0.1 -- dev" |
|
14 |
|
|
|
15 |
|
func printVersion(_ *cobra.Command, _ []string) { |
|
16 |
|
fmt.Println("YT-Mango archiver", Version) |
|
17 |
|
} |
|
18 |
|
|
13 |
19 |
func main() { |
func main() { |
14 |
|
v := data.Video{ID: "kj9mFK62c6E"} |
|
|
20 |
|
rootCmd := cobra.Command{ |
|
21 |
|
Use: "yt-mango", |
|
22 |
|
Short: "YT-Mango is a scalable video metadata archiver", |
|
23 |
|
Long: "YT-Mango is a scalable video metadata archiving utility\n" + |
|
24 |
|
"written by terorie with help from the-eye.eu", |
|
25 |
|
} |
15 |
26 |
|
|
16 |
|
err := classic.Get(&v) |
|
17 |
|
if err != nil { panic(err) } |
|
|
27 |
|
versionCmd := cobra.Command{ |
|
28 |
|
Use: "version", |
|
29 |
|
Short: "Get the version number of yt-mango", |
|
30 |
|
Run: printVersion, |
|
31 |
|
} |
18 |
32 |
|
|
19 |
|
jsn, err := json.MarshalIndent(v, "", "\t") |
|
20 |
|
if err != nil { panic(err) } |
|
|
33 |
|
rootCmd.AddCommand(&versionCmd) |
21 |
34 |
|
|
22 |
|
println(string(jsn)) |
|
|
35 |
|
if err := rootCmd.Execute(); err != nil { |
|
36 |
|
fmt.Fprintln(os.Stderr, err) |
|
37 |
|
os.Exit(1) |
|
38 |
|
} |
23 |
39 |
} |
} |
File pretty/ansi.go added (mode: 100644) (index 0000000..91e06bd) |
|
1 |
|
package pretty |
|
2 |
|
|
|
3 |
|
import ( |
|
4 |
|
"bytes" |
|
5 |
|
) |
|
6 |
|
|
|
7 |
|
type Code string |
|
8 |
|
type Codes []Code |
|
9 |
|
|
|
10 |
|
|
|
11 |
|
type Effect interface { |
|
12 |
|
E(string) string |
|
13 |
|
} |
|
14 |
|
|
|
15 |
|
// Empty effect |
|
16 |
|
type nilEffect struct{} |
|
17 |
|
func (_ nilEffect) E(x string) string { return x } |
|
18 |
|
|
|
19 |
|
// Custom effect |
|
20 |
|
type customEffect func(string) string |
|
21 |
|
func (e customEffect) E(x string) string { return e(x) } |
|
22 |
|
|
|
23 |
|
const ( |
|
24 |
|
RESET = Code("0") |
|
25 |
|
BOLD = Code("1") |
|
26 |
|
DIM = Code("2") |
|
27 |
|
ITALIC = Code("3") |
|
28 |
|
UNDERL = Code("4") |
|
29 |
|
INV = Code("7") |
|
30 |
|
HIDDEN = Code("8") |
|
31 |
|
STRIKE = Code("9") |
|
32 |
|
BLACK = Code("30") |
|
33 |
|
RED = Code("31") |
|
34 |
|
GREEN = Code("32") |
|
35 |
|
YELLOW = Code("33") |
|
36 |
|
BLUE = Code("34") |
|
37 |
|
MGNTA = Code("35") |
|
38 |
|
CYAN = Code("36") |
|
39 |
|
WHITE = Code("37") |
|
40 |
|
HBLACK = Code("90") |
|
41 |
|
HRED = Code("91") |
|
42 |
|
HGREEN = Code("92") |
|
43 |
|
HYELLOW = Code("93") |
|
44 |
|
HBLUE = Code("94") |
|
45 |
|
HMGNTA = Code("95") |
|
46 |
|
HCYAN = Code("96") |
|
47 |
|
HWHITE = Code("97") |
|
48 |
|
) |
|
49 |
|
|
|
50 |
|
func Add(x... Code) Codes { |
|
51 |
|
return Codes(x) |
|
52 |
|
} |
|
53 |
|
|
|
54 |
|
func (c Code) E(x string) string { |
|
55 |
|
if !isTTY { return x } |
|
56 |
|
return "\x1b[" + string(c) + "m" + x + "\x1b[0m" |
|
57 |
|
} |
|
58 |
|
|
|
59 |
|
func (cs Codes) E(x string) string { |
|
60 |
|
if !isTTY { return x } |
|
61 |
|
var b bytes.Buffer |
|
62 |
|
b.WriteString("\x1b[") |
|
63 |
|
for _, c := range cs { |
|
64 |
|
b.WriteRune(';') |
|
65 |
|
b.WriteString(string(c)) |
|
66 |
|
} |
|
67 |
|
b.WriteRune('m') |
|
68 |
|
b.WriteString(x) |
|
69 |
|
b.WriteString("\x1b[0m") |
|
70 |
|
return b.String() |
|
71 |
|
} |
|
72 |
|
|
|
73 |
|
func Wrap(e Effect, wrapper string) Effect { |
|
74 |
|
if !isTTY { return nilEffect{} } |
|
75 |
|
return customEffect(func(s string) string { |
|
76 |
|
return e.E(wrapper[0:1]) + s + e.E(wrapper[1:2]) |
|
77 |
|
}) |
|
78 |
|
} |