List of commits:
Subject Hash Author Date (UTC)
Cobra & channel video dumper b6a0e418403b24df1162b749878d24a9918f9b8c terorie 2018-07-28 23:49:38
Rename project to yt-mango 0f19dcfd3f54c3902a0aa97b8c0f4aa932538d73 terorie 2018-07-28 15:10:49
Remove debug output 9f5b9dd87324b30044723539875ee8a419b7eee0 terorie 2018-07-28 14:45:13
Decode video formats bafc6c4ad3587e2de7a88cf532ad1df5ee5a7964 terorie 2018-07-28 14:34:57
Add formats from youtube-dl e612768d79ed1743b415073d9dba713e785e2ea6 terorie 2018-07-28 13:33:48
Initial README 0e9d46ecadcc5b019b9fae4e356bc3360f521b5e terorie 2018-07-28 03:56:26
Initial commit 3b5744c589494163e05a9d54fba10c71e5965d6a terorie 2018-07-28 03:55:53
Commit b6a0e418403b24df1162b749878d24a9918f9b8c - Cobra & channel video dumper
Author: terorie
Author date (UTC): 2018-07-28 23:49
Committer name: terorie
Committer date (UTC): 2018-07-28 23:50
Parent(s): 0f19dcfd3f54c3902a0aa97b8c0f4aa932538d73
Signing key:
Tree: 0b2c023cc3867c8fd95743ae2fe6c08c3003530c
File Lines added Lines deleted
README.md 3 1
browseajax/get.go 9 0
browseajax/grab.go 45 0
browseajax/parse.go 60 0
browseajax/token.go 75 0
classic/grab.go 2 19
common/http.go 13 1
controller/control.go 13 0
main.go 25 9
pretty/ansi.go 78 0
pretty/istty.go 16 0
version/get.go 5 0
work.go 10 0
File README.md changed (mode: 100644) (index 9f22177..d2ae41c)
... ... If you don't have a Go toolchain, grab an executable from the Releases tab
10 10
11 11 ##### Project structure ##### Project structure
12 12
13 - _/common_: commonly used HTTP code
13 - _/controller_: Manages workers (sends tasks, gets results, …)
14 - _/common_: Commonly used HTTP code
14 15 - _/data_: Data structures - _/data_: Data structures
16 - _/db_: MongoDB connection
15 17 - _/classic_: Extractor calling the HTML `/watch` API - _/classic_: Extractor calling the HTML `/watch` API
16 18 - _/watchapi_: Extractor calling the JSON `/watch` API - _/watchapi_: Extractor calling the JSON `/watch` API
17 19
File browseajax/get.go added (mode: 100644) (index 0000000..0b7e94e)
1 package browseajax
2
3 func GetPage(channelID string, page uint) error {
4 root, err := GrabPage(channelID, page)
5 if err != nil { return err }
6 err = ParsePage(root)
7 if err != nil { return err }
8 return nil
9 }
File browseajax/grab.go added (mode: 100644) (index 0000000..7427999)
1 package browseajax
2
3 import (
4 "net/http"
5 "github.com/terorie/yt-mango/common"
6 "errors"
7 "io/ioutil"
8 "github.com/valyala/fastjson"
9 )
10
11 const mainURL = "https://www.youtube.com/browse_ajax?ctoken="
12
13 func GrabPage(channelID string, page uint) (*fastjson.Value, error) {
14 // Generate page URL
15 token := GenerateToken(channelID, uint64(page))
16 url := mainURL + token
17
18 // Prepare request
19 req, err := http.NewRequest("GET", url, nil)
20 if err != nil { return nil, err }
21 req.Header.Add("X-YouTube-Client-Name", "1")
22 req.Header.Add("X-YouTube-Client-Version", "2.20180726")
23
24 // Send request
25 res, err := common.Client.Do(req)
26 if err != nil { return nil, err }
27 if res.StatusCode == 500 {
28 defer res.Body.Close()
29 buf, _ := ioutil.ReadAll(res.Body)
30 println(string(buf))
31 }
32 if res.StatusCode != 200 { return nil, errors.New("HTTP failure") }
33
34 // Download response
35 defer res.Body.Close()
36 buf, err := ioutil.ReadAll(res.Body)
37 if err != nil { return nil, err }
38
39 // Parse JSON
40 var p fastjson.Parser
41 root, err := p.ParseBytes(buf)
42 if err != nil { return nil, err }
43
44 return root, nil
45 }
File browseajax/parse.go added (mode: 100644) (index 0000000..60abbe2)
1 package browseajax
2
3 import (
4 "github.com/valyala/fastjson"
5 "errors"
6 )
7
8 var missingData = errors.New("missing data")
9
10 func ParsePage(rootObj *fastjson.Value) error {
11 // Root as array
12 root, err := rootObj.Array()
13 if err != nil { return err }
14
15 // Find response container
16 var container *fastjson.Value
17 for _, item := range root {
18 if item.Exists("response") {
19 container = item
20 break
21 }
22 }
23 if container == nil { return missingData }
24
25 // Get error obj
26
27 // Get items from grid
28 itemsObj := container.Get(
29 "response",
30 "continuationContents",
31 "gridContinuation",
32 "items",
33 )
34 if itemsObj == nil { return missingData }
35
36 // Items as array
37 items, err := itemsObj.Array()
38 if err != nil { return err }
39
40 // Enumerate
41 for _, item := range items {
42 // Find URL
43 urlObj := item.Get(
44 "gridVideoRenderer",
45 "navigationEndpoint",
46 "commandMetadata",
47 "webCommandMetadata",
48 "url",
49 )
50 if urlObj == nil { return missingData }
51
52 // URL as string
53 urlBytes, err := urlObj.StringBytes()
54 if err != nil { return err }
55 url := string(urlBytes)
56
57 println(url)
58 }
59 return nil
60 }
File browseajax/token.go added (mode: 100644) (index 0000000..fbfdecf)
1 package browseajax
2
3 import (
4 "bytes"
5 "strconv"
6 "encoding/base64"
7 )
8
9 func GenerateToken(channelId string, page uint64) string {
10 // Generate the inner token
11 token := genInnerToken(page)
12
13 // Build the inner object
14 var inner bytes.Buffer
15
16 // channelId
17 inner.WriteByte(0x12) // type
18 writeVarint(&inner, uint64(len(channelId))) // len
19 inner.WriteString(channelId) // data
20
21 // token
22 inner.WriteByte(0x1a) // type
23 writeVarint(&inner, uint64(len(token))) // len
24 inner.WriteString(token) // data
25
26 innerBytes := inner.Bytes()
27
28 var root bytes.Buffer
29
30 // innerBytes
31 root.Write([]byte{0xe2, 0xa9, 0x85, 0xb2, 0x02}) // probably types
32 writeVarint(&root, uint64(len(innerBytes)))
33 root.Write(innerBytes)
34
35 rootBytes := root.Bytes()
36
37 return base64.URLEncoding.EncodeToString(rootBytes)
38 }
39
40 func genInnerToken(page uint64) string {
41 var buf bytes.Buffer
42
43 pageStr := strconv.FormatUint(page, 10)
44
45 // Probably protobuf
46 buf.Write([]byte{0x12, 0x06})
47 buf.WriteString("videos")
48 buf.Write([]byte{
49 0x20, 0x00, 0x30, 0x01, 0x38, 0x01, 0x60, 0x01,
50 0x6a, 0x00, 0x7a,
51 })
52 // Write size-prefixed page string
53 writeVarint(&buf, uint64(len(pageStr)))
54 buf.WriteString(pageStr)
55 buf.Write([]byte{0xb8, 0x01, 0x00})
56
57 return base64.URLEncoding.EncodeToString(buf.Bytes())
58 }
59
60 func writeVarint(buf *bytes.Buffer, n uint64) {
61 var enc [10]byte
62 i := uint(0)
63 for {
64 enc[i] = uint8(n & 0x7F)
65 n >>= 7
66 if n != 0 {
67 enc[i] |= 0x80
68 i++
69 } else {
70 i++
71 break
72 }
73 }
74 buf.Write(enc[:i])
75 }
File classic/grab.go changed (mode: 100644) (index d347c83..48b28ad)
... ... import (
4 4 "net/http" "net/http"
5 5 "errors" "errors"
6 6 "encoding/xml" "encoding/xml"
7 "time"
8 7 "github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
9 8 "github.com/terorie/yt-mango/data" "github.com/terorie/yt-mango/data"
9 "github.com/terorie/yt-mango/common"
10 10 ) )
11 11
12 var transport = http.Transport{
13 MaxIdleConns: 10,
14 IdleConnTimeout: 30 * time.Second,
15 }
16 var client = http.Client{Transport: &transport}
17
18 12 const mainURL = "https://www.youtube.com/watch?has_verified=1&bpctr=6969696969&v=" const mainURL = "https://www.youtube.com/watch?has_verified=1&bpctr=6969696969&v="
19 13 const subtitleURL = "https://video.google.com/timedtext?type=list&v=" const subtitleURL = "https://video.google.com/timedtext?type=list&v="
20 14
 
... ... func grab(v *data.Video) (doc *goquery.Document, err error) {
24 18 if err != nil { return } if err != nil { return }
25 19 requestHeader(&req.Header) requestHeader(&req.Header)
26 20
27 res, err := client.Do(req)
21 res, err := common.Client.Do(req)
28 22 if err != nil { return } if err != nil { return }
29 23 if res.StatusCode != 200 { return nil, errors.New("HTTP failure") } if res.StatusCode != 200 { return nil, errors.New("HTTP failure") }
30 24
 
... ... func grab(v *data.Video) (doc *goquery.Document, err error) {
38 32 // Grabs and parses a subtitle list // Grabs and parses a subtitle list
39 33 func grabSubtitleList(v *data.Video) (err error) { func grabSubtitleList(v *data.Video) (err error) {
40 34 req, err := http.NewRequest("GET", subtitleURL + v.ID, nil) req, err := http.NewRequest("GET", subtitleURL + v.ID, nil)
41
42 35 if err != nil { return err } if err != nil { return err }
43 requestHeader(&req.Header)
44 36
45 37 res, err := client.Do(req) res, err := client.Do(req)
46
47 38 if err != nil { return err } if err != nil { return err }
48 39 if res.StatusCode != 200 { return errors.New("HTTP failure") } if res.StatusCode != 200 { return errors.New("HTTP failure") }
49 40
 
... ... func grabSubtitleList(v *data.Video) (err error) {
60 51
61 52 return return
62 53 } }
63
64 // Important:
65 // - Set header "Accept-Language: en-US" or else parser might break
66 // - Set header "User-Agent: youtube-mango/1.0"
67 func requestHeader(h *http.Header) {
68 h.Add("Accept-Language", "en-US")
69 h.Add("User-Agent", "youtube-mango/0.1")
70 }
File common/http.go changed (mode: 100644) (index ed5ba4c..66bb07f)
... ... package common
2 2
3 3 import "net/http" import "net/http"
4 4
5 var Client = http.Client{Transport: http.DefaultTransport}
5 // Custom headers
6 type transport struct{}
7
8 // Important:
9 // - Set header "Accept-Language: en-US" or else parser might break
10 // - Set header "User-Agent: youtube-mango/1.0"
11 func (t transport) RoundTrip(r *http.Request) (*http.Response, error) {
12 r.Header.Add("Accept-Language", "en-US")
13 r.Header.Add("User-Agent", "youtube-mango/0.1")
14 return http.DefaultTransport.RoundTrip(r)
15 }
16
17 var Client = http.Client{Transport: transport{}}
File controller/control.go added (mode: 100644) (index 0000000..c953390)
1 package controller
2
3 type Controller struct {
4
5 }
6
7 func (c *Controller) NewController() {
8
9 }
10
11 func (c *Controller) Schedule() {
12
13 }
File main.go changed (mode: 100644) (index 586ff5e..3f7005f)
5 5 package main package main
6 6
7 7 import ( import (
8 "encoding/json"
9 "github.com/terorie/yt-mango/data"
10 "github.com/terorie/yt-mango/classic"
8 "github.com/spf13/cobra"
9 "fmt"
10 "os"
11 11 ) )
12 12
13 const Version = "v0.1 -- dev"
14
15 func printVersion(_ *cobra.Command, _ []string) {
16 fmt.Println("YT-Mango archiver", Version)
17 }
18
13 19 func main() { func main() {
14 v := data.Video{ID: "kj9mFK62c6E"}
20 rootCmd := cobra.Command{
21 Use: "yt-mango",
22 Short: "YT-Mango is a scalable video metadata archiver",
23 Long: "YT-Mango is a scalable video metadata archiving utility\n" +
24 "written by terorie with help from the-eye.eu",
25 }
15 26
16 err := classic.Get(&v)
17 if err != nil { panic(err) }
27 versionCmd := cobra.Command{
28 Use: "version",
29 Short: "Get the version number of yt-mango",
30 Run: printVersion,
31 }
18 32
19 jsn, err := json.MarshalIndent(v, "", "\t")
20 if err != nil { panic(err) }
33 rootCmd.AddCommand(&versionCmd)
21 34
22 println(string(jsn))
35 if err := rootCmd.Execute(); err != nil {
36 fmt.Fprintln(os.Stderr, err)
37 os.Exit(1)
38 }
23 39 } }
File pretty/ansi.go added (mode: 100644) (index 0000000..91e06bd)
1 package pretty
2
3 import (
4 "bytes"
5 )
6
7 type Code string
8 type Codes []Code
9
10
11 type Effect interface {
12 E(string) string
13 }
14
15 // Empty effect
16 type nilEffect struct{}
17 func (_ nilEffect) E(x string) string { return x }
18
19 // Custom effect
20 type customEffect func(string) string
21 func (e customEffect) E(x string) string { return e(x) }
22
23 const (
24 RESET = Code("0")
25 BOLD = Code("1")
26 DIM = Code("2")
27 ITALIC = Code("3")
28 UNDERL = Code("4")
29 INV = Code("7")
30 HIDDEN = Code("8")
31 STRIKE = Code("9")
32 BLACK = Code("30")
33 RED = Code("31")
34 GREEN = Code("32")
35 YELLOW = Code("33")
36 BLUE = Code("34")
37 MGNTA = Code("35")
38 CYAN = Code("36")
39 WHITE = Code("37")
40 HBLACK = Code("90")
41 HRED = Code("91")
42 HGREEN = Code("92")
43 HYELLOW = Code("93")
44 HBLUE = Code("94")
45 HMGNTA = Code("95")
46 HCYAN = Code("96")
47 HWHITE = Code("97")
48 )
49
50 func Add(x... Code) Codes {
51 return Codes(x)
52 }
53
54 func (c Code) E(x string) string {
55 if !isTTY { return x }
56 return "\x1b[" + string(c) + "m" + x + "\x1b[0m"
57 }
58
59 func (cs Codes) E(x string) string {
60 if !isTTY { return x }
61 var b bytes.Buffer
62 b.WriteString("\x1b[")
63 for _, c := range cs {
64 b.WriteRune(';')
65 b.WriteString(string(c))
66 }
67 b.WriteRune('m')
68 b.WriteString(x)
69 b.WriteString("\x1b[0m")
70 return b.String()
71 }
72
73 func Wrap(e Effect, wrapper string) Effect {
74 if !isTTY { return nilEffect{} }
75 return customEffect(func(s string) string {
76 return e.E(wrapper[0:1]) + s + e.E(wrapper[1:2])
77 })
78 }
File pretty/istty.go added (mode: 100644) (index 0000000..fb6bfaa)
1 package pretty
2
3 import (
4 "os"
5 "strings"
6 "github.com/mattn/go-isatty"
7 )
8
9 var isTTY bool
10
11 func init() {
12 term := os.Getenv("TERM")
13
14 isTTY = strings.HasPrefix(term, "xterm") ||
15 isatty.IsTerminal(os.Stdout.Fd())
16 }
File version/get.go added (mode: 100644) (index 0000000..7fd35c5)
1 package version
2
3 func Get() string {
4 return "v0.1 -- dev"
5 }
File work.go added (mode: 100644) (index 0000000..22a94cd)
1 package main
2
3 import "github.com/spf13/cobra"
4
5 var workCmd = cobra.Command{
6 Use: "work",
7 Short: "Connect to a queue and start archiving",
8 Long: "Get work from a Redis queue, start extracting metadata\n" +
9 "and upload it to a Mongo database.",
10 }
Hints:
Before first commit, do not forget to setup your git environment:
git config --global user.name "your_name_here"
git config --global user.email "your@email_here"

Clone this repository using HTTP(S):
git clone https://rocketgit.com/user/anomie/yt-user

Clone this repository using ssh (do not forget to upload a key first):
git clone ssh://rocketgit@ssh.rocketgit.com/user/anomie/yt-user

Clone this repository using git:
git clone git://git.rocketgit.com/user/anomie/yt-user

You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a merge request:
... clone the repository ...
... make some changes and some commits ...
git push origin main