File api/api.go changed (mode: 100644) (index c9a0ad1..06ccb2b) |
... |
... |
package api |
2 |
2 |
|
|
3 |
3 |
import ( |
import ( |
4 |
4 |
"github.com/terorie/yt-mango/data" |
"github.com/terorie/yt-mango/data" |
5 |
|
"github.com/terorie/yt-mango/classic" |
|
6 |
5 |
"github.com/terorie/yt-mango/apiclassic" |
"github.com/terorie/yt-mango/apiclassic" |
|
6 |
|
"github.com/terorie/yt-mango/apijson" |
7 |
7 |
) |
) |
8 |
8 |
|
|
9 |
9 |
type API struct { |
type API struct { |
10 |
10 |
GetVideo func(*data.Video) error |
GetVideo func(*data.Video) error |
|
11 |
|
GetVideoSubtitleList func(*data.Video) error |
11 |
12 |
GetChannel func(*data.Channel) error |
GetChannel func(*data.Channel) error |
12 |
13 |
GetChannelVideoURLs func(channelID string, page uint) ([]string, error) |
GetChannelVideoURLs func(channelID string, page uint) ([]string, error) |
13 |
14 |
} |
} |
14 |
15 |
|
|
|
16 |
|
// TODO Fallback option |
|
17 |
|
var DefaultAPI *API = nil |
|
18 |
|
|
15 |
19 |
var ClassicAPI = API{ |
var ClassicAPI = API{ |
16 |
20 |
GetVideo: apiclassic.GetVideo, |
GetVideo: apiclassic.GetVideo, |
|
21 |
|
GetVideoSubtitleList: apiclassic.GetVideoSubtitleList, |
17 |
22 |
GetChannel: apiclassic.GetChannel, |
GetChannel: apiclassic.GetChannel, |
18 |
23 |
GetChannelVideoURLs: apiclassic.GetChannelVideoURLs, |
GetChannelVideoURLs: apiclassic.GetChannelVideoURLs, |
19 |
24 |
} |
} |
20 |
25 |
|
|
21 |
|
var JsonAPI struct { |
|
22 |
|
|
|
|
26 |
|
var JsonAPI = API{ |
|
27 |
|
GetVideo: apijson.GetVideo, |
|
28 |
|
GetVideoSubtitleList: apiclassic.GetVideoSubtitleList, |
|
29 |
|
GetChannel: apijson.GetChannel, |
|
30 |
|
GetChannelVideoURLs: apijson.GetChannelVideoURLs, |
23 |
31 |
} |
} |
File apiclassic/get.go changed (mode: 100644) (index d42e196..3baa5b5) |
... |
... |
func GetVideo(v *data.Video) error { |
9 |
9 |
if len(v.ID) == 0 { return errors.New("no video ID") } |
if len(v.ID) == 0 { return errors.New("no video ID") } |
10 |
10 |
|
|
11 |
11 |
// Download the doc tree |
// Download the doc tree |
12 |
|
doc, err := grab(v) |
|
|
12 |
|
doc, err := GrabVideo(v.ID) |
13 |
13 |
if err != nil { return err } |
if err != nil { return err } |
14 |
14 |
|
|
15 |
15 |
// Parse it |
// Parse it |
|
... |
... |
func GetVideo(v *data.Video) error { |
20 |
20 |
return nil |
return nil |
21 |
21 |
} |
} |
22 |
22 |
|
|
|
23 |
|
func GetVideoSubtitleList(v *data.Video) (err error) { |
|
24 |
|
tracks, err := GrabSubtitleList(v.ID) |
|
25 |
|
if err != nil { return } |
|
26 |
|
for _, track := range tracks.Tracks { |
|
27 |
|
v.Subtitles = append(v.Subtitles, track.LangCode) |
|
28 |
|
} |
|
29 |
|
return |
|
30 |
|
} |
|
31 |
|
|
23 |
32 |
func GetChannel(c *data.Channel) error { |
func GetChannel(c *data.Channel) error { |
24 |
33 |
return errors.New("not implemented") |
return errors.New("not implemented") |
25 |
34 |
} |
} |
File apiclassic/grab.go changed (mode: 100644) (index 672188e..24fe469) |
... |
... |
import ( |
5 |
5 |
"errors" |
"errors" |
6 |
6 |
"encoding/xml" |
"encoding/xml" |
7 |
7 |
"github.com/PuerkitoBio/goquery" |
"github.com/PuerkitoBio/goquery" |
8 |
|
"github.com/terorie/yt-mango/data" |
|
9 |
8 |
"github.com/terorie/yt-mango/common" |
"github.com/terorie/yt-mango/common" |
10 |
9 |
) |
) |
11 |
10 |
|
|
|
... |
... |
const mainURL = "https://www.youtube.com/watch?has_verified=1&bpctr=6969696969&v |
13 |
12 |
const subtitleURL = "https://video.google.com/timedtext?type=list&v=" |
const subtitleURL = "https://video.google.com/timedtext?type=list&v=" |
14 |
13 |
|
|
15 |
14 |
// Grabs a HTML video page and returns the document tree |
// Grabs a HTML video page and returns the document tree |
16 |
|
func grab(v *data.Video) (doc *goquery.Document, err error) { |
|
17 |
|
req, err := http.NewRequest("GET", mainURL + v.ID, nil) |
|
|
15 |
|
func GrabVideo(videoID string) (doc *goquery.Document, err error) { |
|
16 |
|
req, err := http.NewRequest("GET", mainURL + videoID, nil) |
18 |
17 |
if err != nil { return } |
if err != nil { return } |
19 |
|
requestHeader(&req.Header) |
|
|
18 |
|
setHeaders(&req.Header) |
20 |
19 |
|
|
21 |
20 |
res, err := common.Client.Do(req) |
res, err := common.Client.Do(req) |
22 |
21 |
if err != nil { return } |
if err != nil { return } |
|
... |
... |
func grab(v *data.Video) (doc *goquery.Document, err error) { |
30 |
29 |
} |
} |
31 |
30 |
|
|
32 |
31 |
// Grabs and parses a subtitle list |
// Grabs and parses a subtitle list |
33 |
|
func grabSubtitleList(v *data.Video) (err error) { |
|
34 |
|
req, err := http.NewRequest("GET", subtitleURL + v.ID, nil) |
|
35 |
|
if err != nil { return err } |
|
|
32 |
|
func GrabSubtitleList(videoID string) (tracks *XMLSubTrackList, err error) { |
|
33 |
|
req, err := http.NewRequest("GET", subtitleURL + videoID, nil) |
|
34 |
|
if err != nil { return } |
|
35 |
|
setHeaders(&req.Header) |
36 |
36 |
|
|
37 |
|
res, err := client.Do(req) |
|
38 |
|
if err != nil { return err } |
|
39 |
|
if res.StatusCode != 200 { return errors.New("HTTP failure") } |
|
|
37 |
|
res, err := common.Client.Do(req) |
|
38 |
|
if err != nil { return } |
|
39 |
|
if res.StatusCode != 200 { return nil, errors.New("HTTP failure") } |
40 |
40 |
|
|
41 |
41 |
defer res.Body.Close() |
defer res.Body.Close() |
42 |
42 |
decoder := xml.NewDecoder(res.Body) |
decoder := xml.NewDecoder(res.Body) |
43 |
43 |
|
|
44 |
|
var tracks XMLSubTrackList |
|
45 |
|
err = decoder.Decode(&tracks) |
|
46 |
|
if err != nil { return err } |
|
47 |
|
|
|
48 |
|
for _, track := range tracks.Tracks { |
|
49 |
|
v.Subtitles = append(v.Subtitles, track.LangCode) |
|
50 |
|
} |
|
51 |
|
|
|
|
44 |
|
tracks = new(XMLSubTrackList) |
|
45 |
|
err = decoder.Decode(tracks) |
52 |
46 |
return |
return |
53 |
47 |
} |
} |
|
48 |
|
|
|
49 |
|
func setHeaders(h *http.Header) { |
|
50 |
|
h.Add("Host", "www.youtube.com") |
|
51 |
|
h.Add("User-Agent", "yt-mango/0.1") |
|
52 |
|
} |
File apijson/grab.go changed (mode: 100644) (index 9e90bef..38ab0aa) |
... |
... |
func GrabChannelPage(channelID string, page uint) (root *fastjson.Value, err err |
67 |
67 |
|
|
68 |
68 |
func setHeaders(h *http.Header) { |
func setHeaders(h *http.Header) { |
69 |
69 |
h.Add("Host", "www.youtube.com") |
h.Add("Host", "www.youtube.com") |
|
70 |
|
h.Add("User-Agent", "yt-mango/0.1") |
70 |
71 |
h.Add("X-YouTube-Client-Name", "1") |
h.Add("X-YouTube-Client-Name", "1") |
71 |
72 |
h.Add("X-YouTube-Client-Version", "2.20170707") |
h.Add("X-YouTube-Client-Version", "2.20170707") |
72 |
73 |
} |
} |
File cmd/channeldump.go changed (mode: 100644) (index b9188de..b1b843c) |
... |
... |
import ( |
9 |
9 |
"time" |
"time" |
10 |
10 |
"bufio" |
"bufio" |
11 |
11 |
"log" |
"log" |
12 |
|
"github.com/terorie/yt-mango/apijson" |
|
|
12 |
|
"github.com/terorie/yt-mango/api" |
13 |
13 |
) |
) |
14 |
14 |
|
|
15 |
15 |
var channelDumpCmd = cobra.Command{ |
var channelDumpCmd = cobra.Command{ |
|
... |
... |
var channelDumpCmd = cobra.Command{ |
74 |
74 |
|
|
75 |
75 |
totalURLs := 0 |
totalURLs := 0 |
76 |
76 |
for i := offset; true; i++ { |
for i := offset; true; i++ { |
77 |
|
channelURLs, err := apijson.GetChannelVideoURLs(channelID, uint(i)) |
|
|
77 |
|
channelURLs, err := api.DefaultAPI.GetChannelVideoURLs(channelID, uint(i)) |
78 |
78 |
if err != nil { |
if err != nil { |
79 |
79 |
log.Printf("Aborting on error %v.", err) |
log.Printf("Aborting on error %v.", err) |
80 |
80 |
break |
break |
File main.go changed (mode: 100644) (index 863de42..18b1170) |
... |
... |
import ( |
9 |
9 |
"os" |
"os" |
10 |
10 |
"github.com/terorie/yt-mango/cmd" |
"github.com/terorie/yt-mango/cmd" |
11 |
11 |
"log" |
"log" |
|
12 |
|
"github.com/terorie/yt-mango/api" |
12 |
13 |
) |
) |
13 |
14 |
|
|
14 |
15 |
const Version = "v0.1 -- dev" |
const Version = "v0.1 -- dev" |
|
... |
... |
func main() { |
22 |
23 |
log.SetOutput(os.Stderr) |
log.SetOutput(os.Stderr) |
23 |
24 |
|
|
24 |
25 |
var printVersion bool |
var printVersion bool |
|
26 |
|
var forceAPI string |
|
27 |
|
|
25 |
28 |
rootCmd := cobra.Command{ |
rootCmd := cobra.Command{ |
26 |
29 |
Use: "yt-mango", |
Use: "yt-mango", |
27 |
30 |
Short: "YT-Mango is a scalable video metadata archiver", |
Short: "YT-Mango is a scalable video metadata archiver", |
|
... |
... |
func main() { |
32 |
35 |
fmt.Println(Version) |
fmt.Println(Version) |
33 |
36 |
os.Exit(0) |
os.Exit(0) |
34 |
37 |
} |
} |
|
38 |
|
switch forceAPI { |
|
39 |
|
case "": break |
|
40 |
|
case "classic": api.DefaultAPI = &api.ClassicAPI |
|
41 |
|
case "json": api.DefaultAPI = &api.JsonAPI |
|
42 |
|
default: |
|
43 |
|
fmt.Fprintln(os.Stderr, "Invalid API specified.\n" + |
|
44 |
|
"Valid options are: \"classic\" and \"json\"") |
|
45 |
|
os.Exit(1) |
|
46 |
|
} |
35 |
47 |
}, |
}, |
36 |
48 |
} |
} |
|
49 |
|
|
37 |
50 |
rootCmd.Flags().BoolVar(&printVersion, "version", false, |
rootCmd.Flags().BoolVar(&printVersion, "version", false, |
38 |
|
fmt.Sprintf("Print the version (" + Version +") and exit"), ) |
|
|
51 |
|
fmt.Sprintf("Print the version (" + Version +") and exit")) |
|
52 |
|
rootCmd.Flags().StringVarP(&forceAPI, "api", "a", "", |
|
53 |
|
"Use the specified API for all calls.\n" + |
|
54 |
|
"Possible options: \"classic\" and \"json\"") |
39 |
55 |
|
|
40 |
56 |
rootCmd.AddCommand(&cmd.Channel) |
rootCmd.AddCommand(&cmd.Channel) |
41 |
57 |
rootCmd.AddCommand(&cmd.Video) |
rootCmd.AddCommand(&cmd.Video) |