List of commits:
Subject Hash Author Date (UTC)
Decode video formats bafc6c4ad3587e2de7a88cf532ad1df5ee5a7964 terorie 2018-07-28 14:34:57
Add formats from youtube-dl e612768d79ed1743b415073d9dba713e785e2ea6 terorie 2018-07-28 13:33:48
Initial README 0e9d46ecadcc5b019b9fae4e356bc3360f521b5e terorie 2018-07-28 03:56:26
Initial commit 3b5744c589494163e05a9d54fba10c71e5965d6a terorie 2018-07-28 03:55:53
Commit bafc6c4ad3587e2de7a88cf532ad1df5ee5a7964 - Decode video formats
Author: terorie
Author date (UTC): 2018-07-28 14:34
Committer name: terorie
Committer date (UTC): 2018-07-28 14:34
Parent(s): e612768d79ed1743b415073d9dba713e785e2ea6
Signing key:
Tree: 0587e79c6c3a26987984a108c9948f79a478e137
File Lines added Lines deleted
classic/parse.go 70 9
classic/util.go 7 13
data/format.go 61 20
data/video.go 2 2
File classic/parse.go changed (mode: 100644) (index 7a2d8f4..c0e4677)
... ... import (
6 6 "strconv" "strconv"
7 7 "time" "time"
8 8 "github.com/terorie/youtube-mango/data" "github.com/terorie/youtube-mango/data"
9 "regexp"
10 "github.com/valyala/fastjson"
11 "strings"
9 12 ) )
10 13
11 14 const likeBtnSelector = ".like-button-renderer-like-button-unclicked" const likeBtnSelector = ".like-button-renderer-like-button-unclicked"
 
... ... const viewCountSelector = "div .watch-view-count"
14 17 const userInfoSelector = "div .yt-user-info" const userInfoSelector = "div .yt-user-info"
15 18 const channelNameSelector = ".yt-uix-sessionlink" const channelNameSelector = ".yt-uix-sessionlink"
16 19
20 var playerConfigErr = errors.New("failed to parse player config")
21
17 22 type parseInfo struct { type parseInfo struct {
18 23 v *data.Video v *data.Video
19 24 doc *goquery.Document doc *goquery.Document
 
... ... func (p *parseInfo) parse() error {
28 33 err != nil { return err } err != nil { return err }
29 34 if err := p.parseDescription(); if err := p.parseDescription();
30 35 err != nil { return err } err != nil { return err }
31
32 p.parseMetas()
33
36 if err := p.parsePlayerConfig();
37 err != nil { return err }
38 if err := p.parseMetas();
39 err != nil { return err }
34 40 return nil return nil
35 41 } }
36 42
 
... ... func (p *parseInfo) parseUploader() error {
75 81 return nil return nil
76 82 } }
77 83
78 func (p *parseInfo) parseMetas() {
84 func (p *parseInfo) parseMetas() error {
79 85 metas := p.doc.Find("meta") metas := p.doc.Find("meta")
80 86 // For each <meta> // For each <meta>
81 87 for _, node := range metas.Nodes { for _, node := range metas.Nodes {
 
... ... func (p *parseInfo) parseMetas() {
94 100 } }
95 101
96 102 // Content not set // Content not set
97 if len(content) == 0 {
98 continue
99 }
103 if len(content) == 0 { continue }
100 104
101 105 // <meta property … // <meta property …
102 106 if len(prop) != 0 { if len(prop) != 0 {
 
... ... func (p *parseInfo) parseMetas() {
123 127 case "channelId": case "channelId":
124 128 p.v.UploaderID = content p.v.UploaderID = content
125 129 case "duration": case "duration":
126 if val, err := parseDuration(content);
127 err == nil { p.v.Duration = val }
130 if val, err := parseDuration(content); err == nil {
131 p.v.Duration = val
132 } else {
133 return err
134 }
128 135 case "isFamilyFriendly": case "isFamilyFriendly":
129 136 if val, err := strconv.ParseBool(content); if val, err := strconv.ParseBool(content);
130 137 err == nil { p.v.FamilyFriendly = val } err == nil { p.v.FamilyFriendly = val }
 
... ... func (p *parseInfo) parseMetas() {
132 139 continue continue
133 140 } }
134 141 } }
142 return nil
143 }
144
145 func (p *parseInfo) parsePlayerConfig() error {
146 var json string
147
148 p.doc.Find("script").EachWithBreak(func(_ int, s *goquery.Selection) bool {
149 script := s.Text()
150 startMatch := regexp.MustCompile("var ytplayer = ytplayer \\|\\| {};\\s*ytplayer\\.config = {")
151 endMatch := regexp.MustCompile("};\\s*ytplayer.load = function\\(")
152
153 startIndices := startMatch.FindStringIndex(script)
154 if startIndices == nil { return true }
155 endIndices := endMatch.FindStringIndex(script)
156 if endIndices == nil { return true }
157
158 // minus one to preserve braces
159 startIndex, endIndex := startIndices[1] - 1, endIndices[0] + 1
160 if startIndex > endIndex { return true }
161
162 json = script[startIndex:endIndex]
163
164 // Stop searching, json found
165 return false
166 })
167 // No json found
168 if json == "" { return playerConfigErr }
169
170 // Try decoding json
171 var parser fastjson.Parser
172 config, err := parser.Parse(json)
173 if err != nil { return err }
174
175 // Extract data
176 args := config.Get("args")
177 if args == nil { return playerConfigErr }
178
179 // Get fmt_list string
180 fmtList := args.GetStringBytes("fmt_list")
181 if fmtList == nil { return playerConfigErr }
182
183 // Split and decode it
184 fmts := strings.Split(string(fmtList), ",")
185 for _, fmt := range fmts {
186 parts := strings.Split(fmt, "/")
187 if len(parts) != 2 { return playerConfigErr }
188 formatID := parts[0]
189 // Look up the format ID
190 format := data.FormatsById[formatID]
191 if format == nil { return playerConfigErr }
192 p.v.Formats = append(p.v.Formats, *format)
193 }
194
195 return nil
135 196 } }
File classic/util.go changed (mode: 100644) (index dcad28c..9648285)
1 1 package classic package classic
2 2
3 3 import ( import (
4 "time"
5 4 "errors" "errors"
6 5 "strings" "strings"
7 6 "strconv" "strconv"
8 7 ) )
9 8
10 // "PT6M57S" => 6 min 57 s
11 func parseDuration(d string) (time.Duration, error) {
12 var err error
13 goto start
14
15 error:
16 return 0, errors.New("unknown duration code")
9 var durationErr = errors.New("unknown duration code")
17 10
18 start:
19 if d[0:2] != "PT" { goto error }
11 // "PT6M57S" => 6 min 57 s
12 func parseDuration(d string) (uint64, error) {
13 if d[0:2] != "PT" { return 0, durationErr }
20 14 mIndex := strings.IndexByte(d, 'M') mIndex := strings.IndexByte(d, 'M')
21 if mIndex == -1 { goto error }
15 if mIndex == -1 { return 0, durationErr }
22 16
23 17 minutes, err := strconv.ParseUint(d[2:mIndex], 10, 32) minutes, err := strconv.ParseUint(d[2:mIndex], 10, 32)
24 18 if err != nil { return 0, err } if err != nil { return 0, err }
25 seconds, err := strconv.ParseUint(d[mIndex:len(d)-1], 10, 32)
19 seconds, err := strconv.ParseUint(d[mIndex+1:len(d)-1], 10, 32)
26 20 if err != nil { return 0, err } if err != nil { return 0, err }
27 21
28 dur := time.Duration(minutes) * time.Minute + time.Duration(seconds) * time.Second
22 dur := minutes * 60 + seconds
29 23 return dur, nil return dur, nil
30 24 } }
31 25
File data/format.go changed (mode: 100644) (index bd57174..154fa28)
1 1 package data package data
2 2
3 import "encoding/json"
4
3 5 type FormatType uint8 type FormatType uint8
4 6
5 7 const ( const (
 
... ... const (
13 15 ) )
14 16
15 17 type Format struct { type Format struct {
16 FormatID string
17 Extension string
18 Width uint32
19 Height uint32
20 VideoCodec string
21 AudioCodec string
22 AudioBitrate uint32
23 Flags FormatType
18 ID string `json:"id"`
19 Extension string `json:"ext"`
20 Width uint32 `json:"width"`
21 Height uint32 `json:"height"`
22 VideoCodec string `json:"vcodec"`
23 AudioCodec string `json:"acodec"`
24 AudioBitrate uint32 `json:"abr"`
25 Flags FormatType `json:"flags"`
26 }
27
28 var FormatsById map[string]*Format
29
30 func init() {
31 ids := []string {
32 "5", "6", "13", "17", "18", "22", "34", "35",
33 "36", "37", "38", "43", "44", "45", "46", "59",
34 "78", "82", "83", "84", "85", "100", "101", "102",
35 "91", "92", "93", "94", "95", "96", "132", "151",
36 "133", "134", "135", "136", "137", "138", "160", "212",
37 "264", "298", "299", "266", "139", "140", "141", "256",
38 "258", "325", "328", "167", "168", "169", "170", "218",
39 "219", "278", "242", "243", "244", "245", "246", "247",
40 "248", "271", "272", "302", "303", "308", "313", "315",
41 "171", "172", "249", "250", "251",
42 }
43 FormatsById = make(map[string]*Format)
44 for i, id := range ids {
45 format := &Formats[i]
46 if format.ID != id { panic("misaligned IDs: " + id + "/" + format.ID) }
47 FormatsById[id] = format
48 }
24 49 } }
25 50
26 51 // Taken from github.com/rg3/youtube-dl // Taken from github.com/rg3/youtube-dl
27 52 // As in youtube_dl/extractor/youtube.py // As in youtube_dl/extractor/youtube.py
28 53 var Formats = []Format{ var Formats = []Format{
29 54 // Standard formats // Standard formats
30 { "5", "flv", 400, 240, "h263", "mp3", 64, FormatStd },
31 { "6", "flv", 450, 270, "h263", "mp3", 64, FormatStd },
55 { "5", "flv", 400, 240, "h263", "mp3", 64, FormatStd },
56 { "6", "flv", 450, 270, "h263", "mp3", 64, FormatStd },
32 57 { "13", "3gp", 0, 0, "mp4v", "aac", 0, FormatStd }, { "13", "3gp", 0, 0, "mp4v", "aac", 0, FormatStd },
33 58 { "17", "3gp", 176, 144, "mp4v", "aac", 24, FormatStd }, { "17", "3gp", 176, 144, "mp4v", "aac", 24, FormatStd },
34 59 { "18", "mp4", 640, 360, "h264", "aac", 96, FormatStd }, { "18", "mp4", 640, 360, "h264", "aac", 96, FormatStd },
 
... ... var Formats = []Format{
47 72 { "78", "mp4", 854, 480, "h264", "aac", 128, FormatStd }, { "78", "mp4", 854, 480, "h264", "aac", 128, FormatStd },
48 73
49 74 // 3D videos // 3D videos
50 { "82", "mp4", 0, 360, "h264", "aac", 128, Format3D },
51 { "83", "mp4", 0, 480, "h264", "aac", 128, Format3D },
52 { "84", "mp4", 0, 720, "h264", "aac", 192, Format3D },
53 { "85", "mp4", 0, 1080, "h264", "aac", 192, Format3D },
75 { "82", "mp4", 0, 360, "h264", "aac", 128, Format3D },
76 { "83", "mp4", 0, 480, "h264", "aac", 128, Format3D },
77 { "84", "mp4", 0, 720, "h264", "aac", 192, Format3D },
78 { "85", "mp4", 0, 1080, "h264", "aac", 192, Format3D },
54 79 { "100", "webm", 0, 360, "vp8", "vorbis", 128, Format3D }, { "100", "webm", 0, 360, "vp8", "vorbis", 128, Format3D },
55 80 { "101", "webm", 0, 480, "vp8", "vorbis", 192, Format3D }, { "101", "webm", 0, 480, "vp8", "vorbis", 192, Format3D },
56 81 { "102", "webm", 0, 720, "vp8", "vorbis", 192, Format3D }, { "102", "webm", 0, 720, "vp8", "vorbis", 192, Format3D },
57 82
58 83 // Apple HTTP Live Streaming // Apple HTTP Live Streaming
59 { "91", "mp4", 0, 144, "h264", "aac", 48, FormatHLS },
60 { "92", "mp4", 0, 240, "h264", "aac", 48, FormatHLS },
61 { "93", "mp4", 0, 360, "h264", "aac", 128, FormatHLS },
62 { "94", "mp4", 0, 480, "h264", "aac", 128, FormatHLS },
63 { "95", "mp4", 0, 720, "h264", "aac", 256, FormatHLS },
64 { "96", "mp4", 0, 1080, "h264", "aac", 256, FormatHLS },
84 { "91", "mp4", 0, 144, "h264", "aac", 48, FormatHLS },
85 { "92", "mp4", 0, 240, "h264", "aac", 48, FormatHLS },
86 { "93", "mp4", 0, 360, "h264", "aac", 128, FormatHLS },
87 { "94", "mp4", 0, 480, "h264", "aac", 128, FormatHLS },
88 { "95", "mp4", 0, 720, "h264", "aac", 256, FormatHLS },
89 { "96", "mp4", 0, 1080, "h264", "aac", 256, FormatHLS },
65 90 { "132", "mp4", 0, 240, "h264", "aac", 48, FormatHLS }, { "132", "mp4", 0, 240, "h264", "aac", 48, FormatHLS },
66 91 { "151", "mp4", 0, 72, "h264", "aac", 24, FormatHLS }, { "151", "mp4", 0, 72, "h264", "aac", 24, FormatHLS },
67 92
 
... ... var Formats = []Format{
121 146 { "250", "webm", 0, 0, "", "opus", 70, FormatDASH | FormatAudioOnly }, { "250", "webm", 0, 0, "", "opus", 70, FormatDASH | FormatAudioOnly },
122 147 { "251", "webm", 0, 0, "", "opus", 160, FormatDASH | FormatAudioOnly }, { "251", "webm", 0, 0, "", "opus", 160, FormatDASH | FormatAudioOnly },
123 148 } }
149
150 func (f FormatType) MarshalJSON() ([]byte, error) {
151 flags := make([]string, 0)
152 setFlag := func(mask FormatType, name string) {
153 if f&mask != 0 {
154 flags = append(flags, name)
155 }
156 }
157 setFlag(FormatVideoOnly, "videoOnly")
158 setFlag(FormatAudioOnly, "audioOnly")
159 setFlag(Format3D, "3d")
160 setFlag(FormatHLS, "hls")
161 setFlag(FormatDASH, "dash")
162 setFlag(FormatHighFps, "hiFps")
163 return json.Marshal(flags)
164 }
File data/video.go changed (mode: 100644) (index c1138c1..dadc5fd)
... ... type Video struct {
14 14 URL string `json:"url"` URL string `json:"url"`
15 15 License string `json:"license,omitempty"` License string `json:"license,omitempty"`
16 16 Genre string `json:"genre"` Genre string `json:"genre"`
17 Tags []string `json:"tags"`
17 Tags []string `json:"tags,omitempty"`
18 18 Subtitles []string `json:"subtitles,omitempty"` Subtitles []string `json:"subtitles,omitempty"`
19 Duration time.Duration `json:"duration"`
19 Duration uint64 `json:"duration"`
20 20 FamilyFriendly bool `json:"family_friendly"` FamilyFriendly bool `json:"family_friendly"`
21 21 Views uint64 `json:"views"` Views uint64 `json:"views"`
22 22 Likes uint64 `json:"likes"` Likes uint64 `json:"likes"`
Hints:
Before first commit, do not forget to setup your git environment:
git config --global user.name "your_name_here"
git config --global user.email "your@email_here"

Clone this repository using HTTP(S):
git clone https://rocketgit.com/user/anomie/yt-user

Clone this repository using ssh (do not forget to upload a key first):
git clone ssh://rocketgit@ssh.rocketgit.com/user/anomie/yt-user

Clone this repository using git:
git clone git://git.rocketgit.com/user/anomie/yt-user

You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a merge request:
... clone the repository ...
... make some changes and some commits ...
git push origin main