File api/ids.go added (mode: 100644) (index 0000000..a480764) |
|
1 |
|
package api |
|
2 |
|
|
|
3 |
|
import ( |
|
4 |
|
"regexp" |
|
5 |
|
"os" |
|
6 |
|
"strings" |
|
7 |
|
"log" |
|
8 |
|
"net/url" |
|
9 |
|
) |
|
10 |
|
|
|
11 |
|
var matchChannelID = regexp.MustCompile("^([\\w\\-]|(%3[dD]))+$") |
|
12 |
|
|
|
13 |
|
func GetChannelID(chanURL string) (string, error) { |
|
14 |
|
if !matchChannelID.MatchString(chanURL) { |
|
15 |
|
// Check if youtube.com domain |
|
16 |
|
_url, err := url.Parse(chanURL) |
|
17 |
|
if err != nil || (_url.Host != "www.youtube.com" && _url.Host != "youtube.com") { |
|
18 |
|
log.Fatal("Not a channel ID:", chanURL) |
|
19 |
|
os.Exit(1) |
|
20 |
|
} |
|
21 |
|
|
|
22 |
|
// Check if old /user/ URL |
|
23 |
|
if strings.HasPrefix(_url.Path, "/user/") { |
|
24 |
|
// TODO Implement extraction of channel ID |
|
25 |
|
log.Fatal("New /channel/ link is required!\n" + |
|
26 |
|
"The old /user/ links do not work.") |
|
27 |
|
os.Exit(1) |
|
28 |
|
} |
|
29 |
|
|
|
30 |
|
// Remove /channel/ path |
|
31 |
|
channelID := strings.TrimPrefix(_url.Path, "/channel/") |
|
32 |
|
if len(channelID) == len(_url.Path) { |
|
33 |
|
// No such prefix to be removed |
|
34 |
|
log.Fatal("Not a channel ID:", channelID) |
|
35 |
|
os.Exit(1) |
|
36 |
|
} |
|
37 |
|
|
|
38 |
|
// Remove rest of path from channel ID |
|
39 |
|
slashIndex := strings.IndexRune(channelID, '/') |
|
40 |
|
if slashIndex != -1 { |
|
41 |
|
channelID = channelID[:slashIndex] |
|
42 |
|
} |
|
43 |
|
|
|
44 |
|
return channelID, nil |
|
45 |
|
} else { |
|
46 |
|
// It's already a channel ID |
|
47 |
|
return chanURL, nil |
|
48 |
|
} |
|
49 |
|
} |
File cmd/channel.go changed (mode: 100644) (index 89ace23..cf2ea8b) |
... |
... |
package cmd |
2 |
2 |
|
|
3 |
3 |
import ( |
import ( |
4 |
4 |
"github.com/spf13/cobra" |
"github.com/spf13/cobra" |
5 |
|
"regexp" |
|
6 |
5 |
) |
) |
7 |
6 |
|
|
8 |
7 |
var force bool |
var force bool |
|
... |
... |
var Channel = cobra.Command{ |
13 |
12 |
Short: "Get information about a channel", |
Short: "Get information about a channel", |
14 |
13 |
} |
} |
15 |
14 |
|
|
16 |
|
var matchChannelID = regexp.MustCompile("^([\\w\\-]|(%3[dD]))+$") |
|
17 |
|
|
|
18 |
15 |
func init() { |
func init() { |
19 |
16 |
channelDumpCmd.Flags().BoolVarP(&force, "force", "f", false, "Overwrite the output file if it already exists") |
channelDumpCmd.Flags().BoolVarP(&force, "force", "f", false, "Overwrite the output file if it already exists") |
20 |
17 |
channelDumpCmd.Flags().UintVar(&offset, "page-offset", 1, "Start getting videos at this page. (A page is usually 30 videos)") |
channelDumpCmd.Flags().UintVar(&offset, "page-offset", 1, "Start getting videos at this page. (A page is usually 30 videos)") |
File cmd/channeldump.go changed (mode: 100644) (index 1fb88ce..1bff985) |
... |
... |
package cmd |
2 |
2 |
|
|
3 |
3 |
import ( |
import ( |
4 |
4 |
"github.com/spf13/cobra" |
"github.com/spf13/cobra" |
5 |
|
"net/url" |
|
6 |
5 |
"os" |
"os" |
7 |
|
"strings" |
|
8 |
6 |
"time" |
"time" |
9 |
7 |
"bufio" |
"bufio" |
10 |
8 |
"log" |
"log" |
|
... |
... |
var channelDumpCmd = cobra.Command{ |
39 |
37 |
} |
} |
40 |
38 |
channelDumpContext.printResults = printResults |
channelDumpContext.printResults = printResults |
41 |
39 |
|
|
42 |
|
if !matchChannelID.MatchString(channelID) { |
|
43 |
|
// Check if youtube.com domain |
|
44 |
|
_url, err := url.Parse(channelID) |
|
45 |
|
if err != nil || (_url.Host != "www.youtube.com" && _url.Host != "youtube.com") { |
|
46 |
|
log.Fatal("Not a channel ID:", channelID) |
|
47 |
|
os.Exit(1) |
|
48 |
|
} |
|
49 |
|
|
|
50 |
|
// Check if old /user/ URL |
|
51 |
|
if strings.HasPrefix(_url.Path, "/user/") { |
|
52 |
|
// TODO Implement extraction of channel ID |
|
53 |
|
log.Fatal("New /channel/ link is required!\n" + |
|
54 |
|
"The old /user/ links do not work.") |
|
55 |
|
os.Exit(1) |
|
56 |
|
} |
|
57 |
|
|
|
58 |
|
// Remove /channel/ path |
|
59 |
|
channelID = strings.TrimPrefix(_url.Path, "/channel/") |
|
60 |
|
if len(channelID) == len(_url.Path) { |
|
61 |
|
// No such prefix to be removed |
|
62 |
|
log.Fatal("Not a channel ID:", channelID) |
|
63 |
|
os.Exit(1) |
|
64 |
|
} |
|
65 |
|
|
|
66 |
|
// Remove rest of path from channel ID |
|
67 |
|
slashIndex := strings.IndexRune(channelID, '/') |
|
68 |
|
if slashIndex != -1 { |
|
69 |
|
channelID = channelID[:slashIndex] |
|
70 |
|
} |
|
|
40 |
|
channelID, err := api.GetChannelID(channelID) |
|
41 |
|
if err != nil { |
|
42 |
|
log.Print(err) |
|
43 |
|
os.Exit(1) |
71 |
44 |
} |
} |
72 |
45 |
|
|
73 |
46 |
log.Printf("Starting work on channel ID \"%s\".", channelID) |
log.Printf("Starting work on channel ID \"%s\".", channelID) |
|
... |
... |
var channelDumpCmd = cobra.Command{ |
115 |
88 |
page++ |
page++ |
116 |
89 |
} |
} |
117 |
90 |
terminate: |
terminate: |
118 |
|
log.Printf("&") |
|
119 |
91 |
|
|
120 |
92 |
// Requests sent, wait for remaining requests to finish |
// Requests sent, wait for remaining requests to finish |
121 |
93 |
for { |
for { |
122 |
|
done := atomic.LoadUint64(&channelDumpContext.pagesDone) |
|
123 |
|
// Page starts at 1 |
|
124 |
|
target := uint64(page) - 1 |
|
|
94 |
|
done := uint64(offset) + atomic.LoadUint64(&channelDumpContext.pagesDone) |
|
95 |
|
target := uint64(page) |
125 |
96 |
if done >= target { break } |
if done >= target { break } |
126 |
97 |
|
|
127 |
98 |
// TODO use semaphore |
// TODO use semaphore |