File lurk.hws changed (mode: 100755) (index c90b6de..f39265c) |
... |
... |
Function lurk:Explore(url$) |
40 |
40 |
Local err_code, html$, count = ?DownloadFile(url$, |
Local err_code, html$, count = ?DownloadFile(url$, |
41 |
41 |
{Adapter="hurl", Fail404=True, Encoded=FindStr(url$, "%", True) <> -1}, |
{Adapter="hurl", Fail404=True, Encoded=FindStr(url$, "%", True) <> -1}, |
42 |
42 |
lurk_callback, url$) |
lurk_callback, url$) |
|
43 |
|
|
|
44 |
|
self:set_status(count .. " bytes from " .. url$ .. " transmitted") |
|
45 |
|
|
43 |
46 |
If self.PleaseStop |
If self.PleaseStop |
44 |
|
self:set_status("interrupted") |
|
|
47 |
|
self:set_status("Interrupted") |
45 |
48 |
Return() |
Return() |
46 |
49 |
EndIf |
EndIf |
47 |
50 |
|
|
48 |
51 |
;č zde nepředpokládám, že by někdo jiný mohl spustit parser znovu. |
;č zde nepředpokládám, že by někdo jiný mohl spustit parser znovu. |
49 |
52 |
;č O to je to jednodušší. |
;č O to je to jednodušší. |
50 |
|
If err_code = #ERR_NONE |
|
51 |
|
|
|
52 |
|
Local ok, n = ValidateStr(html$) |
|
53 |
|
Local encoding = IIf(ok, #ENCODING_UTF8, #ENCODING_ISO8859_1) |
|
54 |
|
Local headpos = FindStr(html$, "<head", False, 0, encoding) |
|
55 |
|
If headpos < 0 |
|
56 |
|
self:set_status("\27bNothing found") ;č hlavně ta hlavička.. |
|
57 |
|
Return() |
|
58 |
|
Else |
|
59 |
|
self:set_status(count .. " bytes from " .. url$ .. " transmitted") |
|
60 |
|
|
|
61 |
|
Local p = XMLParser.New({StartElement = lurkStartElement}) |
|
62 |
|
p:setbase(url$) |
|
63 |
|
|
|
64 |
|
p:setencoding(IIf(ok, "UTF-8", "ISO-8859-1")) |
|
65 |
|
;č ten parser trefí šlak, uvidí-li CO má zpracovat |
|
66 |
|
p:Parse(UnrightStr(html$, headpos, encoding)) |
|
67 |
|
|
|
68 |
|
;č html-ko nemůže nezpůsobit chybu. |
|
69 |
|
;č nemá cenu je hlídat |
|
70 |
|
Local err_code = ?p:Close() |
|
71 |
|
;Local err_code, status, msg, line, col, pos = ?p:Close() |
|
72 |
|
;self:Replay(err_code, "Done.") |
|
73 |
|
EndIf |
|
74 |
|
Else |
|
|
53 |
|
If err_code <> #ERR_NONE |
75 |
54 |
self:set_status("\27b" .. GetErrorName(err_code)) |
self:set_status("\27b" .. GetErrorName(err_code)) |
|
55 |
|
Return() |
|
56 |
|
EndIf |
|
57 |
|
|
|
58 |
|
Local ok, n = ValidateStr(html$, #ENCODING_UTF8) |
|
59 |
|
Local encoding = IIf(ok, #ENCODING_UTF8, #ENCODING_ISO8859_1) |
|
60 |
|
Local headpos = FindStr(html$, "<head", False, 0, encoding) |
|
61 |
|
If headpos < 0 |
|
62 |
|
self:set_status("Nothing found") ;č hlavně ta hlavička.. |
|
63 |
|
Return() |
76 |
64 |
EndIf |
EndIf |
|
65 |
|
|
|
66 |
|
Local headend = ReverseFindStr(html$, "</head>", False, |
|
67 |
|
StrLen(html$, encoding)-1, encoding) |
|
68 |
|
Local len = headend-headpos |
|
69 |
|
If len < 0 |
|
70 |
|
self:set_status("Something wrong on page") |
|
71 |
|
Return() |
|
72 |
|
EndIf |
|
73 |
|
|
|
74 |
|
html$ = MidStr(html$, headpos, len, encoding) |
|
75 |
|
Local p = XMLParser.New({StartElement = lurkStartElement}) |
|
76 |
|
p:setbase(url$) |
|
77 |
|
p:setencoding(IIf(ok, "UTF-8", "ISO-8859-1")) |
|
78 |
|
;č ten parser trefí šlak, až uvidí CO má zpracovat |
|
79 |
|
p:Parse(html$) |
|
80 |
|
Local lin, col, pos = p:pos() |
|
81 |
|
|
|
82 |
|
;č html-ko nemůže nezpůsobit chybu. |
|
83 |
|
;č nemá cenu je hlídat |
|
84 |
|
Local err_code = ?p:Close() |
|
85 |
|
If len > pos |
|
86 |
|
self:set_status("("..pos .."/"..len .. ") It's too hard. Parser lost.") |
|
87 |
|
Else |
|
88 |
|
self:set_status("Parser done.") |
|
89 |
|
EndIf |
|
90 |
|
|
77 |
91 |
EndFunction |
EndFunction |
78 |
92 |
|
|
79 |
93 |
|
|
|
... |
... |
Function lurkStartElement(p, name$, attrs) |
156 |
170 |
Switch LowerStr(attrs.type) |
Switch LowerStr(attrs.type) |
157 |
171 |
Case "application/atom+xml": |
Case "application/atom+xml": |
158 |
172 |
FallThrough |
FallThrough |
159 |
|
Case "application/rss+xml": |
|
|
173 |
|
Case "application/rss+xml": |
160 |
174 |
Local item = {href="", title="", rel=""} |
Local item = {href="", title="", rel=""} |
161 |
175 |
For i,v In Pairs(item) |
For i,v In Pairs(item) |
162 |
176 |
If HaveItem(attrs, i) Then item[i] = attrs[i] |
If HaveItem(attrs, i) Then item[i] = attrs[i] |
|
... |
... |
Function lurkStartElement(p, name$, attrs) |
180 |
194 |
EndSwitch |
EndSwitch |
181 |
195 |
EndIf |
EndIf |
182 |
196 |
EndSwitch |
EndSwitch |
183 |
|
|
|
184 |
197 |
EndFunction |
EndFunction |
185 |
198 |
|
|
186 |
199 |
Function lurk:CanHas(url$) |
Function lurk:CanHas(url$) |