Subject | Hash | Author | Date (UTC) |
---|---|---|---|
lurk: rework links parsing | 27d2aa01be8796427acc9753918a25ca336a523f | Alex | 2022-07-23 01:50:51 |
lurk: accept more possible MIME types | 851689d4324eea12e0c2ecca65557a130006d253 | Alex | 2022-07-22 23:58:37 |
pridat mezeru | 9e6e85bc191e313bbe842052bd92671bfece74c1 | Alex | 2022-07-21 02:18:44 |
add Readme file | 0a25299e82fc60cd3d4f2b1fdc0c7047b299bf78 | Alex | 2022-07-20 01:54:53 |
add LICENSE file | f073c68dbdfd517bdf39f2700369ecfb37f74a2d | Alex | 2022-07-19 10:12:47 |
IvoRSS: fix version number | 8488a8ae206d0ed23ed9a434e25db373168e36ed | Alex | 2022-07-19 10:09:34 |
clean up feeds | f53c9a65d7439041b12d204d1127026935baedf9 | Alex | 2022-07-19 09:59:11 |
IvoRSS: Ready to publish | abccdae2fe21522a92f2c92fbffb4cafec089f23 | Alex | 2022-07-19 09:41:44 |
track changes of Atom feeds by updated tag | 1c115173dd62c39d1fc50dc2e7b36da338a306ef | Alex | 2022-07-19 07:46:35 |
implement images opt-out | 6852d7d1c12f23487c2771f77fc8d17bc7f7a761 | Alex | 2022-07-19 06:57:56 |
implement image loading | b408a53d97f23784bbfc668eac663619a3afebe7 | Alex | 2022-07-19 06:27:44 |
implement channel link support | f2378ce83c04b95304c5ff21b14806edf90a2216 | Alex | 2022-07-19 01:26:03 |
bold unread articles (It wasn't planned before) | 35a66771b2285811a5c5f8adefafdda4d2bf4e38 | Alex | 2022-07-19 00:11:29 |
dodat i zmìny z IvoRSS | 78d71c13bd806cf849eb16d1a672aea5551b6b2c | Alex | 2022-07-18 02:14:12 |
totálnì hustej HTML parsing je implementován | eeba45d9811c83e9199d3a5929102c192e54b216 | Alex | 2022-07-18 02:11:49 |
add show source feature | 8c963667a609e9c91f5a94963786ca2e4a7f88a4 | Alex | 2022-07-17 03:46:17 |
IvoR: if parser falls, try to convert to utf-8 from system encoding | ea17d81bc65b6a5e79d202d5a6b36fcae854cc1b | Alex | 2022-07-17 02:33:02 |
feedtree: add Copy URL option to context menu | a9170b64e6b1bca22cb807a7afa0fe35b6611cbe | Alex | 2022-07-16 23:00:07 |
feedtree: keep track of the latest guid | fa1cf2508ade717fc3aa3bf8224a51b685e1636b | Alex | 2022-07-16 22:42:30 |
lurk: track parsed position, show meaningful status to user | 80ce4ada68e657b196b4e173747a873a54f33c36 | Alex | 2022-07-16 13:34:54 |
File | Lines added | Lines deleted |
---|---|---|
lurk.hws | 28 | 9 |
File lurk.hws changed (mode: 100755) (index ac49524..27cae87) | |||
... | ... | Function lurk:Explore(url$) | |
67 | 67 | ||
68 | 68 | Local ok, n = ValidateStr(html$, #ENCODING_UTF8) | Local ok, n = ValidateStr(html$, #ENCODING_UTF8) |
69 | 69 | Local encoding = IIf(ok, #ENCODING_UTF8, #ENCODING_ISO8859_1) | Local encoding = IIf(ok, #ENCODING_UTF8, #ENCODING_ISO8859_1) |
70 | Local headpos = FindStr(html$, "<head", False, 0, encoding) | ||
70 | Local headpos = FindStr(html$, "<link", False, 0, encoding) | ||
71 | 71 | If headpos < 0 | If headpos < 0 |
72 | 72 | self:set_status("Nothing found") ;č hlavně ta hlavička.. | self:set_status("Nothing found") ;č hlavně ta hlavička.. |
73 | 73 | Return() | Return() |
... | ... | Function lurk:Explore(url$) | |
86 | 86 | p:setbase(url$) | p:setbase(url$) |
87 | 87 | p:setencoding(IIf(ok, "UTF-8", "ISO-8859-1")) | p:setencoding(IIf(ok, "UTF-8", "ISO-8859-1")) |
88 | 88 | ;č ten parser trefí šlak, až uvidí CO má zpracovat | ;č ten parser trefí šlak, až uvidí CO má zpracovat |
89 | p:Parse(html$) | ||
90 | Local lin, col, pos = p:pos() | ||
91 | 89 | ||
90 | ;Local lin, col, pos = p:pos() | ||
91 | |||
92 | Local taxometr = 0 | ||
93 | Local parsers_tired = 0 | ||
94 | Local total_links = 0 | ||
95 | For w$ In PatternFindStr(html$, "<link .->") | ||
96 | debugprint(w$) | ||
97 | total_links = total_links + 1 | ||
98 | taxometr = taxometr + StrLen(w$) | ||
99 | p:Parse(w$) | ||
100 | Local lin, col, pos = p:pos() | ||
101 | If pos < taxometr | ||
102 | debugprint("Parser tired") | ||
103 | Local err_code = ?p:Close() | ||
104 | parsers_tired = parsers_tired + 1 | ||
105 | p = XMLParser.New({StartElement = lurkStartElement}) | ||
106 | p:setbase(url$) | ||
107 | p:setencoding(IIf(ok, "UTF-8", "ISO-8859-1")) | ||
108 | taxometr = 0 | ||
109 | EndIf | ||
110 | Next | ||
92 | 111 | ;č html-ko nemůže nezpůsobit chybu. | ;č html-ko nemůže nezpůsobit chybu. |
93 | 112 | ;č nemá cenu je hlídat | ;č nemá cenu je hlídat |
94 | 113 | Local err_code = ?p:Close() | Local err_code = ?p:Close() |
95 | If len > pos | ||
96 | self:set_status("("..pos .."/"..len .. ") It's too hard. Parser lost.") | ||
97 | Else | ||
98 | self:set_status("Parser done.") | ||
99 | EndIf | ||
100 | |||
114 | If parsers_tired | ||
115 | self:set_status(parsers_tired .." of total ".. | ||
116 | total_links .. " links unparsed") | ||
117 | Else | ||
118 | self:set_status(total_links .. " links parsed") | ||
119 | EndIf | ||
101 | 120 | EndFunction | EndFunction |
102 | 121 | ||
103 | 122 |