List of commits:
Subject Hash Author Date (UTC)
Improved bots list and clean old entries 9da0a014040753734beabfecb298b8956fc25fe3 Catalin(ux) M. BOIE 2020-10-05 06:34:43
builder: use secrets only if available 4fe47afc60435508ad90f62c51d489ac3c20b4d0 Catalin(ux) M. BOIE 2020-08-28 07:29:14
worker: use secrets only if available 5371e918e294a46cc38c765371a1caf7a3083eef Catalin(ux) M. BOIE 2020-08-28 07:28:12
builder: do not break meta variable 1d8207b1a782ab039406dbd087cdc9bb5ea0b898 Catalin(ux) M. BOIE 2020-08-28 07:27:48
Whitespace 86086337c5323b073f517bc59c18aba0808fb4f3 Catalin(ux) M. BOIE 2020-08-24 07:22:13
Bump version to 0.73 7cac4d7a31116ac9ad86e0babeb0c1aace5dd852 Catalin(ux) M. BOIE 2020-08-23 15:32:03
duilder update f691d84aa0aa70ef60a7487f247ed1a9ab1e3ab1 Catalin(ux) M. BOIE 2020-08-23 15:31:38
Bot prevention should not be active for edit operation 371c60a486ea989fafb66266cab71ea9f7db0269 Catalin(ux) M. BOIE 2020-08-23 15:02:13
TODO update 32591f3b1cf054bd70411592e5a82f5898d7da2f Catalin(ux) M. BOIE 2020-08-23 15:01:51
Show also by which protocol a user is connecting d35b5b12103f9a72a3e8788e7c48532b9c1e5b89 Catalin(ux) M. BOIE 2020-08-23 15:01:30
Fixed a small locking issue (unlock without lock) 0d1e7915545de251eea7f3bce468647fc2df1bf9 Catalin(ux) M. BOIE 2020-08-23 15:00:58
Give up the cache if we cannot connect in 150ms dad6cf20f9f4bdbfe2026809a404e852e7cd8e53 Catalin(ux) M. BOIE 2020-08-23 15:00:20
Limit the number of repositories in the discover page a0de6d683b59934168ef8415c9ec716aef68b195 Catalin(ux) M. BOIE 2020-08-23 14:58:37
Added support for other branch then master (main prefered) 1108f43c3639ffcc06f1314d901640564fae7483 Catalin(ux) M. BOIE 2020-08-23 14:55:51
Added possibility for admin to mail users 05068314021bbdf6f26bc92bee47177b170b2a1c Catalin(ux) M. BOIE 2020-08-23 14:43:10
CSS: small fixes e4a39879513e000b5d9be588201916950fa629fc Catalin(ux) M. BOIE 2020-08-23 14:03:23
Cosmetic cbe42130692ed9b876746e6c97f4fb32439c6190 Catalin(ux) M. BOIE 2020-08-23 13:59:00
Default statistics are now per month c312ad532190fe566f2243881a883568b4f77c23 Catalin(ux) M. BOIE 2020-08-23 12:35:36
events: when splitting an event we need a transaction e685c800814365661b348c0d04aa8c0b3605ed7e Catalin(ux) M. BOIE 2020-08-21 04:35:17
Fix for a bug preventing partition table to be created in advance 51049495934efa8c7bca710c6f2538f34bc4de9a Catalin(ux) M. BOIE 2020-08-20 04:03:49
Commit 9da0a014040753734beabfecb298b8956fc25fe3 - Improved bots list and clean old entries
Author: Catalin(ux) M. BOIE
Author date (UTC): 2020-10-05 06:34
Committer name: Catalin(ux) M. BOIE
Committer date (UTC): 2020-10-05 06:34
Parent(s): 4fe47afc60435508ad90f62c51d489ac3c20b4d0
Signer:
Signing key:
Signing status: N
Tree: 22c108a3d0b5c249bd79a85e654d7f41db16d775
File Lines added Lines deleted
inc/util.inc.php 65 65
scripts/cron.php 31 0
File inc/util.inc.php changed (mode: 100644) (index 936cef3..a78df36)
... ... function rg_save($file, $a)
2827 2827 */ */
2828 2828 function rg_is_bot($ua) function rg_is_bot($ua)
2829 2829 { {
2830 if (empty($ua))
2831 return FALSE;
2832
2833 if (strstr($ua, ' SemrushBot'))
2834 return TRUE;
2835
2836 if (strstr($ua, ' AhrefsBot'))
2837 return TRUE;
2838
2839 if (strstr($ua, ' DotBot'))
2840 return TRUE;
2841
2842 if (strstr($ua, ' MJ12bot'))
2843 return TRUE;
2844
2845 if (strstr($ua, ' PetalBot'))
2846 return TRUE;
2847
2848 if (strstr($ua, ' Googlebot'))
2849 return TRUE;
2850
2851 if (strstr($ua, ' bingbot'))
2852 return TRUE;
2853
2854 if (strstr($ua, ' SeznamBot'))
2855 return TRUE;
2856
2857 if (strncmp($ua, 'CCBot', 5) == 0)
2858 return TRUE;
2859
2860 if (strncmp($ua, 'yacybot ', 8) == 0)
2861 return TRUE;
2862
2863 if (strcmp($ua, 'Internet-structure-research-project-bot') == 0)
2864 return TRUE;
2865
2866 if (strstr($ua, ' Mail.RU_Bot'))
2867 return TRUE;
2868
2869 if (strstr($ua, ' coccocbot-web'))
2870 return TRUE;
2871
2872 if (strstr($ua, ' MojeekBot'))
2873 return TRUE;
2874
2875 if (strstr($ua, ' DuckDuckGo'))
2876 return TRUE;
2877
2878 if (strstr($ua, ' YandexBot'))
2879 return TRUE;
2880
2881 if (strstr($ua, 'Applebot/'))
2882 return TRUE;
2883
2884 if (strstr($ua, ' DNSResearchBot'))
2885 return TRUE;
2886
2887 if (strstr($ua, ' Cliqzbot/'))
2888 return TRUE;
2889
2890 if (strstr($ua, ' webtechbot;'))
2891 return TRUE;
2892
2893 if (strstr($ua, ' BLEXBot/'))
2894 return TRUE;
2830 if (empty($ua)) return FALSE;
2831 if (strstr($ua, ' AhrefsBot')) return TRUE;
2832 if (strstr($ua, 'Applebot/')) return TRUE;
2833 if (strcmp($ua, 'AWeb') == 0) return TRUE;
2834 if (strstr($ua, 'BananaBot/')) return TRUE;
2835 if (strstr($ua, ' bingbot')) return TRUE;
2836 if (strstr($ua, ' BLEXBot/')) return TRUE;
2837 if (strstr($ua, 'CATExplorador')) return TRUE;
2838 if (strncmp($ua, 'CCBot/', 6) == 0) return TRUE;
2839 if (strstr($ua, 'CISPA Webcrawler ')) return TRUE;
2840 if (strstr($ua, ' Cliqzbot/')) return TRUE;
2841 if (strstr($ua, ' coccocbot-web')) return TRUE;
2842 if (strstr($ua, 'dcrawl/')) return TRUE;
2843 if (strstr($ua, ' DNSResearchBot')) return TRUE;
2844 if (strstr($ua, 'DomainStatsBot/')) return TRUE;
2845 if (strstr($ua, ' DotBot')) return TRUE;
2846 if (strstr($ua, ' DuckDuckGo')) return TRUE;
2847 if (strstr($ua, 'e.ventures Investment Crawler ')) return TRUE;
2848 if (strstr($ua, 'facebookexternalhit/')) return TRUE;
2849 if (strstr($ua, 'Facebot')) return TRUE;
2850 if (strstr($ua, 'GarlikCrawler/')) return TRUE;
2851 if (strstr($ua, 'Gigabot ')) return TRUE;
2852 if (strstr($ua, ' Googlebot')) return TRUE;
2853 if (strcmp($ua, 'googlebot') == 0) return TRUE;
2854 if (strstr($ua, 'Googlebot-Image/')) return TRUE;
2855 if (strcmp($ua, 'Googlebot-News') == 0) return TRUE;
2856 if (strstr($ua, 'Googlebot-Video/')) return TRUE;
2857 if (strstr($ua, 'GoScraper')) return TRUE;
2858 if (strstr($ua, 'ichiro/')) return TRUE;
2859 if (strcmp($ua, 'Internet-structure-research-project-bot') == 0) return TRUE;
2860 if (strstr($ua, 'Lawinsiderbot/')) return TRUE;
2861 if (strstr($ua, 'LightspeedSystemsCrawler')) return TRUE;
2862 if (strstr($ua, 'ltx71 ')) return TRUE;
2863 if (strstr($ua, ' Mail.RU_Bot')) return TRUE;
2864 if (strstr($ua, ' MJ12bot')) return TRUE;
2865 if (strstr($ua, ' MojeekBot')) return TRUE;
2866 if (strstr($ua, 'msnbot-media/')) return TRUE;
2867 if (strstr($ua, 'NewsGator FetchLinks extension/')) return TRUE;
2868 if (strstr($ua, 'PageThing.com')) return TRUE;
2869 if (strstr($ua, 'Pandalytics')) return TRUE;
2870 if (strstr($ua, 'panscient.com')) return TRUE;
2871 if (strstr($ua, ' PetalBot')) return TRUE;
2872 if (strstr($ua, 'pimeyes.com crawler')) return TRUE;
2873 if (strstr($ua, 'Robot Terminator ')) return TRUE;
2874 if (strstr($ua, 'rpmlint/')) return TRUE;
2875 if (strstr($ua, 'SaaSHub')) return TRUE;
2876 if (strstr($ua, 'Screaming Frog SEO Spider')) return TRUE;
2877 if (strcmp($ua, 'SemrushBot') == 0) return TRUE;
2878 if (strstr($ua, ' SeznamBot')) return TRUE;
2879 if (strcmp($ua, 'Sidetrade indexer bot') == 0) return TRUE;
2880 if (strstr($ua, 'Slackbot-LinkExpanding ')) return TRUE;
2881 if (strstr($ua, 'Sogou web spider')) return TRUE;
2882 if (strstr($ua, 'TelegramBot ')) return TRUE;
2883 if (strstr($ua, 'The Knowledge AI')) return TRUE;
2884 if (strstr($ua, 'TprAdsTxtCrawler')) return TRUE;
2885 if (strstr($ua, 'TurnitinBot ')) return TRUE;
2886 if (strstr($ua, 'Twitterbot/')) return TRUE;
2887 if (strstr($ua, 'Wappalyzer')) return TRUE;
2888 if (strstr($ua, ' webtechbot;')) return TRUE;
2889 if (strstr($ua, 'Xenu Link Sleuth/')) return TRUE;
2890 if (strcmp($ua, 'XYZ Spider') == 0) return TRUE;
2891 if (strncmp($ua, 'yacybot ', 8) == 0) return TRUE;
2892 if (strstr($ua, ' YandexBot')) return TRUE;
2893 if (strcmp($ua, ' YisouSpider') == 0) return TRUE;
2894 if (strstr($ua, 'ZoomBot ')) return TRUE;
2895 2895
2896 2896 return FALSE; return FALSE;
2897 2897 } }
File scripts/cron.php changed (mode: 100644) (index 11e10b2..bc48b03)
... ... rg_mr_queue_process($db);
274 274 if (gmdate('Hi') == '0305') if (gmdate('Hi') == '0305')
275 275 rg_clean_logs('/var/log/rocketgit'); rg_clean_logs('/var/log/rocketgit');
276 276
277 $r = rg_state_get_uint($db, 'clean_conn');
278 while (($r < $now - 24 * 3600) && (rg_load() < 20)) {
279 rg_state_set($db, 'clean_conn', $now); // we do not want to run too quickly
280
281 rg_log_enter('Cleaning conns (by ua)...');
282 $sql = 'SELECT DISTINCT(ua) AS ua FROM conns';
283 $res = rg_sql_query($db, $sql);
284 if ($res === FALSE) {
285 rg_internal_error('Cannot select from conns'
286 . ' (' . rg_sql_error() . ')!');
287 break;
288 }
289 while (($row = rg_sql_fetch_array($res))) {
290 if (!rg_is_bot($row['ua']))
291 continue;
292
293 $sql = 'DELETE FROM conns WHERE ua = @@ua@@';
294 $params = array('ua' => $row['ua']);
295 $res2 = rg_sql_query_params($db, $sql, $params);
296 if ($res2 === FALSE) {
297 rg_internal_error('Cannot delete ua (' . rg_sql_error() . ')!');
298 break;
299 }
300 rg_sql_free_result($res2);
301 }
302 rg_sql_free_result($res);
303
304 rg_log_exit();
305 break;
306 }
307
277 308 rg_log_cron(); rg_log_cron();
278 309
279 310 rg_log('Done!'); rg_log('Done!');
Hints:
Before first commit, do not forget to setup your git environment:
git config --global user.name "your_name_here"
git config --global user.email "your@email_here"

Clone this repository using HTTP(S):
git clone https://rocketgit.com/user/catalinux/rocketgit

Clone this repository using ssh (do not forget to upload a key first):
git clone ssh://rocketgit@ssh.rocketgit.com/user/catalinux/rocketgit

Clone this repository using git:
git clone git://git.rocketgit.com/user/catalinux/rocketgit

You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a merge request:
... clone the repository ...
... make some changes and some commits ...
git push origin main