1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
|
;;; url-queue.el --- Fetching web pages in parallel -*- lexical-binding: t -*-
;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
;; Keywords: comm
;; This file is part of GNU Emacs.
;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
;;; Commentary:
;; The point of this package is to allow fetching web pages in
;; parallel -- but control the level of parallelism to avoid DoS-ing
;; web servers and Emacs.
;;; Code:
(eval-when-compile (require 'cl-lib))
(require 'browse-url)
(require 'url-parse)
(require 'url-file)
(defcustom url-queue-parallel-processes 6
"The number of concurrent processes."
:version "24.1"
:type 'natnum
:group 'url)
(defcustom url-queue-timeout 5
"How long to let a job live once it's started (in seconds)."
:version "24.1"
:type 'natnum
:group 'url)
;;; Internal variables.
(defvar url-queue nil)
(defvar url-queue-progress-timer nil)
(cl-defstruct url-queue
url callback cbargs silentp
buffer start-time pre-triggered
inhibit-cookiesp context-buffer)
;;;###autoload
(defun url-queue-retrieve (url callback &optional cbargs silent inhibit-cookies)
"Retrieve URL asynchronously and call CALLBACK with CBARGS when finished.
This is like `url-retrieve' (which see for details of the arguments),
but with limits on the degree of parallelism. The variable
`url-queue-parallel-processes' sets the number of concurrent processes.
The variable `url-queue-timeout' sets a timeout."
(setq url-queue
(append url-queue
(list (make-url-queue :url url
:callback callback
:cbargs cbargs
:silentp silent
:inhibit-cookiesp inhibit-cookies
:context-buffer (current-buffer)))))
(url-queue-setup-runners))
;; To ensure asynch behavior, we start the required number of queue
;; runners from `run-with-idle-timer'. So we're basically going
;; through the queue in two ways: 1) synchronously when a program
;; calls `url-queue-retrieve' (which will then start the required
;; number of queue runners), and 2) at the exit of each job, which
;; will then not start any further threads, but just reuse the
;; previous "slot".
(defun url-queue-setup-runners ()
(let ((running 0)
waiting)
(dolist (entry url-queue)
(cond
((or (url-queue-start-time entry)
(url-queue-pre-triggered entry))
(cl-incf running))
((not waiting)
(setq waiting entry))))
(when (and waiting
(< running url-queue-parallel-processes))
(setf (url-queue-pre-triggered waiting) t)
;; We start fetching from this idle timer...
(run-with-idle-timer 0.01 nil #'url-queue-run-queue)
;; And then we set up a separate timer to ensure progress when a
;; web server is unresponsive.
(unless url-queue-progress-timer
(setq url-queue-progress-timer
(run-with-idle-timer 1 1 #'url-queue-check-progress))))))
(defun url-queue-run-queue ()
(url-queue-prune-old-entries)
(let ((running 0)
waiting)
(dolist (entry url-queue)
(cond
((url-queue-start-time entry)
(cl-incf running))
((not waiting)
(setq waiting entry))))
(when (and waiting
(< running url-queue-parallel-processes))
(setf (url-queue-start-time waiting) (float-time))
(url-queue-start-retrieve waiting))))
(defun url-queue-check-progress ()
(when url-queue-progress-timer
(if url-queue
(url-queue-run-queue)
(cancel-timer url-queue-progress-timer)
(setq url-queue-progress-timer nil))))
(defun url-queue-callback-function (status job)
(let ((buffer (current-buffer)))
(setq url-queue (delq job url-queue))
(when (and (eq (car status) :error)
(eq (cadr (cadr status)) 'connection-failed))
;; If we get a connection error, then flush all other jobs from
;; the host from the queue. This particularly makes sense if the
;; error really is a DNS resolver issue, which happens
;; synchronously and totally halts Emacs.
(url-queue-remove-jobs-from-host
(plist-get (nthcdr 3 (cadr status)) :host)))
(url-queue-run-queue)
;; Somehow something deep in the bowels in the URL library may
;; have killed off the current buffer. So check that it's still
;; alive before doing anything, and if not, just create a dummy
;; buffer and do the callback anyway.
(unless (buffer-live-p buffer)
(set-buffer (generate-new-buffer " *temp*")))
(apply (url-queue-callback job) (cons status (url-queue-cbargs job)))))
(defun url-queue-remove-jobs-from-host (host)
(let ((jobs nil))
(dolist (job url-queue)
(when (equal (url-host (url-generic-parse-url (url-queue-url job)))
host)
(push job jobs)))
(dolist (job jobs)
(url-queue-kill-job job)
(setq url-queue (delq job url-queue)))))
(defun url-queue-start-retrieve (job)
(setf (url-queue-buffer job)
(ignore-errors
(with-current-buffer (if (buffer-live-p
(url-queue-context-buffer job))
(url-queue-context-buffer job)
(current-buffer))
(let ((url-request-noninteractive t)
(url-allow-non-local-files t))
(url-retrieve (url-queue-url job)
#'url-queue-callback-function (list job)
(url-queue-silentp job)
(url-queue-inhibit-cookiesp job)))))))
(defun url-queue-prune-old-entries ()
(let (dead-jobs)
(dolist (job url-queue)
;; Kill jobs that have lasted longer than the timeout.
(when (and (url-queue-start-time job)
(time-less-p url-queue-timeout
(time-since (url-queue-start-time job))))
(push job dead-jobs)))
(dolist (job dead-jobs)
(url-queue-kill-job job)
(setq url-queue (delq job url-queue)))))
(defun url-queue-kill-job (job)
(when (bufferp (url-queue-buffer job))
(let (process)
(while (setq process (get-buffer-process (url-queue-buffer job)))
(set-process-sentinel process 'ignore)
(ignore-errors
(delete-process process)))))
;; Call the callback with an error message to ensure that the caller
;; is notified that the job has failed.
(with-current-buffer
(if (and (bufferp (url-queue-buffer job))
(buffer-live-p (url-queue-buffer job)))
;; Use the (partially filled) process buffer if it exists.
(url-queue-buffer job)
;; If not, just create a new buffer, which will probably be
;; killed again by the caller.
(generate-new-buffer " *temp*"))
(apply (url-queue-callback job)
(cons (list :error (list 'error 'url-queue-timeout
"Queue timeout exceeded"))
(url-queue-cbargs job)))))
(provide 'url-queue)
;;; url-queue.el ends here
|