验证中...
Languages: Clojure
Latest update 2018-12-09 09:55
agentdownload.clj
Raw Copy
(ns dowloadimg.agentdownload
(require [net.cgrand.enlive-html :as enlive])
(use [clojure.string :only (lower-case)])
(use [clojure.java.io :only (as-url)])
(import [java.net URL MalformedURLException]
[java.util.concurrent BlockingQueue LinkedBlockingQueue]
[java.io StringReader]
))
(declare start-url run process handle-results)
(defn get-start-url []
(map #(str "http://jandan.net/ooxx/page-" % "#comments") (range 2274 2 -1))
)
;URL队列 线程安全,用于我们放初始URL 自己生成的URL
(def url-queue (LinkedBlockingQueue.))
(defn get-img-url [h]
(map #(:href %) (map #(:attrs %) (enlive/select h [:a.view_img_link])))
)
;//下载单个文件
(defn ^::blocking dowloadFile [url]
(with-open [rd (clojure.java.io/input-stream url)
wd (clojure.java.io/output-stream (str "D://dowloadimg//" (subs url (inc (.lastIndexOf url "/")))))]
(clojure.java.io/copy rd wd))
)
(defn ^::blocking start-url
[{:keys [^BlockingQueue queue] :as state}]
(let [url (as-url (.take queue))]
(try
{:url url
:content (slurp url);http://www.baidu.com
::t #'process}
(catch Exception e
state)
(finally
(run *agent*)
))))
(defn process [{:keys [url content]}]
(try
(let [html (enlive/html-resource (java.io.StringReader. content))]
{::t #'handle-results
:url url
:links (get-img-url html)
}
)
(finally (run *agent*))))
(defn ^::blocking handle-results [{:keys [url links]}]
(try
;做下载的事情
(doseq [link links]
(dowloadFile link)
)
{::t #'start-url :queue url-queue}
(finally (run *agent*))))
(defn run
([] (doseq [a agents] (run a)))
([a]
(when (agents a)
(send a (fn [{transition ::t :as state}]
(let [dispatch-fn (if (-> transition meta ::blocking)
send-off
send)]
(dispatch-fn *agent* transition)
)
state)
))))
(defn main
[agent-count]
(def agents (set (repeatedly agent-count
#(agent {::t #'start-url :queue url-queue}))))
(.addAll url-queue (get-start-url))
(run)
)

Comment list( 0 )

You need to Sign in for post a comment

Help Search