yokogiri1.5.8Barebones Nokogiri for Clojure dependencies
| (this space intentionally left almost blank) | |||||||||
(ns yokogiri.core
(:require [clojure.java.io :as io])
(:import [com.gargoylesoftware.htmlunit StringWebResponse WebClient BrowserVersion WebClientOptions]
[com.gargoylesoftware.htmlunit.html HtmlPage DomNode DomAttr HTMLParser]
[org.w3c.dom NamedNodeMap Node]
[se.fishtank.css.selectors.dom DOMNodeSelector])) | ||||||||||
(set! *warn-on-reflection* true) | ||||||||||
Returns the client options object for a WebClient. | (defn- web-client-options [^WebClient client] (.getOptions client)) | |||||||||
(def set-client-options-map
{:activex-native #(.setActiveXNative ^WebClientOptions %1 %2)
:applet #(.setAppletEnabled ^WebClientOptions %1 %2)
:block-popups #(.setPopupBlockerEnabled ^WebClientOptions %1 %2)
:css #(.setCssEnabled ^WebClientOptions %1 %2)
:geolocation #(.setGeolocationEnabled ^WebClientOptions %1 %2)
:homepage #(.setHomePage ^WebClientOptions %1 %2)
:insecure-ssl #(.setUseInsecureSSL ^WebClientOptions %1 %2)
:print-content-on-failing-status #(.setPrintContentOnFailingStatusCode ^WebClientOptions %1 %2)
:redirects #(.setRedirectEnabled ^WebClientOptions %1 %2)
:throw-on-failing-status #(.setThrowExceptionOnFailingStatusCode ^WebClientOptions %1 %2)
:throw-on-script-error #(.setThrowExceptionOnScriptError ^WebClientOptions %1 %2)
:timeout #(.setTimeout ^WebClientOptions %1 %2)
:tracking #(.setDoNotTrackEnabled ^WebClientOptions %1 %2)
:javascript #(.setJavaScriptEnabled ^WebClientOptions %1 %2)}) | ||||||||||
(declare ^:dynamic *client*) | ||||||||||
Sets options on the client. Usage:
Available Options:
| (defn set-client-options!
([opts] (set-client-options! *client* opts))
([^WebClient client opts]
(let [^WebClientOptions client-opts (web-client-options client)]
(doseq [[k v] opts]
(let [setter-fn (get set-client-options-map k)]
(setter-fn client-opts v)))
client))) | |||||||||
Returns a map of all options currently set on a client. Usage:
| (defn get-client-options
[^WebClient client]
(let [^WebClientOptions opts (web-client-options ^WebClient client)]
{:activex-native (. opts isActiveXNative)
:applet (. opts isAppletEnabled)
:block-popups (. opts isPopupBlockerEnabled)
:css (. opts isCssEnabled)
:geolocation (. opts isGeolocationEnabled)
:homepage (. opts getHomePage)
:insecure-ssl (. opts isUseInsecureSSL)
:javascript (. opts isJavaScriptEnabled)
:print-content-on-failing-status-code (. opts getPrintContentOnFailingStatusCode)
:redirects (. opts isRedirectEnabled)
:throw-on-failing-status (. opts isThrowExceptionOnFailingStatusCode)
:throw-on-script-error (. opts isThrowExceptionOnScriptError)
:timeout (. opts getTimeout)
:tracking (. opts isDoNotTrackEnabled)})) | |||||||||
Constructs a new WebClient. Usage:
With Options:
Available Options:
See also: yokogiri.core/set-client-options! | (defn make-client
[& {:as opts}]
(let [client (new WebClient)]
(if-not (empty? opts)
(set-client-options! (new WebClient) opts)
client))) | |||||||||
(defonce ^:dynamic *client* (make-client)) | ||||||||||
Takes a client which will be bound to client within the scope of the form. Usage:
| (defmacro with-client
[c & body]
`(binding [*client* ~c]
~@body)) | |||||||||
Takes a string, returns an HtmlPage. Usage:
| (defn create-page
"Takes a string, returns an HtmlPage.
**Usage:**
user> (create-page \"<html><body><a href=\\\"http://example.com\\\">Link</a></body></html>\")
;=> #<HtmlPage HtmlPage(file://fake-response-url)@478170219>"
[xml]
(let [url (io/as-url "file://fake-response-url")
response (StringWebResponse. xml url)]
(HTMLParser/parseHtml response (.getCurrentWindow (WebClient.))))) | |||||||||
Takes a client and a url, returns an HtmlPage. Usage:
| (defn get-page
([url] (get-page *client* url))
([^WebClient client, ^String url]
(.getPage ^WebClient client url))) | |||||||||
Takes a path as a string and creates a Page you can access with #'yokogiri.core/xpath, #'yokogiri.core/css, etc. Usage:
| (defn as-page ([path] (as-page *client* path)) ([client path] (->> path io/file io/as-url str (get-page client)))) | |||||||||
Takes an HtmlPage and an xpath string. Returns a vector of nodes which match the provided xpath string. Usage:
| (defn xpath [^HtmlPage page, ^String xpath] (into [] (.getByXPath page xpath))) | |||||||||
Takes an HtmlPage and an xpath string. Returns the first matching node which matches the provided xpath string. Usage:
| (defn first-by-xpath [^HtmlPage page, ^String xpath] (.getFirstByXPath page xpath)) | |||||||||
Returns matches for a given CSS selector Usage:
http://www.goodercode.com/wp/use-css-selectors-with-htmlunit/ TODO: Bumping the version of css-selectors to 1.0.4 breaks querying by CSS. | (defn css
[^HtmlPage page, ^String selector]
(let [queryable-page (DOMNodeSelector. (. page getDocumentElement))]
(seq (. queryable-page querySelectorAll selector)))) | |||||||||
Returns a node's XML representation. Usage:
More information...\ \ | (defn node-xml
"Returns a node's XML representation.
**Usage:**
user> (node-xml
(first-by-xpath
(get-page (make-client) \"http://www.example.com/\")
\"//a\"))
;=> <a href=\"http://www.iana.org/domains/example\">\\\n More information...\\\n</a>\\\n"
[^DomNode node]
(.asXml node)) | |||||||||
Returns a node's text value Usage:
| (defn node-text [^DomNode node] (.asText node)) | |||||||||
Returns a clojure map of attributes for a given node Usage:
See also: yokogiri.core/attrs | (defn attr-map
[^DomNode node]
(let [^NamedNodeMap attrs (.getAttributes node)]
(loop [acc 0, res {}]
(if (= acc (.getLength attrs))
(assoc res :text (node-text node))
(recur (inc acc)
(let [^DomAttr attr (.item attrs acc)]
(assoc res (keyword (.getName attr)) (.getValue attr)))))))) | |||||||||
See also: yokogiri.core/attr-map | (def attrs #'yokogiri.core/attr-map) | |||||||||
Returns the HtmlUnit DomAttr objects for a given node See also: yokogiri.core/attr-map TODO: http://htmlunit.sourceforge.net/apidocs/com/gargoylesoftware/htmlunit/html/DomAttr.html | (defn- dom-attr
[^DomNode node]
(let [^NamedNodeMap attrs (.getAttributes node)
len (.getLength attrs)]
(map #(.item attrs %) (range 0 len)))) | |||||||||
(comment (def c (make-client)) (def p (get-page c "http://www.example.com/")) (xpath p "//a") (map attrs (css p "p"))) | ||||||||||