OXIESEC PANEL
- Current Dir:
/
/
usr
/
share
/
nmap
/
nselib
Server IP: 139.59.38.164
Upload:
Create Dir:
Name
Size
Modified
Perms
📁
..
-
08/07/2020 12:36:00 PM
rwxr-xr-x
📄
afp.lua
71.92 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
ajp.lua
16.69 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
amqp.lua
10.5 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
anyconnect.lua
4.45 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
asn1.lua
14.57 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
base32.lua
7.33 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
base64.lua
5.67 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
bin.lua
12.89 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
bit.lua
2.43 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
bitcoin.lua
16.99 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
bits.lua
1.82 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
bittorrent.lua
40.77 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
bjnp.lua
9.45 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
brute.lua
50.04 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
cassandra.lua
5.78 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
citrixxml.lua
16 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
coap.lua
76.24 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
comm.lua
10.75 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
creds.lua
18.22 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
cvs.lua
3.13 KB
04/16/2018 01:11:39 AM
rw-r--r--
📁
data
-
08/07/2020 12:36:00 PM
rwxr-xr-x
📄
datafiles.lua
11.05 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
datetime.lua
1.16 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
dhcp.lua
29.17 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
dhcp6.lua
19.87 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
dns.lua
51.44 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
dnsbl.lua
19.02 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
dnssd.lua
12.57 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
drda.lua
24.2 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
eap.lua
7.64 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
eigrp.lua
14.47 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
formulas.lua
5.35 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
ftp.lua
9.03 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
geoip.lua
1.71 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
giop.lua
18.44 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
gps.lua
3.05 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
http.lua
105.81 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
httpspider.lua
36.15 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
iax2.lua
9.6 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
ike.lua
15.02 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
imap.lua
9.59 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
informix.lua
39.76 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
ipOps.lua
26.92 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
ipmi.lua
10.02 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
ipp.lua
12.54 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
irc.lua
757 bytes
04/16/2018 01:11:39 AM
rw-r--r--
📄
iscsi.lua
21.45 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
isns.lua
15.34 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
jdwp.lua
43.57 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
json.lua
11.65 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
ldap.lua
31.86 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
lfs.luadoc
1.68 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
libssh2-utility.lua
4.39 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
libssh2.luadoc
4.75 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
listop.lua
4.66 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
lpeg-utility.lua
5.64 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
lpeg.luadoc
351 bytes
04/16/2018 01:11:39 AM
rw-r--r--
📄
ls.lua
10.96 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
match.lua
2.05 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
membase.lua
9.88 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
mobileme.lua
8.46 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
mongodb.lua
21.29 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
mqtt.lua
28.95 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
msrpc.lua
179.93 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
msrpcperformance.lua
29.72 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
msrpctypes.lua
167.61 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
mssql.lua
110.87 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
multicast.lua
6.1 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
mysql.lua
17.09 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
natpmp.lua
5.04 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
ncp.lua
36 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
ndmp.lua
11.58 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
netbios.lua
13.9 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
nmap.luadoc
40.34 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
nrpc.lua
4.42 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
nsedebug.lua
3.49 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
omp2.lua
4.77 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
openssl.luadoc
7.08 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
ospf.lua
15.29 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
packet.lua
36.65 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
pcre.luadoc
6.79 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
pgsql.lua
20.61 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
pop3.lua
5.7 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
pppoe.lua
29.95 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
proxy.lua
12.04 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
rdp.lua
11.05 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
re.lua
8.22 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
redis.lua
3.59 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
rmi.lua
47.89 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
rpc.lua
106.22 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
rpcap.lua
11.19 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
rsync.lua
5.19 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
rtsp.lua
8.67 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
sasl.lua
16.38 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
shortport.lua
8.01 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
sip.lua
30.56 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
slaxml.lua
17.9 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
smb.lua
175.85 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
smb2.lua
16.32 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
smbauth.lua
37.53 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
smtp.lua
19.81 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
snmp.lua
15.99 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
socks.lua
8.26 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
srvloc.lua
12.25 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
ssh1.lua
8.88 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
ssh2.lua
11.88 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
sslcert.lua
33.34 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
sslv2.lua
9.63 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
stdnse.lua
45.93 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
strbuf.lua
4.52 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
strict.lua
2.53 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
stun.lua
11.51 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
tab.lua
3.35 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
target.lua
3.93 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
tftp.lua
9.38 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
tls.lua
56.16 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
tn3270.lua
43.75 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
tns.lua
64.17 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
unicode.lua
14.32 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
unittest.lua
12.33 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
unpwdb.lua
10.08 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
upnp.lua
11.18 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
url.lua
12.09 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
versant.lua
8.6 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
vnc.lua
23.3 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
vulns.lua
76.29 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
vuzedht.lua
16.62 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
wsdd.lua
12.03 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
xdmcp.lua
11.9 KB
04/16/2018 01:11:39 AM
rw-r--r--
📄
xmpp.lua
15.88 KB
04/16/2018 01:11:39 AM
rw-r--r--
Editing: httpspider.lua
Close
--- -- A smallish httpspider library providing basic spidering capabilities -- It consists of the following classes: -- -- * <code>Options</code> -- ** This class is responsible for handling library options. -- -- * <code>LinkExtractor</code> -- ** This class contains code responsible for extracting urls from web pages. -- -- * <code>URL</code> -- ** This class contains code to parse and process URLs. -- -- * <code>UrlQueue</code> -- ** This class contains a queue of the next links to process. -- -- * <code>Crawler</code> -- ** This class is responsible for the actual crawling. -- -- The following sample code shows how the spider could be used: -- <code> -- local crawler = httpspider.Crawler:new( host, port, '/', { scriptname = SCRIPT_NAME } ) -- crawler:set_timeout(10000) -- -- local result -- while(true) do -- local status, r = crawler:crawl() -- if ( not(status) ) then -- break -- end -- if ( r.response.body:match(str_match) ) then -- crawler:stop() -- result = r.url -- break -- end -- end -- -- return result -- </code> -- -- For advanced use, the library currently supports a number of closures (withinhost, -- withindomain, doscraping). Please note, that withinhost and withindomain options also -- support boolean values. You will want to override them only for advanced use. You can -- define them using the following utilities: -- -- * <code>iswithinhost</code> -- ** You can use this utility to check if the resource exists within the host. -- -- * <code>iswithindomain</code> -- ** You can use this utility to check if the resource exists within the domain. -- -- * <code>isresource</code> -- ** You can use this utility to check the type of the resource (for example "js"). -- ** A third option may hold a number of signs that may exist after the extension -- ** of the resource. By default, these are [#, ?]. For example, if we want to return -- only php resources, the function will also return example.php?query=foo or -- example.php#foo. -- -- The following sample code shows an example usage. We override the default -- withinhost method and we allow spidering only on resources within the host -- that they are not "js" or "css". -- <code> -- crawler.options.withinhost = function(url) -- if crawler:iswithinhost(url) -- and not crawler:isresource(url, "js") -- and not crawler:isresource(url, "css") then -- return true -- end -- end -- </code> -- -- @author Patrik Karlsson <patrik@cqure.net> -- -- @args httpspider.maxdepth the maximum amount of directories beneath -- the initial url to spider. A negative value disables the limit. -- (default: 3) -- @args httpspider.maxpagecount the maximum amount of pages to visit. -- A negative value disables the limit (default: 20) -- @args httpspider.url the url to start spidering. This is a URL -- relative to the scanned host eg. /default.html (default: /) -- @args httpspider.withinhost Closure that overrides the default withinhost -- function that only spiders URLs within the same host. If this is -- set to false the crawler will spider URLs both inside and outside -- the host. See the closure section above to override the default -- behaviour. (default: true) -- @args httpspider.withindomain Closure that overrides the default -- withindomain function that only spiders URLs within the same -- domain. This widens the scope from <code>withinhost</code> and can -- not be used in combination. See the closure section above to -- override the default behaviour. (default: false) -- @args httpspider.noblacklist if set, doesn't load the default blacklist -- @args httpspider.useheadfornonwebfiles if set, the crawler would use -- HEAD instead of GET for files that do not have extensions indicating -- that they are webpages (the list of webpage extensions is located in -- nselib/data/http-web-files-extensions.lst) -- @args httpspider.doscraping Closure that overrides the default doscraping -- function used to check if the resource should be scraped (in terms -- of extracting any links within it). See the closure section above to -- override the default behaviour. --- local coroutine = require "coroutine" local http = require "http" local io = require "io" local nmap = require "nmap" local stdnse = require "stdnse" local string = require "string" local table = require "table" local url = require "url" _ENV = stdnse.module("httpspider", stdnse.seeall) local LIBRARY_NAME = "httpspider" local PREFETCH_SIZE = 5 -- The Options class, handling all spidering options Options = { new = function(self, options) local o = { } -- copy all options as class members for k, v in pairs(options) do o[k] = v end -- set a few default values o.timeout = options.timeout or 10000 o.whitelist = o.whitelist or {} o.blacklist = o.blacklist or {} local removewww = function(url) return string.gsub(url, "^www%.", "") end -- set up the appropriate matching functions if ( o.withinhost ) then o.withinhost = function(u) local parsed_u = url.parse(tostring(u)) if ( o.base_url:getPort() ~= 80 and o.base_url:getPort() ~= 443 ) then if ( parsed_u.port ~= tonumber(o.base_url:getPort()) ) then return false end elseif ( parsed_u.scheme ~= o.base_url:getProto() ) then return false -- if urls don't match only on the "www" prefix, then they are probably the same elseif ( parsed_u.host == nil or removewww(parsed_u.host:lower()) ~= removewww(o.base_url:getHost():lower()) ) then return false end return true end end if ( o.withindomain ) then o.withindomain = function(u) local parsed_u = url.parse(tostring(u)) if ( o.base_url:getPort() ~= 80 and o.base_url:getPort() ~= 443 ) then if ( parsed_u.port ~= tonumber(o.base_url:getPort()) ) then return false end elseif ( parsed_u.scheme ~= o.base_url:getProto() ) then return false elseif ( parsed_u.host == nil or parsed_u.host:sub(-#o.base_url:getDomain()):lower() ~= o.base_url:getDomain():lower() ) then return false end return true end end if (not o.doscraping) then o.doscraping = function(u) return true end end setmetatable(o, self) self.__index = self return o end, addWhitelist = function(self, func) table.insert(self.whitelist, func) end, addBlacklist = function(self, func) table.insert(self.blacklist, func) end, } -- Placeholder for form extraction code FormExtractor = { } LinkExtractor = { -- Creates a new instance of LinkExtractor -- @return o instance of LinkExtractor new = function(self, url, html, options) local o = { url = url, html = html, links = {}, options = options, } setmetatable(o, self) self.__index = self o:parse() return o end, -- is the link absolute or not? isAbsolute = function(url) -- at this point we don't care about the protocol -- also, we don't add // to cover stuff like: -- feed:http://example.com/rss.xml return ( url:match('^%w*:') ~= nil ) end, -- Creates an absolute link from a relative one based on the base_url -- The functionality is very simple and does not take any ../../ in -- consideration. -- -- @param base_url URL containing the page url from which the links were -- extracted -- @param rel_url string containing the relative portion of the URL -- @return link string containing the absolute link createAbsolute = function(base_url, rel_url, base_href) -- is protocol-relative? if rel_url:match("^//") then return ("%s%s%s"):format(base_url:getProto(), ":", rel_url) end -- is relative with leading slash? ie /dir1/foo.html local leading_slash = rel_url:match("^/") rel_url = rel_url:match("^/?(.*)") or '/' -- check for tailing slash if ( base_href and not(base_href:match("/$") ) ) then base_href = base_href .. '/' end if base_url:getPort() == url.get_default_port(base_url:getProto()) then if ( leading_slash ) then return ("%s://%s/%s"):format(base_url:getProto(), base_url:getHost(), rel_url) else if ( base_href ) then return ("%s%s"):format(base_href, rel_url) else return ("%s://%s%s%s"):format(base_url:getProto(), base_url:getHost(), base_url:getDir(), rel_url) end end else if ( leading_slash ) then return ("%s://%s:%d/%s"):format(base_url:getProto(), base_url:getHost(), base_url:getPort(), rel_url) else if ( base_href ) then return ("%s%s"):format(base_href, rel_url) else return ("%s://%s:%d%s%s"):format(base_url:getProto(), base_url:getHost(), base_href or base_url:getPort(), base_url:getDir(), rel_url) end end end end, -- Gets the depth of the link, relative to our base url eg. -- base_url = http://www.cqure.net/wp/ -- url = http://www.cqure.net/wp/ - depth: 0 -- url = http://www.cqure.net/wp/index.php - depth: 0 -- url = http://www.cqure.net/wp/2011/index.php - depth: 1 -- url = http://www.cqure.net/index.html - depth: -1 -- -- @param url instance of URL -- @return depth number containing the depth relative to the base_url getDepth = function(self, url) local base_dir, url_dir = self.options.base_url:getDir(), url:getDir() if ( url_dir and base_dir ) then local m = url_dir:match(base_dir.."(.*)") if ( not(m) ) then return -1 else local _, depth = m:gsub("/", "/") return depth end end end, validate_link = function(self, url) local valid = true -- if our url is nil, abort, this could be due to a number of -- reasons such as unsupported protocols: javascript, mail ... or -- that the URL failed to parse for some reason if ( url == nil or tostring(url) == nil ) then return false end -- linkdepth trumps whitelisting if ( self.options.maxdepth and self.options.maxdepth >= 0 ) then local depth = self:getDepth( url ) if ( -1 == depth or depth > self.options.maxdepth ) then stdnse.debug3("%s: Skipping link depth: %d; b_url=%s; url=%s", LIBRARY_NAME, depth, tostring(self.options.base_url), tostring(url)) return false end end -- withindomain trumps any whitelisting if ( self.options.withindomain ) then if ( not(self.options.withindomain(url)) ) then stdnse.debug2("%s: Link is not within domain: %s", LIBRARY_NAME, tostring(url)) return false end end -- withinhost trumps any whitelisting if ( self.options.withinhost ) then if ( not(self.options.withinhost(url)) ) then stdnse.debug2("%s: Link is not within host: %s", LIBRARY_NAME, tostring(url)) return false end end -- run through all blacklists if ( #self.options.blacklist > 0 ) then for _, func in ipairs(self.options.blacklist) do if ( func(url) ) then stdnse.debug2("%s: Blacklist match: %s", LIBRARY_NAME, tostring(url)) valid = false break end end end -- check the url against our whitelist if ( #self.options.whitelist > 0 ) then valid = false for _, func in ipairs(self.options.whitelist) do if ( func(url) ) then stdnse.debug2("%s: Whitelist match: %s", LIBRARY_NAME, tostring(url)) valid = true break end end end return valid end, -- Parses a HTML response and extracts all links it can find -- The function currently supports href, src and action links -- Also all behaviour options, such as depth, white- and black-list are -- processed in here. parse = function(self) local links = {} local patterns = { '[hH][rR][eE][fF]%s*=%s*[\'"]%s*([^"^\']-)%s*[\'"]', '[hH][rR][eE][fF]%s*=%s*([^\'\"][^%s>]+)', '[sS][rR][cC]%s*=%s*[\'"]%s*([^"^\']-)%s*[\'"]', '[sS][rR][cC]%s*=%s*([^\'\"][^%s>]+)', '[aA][cC][tT][iI][oO][nN]%s*=%s*[\'"]%s*([^"^\']+%s*)[\'"]', } local base_hrefs = { '[Bb][Aa][Ss][Ee]%s*[Hh][Rr][Ee][Ff]%s*=%s*[\'"](%s*[^"^\']+%s*)[\'"]', '[Bb][Aa][Ss][Ee]%s*[Hh][Rr][Ee][Ff]%s*=%s*([^\'\"][^%s>]+)' } local base_href for _, pattern in ipairs(base_hrefs) do base_href = self.html:match(pattern) if ( base_href ) then break end end for _, pattern in ipairs(patterns) do for l in self.html:gmatch(pattern) do local link = l if ( not(LinkExtractor.isAbsolute(l)) ) then link = LinkExtractor.createAbsolute(self.url, l, base_href) end local url = URL:new(link) local valid = self:validate_link(url) if ( valid ) then stdnse.debug3("%s: Adding link: %s", LIBRARY_NAME, tostring(url)) links[tostring(url)] = true elseif ( tostring(url) ) then stdnse.debug3("%s: Skipping url: %s", LIBRARY_NAME, link) end end end for link in pairs(links) do table.insert(self.links, link) end end, -- Gets a table containing all of the retrieved URLs, after filtering -- has been applied. getLinks = function(self) return self.links end, } -- The URL class, containing code to process URLS -- This class is heavily inspired by the Java URL class URL = { -- Creates a new instance of URL -- @param url string containing the text representation of a URL -- @return o instance of URL, in case of parsing being successful -- nil in case parsing fails new = function(self, url) local o = { raw = url, } setmetatable(o, self) self.__index = self if ( o:parse() ) then return o end end, -- Parses the string representation of the URL and splits it into different -- URL components -- @return status true on success, false on failure parse = function(self) self.proto, self.host, self.port, self.file = self.raw:match("^(http[s]?)://([^:/]*)[:]?(%d*)") if ( self.proto and self.host ) then self.file = self.raw:match("^http[s]?://[^:/]*[:]?%d*(/[^#]*)") or '/' self.port = tonumber(self.port) or url.get_default_port(self.proto) self.path = self.file:match("^([^?]*)[%?]?") self.dir = self.path:match("^(.+%/)") or "/" self.domain= self.host:match("^[^%.]-%.(.*)") return true elseif( self.raw:match("^javascript:") ) then stdnse.debug2("%s: Skipping javascript url: %s", LIBRARY_NAME, self.raw) elseif( self.raw:match("^mailto:") ) then stdnse.debug2("%s: Skipping mailto link: %s", LIBRARY_NAME, self.raw) else stdnse.debug2("%s: WARNING: Failed to parse url: %s", LIBRARY_NAME, self.raw) end return false end, -- Gets the host portion of the URL -- @return host string containing the hostname getHost = function(self) return self.host end, -- Gets the protocol representation of the URL -- @return proto string containing the protocol (ie. http, https) getProto = function(self) return self.proto end, -- Returns the filename component of the URL. -- @return file string containing the path and query components of the url getFile = function(self) return self.file end, -- Gets the port component of the URL -- @return port number containing the port of the URL getPort = function(self) return self.port end, -- Gets the path component of the URL -- @return the full path and filename of the URL getPath = function(self) return self.path end, -- Gets the directory component of the URL -- @return directory string containing the directory part of the URL getDir = function(self) return self.dir end, -- Gets the domain component of the URL -- @return domain string containing the hosts domain getDomain = function(self) if ( self.domain ) then return self.domain -- fallback to the host, if we can't find a domain else return self.host end end, -- Converts the URL to a string -- @return url string containing the string representation of the url __tostring = function(self) return self.raw end, } -- An UrlQueue UrlQueue = { -- creates a new instance of UrlQueue -- @param options table containing options -- @return o new instance of UrlQueue new = function(self, options) local o = { urls = {}, options = options } setmetatable(o, self) self.__index = self return o end, -- gets the next available url in the queue getNext = function(self) return table.remove(self.urls,1) end, -- adds a new url to the queue -- @param url can be either a string or a URL or a table of URLs add = function(self, url) assert( type(url) == 'string' or type(url) == 'table', "url was neither a string or table") local urls = ( 'string' == type(url) ) and URL:new(url) or url -- if it's a table, it can be either a single URL or an array of URLs if ( 'table' == type(url) and url.raw ) then urls = { url } end for _, u in ipairs(urls) do u = ( 'string' == type(u) ) and URL:new(u) or u if ( u ) then table.insert(self.urls, u) else stdnse.debug1("ERROR: Invalid URL: %s", url) end end end, -- dumps the contents of the UrlQueue dump = function(self) for _, url in ipairs(self.urls) do print("url:", url) end end, } -- The Crawler class Crawler = { options = {}, removewww = function(url) return string.gsub(url, "^www%.", "") end, -- An utility when defining closures. Checks if the resource exists within host. -- @param u URL that points to the resource we want to check. iswithinhost = function(self, u) local parsed_u = url.parse(tostring(u)) if ( self.options.base_url:getPort() ~= 80 and self.options.base_url:getPort() ~= 443 ) then if ( parsed_u.port ~= tonumber(self.options.base_url:getPort()) ) then return false end elseif ( parsed_u.scheme ~= self.options.base_url:getProto() ) then return false -- if urls don't match only on the "www" prefix, then they are probably the same elseif ( parsed_u.host == nil or self.removewww(parsed_u.host:lower()) ~= self.removewww(self.options.base_url:getHost():lower()) ) then return false end return true end, -- An utility when defining closures. Checks if the resource exists within domain. -- @param u URL that points to the resource we want to check. iswithindomain = function(self, u) local parsed_u = url.parse(tostring(u)) if ( self.options.base_url:getPort() ~= 80 and self.options.base_url:getPort() ~= 443 ) then if ( parsed_u.port ~= tonumber(self.options.base_url:getPort()) ) then return false end elseif ( parsed_u.scheme ~= self.options.base_url:getProto() ) then return false elseif ( parsed_u.host == nil or parsed_u.host:sub(-#self.options.base_url:getDomain()):lower() ~= self.options.base_url:getDomain():lower() ) then return false end return true end, -- An utility when defining closures. Checks the type of the resource. -- @param u URL that points to the resource we want to check. -- @param ext the extension of the resource. -- @param signs table of signs that may exist after the extension of the resource. isresource = function(self, u, ext, signs) u = tostring(u) if string.match(u, "." .. ext .. "$") then return true end local signstring = "" if signs then for _, s in signs do signstring = signstring .. s end signstring:gsub('?', '%?') else signstring = "#%?" end return string.match(u, "." .. ext .. "[" .. signstring .. "]" .. "[^.]*$") end, -- creates a new instance of the Crawler instance -- @param host table as received by the action method -- @param port table as received by the action method -- @param url string containing the relative URL -- @param options table of options: -- <code>noblacklist</code> - do not load default blacklist -- <code>base_url</code> - start url to crawl -- <code>timeout</code> - timeout for the http request -- <code>maxdepth</code> - the maximum directory depth to crawl -- <code>maxpagecount</code> - the maximum amount of pages to retrieve -- <code>withinhost</code> - stay within the host of the base_url -- <code>withindomain</code> - stay within the base_url domain -- <code>doscraping</code> - Permit scraping -- <code>scriptname</code> - should be set to SCRIPT_NAME to enable -- script specific arguments. -- <code>redirect_ok</code> - redirect_ok closure to pass to http.get function -- <code>no_cache</code> - no_cache option to pass to http.get function -- @return o new instance of Crawler or nil on failure new = function(self, host, port, url, options) local o = { host = host, port = port, url = url, options = options or {}, basethread = stdnse.base(), } setmetatable(o, self) self.__index = self self.options = o o:loadScriptArguments() o:loadLibraryArguments() o:loadDefaultArguments() local response = http.get(o.host, o.port, '/', { timeout = o.options.timeout, redirect_ok = o.options.redirect_ok, no_cache = o.options.no_cache } ) if ( not(response) or 'table' ~= type(response) ) then return end o.url = o.url:match("/?(.*)") local u_host = o.host.targetname or o.host.name if ( not(u_host) or 0 == #u_host ) then u_host = o.host.ip end local u = ("%s://%s:%d/%s"):format(response.ssl and "https" or "http", u_host, o.port.number, o.url) o.options.base_url = URL:new(u) o.options = Options:new(o.options) o.urlqueue = UrlQueue:new(o.options) o.urlqueue:add(o.options.base_url) o.options.timeout = o.options.timeout or 10000 o.processed = {} -- script arguments have precedence if ( not(o.options.maxdepth) ) then o.options.maxdepth = tonumber(stdnse.get_script_args("httpspider.maxdepth")) end -- script arguments have precedence if ( not(o.options.maxpagecount) ) then o.options.maxpagecount = tonumber(stdnse.get_script_args("httpspider.maxpagecount")) end if ( not(o.options.noblacklist) ) then o:addDefaultBlacklist() end if ( o.options.useheadfornonwebfiles ) then -- Load web files extensions from a file in nselib/data folder. -- For more information on individual file formats, see -- http://en.wikipedia.org/wiki/List_of_file_formats. o.web_files_extensions = {} local f = nmap.fetchfile("nselib/data/http-web-files-extensions.lst") if f then for l in io.lines(f) do table.insert(o.web_files_extensions, l) end end end stdnse.debug2("%s: %s", LIBRARY_NAME, o:getLimitations()) return o end, -- Sets the timeout used by the http library -- @param timeout number containing the timeout in ms. set_timeout = function(self, timeout) self.options.timeout = timeout end, -- Gets the amount of pages that has been retrieved -- @return count number of pages retrieved by the instance getPageCount = function(self) local count = 1 for url in pairs(self.processed) do count = count + 1 end return count end, -- Adds a default blacklist blocking binary files such as images, -- compressed archives and executable files addDefaultBlacklist = function(self) -- References: --[[ Image file formats: https://en.wikipedia.org/wiki/Image_file_formats Video file formats: https://en.wikipedia.org/wiki/Video_file_format Audio file formats: https://en.wikipedia.org/wiki/Audio_file_format Doc file extension: https://en.wikipedia.org/wiki/List_of_Microsoft_Office_filename_extensions Archive formats: https://en.wikipedia.org/wiki/List_of_archive_formats , https://en.wikipedia.org/wiki/Category:Archive_formats ]] local extensions = { image_extensions = {"png", "jpg", "jpeg", "gif", "bmp", "jfif", "exif", "tiff", "bmp", "ppm", "pgm", "pbm", "pnm", "webp", "heif", "bpg", "cgm", "svg"}, video_extensions = {"avi", "flv", "ogg", "mp4", "m4p", "m4v", "wmv", "vob", "ogv", "mng", "mov", "rmvb", "asf", "nsv", "f4v", "f4p", "amv", "webm", "mkv", "mpg", "mp2", "mpeg", "mpv", "svi", "3gp", "3g2", "mxf", "roq"}, audio_extensions = {"aac", "m4a", "mp3", "wav", "aa", "aax", "act", "aiff", "amr", "ape", "au", "awb", "dct", "dss", "dvf", "flac", "gsm", "iklax", "ivs", "m4a", "m4b", "m4p", "mmf", "mpc", "msc", "ogg", "oga", "mogg", "oups", "ra", "raw", "sln", "tta", "vox", "wma", "wv", "webm"}, doc_extensions = {"pdf", "doc", "docx", "docm", "xla", "xls", "xlsx", "xlsm", "ppt", "pptx", "pptm", "odf", "ods", "odp", "ps", "xps", "dot", "wbk", "dotx", "dotm", "docb", "xlt", "xlm", "xltx", "xltm", "xlsb", "xlam", "xll", "xlw", "pot", "pps", "potx", "potm", "ppam", "ppsx", "ppsm", "pub"}, archive_extensions = {"zip", "tar.gz", "gz", "rar", "7z", "sit", "sitx", "tgz", "tar.bz", "tar", "iso", "a", "ar", "cpio", "shar", "lbr", "iso", "mar", "sbx", "bz2", "lz", "lzma", "lzo", "rz", "sz", "s7z", "ace", "afa", "alz", "apk", "tar.bz2", "tar.Z", "tar.lzma", "tlz", "tbz2", "xp3", "zz", "bzip", "lzip", "lzop", "rzip"}, exe_extensions = {"exe", "com", "msi", "bin","dmg"} } local blacklist = {} for _, cat in pairs(extensions) do for _, ext in ipairs(cat) do table.insert(blacklist, string.format(".%s$", ext)) end end self.options:addBlacklist( function(url) local p = url:getPath():lower() for _, pat in ipairs(blacklist) do if ( p:match(pat) ) then return true end end end ) end, -- does the heavy crawling -- -- The crawler may exit due to a number of different reasons, including -- invalid options, reaching max count or simply running out of links -- We return a false status for all of these and in case the error was -- unexpected or requires attention we set the error property accordingly. -- This way the script can alert the user of the details by calling -- getError() crawl_thread = function(self, response_queue) local condvar = nmap.condvar(response_queue) if ( false ~= self.options.withinhost and false ~= self.options.withindomain ) then table.insert(response_queue, { false, { err = true, reason = "Invalid options: withinhost and withindomain can't both be true" } }) condvar "signal" return end while(true) do if ( self.quit or coroutine.status(self.basethread) == 'dead' ) then table.insert(response_queue, {false, { err = false, msg = "Quit signalled by crawler" } }) break end -- in case the user set a max page count to retrieve check how many -- pages we have retrieved so far local count = self:getPageCount() if ( self.options.maxpagecount and ( self.options.maxpagecount > 0 ) and ( count > self.options.maxpagecount ) ) then table.insert(response_queue, { false, { err = false, msg = "Reached max page count" } }) condvar "signal" return end -- pull links from the queue until we get a valid one local url repeat url = self.urlqueue:getNext() until( not(url) or not(self.processed[tostring(url)]) ) -- if no url could be retrieved from the queue, abort ... if ( not(url) ) then table.insert(response_queue, { false, { err = false, msg = "No more urls" } }) condvar "signal" return end if ( self.options.maxpagecount ) then stdnse.debug2("%s: Fetching url [%d of %d]: %s", LIBRARY_NAME, count, self.options.maxpagecount, tostring(url)) else stdnse.debug2("%s: Fetching url: %s", LIBRARY_NAME, tostring(url)) end local scrape = true if not (self.options.doscraping(url)) then stdnse.debug2("%s: Scraping is not allowed for url: %s", LIBRARY_NAME, tostring(url)) scrape = false end local response -- in case we want to use HEAD rather than GET for files with certain extensions if ( self.options.useheadfornonwebfiles ) then local is_web_file = false local file = url:getPath():lower() -- check if we are at a URL with 'no extension', for example: nmap.org/6 if string.match(file,".*(/[^/%.]*)$") or string.match(file, "/$") then is_web_file = true end if not is_web_file then for _,v in pairs(self.web_files_extensions) do if string.match(file, "%."..v.."$") then is_web_file = true break end end end if is_web_file then stdnse.debug2("%s: Using GET: %s", LIBRARY_NAME, file) response = http.get(url:getHost(), url:getPort(), url:getFile(), { timeout = self.options.timeout, redirect_ok = self.options.redirect_ok, no_cache = self.options.no_cache } ) else stdnse.debug2("%s: Using HEAD: %s", LIBRARY_NAME, file) response = http.head(url:getHost(), url:getPort(), url:getFile()) end else -- fetch the url, and then push it to the processed table response = http.get(url:getHost(), url:getPort(), url:getFile(), { timeout = self.options.timeout, redirect_ok = self.options.redirect_ok, no_cache = self.options.no_cache } ) end self.processed[tostring(url)] = true if ( response ) then -- were we redirected? if ( response.location ) then -- was the link absolute? local link = response.location[#response.location] if ( link:match("^http") ) then url = URL:new(link) -- guess not else url.path = link end end -- if we have a response, proceed scraping it if ( response.body ) and scrape then local links = LinkExtractor:new(url, response.body, self.options):getLinks() self.urlqueue:add(links) end else response = { body = "", headers = {} } end table.insert(response_queue, { true, { url = url, response = response } } ) while ( PREFETCH_SIZE < #response_queue ) do stdnse.debug2("%s: Response queue full, waiting ...", LIBRARY_NAME) condvar "wait" end condvar "signal" end condvar "signal" end, -- Loads the argument set on a script level loadScriptArguments = function(self) local sn = self.options.scriptname if ( not(sn) ) then stdnse.debug1("%s: WARNING: Script argument could not be loaded as scriptname was not set", LIBRARY_NAME) return end if ( nil == self.options.maxdepth ) then self.options.maxdepth = tonumber(stdnse.get_script_args(sn .. ".maxdepth")) end if ( nil == self.options.maxpagecount ) then self.options.maxpagecount = tonumber(stdnse.get_script_args(sn .. ".maxpagecount")) end if ( nil == self.url ) then self.url = stdnse.get_script_args(sn .. ".url") end if ( nil == self.options.withinhost ) then self.options.withinhost = stdnse.get_script_args(sn .. ".withinhost") end if ( nil == self.options.withindomain ) then self.options.withindomain = stdnse.get_script_args(sn .. ".withindomain") end if ( nil == self.options.noblacklist ) then self.options.noblacklist = stdnse.get_script_args(sn .. ".noblacklist") end if ( nil == self.options.useheadfornonwebfiles ) then self.options.useheadfornonwebfiles = stdnse.get_script_args(sn .. ".useheadfornonwebfiles") end if ( nil == self.options.doscraping ) then self.options.doscraping = stdnse.get_script_args(sn .. ".doscraping") end end, -- Loads the argument on a library level loadLibraryArguments = function(self) local ln = LIBRARY_NAME if ( nil == self.options.maxdepth ) then self.options.maxdepth = tonumber(stdnse.get_script_args(ln .. ".maxdepth")) end if ( nil == self.options.maxpagecount ) then self.options.maxpagecount = tonumber(stdnse.get_script_args(ln .. ".maxpagecount")) end if ( nil == self.url ) then self.url = stdnse.get_script_args(ln .. ".url") end if ( nil == self.options.withinhost ) then self.options.withinhost = stdnse.get_script_args(ln .. ".withinhost") end if ( nil == self.options.withindomain ) then self.options.withindomain = stdnse.get_script_args(ln .. ".withindomain") end if ( nil == self.options.noblacklist ) then self.options.noblacklist = stdnse.get_script_args(ln .. ".noblacklist") end if ( nil == self.options.useheadfornonwebfiles ) then self.options.useheadfornonwebfiles = stdnse.get_script_args(ln .. ".useheadfornonwebfiles") end if ( nil == self.options.doscraping ) then self.options.doscraping = stdnse.get_script_args(ln .. ".doscraping") end end, -- Loads any defaults for arguments that were not set loadDefaultArguments = function(self) local function tobool(b) if ( nil == b ) then return end assert("string" == type(b) or "boolean" == type(b) or "number" == type(b), "httpspider: tobool failed, unsupported type") if ( "string" == type(b) ) then if ( "true" == b ) then return true else return false end elseif ( "number" == type(b) ) then if ( 1 == b ) then return true else return false end end return b end if self.options.withinhost == 0 then self.options.withinhost = false end if self.options.withindomain == 0 then self.options.withindomain = false end -- fixup some booleans to make sure they're actually booleans self.options.noblacklist = tobool(self.options.noblacklist) self.options.useheadfornonwebfiles = tobool(self.options.useheadfornonwebfiles) if ( self.options.withinhost == nil ) then if ( self.options.withindomain ~= true ) then self.options.withinhost = true else self.options.withinhost = false end end if ( self.options.withindomain == nil ) then self.options.withindomain = false end if ( not ( type(self.options.doscraping) == "function" ) ) then self.options.doscraping = false end self.options.maxdepth = tonumber(self.options.maxdepth) or 3 self.options.maxpagecount = tonumber(self.options.maxpagecount) or 20 self.url = self.url or '/' end, -- gets a string of limitations imposed on the crawl getLimitations = function(self) local o = self.options local limits = {} if ( o.maxdepth > 0 or o.maxpagecount > 0 or o.withinhost or o.withindomain ) then if ( o.maxdepth > 0 ) then table.insert(limits, ("maxdepth=%d"):format(o.maxdepth)) end if ( o.maxpagecount > 0 ) then table.insert(limits, ("maxpagecount=%d"):format(o.maxpagecount)) end if ( o.withindomain ) then table.insert(limits, ("withindomain=%s"):format(o.base_url:getDomain() or o.base_url:getHost())) end if ( o.withinhost ) then table.insert(limits, ("withinhost=%s"):format(o.base_url:getHost())) end end if ( #limits > 0 ) then return ("Spidering limited to: %s"):format(stdnse.strjoin("; ", limits)) end end, -- does the crawling crawl = function(self) self.response_queue = self.response_queue or {} local condvar = nmap.condvar(self.response_queue) if ( not(self.thread) ) then self.thread = stdnse.new_thread(self.crawl_thread, self, self.response_queue) end if ( #self.response_queue == 0 and coroutine.status(self.thread) ~= 'dead') then condvar "wait" end condvar "signal" if ( #self.response_queue == 0 ) then return false, { err = false, msg = "No more urls" } else return table.unpack(table.remove(self.response_queue, 1)) end end, -- signals the crawler to stop stop = function(self) local condvar = nmap.condvar(self.response_queue) self.quit = true condvar "signal" if ( coroutine.status(self.thread) == "dead" ) then return end condvar "wait" end } return _ENV;