OXIESEC PANEL
- Current Dir:
/
/
usr
/
lib
/
python2.7
Server IP: 139.59.38.164
Upload:
Create Dir:
Name
Size
Modified
Perms
📁
..
-
10/28/2024 06:50:42 AM
rwxr-xr-x
📄
BaseHTTPServer.py
22.21 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
Bastion.py
5.61 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
CGIHTTPServer.py
12.78 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
ConfigParser.py
27.1 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
Cookie.py
25.92 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
DocXMLRPCServer.py
10.52 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
HTMLParser.py
16.77 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
LICENSE.txt
12.47 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
MimeWriter.py
6.33 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
Queue.py
8.38 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
SimpleHTTPServer.py
7.81 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
SimpleXMLRPCServer.py
25.21 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
SocketServer.py
23.39 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
StringIO.py
10.41 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
UserDict.py
6.89 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
UserList.py
3.56 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
UserString.py
9.46 KB
03/08/2023 06:40:28 PM
rwxr-xr-x
📄
_LWPCookieJar.py
6.4 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
_MozillaCookieJar.py
5.66 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
__future__.py
4.28 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
__phello__.foo.py
64 bytes
03/08/2023 06:40:28 PM
rw-r--r--
📄
_abcoll.py
18.18 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
_osx_support.py
18.65 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
_pyio.py
68 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
_strptime.py
20.24 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
_sysconfigdata.py
126 bytes
03/08/2023 06:40:28 PM
rw-r--r--
📄
_threading_local.py
7.09 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
_weakrefset.py
5.77 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
abc.py
6.98 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
aifc.py
33.77 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
antigravity.py
60 bytes
03/08/2023 06:40:28 PM
rw-r--r--
📄
anydbm.py
2.6 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
argparse.egg-info
217 bytes
03/08/2023 06:40:28 PM
rw-r--r--
📄
argparse.py
87.14 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
ast.py
11.53 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
asynchat.py
11.31 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
asyncore.py
20.45 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
atexit.py
1.67 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
audiodev.py
7.42 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
base64.py
11.53 KB
03/08/2023 06:40:28 PM
rwxr-xr-x
📄
bdb.py
21.21 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
binhex.py
14.35 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
bisect.py
2.53 KB
03/08/2023 06:40:28 PM
rw-r--r--
📁
bsddb
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📄
cProfile.py
6.42 KB
03/08/2023 06:40:28 PM
rwxr-xr-x
📄
calendar.py
22.84 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
cgi.py
34.96 KB
03/08/2023 06:40:28 PM
rwxr-xr-x
📄
cgitb.py
11.89 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
chunk.py
5.29 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
cmd.py
14.67 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
code.py
9.95 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
codecs.py
35.3 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
codeop.py
5.86 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
collections.py
27.15 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
colorsys.py
3.6 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
commands.py
2.49 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
compileall.py
7.58 KB
03/08/2023 06:40:28 PM
rw-r--r--
📁
compiler
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📁
config-x86_64-linux-gnu
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📄
contextlib.py
4.32 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
cookielib.py
63.9 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
copy.py
11.26 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
copy_reg.py
6.81 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
csv.py
16.32 KB
03/08/2023 06:40:28 PM
rw-r--r--
📁
ctypes
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📁
curses
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📄
dbhash.py
498 bytes
03/08/2023 06:40:28 PM
rw-r--r--
📄
decimal.py
216.73 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
difflib.py
80.4 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
dircache.py
1.1 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
dis.py
6.35 KB
03/08/2023 06:40:28 PM
rw-r--r--
📁
dist-packages
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📁
distutils
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📄
doctest.py
102.63 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
dumbdbm.py
8.93 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
dummy_thread.py
4.31 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
dummy_threading.py
2.74 KB
03/08/2023 06:40:28 PM
rw-r--r--
📁
email
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📁
encodings
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📁
ensurepip
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📄
filecmp.py
9.36 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
fileinput.py
13.42 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
fnmatch.py
3.24 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
formatter.py
14.56 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
fpformat.py
4.62 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
fractions.py
21.87 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
ftplib.py
37.65 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
functools.py
4.69 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
genericpath.py
3.13 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
getopt.py
7.15 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
getpass.py
5.43 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
gettext.py
22.48 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
glob.py
3.04 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
gzip.py
18.58 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
hashlib.py
7.66 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
heapq.py
17.87 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
hmac.py
4.48 KB
03/08/2023 06:40:28 PM
rw-r--r--
📁
hotshot
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📄
htmlentitydefs.py
17.63 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
htmllib.py
12.57 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
httplib.py
51.72 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
ihooks.py
18.54 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
imaplib.py
47.23 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
imghdr.py
3.46 KB
03/08/2023 06:40:28 PM
rw-r--r--
📁
importlib
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📄
imputil.py
25.16 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
inspect.py
42 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
io.py
3.24 KB
03/08/2023 06:40:28 PM
rw-r--r--
📁
json
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📄
keyword.py
1.95 KB
03/08/2023 06:40:28 PM
rwxr-xr-x
📁
lib-dynload
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📁
lib-tk
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📁
lib2to3
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📄
linecache.py
3.93 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
locale.py
100.43 KB
03/08/2023 06:40:28 PM
rw-r--r--
📁
logging
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📄
macpath.py
6.14 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
macurl2path.py
2.67 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
mailbox.py
79.34 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
mailcap.py
8.21 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
markupbase.py
14.3 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
md5.py
358 bytes
03/08/2023 06:40:28 PM
rw-r--r--
📄
mhlib.py
32.65 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
mimetools.py
7 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
mimetypes.py
20.54 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
mimify.py
14.67 KB
03/08/2023 06:40:28 PM
rwxr-xr-x
📄
modulefinder.py
23.89 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
multifile.py
4.71 KB
03/08/2023 06:40:28 PM
rw-r--r--
📁
multiprocessing
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📄
mutex.py
1.83 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
netrc.py
5.75 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
new.py
610 bytes
03/08/2023 06:40:28 PM
rw-r--r--
📄
nntplib.py
20.97 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
ntpath.py
18.97 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
nturl2path.py
2.36 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
numbers.py
10.08 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
opcode.py
5.35 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
optparse.py
59.77 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
os.py
25.3 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
os2emxpath.py
4.53 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
pdb.doc
7.73 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
pdb.py
45.02 KB
03/08/2023 06:40:28 PM
rwxr-xr-x
📄
pickle.py
44.42 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
pickletools.py
72.78 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
pipes.py
9.36 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
pkgutil.py
19.77 KB
03/08/2023 06:40:28 PM
rw-r--r--
📁
plat-x86_64-linux-gnu
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📄
platform.py
52.52 KB
03/08/2023 06:40:28 PM
rwxr-xr-x
📄
plistlib.py
14.83 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
popen2.py
8.22 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
poplib.py
12.52 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
posixfile.py
7.82 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
posixpath.py
13.96 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
pprint.py
11.5 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
profile.py
22.25 KB
03/08/2023 06:40:28 PM
rwxr-xr-x
📄
pstats.py
26.09 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
pty.py
4.94 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
py_compile.py
6.14 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
pyclbr.py
13.07 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
pydoc.py
93.9 KB
03/08/2023 06:40:28 PM
rwxr-xr-x
📁
pydoc_data
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📄
quopri.py
6.8 KB
03/08/2023 06:40:28 PM
rwxr-xr-x
📄
random.py
31.7 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
re.py
13.11 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
repr.py
4.2 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
rexec.py
19.68 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
rfc822.py
32.76 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
rlcompleter.py
5.85 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
robotparser.py
7.51 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
runpy.py
10.82 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
sched.py
4.97 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
sets.py
18.6 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
sgmllib.py
17.46 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
sha.py
393 bytes
03/08/2023 06:40:28 PM
rw-r--r--
📄
shelve.py
7.99 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
shlex.py
10.9 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
shutil.py
19.41 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
site.py
19.48 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
sitecustomize.py
155 bytes
11/07/2019 10:07:09 AM
rw-r--r--
📄
smtpd.py
18.11 KB
03/08/2023 06:40:28 PM
rwxr-xr-x
📄
smtplib.py
31.38 KB
03/08/2023 06:40:28 PM
rwxr-xr-x
📄
sndhdr.py
5.83 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
socket.py
20.13 KB
03/08/2023 06:40:28 PM
rw-r--r--
📁
sqlite3
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📄
sre.py
384 bytes
03/08/2023 06:40:28 PM
rw-r--r--
📄
sre_compile.py
19.36 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
sre_constants.py
7.03 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
sre_parse.py
29.98 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
ssl.py
36.58 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
stat.py
1.8 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
statvfs.py
898 bytes
03/08/2023 06:40:28 PM
rw-r--r--
📄
string.py
21.04 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
stringold.py
12.16 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
stringprep.py
13.21 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
struct.py
82 bytes
03/08/2023 06:40:28 PM
rw-r--r--
📄
subprocess.py
49.34 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
sunau.py
16.82 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
sunaudio.py
1.37 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
symbol.py
2.01 KB
03/08/2023 06:40:28 PM
rwxr-xr-x
📄
symtable.py
7.26 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
sysconfig.py
24.9 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
tabnanny.py
11.07 KB
03/08/2023 06:40:28 PM
rwxr-xr-x
📄
tarfile.py
88.53 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
telnetlib.py
26.4 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
tempfile.py
19.09 KB
03/08/2023 06:40:28 PM
rw-r--r--
📁
test
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📄
textwrap.py
16.88 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
this.py
1002 bytes
03/08/2023 06:40:28 PM
rw-r--r--
📄
threading.py
46.01 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
timeit.py
12.49 KB
03/08/2023 06:40:28 PM
rwxr-xr-x
📄
toaiff.py
3.07 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
token.py
2.85 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
tokenize.py
17.07 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
trace.py
29.19 KB
03/08/2023 06:40:28 PM
rwxr-xr-x
📄
traceback.py
11.02 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
tty.py
879 bytes
03/08/2023 06:40:28 PM
rw-r--r--
📄
types.py
2.04 KB
03/08/2023 06:40:28 PM
rw-r--r--
📁
unittest
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📄
urllib.py
58.68 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
urllib2.py
51.57 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
urlparse.py
16.78 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
user.py
1.59 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
uu.py
6.4 KB
03/08/2023 06:40:28 PM
rwxr-xr-x
📄
uuid.py
22.63 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
warnings.py
14.48 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
wave.py
18.15 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
weakref.py
14.48 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
webbrowser.py
22.19 KB
03/08/2023 06:40:28 PM
rwxr-xr-x
📄
whichdb.py
3.3 KB
03/08/2023 06:40:28 PM
rw-r--r--
📁
wsgiref
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📄
wsgiref.egg-info
187 bytes
03/08/2023 06:40:28 PM
rw-r--r--
📄
xdrlib.py
5.93 KB
03/08/2023 06:40:28 PM
rw-r--r--
📁
xml
-
05/09/2024 07:14:10 AM
rwxr-xr-x
📄
xmllib.py
34.05 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
xmlrpclib.py
50.91 KB
03/08/2023 06:40:28 PM
rw-r--r--
📄
zipfile.py
58.08 KB
03/08/2023 06:40:28 PM
rw-r--r--
Editing: robotparser.py
Close
""" robotparser.py Copyright (C) 2000 Bastian Kleineidam You can choose between two licenses when using this package: 1) GNU GPLv2 2) PSF license for Python 2.2 The robots.txt Exclusion Protocol is implemented as specified in http://www.robotstxt.org/norobots-rfc.txt """ import urlparse import urllib __all__ = ["RobotFileParser"] class RobotFileParser: """ This class provides a set of methods to read, parse and answer questions about a single robots.txt file. """ def __init__(self, url=''): self.entries = [] self.default_entry = None self.disallow_all = False self.allow_all = False self.set_url(url) self.last_checked = 0 def mtime(self): """Returns the time the robots.txt file was last fetched. This is useful for long-running web spiders that need to check for new robots.txt files periodically. """ return self.last_checked def modified(self): """Sets the time the robots.txt file was last fetched to the current time. """ import time self.last_checked = time.time() def set_url(self, url): """Sets the URL referring to a robots.txt file.""" self.url = url self.host, self.path = urlparse.urlparse(url)[1:3] def read(self): """Reads the robots.txt URL and feeds it to the parser.""" opener = URLopener() f = opener.open(self.url) lines = [line.strip() for line in f] f.close() self.errcode = opener.errcode if self.errcode in (401, 403): self.disallow_all = True elif self.errcode >= 400 and self.errcode < 500: self.allow_all = True elif self.errcode == 200 and lines: self.parse(lines) def _add_entry(self, entry): if "*" in entry.useragents: # the default entry is considered last if self.default_entry is None: # the first default entry wins self.default_entry = entry else: self.entries.append(entry) def parse(self, lines): """parse the input lines from a robots.txt file. We allow that a user-agent: line is not preceded by one or more blank lines.""" # states: # 0: start state # 1: saw user-agent line # 2: saw an allow or disallow line state = 0 linenumber = 0 entry = Entry() self.modified() for line in lines: linenumber += 1 if not line: if state == 1: entry = Entry() state = 0 elif state == 2: self._add_entry(entry) entry = Entry() state = 0 # remove optional comment and strip line i = line.find('#') if i >= 0: line = line[:i] line = line.strip() if not line: continue line = line.split(':', 1) if len(line) == 2: line[0] = line[0].strip().lower() line[1] = urllib.unquote(line[1].strip()) if line[0] == "user-agent": if state == 2: self._add_entry(entry) entry = Entry() entry.useragents.append(line[1]) state = 1 elif line[0] == "disallow": if state != 0: entry.rulelines.append(RuleLine(line[1], False)) state = 2 elif line[0] == "allow": if state != 0: entry.rulelines.append(RuleLine(line[1], True)) state = 2 if state == 2: self._add_entry(entry) def can_fetch(self, useragent, url): """using the parsed robots.txt decide if useragent can fetch url""" if self.disallow_all: return False if self.allow_all: return True # Until the robots.txt file has been read or found not # to exist, we must assume that no url is allowable. # This prevents false positives when a user erroneously # calls can_fetch() before calling read(). if not self.last_checked: return False # search for given user agent matches # the first match counts parsed_url = urlparse.urlparse(urllib.unquote(url)) url = urlparse.urlunparse(('', '', parsed_url.path, parsed_url.params, parsed_url.query, parsed_url.fragment)) url = urllib.quote(url) if not url: url = "/" for entry in self.entries: if entry.applies_to(useragent): return entry.allowance(url) # try the default entry last if self.default_entry: return self.default_entry.allowance(url) # agent not found ==> access granted return True def __str__(self): entries = self.entries if self.default_entry is not None: entries = entries + [self.default_entry] return '\n'.join(map(str, entries)) + '\n' class RuleLine: """A rule line is a single "Allow:" (allowance==True) or "Disallow:" (allowance==False) followed by a path.""" def __init__(self, path, allowance): if path == '' and not allowance: # an empty value means allow all allowance = True path = urlparse.urlunparse(urlparse.urlparse(path)) self.path = urllib.quote(path) self.allowance = allowance def applies_to(self, filename): return self.path == "*" or filename.startswith(self.path) def __str__(self): return (self.allowance and "Allow" or "Disallow") + ": " + self.path class Entry: """An entry has one or more user-agents and zero or more rulelines""" def __init__(self): self.useragents = [] self.rulelines = [] def __str__(self): ret = [] for agent in self.useragents: ret.extend(["User-agent: ", agent, "\n"]) for line in self.rulelines: ret.extend([str(line), "\n"]) return ''.join(ret) def applies_to(self, useragent): """check if this entry applies to the specified agent""" # split the name token and make it lower case useragent = useragent.split("/")[0].lower() for agent in self.useragents: if agent == '*': # we have the catch-all agent return True agent = agent.lower() if agent in useragent: return True return False def allowance(self, filename): """Preconditions: - our agent applies to this entry - filename is URL decoded""" for line in self.rulelines: if line.applies_to(filename): return line.allowance return True class URLopener(urllib.FancyURLopener): def __init__(self, *args): urllib.FancyURLopener.__init__(self, *args) self.errcode = 200 def prompt_user_passwd(self, host, realm): ## If robots.txt file is accessible only with a password, ## we act as if the file wasn't there. return None, None def http_error_default(self, url, fp, errcode, errmsg, headers): self.errcode = errcode return urllib.FancyURLopener.http_error_default(self, url, fp, errcode, errmsg, headers)