OXIESEC PANEL

Name	Size	Modified	Perms
📁 ..	-	10/28/2024 06:50:42 AM	rwxr-xr-x
📄 BaseHTTPServer.py	22.21 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 Bastion.py	5.61 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 CGIHTTPServer.py	12.78 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 ConfigParser.py	27.1 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 Cookie.py	25.92 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 DocXMLRPCServer.py	10.52 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 HTMLParser.py	16.77 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 LICENSE.txt	12.47 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 MimeWriter.py	6.33 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 Queue.py	8.38 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 SimpleHTTPServer.py	7.81 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 SimpleXMLRPCServer.py	25.21 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 SocketServer.py	23.39 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 StringIO.py	10.41 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 UserDict.py	6.89 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 UserList.py	3.56 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 UserString.py	9.46 KB	03/08/2023 06:40:28 PM	rwxr-xr-x
📄 _LWPCookieJar.py	6.4 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 _MozillaCookieJar.py	5.66 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 __future__.py	4.28 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 __phello__.foo.py	64 bytes	03/08/2023 06:40:28 PM	rw-r--r--
📄 _abcoll.py	18.18 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 _osx_support.py	18.65 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 _pyio.py	68 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 _strptime.py	20.24 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 _sysconfigdata.py	126 bytes	03/08/2023 06:40:28 PM	rw-r--r--
📄 _threading_local.py	7.09 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 _weakrefset.py	5.77 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 abc.py	6.98 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 aifc.py	33.77 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 antigravity.py	60 bytes	03/08/2023 06:40:28 PM	rw-r--r--
📄 anydbm.py	2.6 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 argparse.egg-info	217 bytes	03/08/2023 06:40:28 PM	rw-r--r--
📄 argparse.py	87.14 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 ast.py	11.53 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 asynchat.py	11.31 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 asyncore.py	20.45 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 atexit.py	1.67 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 audiodev.py	7.42 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 base64.py	11.53 KB	03/08/2023 06:40:28 PM	rwxr-xr-x
📄 bdb.py	21.21 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 binhex.py	14.35 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 bisect.py	2.53 KB	03/08/2023 06:40:28 PM	rw-r--r--
📁 bsddb	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📄 cProfile.py	6.42 KB	03/08/2023 06:40:28 PM	rwxr-xr-x
📄 calendar.py	22.84 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 cgi.py	34.96 KB	03/08/2023 06:40:28 PM	rwxr-xr-x
📄 cgitb.py	11.89 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 chunk.py	5.29 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 cmd.py	14.67 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 code.py	9.95 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 codecs.py	35.3 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 codeop.py	5.86 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 collections.py	27.15 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 colorsys.py	3.6 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 commands.py	2.49 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 compileall.py	7.58 KB	03/08/2023 06:40:28 PM	rw-r--r--
📁 compiler	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📁 config-x86_64-linux-gnu	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📄 contextlib.py	4.32 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 cookielib.py	63.9 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 copy.py	11.26 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 copy_reg.py	6.81 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 csv.py	16.32 KB	03/08/2023 06:40:28 PM	rw-r--r--
📁 ctypes	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📁 curses	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📄 dbhash.py	498 bytes	03/08/2023 06:40:28 PM	rw-r--r--
📄 decimal.py	216.73 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 difflib.py	80.4 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 dircache.py	1.1 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 dis.py	6.35 KB	03/08/2023 06:40:28 PM	rw-r--r--
📁 dist-packages	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📁 distutils	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📄 doctest.py	102.63 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 dumbdbm.py	8.93 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 dummy_thread.py	4.31 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 dummy_threading.py	2.74 KB	03/08/2023 06:40:28 PM	rw-r--r--
📁 email	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📁 encodings	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📁 ensurepip	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📄 filecmp.py	9.36 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 fileinput.py	13.42 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 fnmatch.py	3.24 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 formatter.py	14.56 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 fpformat.py	4.62 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 fractions.py	21.87 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 ftplib.py	37.65 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 functools.py	4.69 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 genericpath.py	3.13 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 getopt.py	7.15 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 getpass.py	5.43 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 gettext.py	22.48 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 glob.py	3.04 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 gzip.py	18.58 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 hashlib.py	7.66 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 heapq.py	17.87 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 hmac.py	4.48 KB	03/08/2023 06:40:28 PM	rw-r--r--
📁 hotshot	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📄 htmlentitydefs.py	17.63 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 htmllib.py	12.57 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 httplib.py	51.72 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 ihooks.py	18.54 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 imaplib.py	47.23 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 imghdr.py	3.46 KB	03/08/2023 06:40:28 PM	rw-r--r--
📁 importlib	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📄 imputil.py	25.16 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 inspect.py	42 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 io.py	3.24 KB	03/08/2023 06:40:28 PM	rw-r--r--
📁 json	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📄 keyword.py	1.95 KB	03/08/2023 06:40:28 PM	rwxr-xr-x
📁 lib-dynload	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📁 lib-tk	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📁 lib2to3	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📄 linecache.py	3.93 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 locale.py	100.43 KB	03/08/2023 06:40:28 PM	rw-r--r--
📁 logging	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📄 macpath.py	6.14 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 macurl2path.py	2.67 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 mailbox.py	79.34 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 mailcap.py	8.21 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 markupbase.py	14.3 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 md5.py	358 bytes	03/08/2023 06:40:28 PM	rw-r--r--
📄 mhlib.py	32.65 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 mimetools.py	7 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 mimetypes.py	20.54 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 mimify.py	14.67 KB	03/08/2023 06:40:28 PM	rwxr-xr-x
📄 modulefinder.py	23.89 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 multifile.py	4.71 KB	03/08/2023 06:40:28 PM	rw-r--r--
📁 multiprocessing	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📄 mutex.py	1.83 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 netrc.py	5.75 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 new.py	610 bytes	03/08/2023 06:40:28 PM	rw-r--r--
📄 nntplib.py	20.97 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 ntpath.py	18.97 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 nturl2path.py	2.36 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 numbers.py	10.08 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 opcode.py	5.35 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 optparse.py	59.77 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 os.py	25.3 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 os2emxpath.py	4.53 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 pdb.doc	7.73 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 pdb.py	45.02 KB	03/08/2023 06:40:28 PM	rwxr-xr-x
📄 pickle.py	44.42 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 pickletools.py	72.78 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 pipes.py	9.36 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 pkgutil.py	19.77 KB	03/08/2023 06:40:28 PM	rw-r--r--
📁 plat-x86_64-linux-gnu	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📄 platform.py	52.52 KB	03/08/2023 06:40:28 PM	rwxr-xr-x
📄 plistlib.py	14.83 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 popen2.py	8.22 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 poplib.py	12.52 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 posixfile.py	7.82 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 posixpath.py	13.96 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 pprint.py	11.5 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 profile.py	22.25 KB	03/08/2023 06:40:28 PM	rwxr-xr-x
📄 pstats.py	26.09 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 pty.py	4.94 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 py_compile.py	6.14 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 pyclbr.py	13.07 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 pydoc.py	93.9 KB	03/08/2023 06:40:28 PM	rwxr-xr-x
📁 pydoc_data	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📄 quopri.py	6.8 KB	03/08/2023 06:40:28 PM	rwxr-xr-x
📄 random.py	31.7 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 re.py	13.11 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 repr.py	4.2 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 rexec.py	19.68 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 rfc822.py	32.76 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 rlcompleter.py	5.85 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 robotparser.py	7.51 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 runpy.py	10.82 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 sched.py	4.97 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 sets.py	18.6 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 sgmllib.py	17.46 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 sha.py	393 bytes	03/08/2023 06:40:28 PM	rw-r--r--
📄 shelve.py	7.99 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 shlex.py	10.9 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 shutil.py	19.41 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 site.py	19.48 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 sitecustomize.py	155 bytes	11/07/2019 10:07:09 AM	rw-r--r--
📄 smtpd.py	18.11 KB	03/08/2023 06:40:28 PM	rwxr-xr-x
📄 smtplib.py	31.38 KB	03/08/2023 06:40:28 PM	rwxr-xr-x
📄 sndhdr.py	5.83 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 socket.py	20.13 KB	03/08/2023 06:40:28 PM	rw-r--r--
📁 sqlite3	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📄 sre.py	384 bytes	03/08/2023 06:40:28 PM	rw-r--r--
📄 sre_compile.py	19.36 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 sre_constants.py	7.03 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 sre_parse.py	29.98 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 ssl.py	36.58 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 stat.py	1.8 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 statvfs.py	898 bytes	03/08/2023 06:40:28 PM	rw-r--r--
📄 string.py	21.04 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 stringold.py	12.16 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 stringprep.py	13.21 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 struct.py	82 bytes	03/08/2023 06:40:28 PM	rw-r--r--
📄 subprocess.py	49.34 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 sunau.py	16.82 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 sunaudio.py	1.37 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 symbol.py	2.01 KB	03/08/2023 06:40:28 PM	rwxr-xr-x
📄 symtable.py	7.26 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 sysconfig.py	24.9 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 tabnanny.py	11.07 KB	03/08/2023 06:40:28 PM	rwxr-xr-x
📄 tarfile.py	88.53 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 telnetlib.py	26.4 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 tempfile.py	19.09 KB	03/08/2023 06:40:28 PM	rw-r--r--
📁 test	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📄 textwrap.py	16.88 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 this.py	1002 bytes	03/08/2023 06:40:28 PM	rw-r--r--
📄 threading.py	46.01 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 timeit.py	12.49 KB	03/08/2023 06:40:28 PM	rwxr-xr-x
📄 toaiff.py	3.07 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 token.py	2.85 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 tokenize.py	17.07 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 trace.py	29.19 KB	03/08/2023 06:40:28 PM	rwxr-xr-x
📄 traceback.py	11.02 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 tty.py	879 bytes	03/08/2023 06:40:28 PM	rw-r--r--
📄 types.py	2.04 KB	03/08/2023 06:40:28 PM	rw-r--r--
📁 unittest	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📄 urllib.py	58.68 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 urllib2.py	51.57 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 urlparse.py	16.78 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 user.py	1.59 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 uu.py	6.4 KB	03/08/2023 06:40:28 PM	rwxr-xr-x
📄 uuid.py	22.63 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 warnings.py	14.48 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 wave.py	18.15 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 weakref.py	14.48 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 webbrowser.py	22.19 KB	03/08/2023 06:40:28 PM	rwxr-xr-x
📄 whichdb.py	3.3 KB	03/08/2023 06:40:28 PM	rw-r--r--
📁 wsgiref	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📄 wsgiref.egg-info	187 bytes	03/08/2023 06:40:28 PM	rw-r--r--
📄 xdrlib.py	5.93 KB	03/08/2023 06:40:28 PM	rw-r--r--
📁 xml	-	05/09/2024 07:14:10 AM	rwxr-xr-x
📄 xmllib.py	34.05 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 xmlrpclib.py	50.91 KB	03/08/2023 06:40:28 PM	rw-r--r--
📄 zipfile.py	58.08 KB	03/08/2023 06:40:28 PM	rw-r--r--

Editing: HTMLParser.py

"""A parser for HTML and XHTML."""

# This file is based on sgmllib.py, but the API is slightly different.

# XXX There should be a way to distinguish between PCDATA (parsed
# character data -- the normal case), RCDATA (replaceable character
# data -- only char and entity references and end tags are special)
# and CDATA (character data -- only end tags are special).

import markupbase
import re

# Regular expressions used for parsing

interesting_normal = re.compile('[&<]')
incomplete = re.compile('&[a-zA-Z#]')

entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')

starttagopen = re.compile('<[a-zA-Z]')
piclose = re.compile('>')
commentclose = re.compile(r'--\s*>')

# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state
# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state
# note: if you change tagfind/attrfind remember to update locatestarttagend too
tagfind = re.compile('([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*')
# this regex is currently unused, but left for backward compatibility
tagfind_tolerant = re.compile('[a-zA-Z][^\t\n\r\f />\x00]*')

attrfind = re.compile(
    r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*'
    r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*')

locatestarttagend = re.compile(r"""
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
""", re.VERBOSE)
endendtag = re.compile('>')
# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between
# </ and the tag name, so maybe this should be fixed
endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')

class HTMLParseError(Exception):
    """Exception raised for all parse errors."""

def __init__(self, msg, position=(None, None)):
        assert msg
        self.msg = msg
        self.lineno = position[0]
        self.offset = position[1]

def __str__(self):
        result = self.msg
        if self.lineno is not None:
            result = result + ", at line %d" % self.lineno
        if self.offset is not None:
            result = result + ", column %d" % (self.offset + 1)
        return result

class HTMLParser(markupbase.ParserBase):
    """Find tags and other markup and call handler functions.

Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  Entity references are
    passed by calling self.handle_entityref() with the entity
    reference as the argument.  Numeric character references are
    passed to self.handle_charref() with the string containing the
    reference as the argument.
    """

CDATA_CONTENT_ELEMENTS = ("script", "style")

def __init__(self):
        """Initialize and reset this instance."""
        self.reset()

def reset(self):
        """Reset this instance.  Loses all unprocessed data."""
        self.rawdata = ''
        self.lasttag = '???'
        self.interesting = interesting_normal
        self.cdata_elem = None
        markupbase.ParserBase.reset(self)

def feed(self, data):
        r"""Feed data to the parser.

Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        """
        self.rawdata = self.rawdata + data
        self.goahead(0)

def close(self):
        """Handle any buffered data."""
        self.goahead(1)

def error(self, message):
        raise HTMLParseError(message, self.getpos())

__starttag_text = None

def get_starttag_text(self):
        """Return full source of start tag: '<...>'."""
        return self.__starttag_text

def set_cdata_mode(self, elem):
        self.cdata_elem = elem.lower()
        self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)

def clear_cdata_mode(self):
        self.interesting = interesting_normal
        self.cdata_elem = None

# Internal -- handle data as far as reasonable.  May leave state
    # and data to be processed by a subsequent call.  If 'end' is
    # true, force handling all data as if followed by EOF marker.
    def goahead(self, end):
        rawdata = self.rawdata
        i = 0
        n = len(rawdata)
        while i < n:
            match = self.interesting.search(rawdata, i) # < or &
            if match:
                j = match.start()
            else:
                if self.cdata_elem:
                    break
                j = n
            if i < j: self.handle_data(rawdata[i:j])
            i = self.updatepos(i, j)
            if i == n: break
            startswith = rawdata.startswith
            if startswith('<', i):
                if starttagopen.match(rawdata, i): # < + letter
                    k = self.parse_starttag(i)
                elif startswith("</", i):
                    k = self.parse_endtag(i)
                elif startswith("<!--", i):
                    k = self.parse_comment(i)
                elif startswith("<?", i):
                    k = self.parse_pi(i)
                elif startswith("<!", i):
                    k = self.parse_html_declaration(i)
                elif (i + 1) < n:
                    self.handle_data("<")
                    k = i + 1
                else:
                    break
                if k < 0:
                    if not end:
                        break
                    k = rawdata.find('>', i + 1)
                    if k < 0:
                        k = rawdata.find('<', i + 1)
                        if k < 0:
                            k = i + 1
                    else:
                        k += 1
                    self.handle_data(rawdata[i:k])
                i = self.updatepos(i, k)
            elif startswith("&#", i):
                match = charref.match(rawdata, i)
                if match:
                    name = match.group()[2:-1]
                    self.handle_charref(name)
                    k = match.end()
                    if not startswith(';', k-1):
                        k = k - 1
                    i = self.updatepos(i, k)
                    continue
                else:
                    if ";" in rawdata[i:]:  # bail by consuming '&#'
                        self.handle_data(rawdata[i:i+2])
                        i = self.updatepos(i, i+2)
                    break
            elif startswith('&', i):
                match = entityref.match(rawdata, i)
                if match:
                    name = match.group(1)
                    self.handle_entityref(name)
                    k = match.end()
                    if not startswith(';', k-1):
                        k = k - 1
                    i = self.updatepos(i, k)
                    continue
                match = incomplete.match(rawdata, i)
                if match:
                    # match.group() will contain at least 2 chars
                    if end and match.group() == rawdata[i:]:
                        self.error("EOF in middle of entity or char ref")
                    # incomplete
                    break
                elif (i + 1) < n:
                    # not the end of the buffer, and can't be confused
                    # with some other construct
                    self.handle_data("&")
                    i = self.updatepos(i, i + 1)
                else:
                    break
            else:
                assert 0, "interesting.search() lied"
        # end while
        if end and i < n and not self.cdata_elem:
            self.handle_data(rawdata[i:n])
            i = self.updatepos(i, n)
        self.rawdata = rawdata[i:]

# Internal -- parse html declarations, return length or -1 if not terminated
    # See w3.org/TR/html5/tokenization.html#markup-declaration-open-state
    # See also parse_declaration in _markupbase
    def parse_html_declaration(self, i):
        rawdata = self.rawdata
        if rawdata[i:i+2] != '<!':
            self.error('unexpected call to parse_html_declaration()')
        if rawdata[i:i+4] == '<!--':
            # this case is actually already handled in goahead()
            return self.parse_comment(i)
        elif rawdata[i:i+3] == '<![':
            return self.parse_marked_section(i)
        elif rawdata[i:i+9].lower() == '<!doctype':
            # find the closing >
            gtpos = rawdata.find('>', i+9)
            if gtpos == -1:
                return -1
            self.handle_decl(rawdata[i+2:gtpos])
            return gtpos+1
        else:
            return self.parse_bogus_comment(i)

# Internal -- parse bogus comment, return length or -1 if not terminated
    # see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state
    def parse_bogus_comment(self, i, report=1):
        rawdata = self.rawdata
        if rawdata[i:i+2] not in ('<!', '</'):
            self.error('unexpected call to parse_comment()')
        pos = rawdata.find('>', i+2)
        if pos == -1:
            return -1
        if report:
            self.handle_comment(rawdata[i+2:pos])
        return pos + 1

# Internal -- parse processing instr, return end or -1 if not terminated
    def parse_pi(self, i):
        rawdata = self.rawdata
        assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()'
        match = piclose.search(rawdata, i+2) # >
        if not match:
            return -1
        j = match.start()
        self.handle_pi(rawdata[i+2: j])
        j = match.end()
        return j

# Internal -- handle starttag, return end or -1 if not terminated
    def parse_starttag(self, i):
        self.__starttag_text = None
        endpos = self.check_for_whole_start_tag(i)
        if endpos < 0:
            return endpos
        rawdata = self.rawdata
        self.__starttag_text = rawdata[i:endpos]

# Now parse the data between i+1 and j into a tag and attrs
        attrs = []
        match = tagfind.match(rawdata, i+1)
        assert match, 'unexpected call to parse_starttag()'
        k = match.end()
        self.lasttag = tag = match.group(1).lower()

while k < endpos:
            m = attrfind.match(rawdata, k)
            if not m:
                break
            attrname, rest, attrvalue = m.group(1, 2, 3)
            if not rest:
                attrvalue = None
            elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
                 attrvalue[:1] == '"' == attrvalue[-1:]:
                attrvalue = attrvalue[1:-1]
            if attrvalue:
                attrvalue = self.unescape(attrvalue)
            attrs.append((attrname.lower(), attrvalue))
            k = m.end()

end = rawdata[k:endpos].strip()
        if end not in (">", "/>"):
            lineno, offset = self.getpos()
            if "\n" in self.__starttag_text:
                lineno = lineno + self.__starttag_text.count("\n")
                offset = len(self.__starttag_text) \
                         - self.__starttag_text.rfind("\n")
            else:
                offset = offset + len(self.__starttag_text)
            self.handle_data(rawdata[i:endpos])
            return endpos
        if end.endswith('/>'):
            # XHTML-style empty tag: <span attr="value" />
            self.handle_startendtag(tag, attrs)
        else:
            self.handle_starttag(tag, attrs)
            if tag in self.CDATA_CONTENT_ELEMENTS:
                self.set_cdata_mode(tag)
        return endpos

# Internal -- check to see if we have a complete starttag; return end
    # or -1 if incomplete.
    def check_for_whole_start_tag(self, i):
        rawdata = self.rawdata
        m = locatestarttagend.match(rawdata, i)
        if m:
            j = m.end()
            next = rawdata[j:j+1]
            if next == ">":
                return j + 1
            if next == "/":
                if rawdata.startswith("/>", j):
                    return j + 2
                if rawdata.startswith("/", j):
                    # buffer boundary
                    return -1
                # else bogus input
                self.updatepos(i, j + 1)
                self.error("malformed empty start tag")
            if next == "":
                # end of input
                return -1
            if next in ("abcdefghijklmnopqrstuvwxyz=/"
                        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
                # end of input in or before attribute value, or we have the
                # '/' from a '/>' ending
                return -1
            if j > i:
                return j
            else:
                return i + 1
        raise AssertionError("we should not get here!")

# Internal -- parse endtag, return end or -1 if incomplete
    def parse_endtag(self, i):
        rawdata = self.rawdata
        assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag"
        match = endendtag.search(rawdata, i+1) # >
        if not match:
            return -1
        gtpos = match.end()
        match = endtagfind.match(rawdata, i) # </ + tag + >
        if not match:
            if self.cdata_elem is not None:
                self.handle_data(rawdata[i:gtpos])
                return gtpos
            # find the name: w3.org/TR/html5/tokenization.html#tag-name-state
            namematch = tagfind.match(rawdata, i+2)
            if not namematch:
                # w3.org/TR/html5/tokenization.html#end-tag-open-state
                if rawdata[i:i+3] == '</>':
                    return i+3
                else:
                    return self.parse_bogus_comment(i)
            tagname = namematch.group(1).lower()
            # consume and ignore other stuff between the name and the >
            # Note: this is not 100% correct, since we might have things like
            # </tag attr=">">, but looking for > after tha name should cover
            # most of the cases and is much simpler
            gtpos = rawdata.find('>', namematch.end())
            self.handle_endtag(tagname)
            return gtpos+1

elem = match.group(1).lower() # script or style
        if self.cdata_elem is not None:
            if elem != self.cdata_elem:
                self.handle_data(rawdata[i:gtpos])
                return gtpos

self.handle_endtag(elem)
        self.clear_cdata_mode()
        return gtpos

# Overridable -- finish processing of start+end tag: <tag.../>
    def handle_startendtag(self, tag, attrs):
        self.handle_starttag(tag, attrs)
        self.handle_endtag(tag)

# Overridable -- handle start tag
    def handle_starttag(self, tag, attrs):
        pass

# Overridable -- handle end tag
    def handle_endtag(self, tag):
        pass

# Overridable -- handle character reference
    def handle_charref(self, name):
        pass

# Overridable -- handle entity reference
    def handle_entityref(self, name):
        pass

# Overridable -- handle data
    def handle_data(self, data):
        pass

# Overridable -- handle comment
    def handle_comment(self, data):
        pass

# Overridable -- handle declaration
    def handle_decl(self, decl):
        pass

# Overridable -- handle processing instruction
    def handle_pi(self, data):
        pass

def unknown_decl(self, data):
        pass

# Internal -- helper to remove special character quoting
    entitydefs = None
    def unescape(self, s):
        if '&' not in s:
            return s
        def replaceEntities(s):
            s = s.groups()[0]
            try:
                if s[0] == "#":
                    s = s[1:]
                    if s[0] in ['x','X']:
                        c = int(s[1:], 16)
                    else:
                        c = int(s)
                    return unichr(c)
            except ValueError:
                return '&#'+s+';'
            else:
                # Cannot use name2codepoint directly, because HTMLParser supports apos,
                # which is not part of HTML 4
                if HTMLParser.entitydefs is None:
                    import htmlentitydefs
                    entitydefs = {'apos':u"'"}
                    for k, v in htmlentitydefs.name2codepoint.iteritems():
                        entitydefs[k] = unichr(v)
                    HTMLParser.entitydefs = entitydefs
                try:
                    return self.entitydefs[s]
                except KeyError:
                    return '&'+s+';'

return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));", replaceEntities, s)