I've installed readability-lxml with pip on ubuntu 12.10 (64 bit, amd), and all my packages are up-to-date.
When I try accessing urls from the NY Times website using the command line syntax, e.g., these requests:
ERROR:root:error getting summary:
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/readability/readability.py", line 136, in summary
self._html(True)
File "/usr/local/lib/python2.7/dist-packages/readability/readability.py", line 104, in _html
self.html = self._parse(self.input)
File "/usr/local/lib/python2.7/dist-packages/readability/readability.py", line 108, in _parse
doc = build_doc(input)
File "/usr/local/lib/python2.7/dist-packages/readability/htmls.py", line 18, in build_doc
doc = lxml.html.document_fromstring(page_unicode.encode('utf-8', 'replace'), parser=utf8_parser)
File "/usr/lib/python2.7/dist-packages/lxml/html/__init__.py", line 532, in document_fromstring
value = etree.fromstring(html, parser, **kw)
File "lxml.etree.pyx", line 2756, in lxml.etree.fromstring (src/lxml/lxml.etree.c:54726)
File "parser.pxi", line 1578, in lxml.etree._parseMemoryDocument (src/lxml/lxml.etree.c:82843)
File "parser.pxi", line 1457, in lxml.etree._parseDoc (src/lxml/lxml.etree.c:81641)
File "parser.pxi", line 965, in lxml.etree._BaseParser._parseDoc (src/lxml/lxml.etree.c:78311)
File "parser.pxi", line 569, in lxml.etree._ParserContext._handleParseResultDoc (src/lxml/lxml.etree.c:74567)
File "parser.pxi", line 650, in lxml.etree._handleParseResult (src/lxml/lxml.etree.c:75458)
File "parser.pxi", line 601, in lxml.etree._raiseParseError (src/lxml/lxml.etree.c:74958)
XMLSyntaxError: None
Traceback (most recent call last):
File "/usr/lib/python2.7/runpy.py", line 162, in _run_module_as_main
"__main__", fname, loader, pkg_name)
File "/usr/lib/python2.7/runpy.py", line 72, in _run_code
exec code in run_globals
File "/usr/local/lib/python2.7/dist-packages/readability/readability.py", line 589, in <module>
main()
File "/usr/local/lib/python2.7/dist-packages/readability/readability.py", line 584, in main
url=options.url).summary().encode(enc, 'replace')
File "/usr/local/lib/python2.7/dist-packages/readability/readability.py", line 136, in summary
self._html(True)
File "/usr/local/lib/python2.7/dist-packages/readability/readability.py", line 104, in _html
self.html = self._parse(self.input)
File "/usr/local/lib/python2.7/dist-packages/readability/readability.py", line 108, in _parse
doc = build_doc(input)
File "/usr/local/lib/python2.7/dist-packages/readability/htmls.py", line 18, in build_doc
doc = lxml.html.document_fromstring(page_unicode.encode('utf-8', 'replace'), parser=utf8_parser)
File "/usr/lib/python2.7/dist-packages/lxml/html/__init__.py", line 532, in document_fromstring
value = etree.fromstring(html, parser, **kw)
File "lxml.etree.pyx", line 2756, in lxml.etree.fromstring (src/lxml/lxml.etree.c:54726)
File "parser.pxi", line 1578, in lxml.etree._parseMemoryDocument (src/lxml/lxml.etree.c:82843)
File "parser.pxi", line 1457, in lxml.etree._parseDoc (src/lxml/lxml.etree.c:81641)
File "parser.pxi", line 965, in lxml.etree._BaseParser._parseDoc (src/lxml/lxml.etree.c:78311)
File "parser.pxi", line 569, in lxml.etree._ParserContext._handleParseResultDoc (src/lxml/lxml.etree.c:74567)
File "parser.pxi", line 650, in lxml.etree._handleParseResult (src/lxml/lxml.etree.c:75458)
File "parser.pxi", line 601, in lxml.etree._raiseParseError (src/lxml/lxml.etree.c:74958)
__main__.Unparseable: None