Comments (5)
Please create a minimal reproducer that I can use to reproduce the crash
from pycparser.
This is the PoC file
import json
import sys
import re
# This is not required if you've installed pycparser into
# your site-packages/ with setup.py
#
sys.path.extend(['.', '..'])
from pycparser import parse_file, c_ast,c_parser
from pycparser.plyparser import Coord
RE_CHILD_ARRAY = re.compile(r'(.*)\[(.*)\]')
RE_INTERNAL_ATTR = re.compile('__.*__')
class CJsonError(Exception):
pass
def memodict(fn):
""" Fast memoization decorator for a function taking a single argument """
class memodict(dict):
def __missing__(self, key):
ret = self[key] = fn(key)
return ret
return memodict().__getitem__
@memodict
def child_attrs_of(klass):
"""
Given a Node class, get a set of child attrs.
Memoized to avoid highly repetitive string manipulation
"""
non_child_attrs = set(klass.attr_names)
all_attrs = set([i for i in klass.__slots__ if not RE_INTERNAL_ATTR.match(i)])
return all_attrs - non_child_attrs
def to_dict(node):
""" Recursively convert an ast into dict representation. """
klass = node.__class__
result = {}
# Metadata
result['_nodetype'] = klass.__name__
# Local node attributes
for attr in klass.attr_names:
result[attr] = getattr(node, attr)
# Coord object
if node.coord:
result['coord'] = str(node.coord)
else:
result['coord'] = None
# Child attributes
for child_name, child in node.children():
# Child strings are either simple (e.g. 'value') or arrays (e.g. 'block_items[1]')
match = RE_CHILD_ARRAY.match(child_name)
if match:
array_name, array_index = match.groups()
array_index = int(array_index)
# arrays come in order, so we verify and append.
result[array_name] = result.get(array_name, [])
if array_index != len(result[array_name]):
raise CJsonError('Internal ast error. Array {} out of order. '
'Expected index {}, got {}'.format(
array_name, len(result[array_name]), array_index))
result[array_name].append(to_dict(child))
else:
result[child_name] = to_dict(child)
# Any child attributes that were missing need "None" values in the json.
for child_attr in child_attrs_of(klass):
if child_attr not in result:
result[child_attr] = None
return result
def to_json(node, **kwargs):
""" Convert ast node to json string """
return json.dumps(to_dict(node), **kwargs)
def file_to_dict(filename):
""" Load C file into dict representation of ast """
ast = parse_file(filename, use_cpp=True)
return to_dict(ast)
def file_to_json(filename, **kwargs):
""" Load C file into json string representation of ast """
ast = parse_file(filename, use_cpp=True)
return to_json(ast, **kwargs)
def _parse_coord(coord_str):
""" Parse coord string (file:line[:column]) into Coord object. """
if coord_str is None:
return None
vals = coord_str.split(':')
vals.extend([None] * 3)
filename, line, column = vals[:3]
return Coord(filename, line, column)
def _convert_to_obj(value):
"""
Convert an object in the dict representation into an object.
Note: Mutually recursive with from_dict.
"""
value_type = type(value)
if value_type == dict:
return from_dict(value)
elif value_type == list:
return [_convert_to_obj(item) for item in value]
else:
# String
return value
def from_dict(node_dict):
""" Recursively build an ast from dict representation """
class_name = node_dict.pop('_nodetype')
klass = getattr(c_ast, class_name)
# Create a new dict containing the key-value pairs which we can pass
# to node constructors.
objs = {}
for key, value in node_dict.items():
if key == 'coord':
objs[key] = _parse_coord(value)
else:
objs[key] = _convert_to_obj(value)
# Use keyword parameters, which works thanks to beautifully consistent
# ast Node initializers.
return klass(**objs)
def from_json(ast_json):
""" Build an ast from json string representation """
return from_dict(json.loads(ast_json))
#------------------------------------------------------------------------------
if __name__ == "__main__":
_c_parser = c_parser.CParser(
lex_optimize=True,
yacc_debug=False,
yacc_optimize=False)
ast=_c_parser.parse(
"""i
{i[
#66666666
#66666666666666666L66666666
#6666666666666666666666666666666�*�i""",
''
)
ast_dict=to_dict(ast)
ast = from_dict(ast_dict)
print(to_json(ast, sort_keys=True, indent=4))
from pycparser.
I get this thrown for the given C code sample:
ValueError: invalid literal for int() with base 10: '66666666666666666L'
Looks pretty reasonable to me. Perhaps the TypeError
comes from some wrapping code that's not inside pycparser?
from pycparser.
test.py
from pycparser import c_parser
if __name__ == "__main__":
_c_parser = c_parser.CParser(
lex_optimize=True,
yacc_debug=False,
yacc_optimize=False)
ast=_c_parser.parse(
"""i
{i[
#66666666
#66666666666666666L66666666
#6666666666666666666666666666666�*�i""",
''
)
from pycparser.
@eliben I have deleted all non-related statements and got the simplest file. It shows that the root cause of the crash is that parser
doesn't check the legality of input.
from pycparser.
Related Issues (20)
- Missing ; when generating code for extern functions
- Can
- Can't parse incomplete types and other syntactically valid but non-compilable code HOT 1
- Curly braces inside braced-group throws ParseError HOT 2
- AssertionError
- Two-dimensional array binding type problem HOT 1
- Is there a release plan for the next version of pycparser?
- parser error with typedef HOT 10
- assertion error on gcc-9 stddef.h
- Is there a way to find the function declaration matching a function call? HOT 1
- CParser doesn't work with comments HOT 1
- Add end of token coord
- Unable to parse labels at end of block HOT 1
- BUG: pycparser treats empty declarations as a binary operation HOT 1
- "sorry, but this version only supports 100 named groups" HOT 1
- Parsing `va_arg(foo, type)` HOT 3
- Unable to parse through comments and can't support directives HOT 2
- Parser can't handle unknown data types for variables HOT 1
- Parser unbale to handle custom data types for custom functions HOT 1
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from pycparser.