angr / cle Goto Github PK
View Code? Open in Web Editor NEWCLE Loads Everything (at least, many binary formats!)
License: BSD 2-Clause "Simplified" License
CLE Loads Everything (at least, many binary formats!)
License: BSD 2-Clause "Simplified" License
seams this is elftools related.
starting with android7.0 with libs in /lib/system/ from a nexus device.
>>> p = angr.Project("libmedia.so")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python2.7/dist-packages/angr/project.py", line 156, in __init__
self.loader = cle.Loader(self.filename, **load_options)
File "/usr/local/lib/python2.7/dist-packages/cle/loader.py", line 124, in __init__
self.initial_load_objects = self._internal_load(main_binary, *force_load_libs)
File "/usr/local/lib/python2.7/dist-packages/cle/loader.py", line 471, in _internal_load
if self.find_object(main_spec, extra_objects=objects) is not None:
File "/usr/local/lib/python2.7/dist-packages/cle/loader.py", line 284, in find_object
for ident in self._possible_idents(spec):
File "/usr/local/lib/python2.7/dist-packages/cle/loader.py", line 805, in _possible_idents
soname = backend_cls.extract_soname(spec)
File "/usr/local/lib/python2.7/dist-packages/cle/backends/elf/metaelf.py", line 272, in extract_soname
soname = [ x.soname for x in list(dyn.iter_tags()) if x.entry.d_tag == 'DT_SONAME']
File "/usr/local/lib/python2.7/dist-packages/elftools/elf/dynamic.py", line 135, in iter_tags
yield DynamicTag(tag, self._get_stringtable())
File "/usr/local/lib/python2.7/dist-packages/elftools/elf/dynamic.py", line 51, in __init__
stringtable.get_string(self.entry.d_val))
File "/usr/local/lib/python2.7/dist-packages/elftools/elf/sections.py", line 70, in get_string
return s.decode('ascii')
UnicodeDecodeError: 'ascii' codec can't decode byte 0xfa in position 1: ordinal not in range(128)
>>>
Hi. I have a binary that cle fails to load.
It's from a crackme. Here is the sample.
From first glance it seems that the sections don't seem to be well formed and angr relies on that.
Ida on the other hand loads the binary correctly. (After some complaining)
Loading it in angr throws an exception: (Also if i don't specify a backend)
In [1]: p = angr.Project(main_opts={'backend':'blob'}, thing="oxfoo1m3")
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-13-b332a9fedb2b> in <module>()
----> 1 p = angr.Project(main_opts={'backend':'blob'}, thing="oxfoo1m3")
/usr/local/Cellar/pypy/5.10.0_1/libexec/site-packages/angr/project.pyc in __init__(self, thing, default_analysis_mode, ignore_functions, use_sim_procedures, exclude_sim_procedures_func, exclude_sim_procedures_list, arch, simos, load_options, translation_cache, support_selfmodifying_code, store_function, load_function, **kwargs)
162 l.info("Loading binary %s", thing)
163 self.filename = thing
--> 164 self.loader = cle.Loader(self.filename, **load_options)
165
166 # Step 2: determine its CPU architecture, ideally falling back to CLE's guess
/usr/local/Cellar/pypy/5.10.0_1/libexec/site-packages/cle/loader.pyc in __init__(self, main_binary, auto_load_libs, force_load_libs, skip_libs, main_opts, lib_opts, custom_ld_path, use_system_libs, ignore_import_version_numbers, case_insensitive, rebase_granularity, except_missing_libs, aslr, page_size, extern_size)
124 self.requested_names = set()
125
--> 126 self.initial_load_objects = self._internal_load(main_binary, *force_load_libs)
127
128 # Basic functions and properties
/usr/local/Cellar/pypy/5.10.0_1/libexec/site-packages/cle/loader.pyc in _internal_load(self, *args)
487
488 for main_spec in args:
--> 489 if self.find_object(main_spec, extra_objects=objects) is not None:
490 l.info("Skipping load request %s - already loaded", main_spec)
491 continue
/usr/local/Cellar/pypy/5.10.0_1/libexec/site-packages/cle/loader.pyc in find_object(self, spec, extra_objects)
284 extra_idents[ident] = obj
285
--> 286 for ident in self._possible_idents(spec):
287 if ident in self._satisfied_deps:
288 return self._satisfied_deps[ident]
/usr/local/Cellar/pypy/5.10.0_1/libexec/site-packages/cle/loader.pyc in _possible_idents(self, spec, lowercase)
822 backend_cls = self._static_backend(spec)
823 if backend_cls is not None:
--> 824 soname = backend_cls.extract_soname(spec)
825 if soname is not None:
826 yield soname
/usr/local/Cellar/pypy/5.10.0_1/libexec/site-packages/cle/backends/elf/metaelf.pyc in extract_soname(path)
306 e = elftools.elf.elffile.ELFFile(f)
307 # TODO: make this not depend on sections...
--> 308 dyn = e.get_section_by_name('.dynamic')
309 if dyn is None:
310 return None
/usr/local/Cellar/pypy/5.10.0_1/libexec/site-packages/elftools/elf/elffile.pyc in get_section_by_name(self, name)
92 if self._section_name_map is None:
93 self._section_name_map = {}
---> 94 for i, sec in enumerate(self.iter_sections()):
95 self._section_name_map[sec.name] = i
96 secnum = self._section_name_map.get(name, None)
/usr/local/Cellar/pypy/5.10.0_1/libexec/site-packages/elftools/elf/elffile.pyc in iter_sections(self)
101 """
102 for i in range(self.num_sections()):
--> 103 yield self.get_section(i)
104
105 def num_segments(self):
/usr/local/Cellar/pypy/5.10.0_1/libexec/site-packages/elftools/elf/elffile.pyc in get_section(self, n)
81 """
82 section_header = self._get_section_header(n)
---> 83 return self._make_section(section_header)
84
85 def get_section_by_name(self, name):
/usr/local/Cellar/pypy/5.10.0_1/libexec/site-packages/elftools/elf/elffile.pyc in _make_section(self, section_header)
286 """ Create a section object of the appropriate type
287 """
--> 288 name = self._get_section_name(section_header)
289 sectype = section_header['sh_type']
290
/usr/local/Cellar/pypy/5.10.0_1/libexec/site-packages/elftools/elf/elffile.pyc in _get_section_name(self, section_header)
281 """
282 name_offset = section_header['sh_name']
--> 283 return self._file_stringtable_section.get_string(name_offset)
284
285 def _make_section(self, section_header):
/usr/local/Cellar/pypy/5.10.0_1/libexec/site-packages/elftools/elf/sections.pyc in get_string(self, offset)
68 table_offset = self['sh_offset']
69 s = parse_cstring_from_stream(self.stream, table_offset + offset)
---> 70 return s.decode('ascii')
71
72
AttributeError: 'NoneType' object has no attribute 'decode'
read addr at does not perform any control on the lenght of pointer read from memory. If where is near an un-allocated memory region less byte than expected are read, and the unpack instruction throw an error.
Hi guys,
I am using the loader.main_bin.plt to find references to external symbols. With x84_64 libraries everything works well. When loading a ARM library, the loader.main_bin.plt is empty. The only thing I can get is the GOT address with loader.main_bin.imports. Is it possible to get correlate between GOT and PLT address?
Thanks
It would be nice if http://angr.io/api-doc/cle.html was to set the URL for the project.
The .deps
of an object should be able to contain a much richer declaration of what other object loads it would like to make. My proposal is that this "rich declaration" should be an entire loaded but unmapped backend instance.
This would allow us to, for example, load a .a
static archive, which is just a collection of object files, by loading each of the files from its stream using an archive reader library, passing them directly to the ELF
backend class, and returning all the elf objects in .deps
.
it reports this error when I use idabin to load binary . Could you tell me what's the matter
Same as angr/archinfo#31 I'm guessing a MANIFEST.in
file is needed to include LICENSE
for the next release?
This is the continuation of this issue: #44
After @rhelmot's fix, the loading time of my example reduced to 17 sec.
But this commit angr/angr@b614e65, increases the loading time to 70 sec.
In cle/backends/symbol.py
1: The demangled_name property isn't really scalable, since each time a new subprocess is opened.
I want to analyze a library and want a list of all demangeld function names. 25k is enough to crash py computer, despite having ample ram and cpu power left.
2: The build of c++filt on macOS needs the -n flag to work the way it does on linux.
3: Maybe the demangled_name property should return the name itself, if it isn't mangled. Atm it returns None, which feels unintuitive.
Trying to get the offset of an address in the extern object fails with:
import angr
proj = angr.Project("/bin/ls")
e = proj.loader.extern_object
In [30]: e.addr_to_offset(e.min_addr)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-30-53d93d581b7d> in <module>()
----> 1 e.addr_to_offset(e.min_addr)
/home/user/Projects/angr-dev/cle/cle/backends/__init__.pyc in addr_to_offset(self, addr)
216 loadable = self.find_loadable_containing(addr)
217 if loadable is not None:
--> 218 return loadable.addr_to_offset(addr)
219 else:
220 return None
/home/user/Projects/angr-dev/cle/cle/backends/region.pyc in addr_to_offset(self, addr)
45 Convert a virtual memory address into a file offset
46 """
---> 47 offset = addr - self.vaddr + self.offset
48 if not self.contains_offset(offset):
49 return None
TypeError: unsupported operand type(s) for +: 'int' and 'str'
While trying to debug this I found that:
In [32]: e.find_loadable_containing(e.min_addr)
Out[32]: ---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/home/user/.virtualenvs/angr-dev/lib/python2.7/site-packages/IPython/core/formatters.pyc in __call__(self, obj)
697 type_pprinters=self.type_printers,
698 deferred_pprinters=self.deferred_printers)
--> 699 printer.pretty(obj)
700 printer.flush()
701 return stream.getvalue()
/home/user/.virtualenvs/angr-dev/lib/python2.7/site-packages/IPython/lib/pretty.pyc in pretty(self, obj)
401 if cls is not object \
402 and callable(cls.__dict__.get('__repr__')):
--> 403 return _repr_pprint(obj, self, cycle)
404
405 return _default_pprint(obj, self, cycle)
/home/user/.virtualenvs/angr-dev/lib/python2.7/site-packages/IPython/lib/pretty.pyc in _repr_pprint(obj, p, cycle)
701 """A pprint that just redirects to the normal repr function."""
702 # Find newlines and replace them with p.break_()
--> 703 output = repr(obj)
704 for idx,output_line in enumerate(output.splitlines()):
705 if idx:
/home/user/Projects/angr-dev/cle/cle/backends/region.pyc in __repr__(self)
60
61 def __repr__(self):
---> 62 return '<{} {}>'.format(self.__class__.__name__, ', '.join(['{}=0x{:x}'.format(k, v) for k, v in self.__dict__.iteritems()]))
63
64 @property
ValueError: Unknown format code 'x' for object of type 'str'
Maybe I am doing this wrong, I am trying to get the name of the SimProc that is currently being executed by looking up the offset in the extern object with the address and then passing it to extern._symbols_by_addr
CLE fails when attempting to load a dependency that has been compiled without the DT_SONAME flag. I'd pull request a fix but I'm unsure how you would like to resolve the issue. See: https://gist.github.com/jogarock/3cf0aa1b5775da5c8135c59f6120eac3
I'm currently playing with the crackme files found here https://github.com/Maijin/Workshop2015/tree/master/IOLI-crackme which are available for both Linux and Windows. I've been comparing the behavior of angr using matching binaries but different file formats.
I'd like to record a few of the differences here to:
a) see if I'm crazy
b) see if more specific tickets should be created
c) see if some pointers / todo's could be provided as to what specific things need to be implemented to have better pe/exe support.
The main function for the crackme0x00 file is (from Radare2):
[0x08048414]> pdf
/ (fcn) sym.main 127
| ; var int local_6 @ ebp-0x18
| ; DATA XREF from 0x08048377 (sym.main)
| ;-- main:
| 0x08048414 55 push ebp
| 0x08048415 89e5 mov ebp, esp
| 0x08048417 83ec28 sub esp, 0x28
| 0x0804841a 83e4f0 and esp, 0xfffffff0
| 0x0804841d b800000000 mov eax, 0
| 0x08048422 83c00f add eax, 0xf
| 0x08048425 83c00f add eax, 0xf
| 0x08048428 c1e804 shr eax, 4
| 0x0804842b c1e004 shl eax, 4
| 0x0804842e 29c4 sub esp, eax
| 0x08048430 c70424688504. mov dword [esp], str.IOLI_Crackme_Level_0x00_n ; [0x8048568:4]=0x494c4f49 LEA str.IOLI_Crackme_Level_0x00_n ; "IOLI Crackme Level 0x00." @ 0x8048568
| 0x08048437 e804ffffff call sym.imp.printf
| 0x0804843c c70424818504. mov dword [esp], str.Password: ; [0x8048581:4]=0x73736150 LEA str.Password: ; "Password: " @ 0x8048581
| 0x08048443 e8f8feffff call sym.imp.printf
| 0x08048448 8d45e8 lea eax, [ebp-local_6]
| 0x0804844b 89442404 mov dword [esp + 4], eax
| 0x0804844f c704248c8504. mov dword [esp], 0x804858c ; [0x804858c:4]=0x32007325 ; "%s" @ 0x804858c
| 0x08048456 e8d5feffff call sym.imp.scanf
| 0x0804845b 8d45e8 lea eax, [ebp-local_6]
| 0x0804845e c74424048f85. mov dword [esp + 4], str.250382 ; [0x804858f:4]=0x33303532 LEA str.250382 ; "250382" @ 0x804858f
| 0x08048466 890424 mov dword [esp], eax
| 0x08048469 e8e2feffff call sym.imp.strcmp
| 0x0804846e 85c0 test eax, eax
| ,=< 0x08048470 740e je 0x8048480
| | 0x08048472 c70424968504. mov dword [esp], str.Invalid_Password__n ; [0x8048596:4]=0x61766e49 LEA str.Invalid_Password__n ; "Invalid Password!." @ 0x8048596
| | 0x08048479 e8c2feffff call sym.imp.printf
| ,==< 0x0804847e eb0c jmp 0x804848c
| |`-> 0x08048480 c70424a98504. mov dword [esp], str.Password_OK_:__n ; [0x80485a9:4]=0x73736150 LEA str.Password_OK_:__n ; "Password OK :)." @ 0x80485a9
| | 0x08048487 e8b4feffff call sym.imp.printf
| | ; JMP XREF from 0x0804847e (sym.main)
| `--> 0x0804848c b800000000 mov eax, 0
| 0x08048491 c9 leave
\ 0x08048492 c3 ret
[0x08048414]>
Similarly for the EXE:
(fcn) sym._main 141
| ; var int local_0_1 @ ebp-0x1
| ; var int local_6 @ ebp-0x18
| ; var int local_7 @ ebp-0x1c
| ; CALL XREF from 0x00401222 (sym._main)
| 0x00401310 55 push ebp
| 0x00401311 89e5 mov ebp, esp
| 0x00401313 83ec38 sub esp, 0x38
| 0x00401316 83e4f0 and esp, 0xfffffff0
| 0x00401319 b800000000 mov eax, 0
| 0x0040131e 83c00f add eax, 0xf
| 0x00401321 83c00f add eax, 0xf
| 0x00401324 c1e804 shr eax, 4
| 0x00401327 c1e004 shl eax, 4
| 0x0040132a 8945e4 mov dword [ebp-local_7], eax
| 0x0040132d 8b45e4 mov eax, dword [ebp-local_7]
| 0x00401330 e83b190000 call 0x402c70 ; sym.___w32_sharedptr_initialize+0x220
| 0x00401335 e836010000 call sym.___main
| 0x0040133a c70424004040. mov dword [esp], str.IOLI_Crackme_Level_0x00_n ; [0x404000:4]=0x494c4f49 LEA section..rdata ; "IOLI Crackme Level 0x00." @ 0x404000
| 0x00401341 e8ea190000 call sym._printf
| 0x00401346 c70424194040. mov dword [esp], str.Password: ; [0x404019:4]=0x73736150 LEA str.Password: ; "Password: " @ 0x404019
| 0x0040134d e8de190000 call sym._printf
| 0x00401352 8d45e8 lea eax, [ebp-local_6]
| 0x00401355 89442404 mov dword [esp + 4], eax
| 0x00401359 c70424244040. mov dword [esp], 0x404024 ; [0x404024:4]=0x32007325 ; "%s" 0x00404024 ; "%s" @ 0x404024
| 0x00401360 e8bb190000 call sym._scanf
| 0x00401365 8d45e8 lea eax, [ebp-local_6]
| 0x00401368 c74424042740. mov dword [esp + 4], str.250382 ; [0x404027:4]=0x33303532 LEA str.250382 ; "250382" @ 0x404027
| 0x00401370 890424 mov dword [esp], eax
| 0x00401373 e898190000 call sym._strcmp
| 0x00401378 85c0 test eax, eax
| ,=< 0x0040137a 740e je 0x40138a
| | 0x0040137c c704242e4040. mov dword [esp], str.Invalid_Password__n ; [0x40402e:4]=0x61766e49 LEA str.Invalid_Password__n ; "Invalid Password!." @ 0x40402e
| | 0x00401383 e8a8190000 call sym._printf
| ,==< 0x00401388 eb0c jmp 0x401396
| |`-> 0x0040138a c70424414040. mov dword [esp], str.Password_OK_:__n ; [0x404041:4]=0x73736150 LEA str.Password_OK_:__n ; "Password OK :)." @ 0x404041
| | 0x00401391 e89a190000 call sym._printf
| | ; JMP XREF from 0x00401388 (sym._main)
| `--> 0x00401396 b800000000 mov eax, 0
| 0x0040139b c9 leave
\ 0x0040139c c3 ret
First of all, angr doesn't seem to recognize all the symbols in the exe. For example, the following command runs fine on the Linux version, but not the exe: main = proj.loader.main_bin.get_symbol('main')
I'm not sure if it's related or not, but creating cfg from main (using the address of main as a start because the symbol is not found as previously shown) produces two very different CFGs even though it's apparent from the disassembly above that they should be the same. On the ELF side, the CFG is as expected, with 9 basic blocks.
(Pictures made using https://github.com/axt/angr-utils)
On the PE side, the CFG has several hundred blocks, seeming to be spanning into other functions.
. Note: Main is actually way down in the bottom right corner in this huge graph. Also note, the function name for each node in this graph is "None" instead of the actual name as seen in the Linux graph (probably the same problem as above).
Finally, on the ELF side, angr automatically hooks functions like scanf and printf, but on the PE side that does not appear to work. I'm able to symbolically solve both the PE and ELF versions, but on the PE file, I have to manually setup hooking first.
Maybe all these problems come down to the function symbols not being properly found, I'm not sure.
So... Brain dump I know, but please feel free to split these into as many tickets as you feel is appropriate. I would love to see some pointers to jumping-off points where someone new to the project could start with helping resolve some of these discrepancies. Thanks!
Hello,
I'm doing some library detection analysis on ARM32 binaries. I haven't had any problems so far when trying to load normally compiled/created relocatable object files. When i tried to create signatures for libuv-v1.7.0.a which is created with libtool / ranlib instead of ar, cle refused to load the object files contained (example libuv_la-core.o, from libuv.a).
Stacktrace is generated by cle.Loader(..) but is the same for angr.Project(..).
Stacktrace:
----> 1 cle.Loader("/home/user/Code/ARM_FILES/arm_libraries/libuv/tmp/libuv_la-core.o")
/home/user/angr/angr-dev/cle/cle/loader.pyc in __init__(self, main_binary, auto_load_libs, force_load_libs, skip_libs, main_opts, lib_opts, custom_ld_path, ignore_import_version_numbers, rebase_granularity, except_missing_libs, gdb_map, gdb_fix, aslr)
101 self._load_dependencies()
102 self._load_tls()
--> 103 self._perform_reloc(self.main_bin)
104 self._finalize_tls()
105
/home/user/angr/angr-dev/cle/cle/loader.pyc in _perform_reloc(self, obj)
389 for reloc in obj.relocs:
390 if not reloc.resolved:
--> 391 reloc.relocate(([self.main_bin] if self.main_bin is not obj else []) + dep_objs + [obj])
392
393 def provide_symbol(self, owner, name, offset, size=0, binding='STB_GLOBAL', st_type='STT_FUNC', st_info='CLE'):
/home/user/angr/angr-dev/cle/cle/relocations/__init__.pyc in relocate(self, solist)
142 return False
143
--> 144 self.owner_obj.memory.write_addr_at(self.dest_addr, self.value)
145
146 load_relocations()
/home/user/angr/angr-dev/cle/cle/relocations/generic.pyc in value(self)
13 @property
14 def value(self):
---> 15 return self.resolvedby.rebased_addr + self.addend
16
17 class GenericJumpslotReloc(Relocation):
/home/user/angr/angr-dev/cle/cle/relocations/__init__.pyc in addend(self)
84 return self._addend
85 else:
---> 86 return self.owner_obj.memory.read_addr_at(self.addr, orig=True)
87
88 def resolve_symbol(self, solist):
/home/user/angr/angr-dev/cle/cle/memory.pyc in read_addr_at(self, where, orig)
184 Read addr stored in memory as a series of bytes starting at `where`.
185 """
--> 186 return struct.unpack(self._arch.struct_fmt(), ''.join(self.read_bytes(where, self._arch.bytes, orig=orig)))[0]
187
188 def write_addr_at(self, where, addr):
/home/user/angr/angr-dev/cle/cle/memory.pyc in read_bytes(self, addr, n, orig)
114 b = []
115 for i in range(addr, addr+n):
--> 116 b.append(self.get_byte(i, orig=orig))
117 return b
118
/home/user/angr/angr-dev/cle/cle/memory.pyc in get_byte(self, k, orig)
85 except KeyError:
86 pass
---> 87 raise KeyError(k)
88
89 def __setitem__(self, k, v):
KeyError: 3798
For now i am using the .so file generated by libtool instead of the multiple .o files from the .a archive. But still weird, first time it happened to me that CLE can't load something which is not completely broken.
Greetings
Looks like CLE will attempt to find the CaSe SenSiTive match for library dependencies of Windows PE files. This is leading to it not loading deps that otherwise are available to it.
There are lots of missing module, class, and function docstrings in the angr module. Here is the list. We desperately need help with this from the community, if someone wants to contribute!
Here is the list:
************* Module cle.errors
- errors.py:1 -
************* Module cle.memory
- memory.py:1 -
- memory.py:42 - Clemory.update_backer
- memory.py:53 - Clemory.remove_backer
- memory.py:193 - Clemory._stride_repr
************* Module cle.backends.idabin
- backends/idabin.py:1 -
- backends/idabin.py:324 - IDABin.plt
- backends/idabin.py:329 - IDABin.reverse_plt
- backends/idabin.py:332 - IDABin.get_call_stub_addr
- backends/idabin.py:336 - IDABin.is_ppc64_abiv1
************* Module cle.backends.cgc
- backends/cgc.py:1 -
- backends/cgc.py:33 - CGC.make_elf_copy
************* Module cle.backends.metaelf
- backends/metaelf.py:1 -
- backends/metaelf.py:21 - MetaELF._block
- backends/metaelf.py:29 - MetaELF._add_plt_stub
- backends/metaelf.py:39 - MetaELF._load_plt
- backends/metaelf.py:93 - MetaELF._load_plt.tick
- backends/metaelf.py:98 - MetaELF._load_plt.scan_forward
************* Module cle.backends.backedcgc
- backends/backedcgc.py:1 -
- backends/backedcgc.py:5 - FakeSegment
- backends/backedcgc.py:66 - BackedCGC.initial_register_values
************* Module cle.backends.blob
- backends/blob.py:1 -
************* Module cle.backends.elfcore
- backends/elfcore.py:1 -
- backends/elfcore.py:78 - ELFCore.initial_register_values
************* Module cle.backends.elf
- backends/elf.py:1 -
- backends/elf.py:47 - ELFSegment.is_readable
- backends/elf.py:51 - ELFSegment.is_writable
- backends/elf.py:55 - ELFSegment.is_executable
- backends/elf.py:59 - ELFSection
- backends/elf.py:89 - ELFSection.occupies_memory
- backends/elf.py:97 - ELFSection.is_strings
- backends/elf.py:241 - ELF._extract_init_fini
- backends/elf.py:287 - ELF.__register_segments
- backends/elf.py:300 - ELF._rebase_addr
- backends/elf.py:416 - ELF.__register_relocs
- backends/elf.py:466 - ELF._make_reloc
- backends/elf.py:474 - ELF.__register_tls
- backends/elf.py:480 - ELF.__register_sections
- backends/elf.py:497 - ELF.__register_section_symbols
- backends/elf.py:503 - ELF.__relocate_mips
- backends/elf.py:580 - ELFHashTable.elf_hash
- backends/elf.py:616 - GNUHashTable._matches_bloom
- backends/elf.py:649 - GNUHashTable.gnu_hash
************* Module cle.backends.pe
- backends/pe.py:1 -
- backends/pe.py:153 - PE._get_jmprel
- backends/pe.py:156 - PE._handle_imports
- backends/pe.py:168 - PE._handle_exports
************* Module cle.backends
- backends/__init__.py:1 -
- backends/__init__.py:187 - Symbol.resolve
- backends/__init__.py:363 - Backend.set_arch
- backends/__init__.py:370 - Backend.entry
- backends/__init__.py:404 - Backend.addr_to_offset
- backends/__init__.py:410 - Backend.offset_to_addr
************* Module cle.relocations.ppc
- relocations/ppc.py:1 -
************* Module cle.relocations.ppc64
- relocations/ppc64.py:1 -
- relocations/ppc64.py:7 - R_PPC64_JMP_SLOT
************* Module cle.relocations.arm64
- relocations/arm64.py:1 -
************* Module cle.relocations.generic
- relocations/generic.py:1 -
- relocations/generic.py:7 - GenericAbsoluteReloc
- relocations/generic.py:12 - GenericAbsoluteAddendReloc
- relocations/generic.py:17 - GenericJumpslotReloc
- relocations/generic.py:25 - GenericRelativeReloc
- relocations/generic.py:34 - GenericCopyReloc
- relocations/generic.py:39 - GenericTLSModIdReloc
- relocations/generic.py:50 - GenericTLSDoffsetReloc
- relocations/generic.py:59 - GenericTLSOffsetReloc
- relocations/generic.py:70 - GenericIRelativeReloc
- relocations/generic.py:82 - MipsGlobalReloc
- relocations/generic.py:85 - MipsLocalReloc
************* Module cle.relocations.armel
- relocations/armel.py:1 -
************* Module cle.relocations.arm
- relocations/arm.py:1 -
************* Module cle.relocations.armhf
- relocations/armhf.py:1 -
************* Module cle.relocations.i386
- relocations/i386.py:1 -
- relocations/i386.py:16 - R_386_PC32
************* Module cle.relocations.amd64
- relocations/amd64.py:1 -
************* Module cle.relocations.mips
- relocations/mips.py:1 -
************* Module cle.relocations
- relocations/__init__.py:1 -
- relocations/__init__.py:15 - load_relocations
- relocations/__init__.py:43 - get_relocation
- relocations/__init__.py:82 - Relocation.addend
- relocations/__init__.py:88 - Relocation.resolve_symbol
- relocations/__init__.py:111 - Relocation.resolve
- relocations/__init__.py:118 - Relocation.rebased_addr
- relocations/__init__.py:122 - Relocation.dest_addr
- relocations/__init__.py:126 - Relocation.value
************* Module cle.relocations.mips64
- relocations/mips64.py:1 -
************* Module cle.tls
- tls.py:1 -
- tls.py:29 - roundup
- tls.py:62 - TLSObj.finalize
- tls.py:65 - TLSObj.finalize.drop
- tls.py:68 - TLSObj.finalize.drop_int
- tls.py:96 - TLSObj.thread_pointer
************* Module cle.loader
- loader.py:1 -
- loader.py:127 - Loader.linux_loader_object
- loader.py:143 - Loader._load_main_binary
- loader.py:163 - Loader._load_dependencies
- loader.py:270 - Loader.get_loader_symbolic_constraints
- loader.py:344 - Loader._possible_paths
- loader.py:371 - Loader._perform_reloc
- loader.py:385 - Loader.provide_symbol
- loader.py:429 - Loader.addr_belongs_to_object
- loader.py:793 - Loader.all_elf_objects
- loader.py:796 - Loader.perform_irelative_relocs
************* Module cle
- __init__.py:1 -
When running:
loader = cle.Loader(exe_path)
for procedure in loader.main_bin.symbols_by_addr.values():
bytes = loader.memory.read_bytes(procedure.addr, 10)
wget-1.18-1.fc24.i686.wget-1.18-1.fc24.i686.usr.bin.wget.zip
I'm not sure what to call this. Some possible titles are-
ValueError: invalid literal for int() with base 16: ''
self.memory.add_backer()
?I frequently get the attached crash when creating custom loaders/backends:
crash.txt
An example loader is- https://github.com/haxmeadroom/cle/tree/master/tests/minidump
Running
tests/minidump/test_minidump.py fauxware.dmp
gives the error
I've tried different loaders based on PE, IDAlink, and ELF, all pretty much causing the same issue. I assume there is something I'm doing wrong. For most of my cases, I don't (think) I need much more than add_backer() with the correct VA's and memory (which I've checked as correct). Sometimes I'm able to do a state.memory.load(va,...)
and it works correctly. Sometimes I get the same exception doing project.analyses.CFG()
.
I've tried add_backer( 0, str )
similar to how PE does, and add_backer( va, str )
how ever other loader seems to. I've noticed several special cases for MetaELF, PE, IDA with isinstance()
calls. Do I need to work with/around those? Any ideas? Thanks!
cle depends on claripy, pyvex, etc.
just running pip install cle
doesn't satisfy the dependencies.
Hi,
using CLE is it possible to understand if the project was compiled using gcc
, clang
or any other compiler?
Thanks
Hi all,
I tried to load .ko from a Linux machine but it does not seem to be possible. If useful I can paste the error later. Can you let me know if it will be possible and what would be the fix?
Thanks
I've tried to load a PE binary with 27000 symbol, and it took 90seconds.
Did some profiling:
It turned out that the code spends a lot of time in the provide_symbol
function in cle/loader.py
. Its because the lookup for the symbols is done by a a loop (O(n)), and since we look up n symbols we end up with (O(n^)) runtime.
for reloc in obj.relocs:
if reloc.symbol and reloc.symbol.name == name:
reloc.relocate(solist)
I've did a quick patch to build a map from the symbols, and with this the time to call provide_symbol
reduces greatly:
I didn't sent a pull-request because the patch is just a proof-of-concept and not done correctly.
Could you fix this?
Hey guys,
The bug that was fixed in PR #13 seems to have regressed. I believe this is because of changes made in angr
and the introduction of angr.extern_obj.AngrExternObj
, which causes provides
to return the string 'angr externs' rather than None
. This means that the list comprehension in WinReloc.resolve_symbol
is incorrect. You can run the procedure I describe in PR #13 to reproduce.
I could change the list comprehension to explicitly check for the string 'angr externs' rather than None
, however is this list comprehension (and hence the entire WinReloc.resolve_symbol
method) even necessary? Is there any particular reason to do this initial list comprehension over solist
, rather than just letting the super Relocation.resolve_symbol
take care of everything? I can't think of any reason, but I could have missed something.
Big thanks to @0xbc for pointing out the regression to me and for the subsequent discussion.
Line 12 in c73f871
may
-> many
?
Think it would be possible to use something like this to get non-native binaries running with native libraries?
I've seen a few people on IRC and the mailing list ask about Mach-O loading, figured we should mention it on GitHub as well.
Hi Chris,
We're all super swamped preparing for the CGC, so we can't give too much guidance at the moment. You might want to look at the other CLE backends (https://github.com/angr/cle/tree/master/cle/backends) to see how they're implemented. The general idea is to find a helper lib (maybe macholib or machobot?) and then write the glue between it and CLE.
Good luck! We'll be able to offer more guidance after the CGC and DEFCON, too!
- Yan
Source: https://lists.cs.ucsb.edu/pipermail/angr/2016-July/000095.html
I believe there is an error in loader.py, line 78:
self._satisfied_deps = set([] if skip_libs is None else skip_libs)
should be
self._satisfied_deps = set([] if skip_libs is None else [skip_libs]) #wrap skip_libs with []
The former creates a set of each individual charater in the library names, while the latter creates a set of the full library names.
Attached is a binary + libc + ld that triggers the bug. It seems to be the interaction of the libraries, loading any one alone is fine.
angr.Project("./libdl.so.2")
triggers it
adding load_options={"auto_load_libs":False}
is the current work around
Traceback looks like:
/home/salls/Projects/angr/cle/cle/loader.pyc in __init__(self, main_binary, auto_load_libs, force_load_libs, skip_libs, main_opts, lib_opts, custom_ld_path, ignore_import_version_numbers, rebase_granularity, except_missing_libs, gdb_map, gdb_fix, aslr)
103 self._load_dependencies()
104 self._load_tls()
--> 105 self._perform_reloc(self.main_bin)
106 self._finalize_tls()
107
/home/salls/Projects/angr/cle/cle/loader.pyc in _perform_reloc(self, obj)
399 dep_objs = [self.shared_objects[dep_name] for dep_name in obj.deps if dep_name in self.shared_objects]
400 for dep_obj in dep_objs:
--> 401 self._perform_reloc(dep_obj)
402
403 if isinstance(obj, (MetaELF, PE)):
/home/salls/Projects/angr/cle/cle/loader.pyc in _perform_reloc(self, obj)
399 dep_objs = [self.shared_objects[dep_name] for dep_name in obj.deps if dep_name in self.shared_objects]
400 for dep_obj in dep_objs:
--> 401 self._perform_reloc(dep_obj)
402
403 if isinstance(obj, (MetaELF, PE)):
/home/salls/Projects/angr/cle/cle/loader.pyc in _perform_reloc(self, obj)
404 for reloc in obj.relocs:
405 if not reloc.resolved:
--> 406 reloc.relocate(([self.main_bin] if self.main_bin is not obj else []) + dep_objs + [obj])
407
408 def provide_symbol(self, owner, name, offset, size=0, sym_type=None):
/home/salls/Projects/angr/cle/cle/backends/relocations/__init__.pyc in relocate(self, solist, bypass_compatibility)
142 :param solist: A list of objects from which to resolve symbols.
143 """
--> 144 if not self.resolve_symbol(solist, bypass_compatibility):
145 return False
146
/home/salls/Projects/angr/cle/cle/backends/relocations/__init__.pyc in resolve_symbol(self, solist, bypass_compatibility)
90 weak_result = None
91 for so in solist:
---> 92 symbol = so.get_symbol(self.symbol.name)
93 if symbol is not None and symbol.is_export:
94 if not symbol.is_weak:
/home/salls/Projects/angr/cle/cle/backends/elf.py in get_symbol(self, symid, symbol_table)
311 if self.hashtable is None:
312 return None
--> 313 re_sym = self.hashtable.get(symid)
314 if re_sym is None:
315 return None
/home/salls/Projects/angr/cle/cle/backends/elf.py in get(self, k)
778 n += 1
779 sym = self.symtab.get_symbol(n)
--> 780 if (self.gnu_hash(sym.name) % self.nbuckets) != (h % self.nbuckets):
781 break
782 except AttributeError: # XXX THIS IS A HACK
/home/salls/Projects/angr/cle/cle/backends/elf.py in gnu_hash(key)
787 def gnu_hash(key):
788 h = 5381
--> 789 for c in key:
790 h = h * 33 + ord(c)
791 return h & 0xFFFFFFFF
TypeError: 'NoneType' object is not iterable
On my system, angr-doc tests fail because they attempt to load an arm binary, depending on libc.so. My machine has /usr/arm-linux-gnueabi/lib/libc.so
, which CLE finds and attempt so load, but it can't be loaded since it's an ldscript, not an actual elf. When identify_object
fails to find the backend, it raises a CLECompatibilityError
, which is never caught and causes the program to halt.
The correct behavior is that the loader continues to search for a valid libc.so, and if none is found, the dependency goes unresolved (contingent on the value of except_missing_libs
)
I can't assign you for some reason, but this is the job of @subwire.
It is may be not a real issue but a lack of understanding.
I am trying to load some 32bit ARM binary with dependencies resolution enabled
angr.cle.Loader(..., custom_ld_path=["./lib/"])
the result is
INFO | 2017-05-17 17:04:36,806 | cle.loader | Rebasing ##cle_tls## at 0x192000000
Traceback (most recent call last):
...
File "/home/user/Desktop/repos/angr-dev/cle/cle/tls/elf_tls.py", line 63, in drop_int
drop(struct.pack(self.arch.struct_fmt(), num), offset)
error: argument out of range for 4-bytes integer format
The virtual address 0x1 9200 0000 is not of 32 bit length.
The problem, as I understood, arise from method Loader._get_safe_rebase_addr
def _get_safe_rebase_addr(self):
granularity = self._rebase_granularity
return self.max_addr() + (granularity - self.max_addr() % granularity)
and located inside Loader.max_addrโbackends.get_max_addr
def get_max_addr(self):
out = self.segments.max_addr
if out is None:
out = self.sections.max_addr
if out is None:
return self.rebase_addr
else:
return out + self.rebase_addr
The line I am interested in is the last one
...
return out + self.rebase_addr
it should return new rebased image base address (the one inside Clemory map), but in my case it instead returns
(old image base + maximum rva from ELF objects) out + self.rebase_addr (new image base)
Does this logic correct? Maybe out should be normalized (- old image base)...
Please correct me if I am wrong.
Hi, is there any plan to port it to python3 ?
I was trying to load a PE binary, from a Borland C++/Delphi project, with auto_load_libs
True.
It gives the following error:
Traceback (most recent call last):
File "wpl-angr3.py", line 46, in <module>
proj = angr.Project(sample, load_options={'auto_load_libs':True})#, 'main_opts': {'custom_base_addr': 0x0}})
File "/usr/local/lib/python2.7/dist-packages/angr/project.py", line 107, in __init__
self.loader = cle.Loader(self.filename, **load_options)
File "/usr/local/lib/python2.7/dist-packages/cle-4.6.5.27-py2.7.egg/cle/loader.py", line 101, in __init__
self._load_dependencies()
File "/usr/local/lib/python2.7/dist-packages/cle-4.6.5.27-py2.7.egg/cle/loader.py", line 203, in _load_dependencies
self.add_object(obj, base_addr)
File "/usr/local/lib/python2.7/dist-packages/cle-4.6.5.27-py2.7.egg/cle/loader.py", line 341, in add_object
self.memory.add_backer(base_addr, obj.memory)
File "/usr/local/lib/python2.7/dist-packages/cle-4.6.5.27-py2.7.egg/cle/memory.py", line 36, in add_backer
raise ValueError("Address %#x is already backed!" % start)
ValueError: Address 0x40000000 is already backed!
I've tried to track down the problem, and its caused because of this code
if self.binary is not None and self.binary.endswith('.dll'):
self.provides = os.path.basename(self.binary)
else:
self.provides = None
The problem is, that Borland C++ calls its dll
-s bpl
and so object.provides
will be None
, so cle
will try to load the same library several times, which causes the above exception. I didnt checked this, but I also think .DLL
-s could cause a problem, since windows file names are case-insensitive, while the code above is case-sensitive.
I don't know what would be the correct solution to this problem. I've currently patched cle to support both dll
and bpl
, but I'm not sure one could enumerate all the extensions for that check.
What is the purpose of the check above? What would it break if removed, and object always have a provides field?
I tried to use cle for vmlinux file and got this error. I'm using angr-dev repository.
Traceback (most recent call last):
File "test.py", line 11, in <module>
b = angr.Project(filename, load_options={'auto_load_libs': False})
File "/angr-dev/angr/angr/project.py", line 213, in __init__
self.simos.configure_project()
File "angr-dev/angr/angr/simos/linux.py", line 38, in configure_project
self._loader_addr = self.project.loader.extern_object.allocate()
File "angr-dev/cle/cle/loader.py", line 200, in extern_object
self._map_object(self._extern_object)
File "angr-dev/cle/cle/loader.py", line 682, in _map_object
base_addr = self._find_safe_rebase_addr(obj_size)
File "angr-dev/cle/cle/loader.py", line 741, in _find_safe_rebase_addr
raise CLEOperationError("Ran out of room in address space")
cle.errors.CLEOperationError: Ran out of room in address space
Thank you. Trying this out of curiosity.
I took this package https://archlinuxarm.org/packages/arm/binutils for a test.
It seems that any symbols from the libc is found (tested on the binary size
and strings
).
cle should have a backend similar to idalink but using radare2, as all of the information provided from ida is available through radare2.
Hi. I found that there is an if check in the elf.py
if (ph.p_vaddr - ph.p_offset) & (ph.p_align - 1) != 0:
raise CLEInvalidBinaryError("ELF file %s is loading a segment with an inappropriate alignment" % self.binary)
You mentioned to see https://code.woboq.org/userspace/glibc/elf/dl-load.c.html#1066
But I am still confused why you need this check and which specification of elf do this check map to? Many Thanks
Consider the following case: Binary A depends on two libraries B and C. C provides a function foo
that both A and B import. C is in A's library dependent list, but not in B's (i.e. readelf -d B
does not contain C at all).
In this case, ld
is able to load binary A with libraries B and C, and correctly resolve foo
from both A and B to some address in C. Cle, instead, only resolves foo
for A but not for B
as it does not see C
as a dependent of B
.
To resolve this issue, we need to understand how ld
handles it internally.
EDIT: I cannot distribute the binaries here. angr-core members may ask me for test cases.
As the title mentioned: ld.main_bin.symbols
returns NoneType
. The script I've tried:
In [80]: import cle
In [81]: ld = cle.Loader("/home/riyad/tmp/coreutils-8.24/src/mkdir")
In [82]: ld.main_bin.symbols
In [83]: type(ld.main_bin.symbols)
Out[83]: NoneType
PS Binary isn't stripped.
raise CLEError("All backends failed loading %s!" % path)
cle.errors.CLEError: All backends failed loading /home/hcq/binwalk/bin/c.bin!
Attachment: cle_bug.zip
The current master branch of CLE contains a bug that causes any access to a binary's symbols_by_addr
property to crash for Mach-O binaries.
Attached to this bugreport is a zip file contianing demo.py
and hw
, a simple "Hello World" executable compiled for MacOS.
To reproduce the error execute demo.py
which should produce the following output:
$ python demo.py
The symbol chosen for this PoC has address 0x1002bc120 and is indeed in _symbols_by_addr
Detected None in _symbols_by_addr, this is gonna crash!
trying to access 0x1002bc120
Traceback (most recent call last):
File "demo.py", line 17, in <module>
binary.symbols_by_addr[known_symbol_addr]
File ".../cle_bug/venv/lib/python2.7/site-packages/cle/backends/__init__.py", line 174, in symbols_by_addr
return {AT.from_rva(x, self).to_mva(): self._symbols_by_addr[x] for x in self._symbols_by_addr}
File ".../cle_bug/venv/lib/python2.7/site-packages/cle/backends/__init__.py", line 174, in <dictcomp>
return {AT.from_rva(x, self).to_mva(): self._symbols_by_addr[x] for x in self._symbols_by_addr}
File ".../cle_bug/venv/lib/python2.7/site-packages/cle/address_translator.py", line 73, in to_mva
return self._rva + self._owner.mapped_base
TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'
$
The issue here is that Symbols in Mach-O files generally do not possess an address until they
went through the binding/resolution process (which may be optional for analysis purposes).
Also several symbols do not have an address at all AFAIK.
Currently the Mach-O backend handles these symbols by assigning None
to their address until they can be resolved, which may happen at various points in a binary's lifecycle.
The Mach-O backend uses None
for this purpose in order to distinguish them from symbols that may be validly (but somewhat uselessly) mapped to address 0.
As a result _symbols_by_addr
will contain None
as a key due to the logic in the constructor for Symbol
(cle/backends/symbol.py
, line 50-51):
if (claripy and isinstance(self.relative_addr, claripy.ast.Base)) or self.relative_addr != 0:
self.owner_obj._symbols_by_addr[self.relative_addr] = self
Furthermore the Mach-O backend currently manually inserts symbols after resolution into _symbols_by_addr
and takes special care to only include those symbols that make sense (compare cle/backends/macho/macho.py
lines 350-356).
Design-wise I feel that as the backend module is responsible for loading a binary, it is the backend module which should decide where to include which symbol (during the loading process)
and not a superclass (especially not the symbol-superclass) somwhere up the hierarchy.
I am uncertain as to how to properly resolve this, a simple fix could extend the check in symbol.py
to ignore all symbols whose relative_addr
is None
. However as stated above I think it would be best if backends had the power to decide what should be included in symbols_by_addr
(at least during the loading process) so it would be nice if that facility had some sort of opt-out mechanism.
Alternatively I could also have the Mach-O backend clean out the erroneous keys by wiping _symbols_by_addr
after symbol resolution but that feels like a very hacky solution.
As this bug currently halts my work I'm gonna need to fix it soon.
I'd appreciate any input for a proper solution and can send a pull request once its done.
Do you work a lot with blobs? I sure do, and I can't stand what a pain it is to use the Blob backend.
There's been a lot of work out there on automatically figuring out what your blob is. We can and should leverage it, to make this a mostly-automatic process.
But what if your blob is not a blob? What if your blob is in fact a collection of binaries? (e.g., a filesystem image, or archive) Tools exist for this too, and we can use them to more accurately construct the environment of the target.
This is a tracking issue for the effort to implement this functionality as a CLE backend, so that we can handle as much as possible automagically. Short-term goals include support for "clean" blobs, that is, in-tact blobs ready to flash to a microcontroller, but longer-term we'd like to support nasty stuff, like code extracted from real systems in a not-very-precise way, ripping the code right out of a Windows flash tool binary, and so on. Other high-level goals include support for filesystem images, where the environment is pre-constrained based on libraries, etc that exist inside.
Want to help? Working on this doesn't require much/any knowledge about the rest of the angr family of libraries, so it's great for those newer to the project. Read on...
Currently, CLE will try all possible backends until it finds one that works. (or fails, and tells you to go try the Blob loader manually) AutoBlob should be able more or less guess the parameters that would be needed to use Blob, or some other backend, and load the binary for you if it can.
While many backends load all kinds of fancy metadata, symbols, and what have you, you always need at least these three things around, in order to load a binary:
Architecture: Obviously you need to know what arch and endness the binary is. The more specific we can get, the better. (note: some day we will have better embedded architecture support, and being "more specific" to a sub-architecture will be actually important)
Base address: Where in memory does this blob belong? (but if we are sure it won't matter, we can make one up)
Entry point: What should angr consider to be the beginning of the program?
But how do we get those?
Blobs are not all as opaque as we may think. There are typically bits and pieces of metadata around that we may be able to find and use. Let's give an example:
ARM microcontroller blobs, that is, totally unpacked, fully-formed, clean ARM blobs, will contain, somewhere, the initial Interrupt Vector Table (or Exception Vector Table, if you will). If you're lucky, and your blob is extra clean, it'll be the very first thing in the file. This gives us a surprising amount of information; the very first word in the table is actually the initial stack pointer, which will fall into the normal physical address range for RAM (the low 0x20000000's). The endness of this value tells us, of course, the endness. Let's also assume if we see a pointer in about the right place that falls in that range using either endness, that it's an ARM blob (but we will refine this guess later) The second word is the IRQ handler for IRQ 1, "reset", which is taken always at power-on. That's our entry point! We're done here.
MSP430 binaries have a similar thing. although this time, the IVT is at the very top end of memory, and goes backward. Same deal there.
IVT's aren't the only way to figure it out; angr has two existing analyses (girlscout and boyscout) which help with this kind of identification. There is a lot of uncertainty as to how well they work, and they can probably stand to be retrofitted, cleaned, and reapplied inside of CLE.
There's also the very popular tool binwalk, a python library/utility/thingy for carving stuff out of other stuff in a pretty reasonable way. It's a bit more exciting than just libmagic on steroids, there's disassembly-based analysis, specific support for weird firmware formats, compression, etc etc.
There's even a pile of other tools based on binwalk (like firmware_mod_kit) for playing with firmware. Can we leverage any of this to make educated guesses about our blobs, or even unpack entire filesystems?
Here's what we need to do:
Implement the initial AutoBlob backend. [DONE]
Fix it so that we don't call autodetect_initial() twice when loading a binary
Create a framework for adding identification functions.
We will divide our "magic tricks" into two flavors: initial and secondary.
initial means that this is performed to merely get the blob into CLE at all; we use this as part of the is_compatible() method for this backend to tell if AutoBlob will even work on the blob. We call it again in the constructor to actually load the thing. "secondary" techniques let us refine our initial guesses, such as detecting sub-architectures, reorganizing the memory map (what's code, what's data, what goes where), or maybe detecting the OS (is this blob actually VXWorks? Great, we should know that) [DONE]
Add boyscout as an initial technique [DONE]
Add cpu_rec as an initial technique [DONE]
Fix cpu_rec so that it's a real python library
Add girlscout as a secondary technique
Re-implement girlscout so that it's better and modern (check with Paul, this may be done)
Modify CLE's backend registration scheme to be list-based (so AutoBlob can be at the end, always) See cle/backends/init.py
Add binwalk as a last-ditch catchall approach, and for filesystem / archive support
Binwalk is really a python library underneath, and my initial exploration showed that this is totally doable. We may want to filter our results to account for the fact that binwalk will generate lots of false-positives, particularly for instruction matching (I bet /dev/urandom produces valid ARM instructions most of the time)
Add support for extracting / mounting binwalk'd filesystem archives and using them as the basis for a concrete FS / libraries in an angr project
Here is the current implementation: https://github.com/subwire/autoblob
It is currently implemented as an out-of-tree CLE backend, but we may merge it in later if the end result is well-behaved enough.
Hello,
I'll try my best to describe this issue. This only happens for ARM Cortex binaries (compiled with -mthumb -mcpu=cortex-mX
, X might be 1, 3, 4 or 7).
If the program does not contain any libc's function, I got this error:
File "/users/cao/vusontuan/OSS/angr-dev/angr/angr/project.py", line 154, in __init__
self.loader = cle.Loader(self.filename, **load_options)
File "/users/cao/vusontuan/OSS/angr-dev/cle/cle/loader.py", line 124, in __init__
self.initial_load_objects = self._internal_load(main_binary, *force_load_libs)
File "/users/cao/vusontuan/OSS/angr-dev/cle/cle/loader.py", line 514, in _internal_load
self._relocate_object(obj)
File "/users/cao/vusontuan/OSS/angr-dev/cle/cle/loader.py", line 619, in _relocate_object
reloc.relocate(([self.main_object] if self.main_object is not obj else []) + dep_objs + [obj])
File "/users/cao/vusontuan/OSS/angr-dev/cle/cle/backends/relocation.py", line 113, in relocate
self.owner_obj.memory.write_addr_at(self.dest_addr, self.value)
File "/users/cao/vusontuan/OSS/angr-dev/cle/cle/memory.py", line 200, in write_addr_at
self.write_bytes(where, by)
File "/users/cao/vusontuan/OSS/angr-dev/cle/cle/memory.py", line 130, in write_bytes
self[addr+i] = c
File "/users/cao/vusontuan/OSS/angr-dev/cle/cle/memory.py", line 91, in __setitem__
raise IndexError(k)
IndexError: 3204444160
Every single time. With exactly the same index 3204444160. Using ipdb to debug it, this corresponds to reloc.dest_addr
of relocation of __libc_start_main
. Interestingly, adding another (only one) libc's function to the program makes the error disappear.... Adding 2 libc's functions makes this problem happen again... So, we have to have an even number of libc's functions in the program....
@rhelmot can you tell me what can be the reason for this really weird behavior please? In fact I have had this issue since last year, but my workaround was to simply put a libc function to a foo()
function that doesn't get called anywhere in the program. But now I have to compile the programs with optimizations (-O2
), those unused functions gets removed...
Here are the binaries whose name is suffixed by the number of libc's functions in the program
weird.zip
Thank you for your help
Hi Guys!
After cle update, I've started getting error below when trying to load some files:
proj = angr.Project(binary, load_options={'auto_load_libs': False})
File "/usr/local/lib/python2.7/dist-packages/angr/project.py", line 164, in __init__
self.loader = cle.Loader(self.filename, **load_options)
File "/usr/local/lib/python2.7/dist-packages/cle/loader.py", line 126, in __init__
self.initial_load_objects = self._internal_load(main_binary, *force_load_libs)
File "/usr/local/lib/python2.7/dist-packages/cle/loader.py", line 574, in _internal_load
main_obj = self._load_object_isolated(main_spec)
File "/usr/local/lib/python2.7/dist-packages/cle/loader.py", line 668, in _load_object_isolated
return backend_cls(full_spec, is_main_bin=self.main_object is None, loader=self, **options)
File "/usr/local/lib/python2.7/dist-packages/cle/backends/elf/elf.py", line 117, in __init__
self.__register_segments()
File "/usr/local/lib/python2.7/dist-packages/cle/backends/elf/elf.py", line 443, in __register_segments
self.__register_dyn(seg)
File "/usr/local/lib/python2.7/dist-packages/cle/backends/elf/elf.py", line 538, in __register_dyn
self.__register_relocs(readelf_relocsec)
File "/usr/local/lib/python2.7/dist-packages/cle/backends/elf/elf.py", line 589, in __register_relocs
for readelf_reloc in section.iter_relocations():
File "/usr/local/lib/python2.7/dist-packages/elftools/elf/relocation.py", line 90, in iter_relocations
yield self.get_relocation(i)
File "/usr/local/lib/python2.7/dist-packages/elftools/elf/relocation.py", line 83, in get_relocation
stream_pos=entry_offset)
File "/usr/local/lib/python2.7/dist-packages/elftools/common/utils.py", line 34, in struct_parse
raise ELFParseError(str(e))
elftools.common.exceptions.ELFParseError: expected 4, found 0
I have attached example file:
libgcc_s.so.zip
Few monthes ago it worked fine.
Can you please advise what is the possible reason of this problem?
Thanks.
Hi,
I got this error quite systematically, unfortunately I don't really know about this in order to fix it...
/users/cao/vusontuan/OSS/angr-dev/angr-doc/examples/mma_howtouse
Traceback (most recent call last):
File "test.py", line 142, in <module>
exampletest_single('mma_howtouse')
File "test.py", line 29, in exampletest_single
s.test()
File "./solve.py", line 33, in test
File "./solve.py", line 19, in main
File "/users/cao/vusontuan/OSS/angr-dev/angr/angr/project.py", line 142, in __init__
self.loader = cle.Loader(self.filename, **load_options)
File "/users/cao/vusontuan/OSS/angr-dev/cle/cle/loader.py", line 114, in __init__
self._perform_reloc(self.main_bin)
File "/users/cao/vusontuan/OSS/angr-dev/cle/cle/loader.py", line 390, in _perform_reloc
reloc.relocate(([self.main_bin] if self.main_bin is not obj else []) + dep_objs + [obj])
File "/users/cao/vusontuan/OSS/angr-dev/cle/cle/backends/pe.py", line 62, in relocate
org_value = struct.unpack('<I', org_bytes)[0]
struct.error: unpack requires a string argument of length 4
Guess I should ping @rhelmot here
I installed via pip within the last hour...
$ python
Python 2.7.9 (default, Apr 2 2015, 15:33:21)
[GCC 4.9.2] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import cle
>>> ld = cle.Loader('/bin/ls')
No handlers could be found for logger "cle.generic"
Linux ubuntu 3.19.0-18-generic #18-Ubuntu SMP Tue May 19 18:31:35 UTC 2015 x86_64 x86_64 x86_64 GNU/Linux
$ cat /etc/lsb-release
DISTRIB_ID=Ubuntu
DISTRIB_RELEASE=15.04
DISTRIB_CODENAME=vivid
DISTRIB_DESCRIPTION="Ubuntu 15.04"
Any ideas? Thanks!
I am getting following error:
File "srj-prog_ana.py", line 3, in <module>
b = angr.Project('srj-string_format_vul-no_opt')
File "srj-vulnerability_project/srj-angr/angr/angr/angr/project.py", line 98, in __init__
self.loader = cle.Loader(self.filename, **load_options)
File "srj-vulnerability_project/srj-angr/angr/cle/cle/loader.py", line 90, in __init__
self._perform_reloc(self.main_bin)
File "srj-vulnerability_project/srj-angr/angr/cle/cle/loader.py", line 279, in _perform_reloc
self._perform_reloc(dep_obj)
File "srj-vulnerability_project/srj-angr/angr/cle/cle/loader.py", line 279, in _perform_reloc
self._perform_reloc(dep_obj)
File "srj-vulnerability_project/srj-angr/angr/cle/cle/loader.py", line 285, in _perform_reloc
reloc.relocate(self.all_objects)
File "srj-vulnerability_project/srj-angr/angr/cle/cle/absobj.py", line 185, in relocate
elif self.type in self.arch.reloc_tls_doffset:
AttributeError: 'ArchX86' object has no attribute 'reloc_tls_doffset'
I've noticed a few little hacks in CLE for ARM's Thumb stuff so I want to make sure I won't break things by fixing R_ARM_ABS32
. It's currently defined as generic.GenericAbsoluteAddendReloc
which has an operation that the ARM ELF manual would define as S + A
. This is actually the operation for R_ARM_ABS32_NOI
. R_ARM_ABS32
is supposed to be (S + A) | T
where T is 1 if the target is STT_FUNC
and is a Thumb function.
Since I'm working on R_ARM_CALL
and others (see angr issue #546), I figured I'd fix this one as well.
Any easy path to manipulating the loaded state and saving a new binary?
The output log:
self.__program = cle.Loader(run_trace.process, lib_opts=lib_opts)
File "C:\Python27\lib\site-packages\cle-7.8.2.21-py2.7.egg\cle\loader.py", line 111, in __init__
for x in self._lib_opts: self._lib_opts[x.lower()] = self._lib_opts[x]
RuntimeError: dictionary changed size during iteration
As seen in the following code:
Line 491 in 82e33f9
The ELF backend uses sh_addr
for loading objects into the memory; however, for relocatable objects the sh_addr
is always zero (only sh_offset
is set). This causes the binary loading to fail.
A declarative, efficient, and flexible JavaScript library for building user interfaces.
๐ Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. ๐๐๐
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google โค๏ธ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.