分享

procfs environ explained in

 astrotycoon 2019-01-10

Now we see how env_start and env_end get successfully initialized to the user address of the block of memory in user stack.

There is one question left to be answered, i.e. how is the “extern char ** environ”  gets initialized and what that has to do with this block.

So as we saw in last post, there is a block with the name “envp” that gets initialized as a char *[] with each element being the address of an environment variable string with the format of “name=value”.

This block, as well as the argv, argc gets placed right next to the user stack top.

Then the ELF bin format handler searches the ELF file for the entry function and change control to that address, after which we will be running in user space.

890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
if (elf_interpreter) {
 unsigned long interp_map_addr = 0;
elf_entry = load_elf_interp(&loc->interp_elf_ex,
 interpreter,
 &interp_map_addr,
 load_bias);
 if (!IS_ERR((void *)elf_entry)) {
 /*
 * load_elf_interp() returns relocation
 * adjustment
 */
 interp_load_addr = elf_entry;
 elf_entry += loc->interp_elf_ex.e_entry;
 }
 if (BAD_ADDR(elf_entry)) {
 force_sig(SIGSEGV, current);
 retval = IS_ERR((void *)elf_entry) ?
 (int)elf_entry : -EINVAL;
 goto out_free_dentry;
 }
 reloc_func_desc = interp_load_addr;
allow_write_access(interpreter);
 fput(interpreter);
 kfree(elf_interpreter);
 } else {
 elf_entry = loc->elf_ex.e_entry;
 if (BAD_ADDR(elf_entry)) {
 force_sig(SIGSEGV, current);
 retval = -EINVAL;
 goto out_free_dentry;
 }
 }
...
start_thread(regs, elf_entry, bprm->p);
 retval = 0;

Nowaday ELF usually has an interpreter section which has the name of the interpreter ELF that is responsible for loading other necessary parts of the lib and start user provided “main” function.

For example, our sample program has the interpreter set as:

1
2
3
4
5
6
7
$objdump -s -j .interp b
b: file format elf64-x86-64
Contents of section .interp:
 400238 2f6c6962 36342f6c 642d6c69 6e75782d /lib64/ld-linux-
 400248 7838362d 36342e73 6f2e3200 x86-64.so.2.

This means the interpreter program is /lib64/ld-linux-x86-64.so.2 which is also part of glibc.

Here is _start‘s code in “sysdeps/x86_64/dl-machine.h“:

145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
/* Initial entry point code for the dynamic linker.
 The C function `_dl_start' is the real entry point;
 its return value is the user program's entry point. */
#define RTLD_START asm ("\n\
.text\n\
 .align 16\n\
.globl _start\n\
.globl _dl_start_user\n\
_start:\n\
 movq %rsp, %rdi\n\
 call _dl_start\n\
_dl_start_user:\n\
 # Save the user entry point address in %r12.\n\
 movq %rax, %r12\n\
 # See if we were run as a command with the executable file\n\
 # name as an extra leading argument.\n\
 movl _dl_skip_args(%rip), %eax\n\
 # Pop the original argument count.\n\
 popq %rdx\n\
 # Adjust the stack pointer to skip _dl_skip_args words.\n\
 leaq (%rsp,%rax,8), %rsp\n\
 # Subtract _dl_skip_args from argc.\n\
 subl %eax, %edx\n\
 # Push argc back on the stack.\n\
 pushq %rdx\n\
 # Call _dl_init (struct link_map *main_map, int argc, char **argv, char **env)\n\
 # argc -> rsi\n\
 movq %rdx, %rsi\n\
 # Save %rsp value in %r13.\n\
 movq %rsp, %r13\n\
 # And align stack for the _dl_init_internal call. \n\
 andq $-16, %rsp\n\
 # _dl_loaded -> rdi\n\
 movq _rtld_local(%rip), %rdi\n\
 # env -> rcx\n\
 leaq 16(%r13,%rdx,8), %rcx\n\
 # argv -> rdx\n\
 leaq 8(%r13), %rdx\n\
# Clear %rbp to mark outermost frame obviously even for constructors.\n\
 xorl %ebp, %ebp\n\
 # Call the function to run the initializers.\n\
 call _dl_init_internal@PLT\n\
 # Pass our finalizer function to the user in %rdx, as per ELF ABI.\n\
 leaq _dl_fini(%rip), %rdx\n\
 # And make sure %rsp points to argc stored on the stack.\n\
 movq %r13, %rsp\n\
 # Jump to the user's entry point.\n\
 jmp *%r12\n\
.previous\n\
");

We can use GDB to help us locate the point that “environ” gets updated.

Let’s start a different toy program a.C:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
$cat a.C
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#include <iostream>
extern char * * environ;
using namespace std;
int main(void)
{
 cout << "environ=" << hex << environ << " @ " << (&environ) << endl;
return 0;
}

Which executes with the following output:

1
2
$./a
environ=0x7fff09704098 @ 0x602080

Let’s put GDB in action:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
$gdb ./a
Reading symbols from a...done.
(gdb) b main
Breakpoint 1 at 0x400975: file a.C, line 12.
(gdb) r
Starting program: a
Breakpoint 1, main () at a.C:12
12 cout << "environ=" << hex << environ << " @ " << (&environ) << endl;
(gdb) info variables environ
All variables matching regular expression "environ":
File environ.c:
char **__environ;
char **_environ;
char **environ;
File setenv.c:
static char **last_environ;
File environ.c:
char **__environ;
char **_environ;
char **environ;
Non-debugging symbols:
0x0000000000602080 __environ@@GLIBC_2.2.5
0x0000000000602080 environ@@GLIBC_2.2.5
(gdb) watch *0x602080
Hardware watchpoint 2: *0x602080
(gdb) r
The program being debugged has been started already.
Start it from the beginning? (y or n) y
Starting program: a
Hardware watchpoint 2: *0x602080
Old value = 0
New value = -7016
_init (argc=1, argv=0x7fffffffe488, envp=0x7fffffffe498) at ../sysdeps/unix/sysv/linux/x86_64/../init-first.c:87
87 ../sysdeps/unix/sysv/linux/x86_64/../init-first.c: No such file or directory.
(gdb) bt
#0 _init (argc=1, argv=0x7fffffffe488, envp=0x7fffffffe498) at ../sysdeps/unix/sysv/linux/x86_64/../init-first.c:87
#1 0x00007ffff7de92cb in call_init (l=0x7ffff7ffa9b8, argc=1, argv=0x7fffffffe488, env=0x7fffffffe498) at dl-init.c:70
#2 0x00007ffff7de93ef in call_init (env=<optimized out>, argv=<optimized out>, argc=<optimized out>, l=<optimized out>) at dl-init.c:52
#3 _dl_init (main_map=0x7ffff7ffe2e8, argc=1, argv=0x7fffffffe488, env=0x7fffffffe498) at dl-init.c:134
#4 0x00007ffff7ddb6fa in _dl_start_user () from /lib64/ld-linux-x86-64.so.2
#5 0x0000000000000001 in ?? ()
#6 0x00007fffffffe6c8 in ?? ()
#7 0x0000000000000000 in ?? ()

Let’s check the part of code that really updates “environ” in “init-first.c“:

50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
void
attribute_hidden
_init (int argc, char **argv, char **envp)
{
#endif
#ifdef USE_NONOPTION_FLAGS
 extern void __getopt_clean_environment (char **);
#endif
 __libc_multiple_libcs = &_dl_starting_up && !_dl_starting_up;
 /* Make sure we don't initialize twice. */
 if (!__libc_multiple_libcs)
 {
 /* Set the FPU control word to the proper default value if the
 kernel would use a different value. (In a static program we
 don't have this information.) */
#ifdef SHARED
 if (__fpu_control != GLRO(dl_fpu_control))
#endif
 __setfpucw (__fpu_control);
 }
 /* Save the command-line arguments. */
 __libc_argc = argc;
 __libc_argv = argv;
 __environ = envp;

and “__environ” is just a weak alias of glibc “environ“:

6
7
8
9
10
11
12
/* This must be initialized; we cannot have a weak alias into bss. */
char **__environ = NULL;
weak_alias (__environ, environ)
/* The SVR4 ABI says `_environ' will be the name to use
in case the user overrides the weak alias `environ'. */
weak_alias (__environ, _environ)

Next we want to understand what happens when we call setenv().

Advertisements

    本站是提供个人知识管理的网络存储空间,所有内容均由用户发布,不代表本站观点。请注意甄别内容中的联系方式、诱导购买等信息,谨防诈骗。如发现有害或侵权内容,请点击一键举报。
    转藏 分享 献花(0

    0条评论

    发表

    请遵守用户 评论公约

    类似文章 更多