summaryrefslogtreecommitdiff
path: root/coregrind/m_syscall.c
blob: 627f5f0d0da84653cbedfdfb982c26952d0d7839 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567

/*--------------------------------------------------------------------*/
/*--- Doing syscalls.                                  m_syscall.c ---*/
/*--------------------------------------------------------------------*/

/*
   This file is part of Valgrind, a dynamic binary instrumentation
   framework.

   Copyright (C) 2000-2008 Julian Seward 
      jseward@acm.org

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.

   The GNU General Public License is contained in the file COPYING.
*/

#include "pub_core_basics.h"
#include "pub_core_vki.h"
#include "pub_core_vkiscnums.h"
#include "pub_core_syscall.h"

/* ---------------------------------------------------------------------
   Building syscall return values.
   ------------------------------------------------------------------ */

/* Make a SysRes value from an syscall return value.  This is
   Linux-specific.

   From:
   http://sources.redhat.com/cgi-bin/cvsweb.cgi/libc/sysdeps/unix/sysv/
   linux/i386/sysdep.h?
   rev=1.28&content-type=text/x-cvsweb-markup&cvsroot=glibc

   Linux uses a negative return value to indicate syscall errors,
   unlike most Unices, which use the condition codes' carry flag.

   Since version 2.1 the return value of a system call might be
   negative even if the call succeeded.  E.g., the 'lseek' system call
   might return a large offset.  Therefore we must not anymore test
   for < 0, but test for a real error by making sure the value in %eax
   is a real error number.  Linus said he will make sure the no
   syscall returns a value in -1 .. -4095 as a valid result so we can
   safely test with -4095.
*/
SysRes VG_(mk_SysRes_x86_linux) ( UInt val ) {
   SysRes res;
   res.isError = val >= -4095 && val <= -1;
   if (res.isError) {
      res.err = -val;
      res.res = 0;
   } else {
      res.err = 0;
      res.res = val;
   }
   return res;
}

/* Similarly .. */
SysRes VG_(mk_SysRes_amd64_linux) ( ULong val ) {
   SysRes res;
   res.isError = val >= -4095 && val <= -1;
   if (res.isError) {
      res.err = -val;
      res.res = 0;
   } else {
      res.err = 0;
      res.res = val;
   }
   return res;
}

/* PPC uses the CR7.SO bit to flag an error (CR0 in IBM-speak) */
/* Note this must be in the bottom bit of the second arg */
SysRes VG_(mk_SysRes_ppc32_linux) ( UInt val, UInt cr0so ) {
   SysRes res;
   res.isError = (cr0so & 1) != 0;
   if (res.isError) {
      res.err = val;
      res.res = 0;
   } else {
      res.err = 0;
      res.res = val;
   }
   return res;
}

/* As per ppc32 version, cr0.so must be in l.s.b. of 2nd arg */
SysRes VG_(mk_SysRes_ppc64_linux) ( ULong val, ULong cr0so ) {
   SysRes res;
   res.isError = (cr0so & 1) != 0;
   if (res.isError) {
      res.err = val;
      res.res = 0;
   } else {
      res.err = 0;
      res.res = val;
   }
   return res;
}

/* AIX scheme: we have to record both 'res' (r3) and 'err' (r4).  If
   'err' is nonzero then the call has failed, but it could still be
   that AIX userspace will ignore 'err' and instead consult 'res' to
   determine if the call failed.  So we have to record both. */
SysRes VG_(mk_SysRes_ppc32_aix5) ( UInt res, UInt err ) {
   SysRes r;
   r.res     = res;
   r.err     = err;
   r.isError = r.err != 0;
   return r;
}

SysRes VG_(mk_SysRes_ppc64_aix5) ( ULong res, ULong err ) {
   SysRes r;
   r.res     = res;
   r.err     = err;
   r.isError = r.err != 0;
   return r;
}

/* Generic constructors. */
SysRes VG_(mk_SysRes_Error) ( UWord err ) {
   SysRes r;
   r.res     = 0;
   r.err     = err;
   r.isError = True;
   return r;
}

SysRes VG_(mk_SysRes_Success) ( UWord res ) {
   SysRes r;
   r.res     = res;
   r.err     = 0;
   r.isError = False;
   return r;
}


/* ---------------------------------------------------------------------
   A function for doing syscalls.
   ------------------------------------------------------------------ */

#if defined(VGP_x86_linux)
/* Incoming args (syscall number + up to 6 args) come on the stack.
   (ie. the C calling convention).

   The syscall number goes in %eax.  The args are passed to the syscall in
   the regs %ebx, %ecx, %edx, %esi, %edi, %ebp, ie. the kernel's syscall
   calling convention.

   %eax gets the return value.  Not sure which registers the kernel
   clobbers, so we preserve all the callee-save regs (%esi, %edi, %ebx,
   %ebp).
*/
extern UWord do_syscall_WRK (
          UWord syscall_no, 
          UWord a1, UWord a2, UWord a3,
          UWord a4, UWord a5, UWord a6
       );
asm(
".text\n"
"do_syscall_WRK:\n"
"	push	%esi\n"
"	push	%edi\n"
"	push	%ebx\n"
"	push	%ebp\n"
"	movl	16+ 4(%esp),%eax\n"
"	movl	16+ 8(%esp),%ebx\n"
"	movl	16+12(%esp),%ecx\n"
"	movl	16+16(%esp),%edx\n"
"	movl	16+20(%esp),%esi\n"
"	movl	16+24(%esp),%edi\n"
"	movl	16+28(%esp),%ebp\n"
"	int	$0x80\n"
"	popl	%ebp\n"
"	popl	%ebx\n"
"	popl	%edi\n"
"	popl	%esi\n"
"	ret\n"
".previous\n"
);

#elif defined(VGP_amd64_linux)
/* Incoming args (syscall number + up to 6 args) come in %rdi, %rsi,
   %rdx, %rcx, %r8, %r9, and the last one on the stack (ie. the C
   calling convention).

   The syscall number goes in %rax.  The args are passed to the syscall in
   the regs %rdi, %rsi, %rdx, %r10, %r8, %r9 (yes, really %r10, not %rcx),
   ie. the kernel's syscall calling convention.

   %rax gets the return value.  %rcx and %r11 are clobbered by the syscall;
   no matter, they are caller-save (the syscall clobbers no callee-save
   regs, so we don't have to do any register saving/restoring).
*/
extern UWord do_syscall_WRK (
          UWord syscall_no, 
          UWord a1, UWord a2, UWord a3,
          UWord a4, UWord a5, UWord a6
       );
asm(
".text\n"
"do_syscall_WRK:\n"
        /* Convert function calling convention --> syscall calling
           convention */
"	movq	%rdi, %rax\n"
"	movq	%rsi, %rdi\n"
"	movq	%rdx, %rsi\n"
"	movq	%rcx, %rdx\n"
"	movq	%r8,  %r10\n"
"	movq	%r9,  %r8\n"
"	movq    8(%rsp), %r9\n"	 /* last arg from stack */
"	syscall\n"
"	ret\n"
".previous\n"
);

#elif defined(VGP_ppc32_linux)
/* Incoming args (syscall number + up to 6 args) come in %r3:%r9.

   The syscall number goes in %r0.  The args are passed to the syscall in
   the regs %r3:%r8, i.e. the kernel's syscall calling convention.

   The %cr0.so bit flags an error.
   We return the syscall return value in %r3, and the %cr0.so in 
   the lowest bit of %r4.
   We return a ULong, of which %r3 is the high word, and %r4 the low.
   No callee-save regs are clobbered, so no saving/restoring is needed.
*/
extern ULong do_syscall_WRK (
          UWord syscall_no, 
          UWord a1, UWord a2, UWord a3,
          UWord a4, UWord a5, UWord a6
       );
asm(
".text\n"
"do_syscall_WRK:\n"
"        mr      0,3\n"
"        mr      3,4\n"
"        mr      4,5\n"
"        mr      5,6\n"
"        mr      6,7\n"
"        mr      7,8\n"
"        mr      8,9\n"
"        sc\n"                  /* syscall: sets %cr0.so on error         */
"        mfcr    4\n"           /* %cr -> low word of return var          */
"        rlwinm  4,4,4,31,31\n" /* rotate flag bit so to lsb, and mask it */
"        blr\n"                 /* and return                             */
".previous\n"
);

#elif defined(VGP_ppc64_linux)
/* Due to the need to return 65 bits of result, this is completely
   different from the ppc32 case.  The single arg register points to a
   7-word block containing the syscall # and the 6 args.  The syscall
   result proper is put in [0] of the block, and %cr0.so is in the
   bottom but of [1]. */
extern void do_syscall_WRK ( ULong* argblock );
asm(
".align   2\n"
".globl   do_syscall_WRK\n"
".section \".opd\",\"aw\"\n"
".align   3\n"
"do_syscall_WRK:\n"
".quad    .do_syscall_WRK,.TOC.@tocbase,0\n"
".previous\n"
".type    .do_syscall_WRK,@function\n"
".globl   .do_syscall_WRK\n"
".do_syscall_WRK:\n"
"        std  3,-16(1)\n"  /* stash arg */
"        ld   8, 48(3)\n"  /* sc arg 6 */
"        ld   7, 40(3)\n"  /* sc arg 5 */
"        ld   6, 32(3)\n"  /* sc arg 4 */
"        ld   5, 24(3)\n"  /* sc arg 3 */
"        ld   4, 16(3)\n"  /* sc arg 2 */
"        ld   0,  0(3)\n"  /* sc number */
"        ld   3,  8(3)\n"  /* sc arg 1 */
"        sc\n"             /* result in r3 and cr0.so */
"        ld   5,-16(1)\n"  /* reacquire argblock ptr (r5 is caller-save) */
"        std  3,0(5)\n"    /* argblock[0] = r3 */
"        mfcr 3\n"
"        srwi 3,3,28\n"
"        andi. 3,3,1\n"
"        std  3,8(5)\n"    /* argblock[1] = cr0.s0 & 1 */
"        blr\n"
);

#elif defined(VGP_ppc32_aix5)
static void do_syscall_WRK ( UWord* res_r3, UWord* res_r4,
                             UWord sysno, 
                             UWord a1, UWord a2, UWord a3,
                             UWord a4, UWord a5, UWord a6,
                             UWord a7, UWord a8 )
{
   /* Syscalls on AIX are very similar to function calls:
      - up to 8 args in r3-r10
      - syscall number in r2
      - kernel resumes at 'lr', so must set it appropriately beforehand
      - r3 holds the result and r4 any applicable error code
      See http://www.cs.utexas.edu/users/cart/publications/tr00-04.ps
      and also 'man truss'.
   */
   /* For some reason gcc-3.3.2 doesn't preserve r31 across the asm
      even though we state it to be trashed.  So use r27 instead. */
   UWord args[9];
   args[0] = sysno;
   args[1] = a1; args[2] = a2;
   args[3] = a3; args[4] = a4;
   args[5] = a5; args[6] = a6;
   args[7] = a7; args[8] = a8;

   __asm__ __volatile__(

      // establish base ptr
      "mr   28,%0\n\t"

      // save r2, lr
      "mr   27,2\n\t" // save r2 in r27
      "mflr 30\n\t"   // save lr in r30

      // set syscall number and args
      "lwz   2,  0(28)\n\t"
      "lwz   3,  4(28)\n\t"
      "lwz   4,  8(28)\n\t"
      "lwz   5, 12(28)\n\t"
      "lwz   6, 16(28)\n\t"
      "lwz   7, 20(28)\n\t"
      "lwz   8, 24(28)\n\t"
      "lwz   9, 28(28)\n\t"
      "lwz  10, 32(28)\n\t"

      // set bit 3 of CR1 otherwise AIX 5.1 returns to the
      // wrong address after the sc instruction
      "crorc 6,6,6\n\t"

      // set up LR to point just after the sc insn
      ".long 0x48000005\n\t" // "bl here+4" -- lr := & next insn
      "mflr 29\n\t"
      "addi 29,29,16\n\t"
      "mtlr 29\n\t"

      // do it!
      "sc\n\t"

      // result is now in r3; save it in args[0]
      "stw  3,0(28)\n\t"
      // error code in r4; save it in args[1]
      "stw  4,4(28)\n\t"

      // restore
      "mr   2,27\n\t"
      "mtlr 30\n\t"

      : /*out*/
      : /*in*/  "b" (&args[0])
      : /*trash*/
           /*temps*/    "r31","r30","r29","r28","r27",
           /*args*/     "r3","r4","r5","r6","r7","r8","r9","r10",
           /*paranoia*/ "memory","cc","r0","r1","r11","r12","r13",
                        "xer","ctr","cr0","cr1","cr2","cr3",
                        "cr4","cr5","cr6","cr7"
   );

   *res_r3 = args[0];
   *res_r4 = args[1];
}

#elif defined(VGP_ppc64_aix5)
static void do_syscall_WRK ( UWord* res_r3, UWord* res_r4,
                             UWord sysno, 
                             UWord a1, UWord a2, UWord a3,
                             UWord a4, UWord a5, UWord a6,
                             UWord a7, UWord a8 )
{
   /* Same scheme as ppc32-aix5. */
   UWord args[9];
   args[0] = sysno;
   args[1] = a1; args[2] = a2;
   args[3] = a3; args[4] = a4;
   args[5] = a5; args[6] = a6;
   args[7] = a7; args[8] = a8;

   __asm__ __volatile__(

      // establish base ptr
      "mr   28,%0\n\t"

      // save r2, lr
      "mr   27,2\n\t" // save r2 in r27
      "mflr 30\n\t"   // save lr in r30

      // set syscall number and args
      "ld    2,  0(28)\n\t"
      "ld    3,  8(28)\n\t"
      "ld    4, 16(28)\n\t"
      "ld    5, 24(28)\n\t"
      "ld    6, 32(28)\n\t"
      "ld    7, 40(28)\n\t"
      "ld    8, 48(28)\n\t"
      "ld    9, 56(28)\n\t"
      "ld   10, 64(28)\n\t"

      // set bit 3 of CR1 otherwise AIX 5.1 returns to the
      // wrong address after the sc instruction
      "crorc 6,6,6\n\t"

      // set up LR to point just after the sc insn
      ".long 0x48000005\n\t" // "bl here+4" -- lr := & next insn
      "mflr 29\n\t"
      "addi 29,29,16\n\t"
      "mtlr 29\n\t"

      // do it!
      "sc\n\t"

      // result is now in r3; save it in args[0]
      "std  3,0(28)\n\t"
      // error code in r4; save it in args[1]
      "std  4,8(28)\n\t"

      // restore
      "mr   2,27\n\t"
      "mtlr 30\n\t"

      : /*out*/
      : /*in*/  "b" (&args[0])
      : /*trash*/
           /*temps*/    "r31","r30","r29","r28","r27",
           /*args*/     "r3","r4","r5","r6","r7","r8","r9","r10",
           /*paranoia*/ "memory","cc","r0","r1","r11","r12","r13",
                        "xer","ctr","cr0","cr1","cr2","cr3",
                        "cr4","cr5","cr6","cr7"
   );

   *res_r3 = args[0];
   *res_r4 = args[1];
}

#else
#  error Unknown platform
#endif


SysRes VG_(do_syscall) ( UWord sysno, UWord a1, UWord a2, UWord a3,
                                      UWord a4, UWord a5, UWord a6,
                                      UWord a7, UWord a8 )
{
#if defined(VGP_x86_linux)
  UWord val = do_syscall_WRK(sysno,a1,a2,a3,a4,a5,a6);
  return VG_(mk_SysRes_x86_linux)( val );

#elif defined(VGP_amd64_linux)
  UWord val = do_syscall_WRK(sysno,a1,a2,a3,a4,a5,a6);
  return VG_(mk_SysRes_amd64_linux)( val );

#elif defined(VGP_ppc32_linux)
  ULong ret     = do_syscall_WRK(sysno,a1,a2,a3,a4,a5,a6);
  UInt  val     = (UInt)(ret>>32);
  UInt  cr0so   = (UInt)(ret);
  return VG_(mk_SysRes_ppc32_linux)( val, cr0so );

#elif defined(VGP_ppc64_linux)
  ULong argblock[7];
  argblock[0] = sysno;
  argblock[1] = a1;
  argblock[2] = a2;
  argblock[3] = a3;
  argblock[4] = a4;
  argblock[5] = a5;
  argblock[6] = a6;
  do_syscall_WRK( &argblock[0] );
  return VG_(mk_SysRes_ppc64_linux)( argblock[0], argblock[1] );

#elif defined(VGP_ppc32_aix5)
   UWord res;
   UWord err;
   do_syscall_WRK( &res, &err, 
		   sysno, a1, a2, a3, a4, a5, a6, a7, a8);
   /* Try to set the error number to zero if the syscall hasn't
      really failed. */
   if (sysno == __NR_AIX5_kread
       || sysno == __NR_AIX5_kwrite) {
      if (res != (UWord)-1L)
         err = 0;
   }
   else if (sysno == __NR_AIX5_sigprocmask
            || sysno == __NR_AIX5__sigpending) {
      if (res == 0)
         err = 0;
   }

   return VG_(mk_SysRes_ppc32_aix5)( res, err );

#elif defined(VGP_ppc64_aix5)
   UWord res;
   UWord err;
   do_syscall_WRK( &res, &err, 
		   sysno, a1, a2, a3, a4, a5, a6, a7, a8);
   /* Try to set the error number to zero if the syscall hasn't
      really failed. */
   if (sysno == __NR_AIX5_kread
       || sysno == __NR_AIX5_kwrite) {
      if (res != (UWord)-1L)
         err = 0;
   }
   else if (sysno == __NR_AIX5_sigprocmask
            || sysno == __NR_AIX5__sigpending) {
      if (res == 0)
         err = 0;
   }

   return VG_(mk_SysRes_ppc64_aix5)( res, err );

#else
#  error Unknown platform
#endif
}

/* ---------------------------------------------------------------------
   Names of errors.
   ------------------------------------------------------------------ */

/* Return a string which gives the name of an error value.  Note,
   unlike the standard C syserror fn, the returned string is not
   malloc-allocated or writable -- treat it as a constant. 
   TODO: implement this properly. */

const HChar* VG_(strerror) ( UWord errnum )
{
   switch (errnum) {
      case VKI_EPERM:       return "Operation not permitted";
      case VKI_ENOENT:      return "No such file or directory";
      case VKI_ESRCH:       return "No such process";
      case VKI_EINTR:       return "Interrupted system call";
      case VKI_EBADF:       return "Bad file number";
      case VKI_EAGAIN:      return "Try again";
      case VKI_ENOMEM:      return "Out of memory";
      case VKI_EACCES:      return "Permission denied";
      case VKI_EFAULT:      return "Bad address";
      case VKI_EEXIST:      return "File exists";
      case VKI_EINVAL:      return "Invalid argument";
      case VKI_EMFILE:      return "Too many open files";
      case VKI_ENOSYS:      return "Function not implemented";
      case VKI_EOVERFLOW:   return "Value too large for defined data type";
      case VKI_ERESTARTSYS: return "ERESTARTSYS";
      default:              return "VG_(strerror): unknown error";
   }
}


/*--------------------------------------------------------------------*/
/*--- end                                                        ---*/
/*--------------------------------------------------------------------*/