1 /**
2  * Compiler implementation of the
3  * $(LINK2 http://www.dlang.org, D programming language).
4  *
5  * Copyright:   Copyright (c) 1999-2016 by Digital Mars, All Rights Reserved
6  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
7  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
8  * Source:      $(DMDSRC _libmach.d)
9  */
10 
11 module ddmd.libmach;
12 
13 import core.stdc.time;
14 import core.stdc..string;
15 import core.stdc.stdlib;
16 import core.stdc.stdio;
17 import core.stdc.config;
18 
19 import core.sys.posix.sys.stat;
20 import core.sys.posix.unistd;
21 
22 import ddmd.globals;
23 import ddmd.lib;
24 import ddmd.utils;
25 
26 import ddmd.root.array;
27 import ddmd.root.file;
28 import ddmd.root.filename;
29 import ddmd.root.outbuffer;
30 import ddmd.root.port;
31 import ddmd.root.rmem;
32 import ddmd.root.stringtable;
33 
34 import ddmd.scanmach;
35 
36 enum LOG = false;
37 
38 struct MachObjSymbol
39 {
40     const(char)[] name;         // still has a terminating 0
41     MachObjModule* om;
42 }
43 
44 alias MachObjModules = Array!(MachObjModule*);
45 alias MachObjSymbols = Array!(MachObjSymbol*);
46 
47 final class LibMach : Library
48 {
49     MachObjModules objmodules; // MachObjModule[]
50     MachObjSymbols objsymbols; // MachObjSymbol[]
51     StringTable tab;
52 
53     extern (D) this()
54     {
55         tab._init(14000);
56     }
57 
58     /***************************************
59      * Add object module or library to the library.
60      * Examine the buffer to see which it is.
61      * If the buffer is NULL, use module_name as the file name
62      * and load the file.
63      */
64     override void addObject(const(char)* module_name, const ubyte[] buffer)
65     {
66         if (!module_name)
67             module_name = "";
68         static if (LOG)
69         {
70             printf("LibMach::addObject(%s)\n", module_name);
71         }
72 
73         void corrupt(int reason)
74         {
75             error("corrupt Mach object module %s %d", module_name, reason);
76         }
77 
78         int fromfile = 0;
79         auto buf = buffer.ptr;
80         auto buflen = buffer.length;
81         if (!buf)
82         {
83             assert(module_name[0]);
84             File* file = File.create(cast(char*)module_name);
85             readFile(Loc(), file);
86             buf = file.buffer;
87             buflen = file.len;
88             file._ref = 1;
89             fromfile = 1;
90         }
91         int reason = 0;
92         if (buflen < 16)
93         {
94             static if (LOG)
95             {
96                 printf("buf = %p, buflen = %d\n", buf, buflen);
97             }
98             return corrupt(__LINE__);
99         }
100         if (memcmp(buf, cast(char*)"!<arch>\n", 8) == 0)
101         {
102             /* Library file.
103              * Pull each object module out of the library and add it
104              * to the object module array.
105              */
106             static if (LOG)
107             {
108                 printf("archive, buf = %p, buflen = %d\n", buf, buflen);
109             }
110             uint offset = 8;
111             char* symtab = null;
112             uint symtab_size = 0;
113             uint mstart = cast(uint)objmodules.dim;
114             while (offset < buflen)
115             {
116                 if (offset + MachLibHeader.sizeof >= buflen)
117                     return corrupt(__LINE__);
118                 MachLibHeader* header = cast(MachLibHeader*)(cast(ubyte*)buf + offset);
119                 offset += MachLibHeader.sizeof;
120                 char* endptr = null;
121                 uint size = cast(uint)strtoul(header.file_size.ptr, &endptr, 10);
122                 if (endptr >= header.file_size.ptr + 10 || *endptr != ' ')
123                     return corrupt(__LINE__);
124                 if (offset + size > buflen)
125                     return corrupt(__LINE__);
126                 if (memcmp(header.object_name.ptr, cast(char*)"__.SYMDEF       ", 16) == 0 ||
127                     memcmp(header.object_name.ptr, cast(char*)"__.SYMDEF SORTED", 16) == 0)
128                 {
129                     /* Instead of rescanning the object modules we pull from a
130                      * library, just use the already created symbol table.
131                      */
132                     if (symtab)
133                         return corrupt(__LINE__);
134                     symtab = cast(char*)buf + offset;
135                     symtab_size = size;
136                     if (size < 4)
137                         return corrupt(__LINE__);
138                 }
139                 else
140                 {
141                     auto om = new MachObjModule();
142                     om.base = cast(ubyte*)buf + offset - MachLibHeader.sizeof;
143                     om.length = cast(uint)(size + MachLibHeader.sizeof);
144                     om.offset = 0;
145                     const n = cast(const(char)*)(om.base + MachLibHeader.sizeof);
146                     om.name = n[0 .. strlen(n)];
147                     om.file_time = cast(uint)strtoul(header.file_time.ptr, &endptr, 10);
148                     om.user_id = cast(uint)strtoul(header.user_id.ptr, &endptr, 10);
149                     om.group_id = cast(uint)strtoul(header.group_id.ptr, &endptr, 10);
150                     om.file_mode = cast(uint)strtoul(header.file_mode.ptr, &endptr, 8);
151                     om.scan = 0; // don't scan object module for symbols
152                     objmodules.push(om);
153                 }
154                 offset += (size + 1) & ~1;
155             }
156             if (offset != buflen)
157                 return corrupt(__LINE__);
158             /* Scan the library's symbol table, and insert it into our own.
159              * We use this instead of rescanning the object module, because
160              * the library's creator may have a different idea of what symbols
161              * go into the symbol table than we do.
162              * This is also probably faster.
163              */
164             uint nsymbols = Port.readlongLE(symtab) / 8;
165             char* s = symtab + 4 + nsymbols * 8 + 4;
166             if (4 + nsymbols * 8 + 4 > symtab_size)
167                 return corrupt(__LINE__);
168             for (uint i = 0; i < nsymbols; i++)
169             {
170                 uint soff = Port.readlongLE(symtab + 4 + i * 8);
171                 const(char)* name = s + soff;
172                 size_t namelen = strlen(name);
173                 //printf("soff = x%x name = %s\n", soff, name);
174                 if (s + namelen + 1 - symtab > symtab_size)
175                     return corrupt(__LINE__);
176                 uint moff = Port.readlongLE(symtab + 4 + i * 8 + 4);
177                 //printf("symtab[%d] moff = x%x  x%x, name = %s\n", i, moff, moff + sizeof(Header), name);
178                 for (uint m = mstart; 1; m++)
179                 {
180                     if (m == objmodules.dim)
181                         return corrupt(__LINE__);       // didn't find it
182                     MachObjModule* om = objmodules[m];
183                     //printf("\tom offset = x%x\n", (char *)om->base - (char *)buf);
184                     if (moff == cast(char*)om.base - cast(char*)buf)
185                     {
186                         addSymbol(om, name[0 .. namelen], 1);
187                         //if (mstart == m)
188                         //    mstart++;
189                         break;
190                     }
191                 }
192             }
193             return;
194         }
195         /* It's an object module
196          */
197         auto om = new MachObjModule();
198         om.base = cast(ubyte*)buf;
199         om.length = cast(uint)buflen;
200         om.offset = 0;
201         const n = cast(const(char)*)FileName.name(module_name); // remove path, but not extension
202         om.name = n[0 .. strlen(n)];
203         om.scan = 1;
204         if (fromfile)
205         {
206             stat_t statbuf;
207             int i = stat(module_name, &statbuf);
208             if (i == -1) // error, errno is set
209                 return corrupt(__LINE__);
210             om.file_time = statbuf.st_ctime;
211             om.user_id = statbuf.st_uid;
212             om.group_id = statbuf.st_gid;
213             om.file_mode = statbuf.st_mode;
214         }
215         else
216         {
217             /* Mock things up for the object module file that never was
218              * actually written out.
219              */
220             static __gshared uid_t uid;
221             static __gshared gid_t gid;
222             static __gshared int _init;
223             if (!_init)
224             {
225                 _init = 1;
226                 uid = getuid();
227                 gid = getgid();
228             }
229             time(&om.file_time);
230             om.user_id = uid;
231             om.group_id = gid;
232             om.file_mode = (1 << 15) | (6 << 6) | (4 << 3) | (4 << 0); // 0100644
233         }
234         objmodules.push(om);
235     }
236 
237     /*****************************************************************************/
238 
239     void addSymbol(MachObjModule* om, const(char)[] name, int pickAny = 0)
240     {
241         static if (LOG)
242         {
243             printf("LibMach::addSymbol(%s, %s, %d)\n", om.name.ptr, name.ptr, pickAny);
244         }
245         version (none)
246         {
247             // let linker sort out duplicates
248             StringValue* s = tab.insert(name.ptr, name.length, null);
249             if (!s)
250             {
251                 // already in table
252                 if (!pickAny)
253                 {
254                     s = tab.lookup(name.ptr, name.length);
255                     assert(s);
256                     MachObjSymbol* os = cast(MachObjSymbol*)s.ptrvalue;
257                     error("multiple definition of %s: %s and %s: %s", om.name.ptr, name.ptr, os.om.name.ptr, os.name.ptr);
258                 }
259             }
260             else
261             {
262                 auto os = new MachObjSymbol();
263                 os.name = xarraydup(name);
264                 os.om = om;
265                 s.ptrvalue = cast(void*)os;
266                 objsymbols.push(os);
267             }
268         }
269         else
270         {
271             auto os = new MachObjSymbol();
272             os.name = xarraydup(name);
273             os.om = om;
274             objsymbols.push(os);
275         }
276     }
277 
278 private:
279     /************************************
280      * Scan single object module for dictionary symbols.
281      * Send those symbols to LibMach::addSymbol().
282      */
283     void scanObjModule(MachObjModule* om)
284     {
285         static if (LOG)
286         {
287             printf("LibMach::scanObjModule(%s)\n", om.name.ptr);
288         }
289 
290         extern (D) void addSymbol(const(char)[] name, int pickAny)
291         {
292             this.addSymbol(om, name, pickAny);
293         }
294 
295         scanMachObjModule(&addSymbol, om.base[0 .. om.length], om.name.ptr, loc);
296     }
297 
298     /*****************************************************************************/
299     /*****************************************************************************/
300     /**********************************************
301      * Create and write library to libbuf.
302      * The library consists of:
303      *      !<arch>\n
304      *      header
305      *      dictionary
306      *      object modules...
307      */
308     protected override void WriteLibToBuffer(OutBuffer* libbuf)
309     {
310         static if (LOG)
311         {
312             printf("LibMach::WriteLibToBuffer()\n");
313         }
314         static __gshared char* pad = [0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A];
315         /************* Scan Object Modules for Symbols ******************/
316         for (size_t i = 0; i < objmodules.dim; i++)
317         {
318             MachObjModule* om = objmodules[i];
319             if (om.scan)
320             {
321                 scanObjModule(om);
322             }
323         }
324         /************* Determine module offsets ******************/
325         uint moffset = 8 + MachLibHeader.sizeof + 4 + 4;
326         for (size_t i = 0; i < objsymbols.dim; i++)
327         {
328             MachObjSymbol* os = objsymbols[i];
329             moffset += 8 + os.name.length + 1;
330         }
331         moffset = (moffset + 3) & ~3;
332         //if (moffset & 4)
333         //    moffset += 4;
334         uint hoffset = moffset;
335         static if (LOG)
336         {
337             printf("\tmoffset = x%x\n", moffset);
338         }
339         for (size_t i = 0; i < objmodules.dim; i++)
340         {
341             MachObjModule* om = objmodules[i];
342             moffset += moffset & 1;
343             om.offset = moffset;
344             if (om.scan)
345             {
346                 const slen = om.name.length;
347                 int nzeros = 8 - ((slen + 4) & 7);
348                 if (nzeros < 4)
349                     nzeros += 8; // emulate mysterious behavior of ar
350                 int filesize = om.length;
351                 filesize = (filesize + 7) & ~7;
352                 moffset += MachLibHeader.sizeof + slen + nzeros + filesize;
353             }
354             else
355             {
356                 moffset += om.length;
357             }
358         }
359         libbuf.reserve(moffset);
360         /************* Write the library ******************/
361         libbuf.write(cast(const(char)*)"!<arch>\n", 8);
362         MachObjModule om;
363         om.base = null;
364         om.length = cast(uint)(hoffset - (8 + MachLibHeader.sizeof));
365         om.offset = 8;
366         om.name = "";
367         .time(&om.file_time);
368         om.user_id = getuid();
369         om.group_id = getgid();
370         om.file_mode = (1 << 15) | (6 << 6) | (4 << 3) | (4 << 0); // 0100644
371         MachLibHeader h;
372         MachOmToHeader(&h, &om);
373         memcpy(h.object_name.ptr, cast(const(char)*)"__.SYMDEF", 9);
374         int len = sprintf(h.file_size.ptr, "%u", om.length);
375         assert(len <= 10);
376         memset(h.file_size.ptr + len, ' ', 10 - len);
377         libbuf.write(&h, h.sizeof);
378         char[4] buf;
379         Port.writelongLE(cast(uint)(objsymbols.dim * 8), buf.ptr);
380         libbuf.write(buf.ptr, 4);
381         int stringoff = 0;
382         for (size_t i = 0; i < objsymbols.dim; i++)
383         {
384             MachObjSymbol* os = objsymbols[i];
385             Port.writelongLE(stringoff, buf.ptr);
386             libbuf.write(buf.ptr, 4);
387             Port.writelongLE(os.om.offset, buf.ptr);
388             libbuf.write(buf.ptr, 4);
389             stringoff += os.name.length + 1;
390         }
391         Port.writelongLE(stringoff, buf.ptr);
392         libbuf.write(buf.ptr, 4);
393         for (size_t i = 0; i < objsymbols.dim; i++)
394         {
395             MachObjSymbol* os = objsymbols[i];
396             libbuf.writestring(os.name);
397             libbuf.writeByte(0);
398         }
399         while (libbuf.offset & 3)
400             libbuf.writeByte(0);
401         //if (libbuf->offset & 4)
402         //    libbuf->write(pad, 4);
403         static if (LOG)
404         {
405             printf("\tlibbuf->moffset = x%x\n", libbuf.offset);
406         }
407         assert(libbuf.offset == hoffset);
408         /* Write out each of the object modules
409          */
410         for (size_t i = 0; i < objmodules.dim; i++)
411         {
412             MachObjModule* om2 = objmodules[i];
413             if (libbuf.offset & 1)
414                 libbuf.writeByte('\n'); // module alignment
415             assert(libbuf.offset == om2.offset);
416             if (om2.scan)
417             {
418                 MachOmToHeader(&h, om2);
419                 libbuf.write(&h, h.sizeof); // module header
420                 libbuf.write(om2.name.ptr, om2.name.length);
421                 int nzeros = 8 - ((om2.name.length + 4) & 7);
422                 if (nzeros < 4)
423                     nzeros += 8; // emulate mysterious behavior of ar
424                 libbuf.fill0(nzeros);
425                 libbuf.write(om2.base, om2.length); // module contents
426                 // obj modules are padded out to 8 bytes in length with 0x0A
427                 int filealign = om2.length & 7;
428                 if (filealign)
429                 {
430                     libbuf.write(pad, 8 - filealign);
431                 }
432             }
433             else
434             {
435                 libbuf.write(om2.base, om2.length); // module contents
436             }
437         }
438         static if (LOG)
439         {
440             printf("moffset = x%x, libbuf->offset = x%x\n", moffset, libbuf.offset);
441         }
442         assert(libbuf.offset == moffset);
443     }
444 }
445 
446 extern (C++) Library LibMach_factory()
447 {
448     return new LibMach();
449 }
450 
451 /*****************************************************************************/
452 /*****************************************************************************/
453 struct MachObjModule
454 {
455     ubyte* base; // where are we holding it in memory
456     uint length; // in bytes
457     uint offset; // offset from start of library
458     const(char)[] name; // module name (file name) with terminating 0
459     c_long file_time; // file time
460     uint user_id;
461     uint group_id;
462     uint file_mode;
463     int scan; // 1 means scan for symbols
464 }
465 
466 enum MACH_OBJECT_NAME_SIZE = 16;
467 
468 struct MachLibHeader
469 {
470     char[MACH_OBJECT_NAME_SIZE] object_name;
471     char[12] file_time;
472     char[6] user_id;
473     char[6] group_id;
474     char[8] file_mode; // in octal
475     char[10] file_size;
476     char[2] trailer;
477 }
478 
479 extern (C++) void MachOmToHeader(MachLibHeader* h, MachObjModule* om)
480 {
481     const slen = om.name.length;
482     int nzeros = 8 - ((slen + 4) & 7);
483     if (nzeros < 4)
484         nzeros += 8; // emulate mysterious behavior of ar
485     size_t len = sprintf(h.object_name.ptr, "#1/%ld", slen + nzeros);
486     memset(h.object_name.ptr + len, ' ', MACH_OBJECT_NAME_SIZE - len);
487     /* In the following sprintf's, don't worry if the trailing 0
488      * that sprintf writes goes off the end of the field. It will
489      * write into the next field, which we will promptly overwrite
490      * anyway. (So make sure to write the fields in ascending order.)
491      */
492     len = sprintf(h.file_time.ptr, "%llu", cast(long)om.file_time);
493     assert(len <= 12);
494     memset(h.file_time.ptr + len, ' ', 12 - len);
495     if (om.user_id > 999999) // yes, it happens
496         om.user_id = 0; // don't really know what to do here
497     len = sprintf(h.user_id.ptr, "%u", om.user_id);
498     assert(len <= 6);
499     memset(h.user_id.ptr + len, ' ', 6 - len);
500     if (om.group_id > 999999) // yes, it happens
501         om.group_id = 0; // don't really know what to do here
502     len = sprintf(h.group_id.ptr, "%u", om.group_id);
503     assert(len <= 6);
504     memset(h.group_id.ptr + len, ' ', 6 - len);
505     len = sprintf(h.file_mode.ptr, "%o", om.file_mode);
506     assert(len <= 8);
507     memset(h.file_mode.ptr + len, ' ', 8 - len);
508     int filesize = om.length;
509     filesize = (filesize + 7) & ~7;
510     len = sprintf(h.file_size.ptr, "%lu", slen + nzeros + filesize);
511     assert(len <= 10);
512     memset(h.file_size.ptr + len, ' ', 10 - len);
513     h.trailer[0] = '`';
514     h.trailer[1] = '\n';
515 }