xref: /PHP-5.3/ext/sqlite/libsqlite/src/os.c (revision 61c9b225)
1 /*
2 ** 2001 September 16
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This file contains code that is specific to particular operating
14 ** systems.  The purpose of this file is to provide a uniform abstraction
15 ** on which the rest of SQLite can operate.
16 */
17 #include "os.h"          /* Must be first to enable large file support */
18 #include "sqliteInt.h"
19 
20 #if OS_UNIX
21 # include <time.h>
22 # include <errno.h>
23 # include <unistd.h>
24 # ifndef O_LARGEFILE
25 #  define O_LARGEFILE 0
26 # endif
27 # ifdef SQLITE_DISABLE_LFS
28 #  undef O_LARGEFILE
29 #  define O_LARGEFILE 0
30 # endif
31 # ifndef O_NOFOLLOW
32 #  define O_NOFOLLOW 0
33 # endif
34 # ifndef O_BINARY
35 #  define O_BINARY 0
36 # endif
37 #endif
38 
39 
40 #if OS_WIN
41 # include <winbase.h>
42 #endif
43 
44 #if OS_MAC
45 # include <extras.h>
46 # include <path2fss.h>
47 # include <TextUtils.h>
48 # include <FinderRegistry.h>
49 # include <Folders.h>
50 # include <Timer.h>
51 # include <OSUtils.h>
52 #endif
53 
54 /*
55 ** The DJGPP compiler environment looks mostly like Unix, but it
56 ** lacks the fcntl() system call.  So redefine fcntl() to be something
57 ** that always succeeds.  This means that locking does not occur under
58 ** DJGPP.  But its DOS - what did you expect?
59 */
60 #ifdef __DJGPP__
61 # define fcntl(A,B,C) 0
62 #endif
63 
64 /*
65 ** Macros used to determine whether or not to use threads.  The
66 ** SQLITE_UNIX_THREADS macro is defined if we are synchronizing for
67 ** Posix threads and SQLITE_W32_THREADS is defined if we are
68 ** synchronizing using Win32 threads.
69 */
70 #if OS_UNIX && defined(THREADSAFE) && THREADSAFE
71 # include <pthread.h>
72 # define SQLITE_UNIX_THREADS 1
73 #endif
74 #if OS_WIN && defined(THREADSAFE) && THREADSAFE
75 # define SQLITE_W32_THREADS 1
76 #endif
77 #if OS_MAC && defined(THREADSAFE) && THREADSAFE
78 # include <Multiprocessing.h>
79 # define SQLITE_MACOS_MULTITASKING 1
80 #endif
81 
82 /*
83 ** Macros for performance tracing.  Normally turned off
84 */
85 #if 0
86 static int last_page = 0;
87 __inline__ unsigned long long int hwtime(void){
88   unsigned long long int x;
89   __asm__("rdtsc\n\t"
90           "mov %%edx, %%ecx\n\t"
91           :"=A" (x));
92   return x;
93 }
94 static unsigned long long int g_start;
95 static unsigned int elapse;
96 #define TIMER_START       g_start=hwtime()
97 #define TIMER_END         elapse=hwtime()-g_start
98 #define SEEK(X)           last_page=(X)
99 #define TRACE1(X)         fprintf(stderr,X)
100 #define TRACE2(X,Y)       fprintf(stderr,X,Y)
101 #define TRACE3(X,Y,Z)     fprintf(stderr,X,Y,Z)
102 #define TRACE4(X,Y,Z,A)   fprintf(stderr,X,Y,Z,A)
103 #define TRACE5(X,Y,Z,A,B) fprintf(stderr,X,Y,Z,A,B)
104 #else
105 #define TIMER_START
106 #define TIMER_END
107 #define SEEK(X)
108 #define TRACE1(X)
109 #define TRACE2(X,Y)
110 #define TRACE3(X,Y,Z)
111 #define TRACE4(X,Y,Z,A)
112 #define TRACE5(X,Y,Z,A,B)
113 #endif
114 
115 
116 #if OS_UNIX
117 /*
118 ** Here is the dirt on POSIX advisory locks:  ANSI STD 1003.1 (1996)
119 ** section 6.5.2.2 lines 483 through 490 specify that when a process
120 ** sets or clears a lock, that operation overrides any prior locks set
121 ** by the same process.  It does not explicitly say so, but this implies
122 ** that it overrides locks set by the same process using a different
123 ** file descriptor.  Consider this test case:
124 **
125 **       int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
126 **       int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
127 **
128 ** Suppose ./file1 and ./file2 are really the same file (because
129 ** one is a hard or symbolic link to the other) then if you set
130 ** an exclusive lock on fd1, then try to get an exclusive lock
131 ** on fd2, it works.  I would have expected the second lock to
132 ** fail since there was already a lock on the file due to fd1.
133 ** But not so.  Since both locks came from the same process, the
134 ** second overrides the first, even though they were on different
135 ** file descriptors opened on different file names.
136 **
137 ** Bummer.  If you ask me, this is broken.  Badly broken.  It means
138 ** that we cannot use POSIX locks to synchronize file access among
139 ** competing threads of the same process.  POSIX locks will work fine
140 ** to synchronize access for threads in separate processes, but not
141 ** threads within the same process.
142 **
143 ** To work around the problem, SQLite has to manage file locks internally
144 ** on its own.  Whenever a new database is opened, we have to find the
145 ** specific inode of the database file (the inode is determined by the
146 ** st_dev and st_ino fields of the stat structure that fstat() fills in)
147 ** and check for locks already existing on that inode.  When locks are
148 ** created or removed, we have to look at our own internal record of the
149 ** locks to see if another thread has previously set a lock on that same
150 ** inode.
151 **
152 ** The OsFile structure for POSIX is no longer just an integer file
153 ** descriptor.  It is now a structure that holds the integer file
154 ** descriptor and a pointer to a structure that describes the internal
155 ** locks on the corresponding inode.  There is one locking structure
156 ** per inode, so if the same inode is opened twice, both OsFile structures
157 ** point to the same locking structure.  The locking structure keeps
158 ** a reference count (so we will know when to delete it) and a "cnt"
159 ** field that tells us its internal lock status.  cnt==0 means the
160 ** file is unlocked.  cnt==-1 means the file has an exclusive lock.
161 ** cnt>0 means there are cnt shared locks on the file.
162 **
163 ** Any attempt to lock or unlock a file first checks the locking
164 ** structure.  The fcntl() system call is only invoked to set a
165 ** POSIX lock if the internal lock structure transitions between
166 ** a locked and an unlocked state.
167 **
168 ** 2004-Jan-11:
169 ** More recent discoveries about POSIX advisory locks.  (The more
170 ** I discover, the more I realize the a POSIX advisory locks are
171 ** an abomination.)
172 **
173 ** If you close a file descriptor that points to a file that has locks,
174 ** all locks on that file that are owned by the current process are
175 ** released.  To work around this problem, each OsFile structure contains
176 ** a pointer to an openCnt structure.  There is one openCnt structure
177 ** per open inode, which means that multiple OsFiles can point to a single
178 ** openCnt.  When an attempt is made to close an OsFile, if there are
179 ** other OsFiles open on the same inode that are holding locks, the call
180 ** to close() the file descriptor is deferred until all of the locks clear.
181 ** The openCnt structure keeps a list of file descriptors that need to
182 ** be closed and that list is walked (and cleared) when the last lock
183 ** clears.
184 **
185 ** First, under Linux threads, because each thread has a separate
186 ** process ID, lock operations in one thread do not override locks
187 ** to the same file in other threads.  Linux threads behave like
188 ** separate processes in this respect.  But, if you close a file
189 ** descriptor in linux threads, all locks are cleared, even locks
190 ** on other threads and even though the other threads have different
191 ** process IDs.  Linux threads is inconsistent in this respect.
192 ** (I'm beginning to think that linux threads is an abomination too.)
193 ** The consequence of this all is that the hash table for the lockInfo
194 ** structure has to include the process id as part of its key because
195 ** locks in different threads are treated as distinct.  But the
196 ** openCnt structure should not include the process id in its
197 ** key because close() clears lock on all threads, not just the current
198 ** thread.  Were it not for this goofiness in linux threads, we could
199 ** combine the lockInfo and openCnt structures into a single structure.
200 */
201 
202 /*
203 ** An instance of the following structure serves as the key used
204 ** to locate a particular lockInfo structure given its inode.  Note
205 ** that we have to include the process ID as part of the key.  On some
206 ** threading implementations (ex: linux), each thread has a separate
207 ** process ID.
208 */
209 struct lockKey {
210   dev_t dev;   /* Device number */
211   ino_t ino;   /* Inode number */
212   pid_t pid;   /* Process ID */
213 };
214 
215 /*
216 ** An instance of the following structure is allocated for each open
217 ** inode on each thread with a different process ID.  (Threads have
218 ** different process IDs on linux, but not on most other unixes.)
219 **
220 ** A single inode can have multiple file descriptors, so each OsFile
221 ** structure contains a pointer to an instance of this object and this
222 ** object keeps a count of the number of OsFiles pointing to it.
223 */
224 struct lockInfo {
225   struct lockKey key;  /* The lookup key */
226   int cnt;             /* 0: unlocked.  -1: write lock.  1...: read lock. */
227   int nRef;            /* Number of pointers to this structure */
228 };
229 
230 /*
231 ** An instance of the following structure serves as the key used
232 ** to locate a particular openCnt structure given its inode.  This
233 ** is the same as the lockKey except that the process ID is omitted.
234 */
235 struct openKey {
236   dev_t dev;   /* Device number */
237   ino_t ino;   /* Inode number */
238 };
239 
240 /*
241 ** An instance of the following structure is allocated for each open
242 ** inode.  This structure keeps track of the number of locks on that
243 ** inode.  If a close is attempted against an inode that is holding
244 ** locks, the close is deferred until all locks clear by adding the
245 ** file descriptor to be closed to the pending list.
246 */
247 struct openCnt {
248   struct openKey key;   /* The lookup key */
249   int nRef;             /* Number of pointers to this structure */
250   int nLock;            /* Number of outstanding locks */
251   int nPending;         /* Number of pending close() operations */
252   int *aPending;        /* Malloced space holding fd's awaiting a close() */
253 };
254 
255 /*
256 ** These hash table maps inodes and process IDs into lockInfo and openCnt
257 ** structures.  Access to these hash tables must be protected by a mutex.
258 */
259 static Hash lockHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
260 static Hash openHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
261 
262 /*
263 ** Release a lockInfo structure previously allocated by findLockInfo().
264 */
releaseLockInfo(struct lockInfo * pLock)265 static void releaseLockInfo(struct lockInfo *pLock){
266   pLock->nRef--;
267   if( pLock->nRef==0 ){
268     sqliteHashInsert(&lockHash, &pLock->key, sizeof(pLock->key), 0);
269     sqliteFree(pLock);
270   }
271 }
272 
273 /*
274 ** Release a openCnt structure previously allocated by findLockInfo().
275 */
releaseOpenCnt(struct openCnt * pOpen)276 static void releaseOpenCnt(struct openCnt *pOpen){
277   pOpen->nRef--;
278   if( pOpen->nRef==0 ){
279     sqliteHashInsert(&openHash, &pOpen->key, sizeof(pOpen->key), 0);
280     sqliteFree(pOpen->aPending);
281     sqliteFree(pOpen);
282   }
283 }
284 
285 /*
286 ** Given a file descriptor, locate lockInfo and openCnt structures that
287 ** describes that file descriptor.  Create a new ones if necessary.  The
288 ** return values might be unset if an error occurs.
289 **
290 ** Return the number of errors.
291 */
findLockInfo(int fd,struct lockInfo ** ppLock,struct openCnt ** ppOpen)292 int findLockInfo(
293   int fd,                      /* The file descriptor used in the key */
294   struct lockInfo **ppLock,    /* Return the lockInfo structure here */
295   struct openCnt **ppOpen   /* Return the openCnt structure here */
296 ){
297   int rc;
298   struct lockKey key1;
299   struct openKey key2;
300   struct stat statbuf;
301   struct lockInfo *pLock;
302   struct openCnt *pOpen;
303   rc = fstat(fd, &statbuf);
304   if( rc!=0 ) return 1;
305   memset(&key1, 0, sizeof(key1));
306   key1.dev = statbuf.st_dev;
307   key1.ino = statbuf.st_ino;
308   key1.pid = getpid();
309   memset(&key2, 0, sizeof(key2));
310   key2.dev = statbuf.st_dev;
311   key2.ino = statbuf.st_ino;
312   pLock = (struct lockInfo*)sqliteHashFind(&lockHash, &key1, sizeof(key1));
313   if( pLock==0 ){
314     struct lockInfo *pOld;
315     pLock = sqliteMallocRaw( sizeof(*pLock) );
316     if( pLock==0 ) return 1;
317     pLock->key = key1;
318     pLock->nRef = 1;
319     pLock->cnt = 0;
320     pOld = sqliteHashInsert(&lockHash, &pLock->key, sizeof(key1), pLock);
321     if( pOld!=0 ){
322       assert( pOld==pLock );
323       sqliteFree(pLock);
324       return 1;
325     }
326   }else{
327     pLock->nRef++;
328   }
329   *ppLock = pLock;
330   pOpen = (struct openCnt*)sqliteHashFind(&openHash, &key2, sizeof(key2));
331   if( pOpen==0 ){
332     struct openCnt *pOld;
333     pOpen = sqliteMallocRaw( sizeof(*pOpen) );
334     if( pOpen==0 ){
335       releaseLockInfo(pLock);
336       return 1;
337     }
338     pOpen->key = key2;
339     pOpen->nRef = 1;
340     pOpen->nLock = 0;
341     pOpen->nPending = 0;
342     pOpen->aPending = 0;
343     pOld = sqliteHashInsert(&openHash, &pOpen->key, sizeof(key2), pOpen);
344     if( pOld!=0 ){
345       assert( pOld==pOpen );
346       sqliteFree(pOpen);
347       releaseLockInfo(pLock);
348       return 1;
349     }
350   }else{
351     pOpen->nRef++;
352   }
353   *ppOpen = pOpen;
354   return 0;
355 }
356 
357 #endif  /** POSIX advisory lock work-around **/
358 
359 /*
360 ** If we compile with the SQLITE_TEST macro set, then the following block
361 ** of code will give us the ability to simulate a disk I/O error.  This
362 ** is used for testing the I/O recovery logic.
363 */
364 #ifdef SQLITE_TEST
365 int sqlite_io_error_pending = 0;
366 #define SimulateIOError(A)  \
367    if( sqlite_io_error_pending ) \
368      if( sqlite_io_error_pending-- == 1 ){ local_ioerr(); return A; }
local_ioerr()369 static void local_ioerr(){
370   sqlite_io_error_pending = 0;  /* Really just a place to set a breakpoint */
371 }
372 #else
373 #define SimulateIOError(A)
374 #endif
375 
376 /*
377 ** When testing, keep a count of the number of open files.
378 */
379 #ifdef SQLITE_TEST
380 int sqlite_open_file_count = 0;
381 #define OpenCounter(X)  sqlite_open_file_count+=(X)
382 #else
383 #define OpenCounter(X)
384 #endif
385 
386 
387 /*
388 ** Delete the named file
389 */
sqliteOsDelete(const char * zFilename)390 int sqliteOsDelete(const char *zFilename){
391 #if OS_UNIX
392   unlink(zFilename);
393 #endif
394 #if OS_WIN
395   DeleteFile(zFilename);
396 #endif
397 #if OS_MAC
398   unlink(zFilename);
399 #endif
400   return SQLITE_OK;
401 }
402 
403 /*
404 ** Return TRUE if the named file exists.
405 */
sqliteOsFileExists(const char * zFilename)406 int sqliteOsFileExists(const char *zFilename){
407 #if OS_UNIX
408   return access(zFilename, 0)==0;
409 #endif
410 #if OS_WIN
411   return GetFileAttributes(zFilename) != 0xffffffff;
412 #endif
413 #if OS_MAC
414   return access(zFilename, 0)==0;
415 #endif
416 }
417 
418 
419 #if 0 /* NOT USED */
420 /*
421 ** Change the name of an existing file.
422 */
423 int sqliteOsFileRename(const char *zOldName, const char *zNewName){
424 #if OS_UNIX
425   if( link(zOldName, zNewName) ){
426     return SQLITE_ERROR;
427   }
428   unlink(zOldName);
429   return SQLITE_OK;
430 #endif
431 #if OS_WIN
432   if( !MoveFile(zOldName, zNewName) ){
433     return SQLITE_ERROR;
434   }
435   return SQLITE_OK;
436 #endif
437 #if OS_MAC
438   /**** FIX ME ***/
439   return SQLITE_ERROR;
440 #endif
441 }
442 #endif /* NOT USED */
443 
444 /*
445 ** Attempt to open a file for both reading and writing.  If that
446 ** fails, try opening it read-only.  If the file does not exist,
447 ** try to create it.
448 **
449 ** On success, a handle for the open file is written to *id
450 ** and *pReadonly is set to 0 if the file was opened for reading and
451 ** writing or 1 if the file was opened read-only.  The function returns
452 ** SQLITE_OK.
453 **
454 ** On failure, the function returns SQLITE_CANTOPEN and leaves
455 ** *id and *pReadonly unchanged.
456 */
sqliteOsOpenReadWrite(const char * zFilename,OsFile * id,int * pReadonly)457 int sqliteOsOpenReadWrite(
458   const char *zFilename,
459   OsFile *id,
460   int *pReadonly
461 ){
462 #if OS_UNIX
463   int rc;
464   id->dirfd = -1;
465   id->fd = open(zFilename, O_RDWR|O_CREAT|O_LARGEFILE|O_BINARY, 0644);
466   if( id->fd<0 ){
467 #ifdef EISDIR
468     if( errno==EISDIR ){
469       return SQLITE_CANTOPEN;
470     }
471 #endif
472     id->fd = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
473     if( id->fd<0 ){
474       return SQLITE_CANTOPEN;
475     }
476     *pReadonly = 1;
477   }else{
478     *pReadonly = 0;
479   }
480   sqliteOsEnterMutex();
481   rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
482   sqliteOsLeaveMutex();
483   if( rc ){
484     close(id->fd);
485     return SQLITE_NOMEM;
486   }
487   id->locked = 0;
488   TRACE3("OPEN    %-3d %s\n", id->fd, zFilename);
489   OpenCounter(+1);
490   return SQLITE_OK;
491 #endif
492 #if OS_WIN
493   HANDLE h = CreateFile(zFilename,
494      GENERIC_READ | GENERIC_WRITE,
495      FILE_SHARE_READ | FILE_SHARE_WRITE,
496      NULL,
497      OPEN_ALWAYS,
498      FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS,
499      NULL
500   );
501   if( h==INVALID_HANDLE_VALUE ){
502     h = CreateFile(zFilename,
503        GENERIC_READ,
504        FILE_SHARE_READ,
505        NULL,
506        OPEN_ALWAYS,
507        FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS,
508        NULL
509     );
510     if( h==INVALID_HANDLE_VALUE ){
511       return SQLITE_CANTOPEN;
512     }
513     *pReadonly = 1;
514   }else{
515     *pReadonly = 0;
516   }
517   id->h = h;
518   id->locked = 0;
519   OpenCounter(+1);
520   return SQLITE_OK;
521 #endif
522 #if OS_MAC
523   FSSpec fsSpec;
524 # ifdef _LARGE_FILE
525   HFSUniStr255 dfName;
526   FSRef fsRef;
527   if( __path2fss(zFilename, &fsSpec) != noErr ){
528     if( HCreate(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, 'SQLI', cDocumentFile) != noErr )
529       return SQLITE_CANTOPEN;
530   }
531   if( FSpMakeFSRef(&fsSpec, &fsRef) != noErr )
532     return SQLITE_CANTOPEN;
533   FSGetDataForkName(&dfName);
534   if( FSOpenFork(&fsRef, dfName.length, dfName.unicode,
535                  fsRdWrShPerm, &(id->refNum)) != noErr ){
536     if( FSOpenFork(&fsRef, dfName.length, dfName.unicode,
537                    fsRdWrPerm, &(id->refNum)) != noErr ){
538       if (FSOpenFork(&fsRef, dfName.length, dfName.unicode,
539                    fsRdPerm, &(id->refNum)) != noErr )
540         return SQLITE_CANTOPEN;
541       else
542         *pReadonly = 1;
543     } else
544       *pReadonly = 0;
545   } else
546     *pReadonly = 0;
547 # else
548   __path2fss(zFilename, &fsSpec);
549   if( !sqliteOsFileExists(zFilename) ){
550     if( HCreate(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, 'SQLI', cDocumentFile) != noErr )
551       return SQLITE_CANTOPEN;
552   }
553   if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrShPerm, &(id->refNum)) != noErr ){
554     if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrPerm, &(id->refNum)) != noErr ){
555       if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdPerm, &(id->refNum)) != noErr )
556         return SQLITE_CANTOPEN;
557       else
558         *pReadonly = 1;
559     } else
560       *pReadonly = 0;
561   } else
562     *pReadonly = 0;
563 # endif
564   if( HOpenRF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrShPerm, &(id->refNumRF)) != noErr){
565     id->refNumRF = -1;
566   }
567   id->locked = 0;
568   id->delOnClose = 0;
569   OpenCounter(+1);
570   return SQLITE_OK;
571 #endif
572 }
573 
574 
575 /*
576 ** Attempt to open a new file for exclusive access by this process.
577 ** The file will be opened for both reading and writing.  To avoid
578 ** a potential security problem, we do not allow the file to have
579 ** previously existed.  Nor do we allow the file to be a symbolic
580 ** link.
581 **
582 ** If delFlag is true, then make arrangements to automatically delete
583 ** the file when it is closed.
584 **
585 ** On success, write the file handle into *id and return SQLITE_OK.
586 **
587 ** On failure, return SQLITE_CANTOPEN.
588 */
sqliteOsOpenExclusive(const char * zFilename,OsFile * id,int delFlag)589 int sqliteOsOpenExclusive(const char *zFilename, OsFile *id, int delFlag){
590 #if OS_UNIX
591   int rc;
592   if( access(zFilename, 0)==0 ){
593     return SQLITE_CANTOPEN;
594   }
595   id->dirfd = -1;
596   id->fd = open(zFilename,
597                 O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW|O_LARGEFILE|O_BINARY, 0600);
598   if( id->fd<0 ){
599     return SQLITE_CANTOPEN;
600   }
601   sqliteOsEnterMutex();
602   rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
603   sqliteOsLeaveMutex();
604   if( rc ){
605     close(id->fd);
606     unlink(zFilename);
607     return SQLITE_NOMEM;
608   }
609   id->locked = 0;
610   if( delFlag ){
611     unlink(zFilename);
612   }
613   TRACE3("OPEN-EX %-3d %s\n", id->fd, zFilename);
614   OpenCounter(+1);
615   return SQLITE_OK;
616 #endif
617 #if OS_WIN
618   HANDLE h;
619   int fileflags;
620   if( delFlag ){
621     fileflags = FILE_ATTRIBUTE_TEMPORARY | FILE_FLAG_RANDOM_ACCESS
622                      | FILE_FLAG_DELETE_ON_CLOSE;
623   }else{
624     fileflags = FILE_FLAG_RANDOM_ACCESS;
625   }
626   h = CreateFile(zFilename,
627      GENERIC_READ | GENERIC_WRITE,
628      0,
629      NULL,
630      CREATE_ALWAYS,
631      fileflags,
632      NULL
633   );
634   if( h==INVALID_HANDLE_VALUE ){
635     return SQLITE_CANTOPEN;
636   }
637   id->h = h;
638   id->locked = 0;
639   OpenCounter(+1);
640   return SQLITE_OK;
641 #endif
642 #if OS_MAC
643   FSSpec fsSpec;
644 # ifdef _LARGE_FILE
645   HFSUniStr255 dfName;
646   FSRef fsRef;
647   __path2fss(zFilename, &fsSpec);
648   if( HCreate(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, 'SQLI', cDocumentFile) != noErr )
649     return SQLITE_CANTOPEN;
650   if( FSpMakeFSRef(&fsSpec, &fsRef) != noErr )
651     return SQLITE_CANTOPEN;
652   FSGetDataForkName(&dfName);
653   if( FSOpenFork(&fsRef, dfName.length, dfName.unicode,
654                  fsRdWrPerm, &(id->refNum)) != noErr )
655     return SQLITE_CANTOPEN;
656 # else
657   __path2fss(zFilename, &fsSpec);
658   if( HCreate(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, 'SQLI', cDocumentFile) != noErr )
659     return SQLITE_CANTOPEN;
660   if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrPerm, &(id->refNum)) != noErr )
661     return SQLITE_CANTOPEN;
662 # endif
663   id->refNumRF = -1;
664   id->locked = 0;
665   id->delOnClose = delFlag;
666   if (delFlag)
667     id->pathToDel = sqliteOsFullPathname(zFilename);
668   OpenCounter(+1);
669   return SQLITE_OK;
670 #endif
671 }
672 
673 /*
674 ** Attempt to open a new file for read-only access.
675 **
676 ** On success, write the file handle into *id and return SQLITE_OK.
677 **
678 ** On failure, return SQLITE_CANTOPEN.
679 */
sqliteOsOpenReadOnly(const char * zFilename,OsFile * id)680 int sqliteOsOpenReadOnly(const char *zFilename, OsFile *id){
681 #if OS_UNIX
682   int rc;
683   id->dirfd = -1;
684   id->fd = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
685   if( id->fd<0 ){
686     return SQLITE_CANTOPEN;
687   }
688   sqliteOsEnterMutex();
689   rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
690   sqliteOsLeaveMutex();
691   if( rc ){
692     close(id->fd);
693     return SQLITE_NOMEM;
694   }
695   id->locked = 0;
696   TRACE3("OPEN-RO %-3d %s\n", id->fd, zFilename);
697   OpenCounter(+1);
698   return SQLITE_OK;
699 #endif
700 #if OS_WIN
701   HANDLE h = CreateFile(zFilename,
702      GENERIC_READ,
703      0,
704      NULL,
705      OPEN_EXISTING,
706      FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS,
707      NULL
708   );
709   if( h==INVALID_HANDLE_VALUE ){
710     return SQLITE_CANTOPEN;
711   }
712   id->h = h;
713   id->locked = 0;
714   OpenCounter(+1);
715   return SQLITE_OK;
716 #endif
717 #if OS_MAC
718   FSSpec fsSpec;
719 # ifdef _LARGE_FILE
720   HFSUniStr255 dfName;
721   FSRef fsRef;
722   if( __path2fss(zFilename, &fsSpec) != noErr )
723     return SQLITE_CANTOPEN;
724   if( FSpMakeFSRef(&fsSpec, &fsRef) != noErr )
725     return SQLITE_CANTOPEN;
726   FSGetDataForkName(&dfName);
727   if( FSOpenFork(&fsRef, dfName.length, dfName.unicode,
728                  fsRdPerm, &(id->refNum)) != noErr )
729     return SQLITE_CANTOPEN;
730 # else
731   __path2fss(zFilename, &fsSpec);
732   if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdPerm, &(id->refNum)) != noErr )
733     return SQLITE_CANTOPEN;
734 # endif
735   if( HOpenRF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrShPerm, &(id->refNumRF)) != noErr){
736     id->refNumRF = -1;
737   }
738   id->locked = 0;
739   id->delOnClose = 0;
740   OpenCounter(+1);
741   return SQLITE_OK;
742 #endif
743 }
744 
745 /*
746 ** Attempt to open a file descriptor for the directory that contains a
747 ** file.  This file descriptor can be used to fsync() the directory
748 ** in order to make sure the creation of a new file is actually written
749 ** to disk.
750 **
751 ** This routine is only meaningful for Unix.  It is a no-op under
752 ** windows since windows does not support hard links.
753 **
754 ** On success, a handle for a previously open file is at *id is
755 ** updated with the new directory file descriptor and SQLITE_OK is
756 ** returned.
757 **
758 ** On failure, the function returns SQLITE_CANTOPEN and leaves
759 ** *id unchanged.
760 */
sqliteOsOpenDirectory(const char * zDirname,OsFile * id)761 int sqliteOsOpenDirectory(
762   const char *zDirname,
763   OsFile *id
764 ){
765 #if OS_UNIX
766   if( id->fd<0 ){
767     /* Do not open the directory if the corresponding file is not already
768     ** open. */
769     return SQLITE_CANTOPEN;
770   }
771   assert( id->dirfd<0 );
772   id->dirfd = open(zDirname, O_RDONLY|O_BINARY, 0644);
773   if( id->dirfd<0 ){
774     return SQLITE_CANTOPEN;
775   }
776   TRACE3("OPENDIR %-3d %s\n", id->dirfd, zDirname);
777 #endif
778   return SQLITE_OK;
779 }
780 
781 /*
782 ** If the following global variable points to a string which is the
783 ** name of a directory, then that directory will be used to store
784 ** temporary files.
785 */
786 const char *sqlite_temp_directory = 0;
787 
788 /*
789 ** Create a temporary file name in zBuf.  zBuf must be big enough to
790 ** hold at least SQLITE_TEMPNAME_SIZE characters.
791 */
sqliteOsTempFileName(char * zBuf)792 int sqliteOsTempFileName(char *zBuf){
793 #if OS_UNIX
794   static const char *azDirs[] = {
795      0,
796      "/var/tmp",
797      "/usr/tmp",
798      "/tmp",
799      ".",
800   };
801   static unsigned char zChars[] =
802     "abcdefghijklmnopqrstuvwxyz"
803     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
804     "0123456789";
805   int i, j;
806   struct stat buf;
807   const char *zDir = ".";
808   azDirs[0] = sqlite_temp_directory;
809   for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
810     if( azDirs[i]==0 ) continue;
811     if( stat(azDirs[i], &buf) ) continue;
812     if( !S_ISDIR(buf.st_mode) ) continue;
813     if( access(azDirs[i], 07) ) continue;
814     zDir = azDirs[i];
815     break;
816   }
817   do{
818     sprintf(zBuf, "%s/"TEMP_FILE_PREFIX, zDir);
819     j = strlen(zBuf);
820     sqliteRandomness(15, &zBuf[j]);
821     for(i=0; i<15; i++, j++){
822       zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
823     }
824     zBuf[j] = 0;
825   }while( access(zBuf,0)==0 );
826 #endif
827 #if OS_WIN
828   static char zChars[] =
829     "abcdefghijklmnopqrstuvwxyz"
830     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
831     "0123456789";
832   int i, j;
833   const char *zDir;
834   char zTempPath[SQLITE_TEMPNAME_SIZE];
835   if( sqlite_temp_directory==0 ){
836     GetTempPath(SQLITE_TEMPNAME_SIZE-30, zTempPath);
837     for(i=strlen(zTempPath); i>0 && zTempPath[i-1]=='\\'; i--){}
838     zTempPath[i] = 0;
839     zDir = zTempPath;
840   }else{
841     zDir = sqlite_temp_directory;
842   }
843   for(;;){
844     sprintf(zBuf, "%s\\"TEMP_FILE_PREFIX, zDir);
845     j = strlen(zBuf);
846     sqliteRandomness(15, &zBuf[j]);
847     for(i=0; i<15; i++, j++){
848       zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
849     }
850     zBuf[j] = 0;
851     if( !sqliteOsFileExists(zBuf) ) break;
852   }
853 #endif
854 #if OS_MAC
855   static char zChars[] =
856     "abcdefghijklmnopqrstuvwxyz"
857     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
858     "0123456789";
859   int i, j;
860   char *zDir;
861   char zTempPath[SQLITE_TEMPNAME_SIZE];
862   char zdirName[32];
863   CInfoPBRec infoRec;
864   Str31 dirName;
865   memset(&infoRec, 0, sizeof(infoRec));
866   memset(zTempPath, 0, SQLITE_TEMPNAME_SIZE);
867   if( sqlite_temp_directory!=0 ){
868     zDir = sqlite_temp_directory;
869   }else if( FindFolder(kOnSystemDisk, kTemporaryFolderType,  kCreateFolder,
870        &(infoRec.dirInfo.ioVRefNum), &(infoRec.dirInfo.ioDrParID)) == noErr ){
871     infoRec.dirInfo.ioNamePtr = dirName;
872     do{
873       infoRec.dirInfo.ioFDirIndex = -1;
874       infoRec.dirInfo.ioDrDirID = infoRec.dirInfo.ioDrParID;
875       if( PBGetCatInfoSync(&infoRec) == noErr ){
876         CopyPascalStringToC(dirName, zdirName);
877         i = strlen(zdirName);
878         memmove(&(zTempPath[i+1]), zTempPath, strlen(zTempPath));
879         strcpy(zTempPath, zdirName);
880         zTempPath[i] = ':';
881       }else{
882         *zTempPath = 0;
883         break;
884       }
885     } while( infoRec.dirInfo.ioDrDirID != fsRtDirID );
886     zDir = zTempPath;
887   }
888   if( zDir[0]==0 ){
889     getcwd(zTempPath, SQLITE_TEMPNAME_SIZE-24);
890     zDir = zTempPath;
891   }
892   for(;;){
893     sprintf(zBuf, "%s"TEMP_FILE_PREFIX, zDir);
894     j = strlen(zBuf);
895     sqliteRandomness(15, &zBuf[j]);
896     for(i=0; i<15; i++, j++){
897       zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
898     }
899     zBuf[j] = 0;
900     if( !sqliteOsFileExists(zBuf) ) break;
901   }
902 #endif
903   return SQLITE_OK;
904 }
905 
906 /*
907 ** Close a file.
908 */
sqliteOsClose(OsFile * id)909 int sqliteOsClose(OsFile *id){
910 #if OS_UNIX
911   sqliteOsUnlock(id);
912   if( id->dirfd>=0 ) close(id->dirfd);
913   id->dirfd = -1;
914   sqliteOsEnterMutex();
915   if( id->pOpen->nLock ){
916     /* If there are outstanding locks, do not actually close the file just
917     ** yet because that would clear those locks.  Instead, add the file
918     ** descriptor to pOpen->aPending.  It will be automatically closed when
919     ** the last lock is cleared.
920     */
921     int *aNew;
922     struct openCnt *pOpen = id->pOpen;
923     pOpen->nPending++;
924     aNew = sqliteRealloc( pOpen->aPending, pOpen->nPending*sizeof(int) );
925     if( aNew==0 ){
926       /* If a malloc fails, just leak the file descriptor */
927     }else{
928       pOpen->aPending = aNew;
929       pOpen->aPending[pOpen->nPending-1] = id->fd;
930     }
931   }else{
932     /* There are no outstanding locks so we can close the file immediately */
933     close(id->fd);
934   }
935   releaseLockInfo(id->pLock);
936   releaseOpenCnt(id->pOpen);
937   sqliteOsLeaveMutex();
938   TRACE2("CLOSE   %-3d\n", id->fd);
939   OpenCounter(-1);
940   return SQLITE_OK;
941 #endif
942 #if OS_WIN
943   CloseHandle(id->h);
944   OpenCounter(-1);
945   return SQLITE_OK;
946 #endif
947 #if OS_MAC
948   if( id->refNumRF!=-1 )
949     FSClose(id->refNumRF);
950 # ifdef _LARGE_FILE
951   FSCloseFork(id->refNum);
952 # else
953   FSClose(id->refNum);
954 # endif
955   if( id->delOnClose ){
956     unlink(id->pathToDel);
957     sqliteFree(id->pathToDel);
958   }
959   OpenCounter(-1);
960   return SQLITE_OK;
961 #endif
962 }
963 
964 /*
965 ** Read data from a file into a buffer.  Return SQLITE_OK if all
966 ** bytes were read successfully and SQLITE_IOERR if anything goes
967 ** wrong.
968 */
sqliteOsRead(OsFile * id,void * pBuf,int amt)969 int sqliteOsRead(OsFile *id, void *pBuf, int amt){
970 #if OS_UNIX
971   int got;
972   SimulateIOError(SQLITE_IOERR);
973   TIMER_START;
974   got = read(id->fd, pBuf, amt);
975   TIMER_END;
976   TRACE4("READ    %-3d %7d %d\n", id->fd, last_page, elapse);
977   SEEK(0);
978   /* if( got<0 ) got = 0; */
979   if( got==amt ){
980     return SQLITE_OK;
981   }else{
982     return SQLITE_IOERR;
983   }
984 #endif
985 #if OS_WIN
986   DWORD got;
987   SimulateIOError(SQLITE_IOERR);
988   TRACE2("READ %d\n", last_page);
989   if( !ReadFile(id->h, pBuf, amt, &got, 0) ){
990     got = 0;
991   }
992   if( got==(DWORD)amt ){
993     return SQLITE_OK;
994   }else{
995     return SQLITE_IOERR;
996   }
997 #endif
998 #if OS_MAC
999   int got;
1000   SimulateIOError(SQLITE_IOERR);
1001   TRACE2("READ %d\n", last_page);
1002 # ifdef _LARGE_FILE
1003   FSReadFork(id->refNum, fsAtMark, 0, (ByteCount)amt, pBuf, (ByteCount*)&got);
1004 # else
1005   got = amt;
1006   FSRead(id->refNum, &got, pBuf);
1007 # endif
1008   if( got==amt ){
1009     return SQLITE_OK;
1010   }else{
1011     return SQLITE_IOERR;
1012   }
1013 #endif
1014 }
1015 
1016 /*
1017 ** Write data from a buffer into a file.  Return SQLITE_OK on success
1018 ** or some other error code on failure.
1019 */
sqliteOsWrite(OsFile * id,const void * pBuf,int amt)1020 int sqliteOsWrite(OsFile *id, const void *pBuf, int amt){
1021 #if OS_UNIX
1022   int wrote = 0;
1023   SimulateIOError(SQLITE_IOERR);
1024   TIMER_START;
1025   while( amt>0 && (wrote = write(id->fd, pBuf, amt))>0 ){
1026     amt -= wrote;
1027     pBuf = &((char*)pBuf)[wrote];
1028   }
1029   TIMER_END;
1030   TRACE4("WRITE   %-3d %7d %d\n", id->fd, last_page, elapse);
1031   SEEK(0);
1032   if( amt>0 ){
1033     return SQLITE_FULL;
1034   }
1035   return SQLITE_OK;
1036 #endif
1037 #if OS_WIN
1038   int rc;
1039   DWORD wrote;
1040   SimulateIOError(SQLITE_IOERR);
1041   TRACE2("WRITE %d\n", last_page);
1042   while( amt>0 && (rc = WriteFile(id->h, pBuf, amt, &wrote, 0))!=0 && wrote>0 ){
1043     amt -= wrote;
1044     pBuf = &((char*)pBuf)[wrote];
1045   }
1046   if( !rc || amt>(int)wrote ){
1047     return SQLITE_FULL;
1048   }
1049   return SQLITE_OK;
1050 #endif
1051 #if OS_MAC
1052   OSErr oserr;
1053   int wrote = 0;
1054   SimulateIOError(SQLITE_IOERR);
1055   TRACE2("WRITE %d\n", last_page);
1056   while( amt>0 ){
1057 # ifdef _LARGE_FILE
1058     oserr = FSWriteFork(id->refNum, fsAtMark, 0,
1059                         (ByteCount)amt, pBuf, (ByteCount*)&wrote);
1060 # else
1061     wrote = amt;
1062     oserr = FSWrite(id->refNum, &wrote, pBuf);
1063 # endif
1064     if( wrote == 0 || oserr != noErr)
1065       break;
1066     amt -= wrote;
1067     pBuf = &((char*)pBuf)[wrote];
1068   }
1069   if( oserr != noErr || amt>wrote ){
1070     return SQLITE_FULL;
1071   }
1072   return SQLITE_OK;
1073 #endif
1074 }
1075 
1076 /*
1077 ** Move the read/write pointer in a file.
1078 */
sqliteOsSeek(OsFile * id,off_t offset)1079 int sqliteOsSeek(OsFile *id, off_t offset){
1080   SEEK(offset/1024 + 1);
1081 #if OS_UNIX
1082   lseek(id->fd, offset, SEEK_SET);
1083   return SQLITE_OK;
1084 #endif
1085 #if OS_WIN
1086   {
1087     LONG upperBits = offset>>32;
1088     LONG lowerBits = offset & 0xffffffff;
1089     DWORD rc;
1090     rc = SetFilePointer(id->h, lowerBits, &upperBits, FILE_BEGIN);
1091     /* TRACE3("SEEK rc=0x%x upper=0x%x\n", rc, upperBits); */
1092   }
1093   return SQLITE_OK;
1094 #endif
1095 #if OS_MAC
1096   {
1097     off_t curSize;
1098     if( sqliteOsFileSize(id, &curSize) != SQLITE_OK ){
1099       return SQLITE_IOERR;
1100     }
1101     if( offset >= curSize ){
1102       if( sqliteOsTruncate(id, offset+1) != SQLITE_OK ){
1103         return SQLITE_IOERR;
1104       }
1105     }
1106 # ifdef _LARGE_FILE
1107     if( FSSetForkPosition(id->refNum, fsFromStart, offset) != noErr ){
1108 # else
1109     if( SetFPos(id->refNum, fsFromStart, offset) != noErr ){
1110 # endif
1111       return SQLITE_IOERR;
1112     }else{
1113       return SQLITE_OK;
1114     }
1115   }
1116 #endif
1117 }
1118 
1119 #ifdef SQLITE_NOSYNC
1120 # define fsync(X) 0
1121 #endif
1122 
1123 /*
1124 ** Make sure all writes to a particular file are committed to disk.
1125 **
1126 ** Under Unix, also make sure that the directory entry for the file
1127 ** has been created by fsync-ing the directory that contains the file.
1128 ** If we do not do this and we encounter a power failure, the directory
1129 ** entry for the journal might not exist after we reboot.  The next
1130 ** SQLite to access the file will not know that the journal exists (because
1131 ** the directory entry for the journal was never created) and the transaction
1132 ** will not roll back - possibly leading to database corruption.
1133 */
1134 int sqliteOsSync(OsFile *id){
1135 #if OS_UNIX
1136   SimulateIOError(SQLITE_IOERR);
1137   TRACE2("SYNC    %-3d\n", id->fd);
1138   if( fsync(id->fd) ){
1139     return SQLITE_IOERR;
1140   }else{
1141     if( id->dirfd>=0 ){
1142       TRACE2("DIRSYNC %-3d\n", id->dirfd);
1143       fsync(id->dirfd);
1144       close(id->dirfd);  /* Only need to sync once, so close the directory */
1145       id->dirfd = -1;    /* when we are done. */
1146     }
1147     return SQLITE_OK;
1148   }
1149 #endif
1150 #if OS_WIN
1151   if( FlushFileBuffers(id->h) ){
1152     return SQLITE_OK;
1153   }else{
1154     return SQLITE_IOERR;
1155   }
1156 #endif
1157 #if OS_MAC
1158 # ifdef _LARGE_FILE
1159   if( FSFlushFork(id->refNum) != noErr ){
1160 # else
1161   ParamBlockRec params;
1162   memset(&params, 0, sizeof(ParamBlockRec));
1163   params.ioParam.ioRefNum = id->refNum;
1164   if( PBFlushFileSync(&params) != noErr ){
1165 # endif
1166     return SQLITE_IOERR;
1167   }else{
1168     return SQLITE_OK;
1169   }
1170 #endif
1171 }
1172 
1173 /*
1174 ** Truncate an open file to a specified size
1175 */
1176 int sqliteOsTruncate(OsFile *id, off_t nByte){
1177   SimulateIOError(SQLITE_IOERR);
1178 #if OS_UNIX
1179   return ftruncate(id->fd, nByte)==0 ? SQLITE_OK : SQLITE_IOERR;
1180 #endif
1181 #if OS_WIN
1182   {
1183     LONG upperBits = nByte>>32;
1184     SetFilePointer(id->h, nByte, &upperBits, FILE_BEGIN);
1185     SetEndOfFile(id->h);
1186   }
1187   return SQLITE_OK;
1188 #endif
1189 #if OS_MAC
1190 # ifdef _LARGE_FILE
1191   if( FSSetForkSize(id->refNum, fsFromStart, nByte) != noErr){
1192 # else
1193   if( SetEOF(id->refNum, nByte) != noErr ){
1194 # endif
1195     return SQLITE_IOERR;
1196   }else{
1197     return SQLITE_OK;
1198   }
1199 #endif
1200 }
1201 
1202 /*
1203 ** Determine the current size of a file in bytes
1204 */
1205 int sqliteOsFileSize(OsFile *id, off_t *pSize){
1206 #if OS_UNIX
1207   struct stat buf;
1208   SimulateIOError(SQLITE_IOERR);
1209   if( fstat(id->fd, &buf)!=0 ){
1210     return SQLITE_IOERR;
1211   }
1212   *pSize = buf.st_size;
1213   return SQLITE_OK;
1214 #endif
1215 #if OS_WIN
1216   DWORD upperBits, lowerBits;
1217   SimulateIOError(SQLITE_IOERR);
1218   lowerBits = GetFileSize(id->h, &upperBits);
1219   *pSize = (((off_t)upperBits)<<32) + lowerBits;
1220   return SQLITE_OK;
1221 #endif
1222 #if OS_MAC
1223 # ifdef _LARGE_FILE
1224   if( FSGetForkSize(id->refNum, pSize) != noErr){
1225 # else
1226   if( GetEOF(id->refNum, pSize) != noErr ){
1227 # endif
1228     return SQLITE_IOERR;
1229   }else{
1230     return SQLITE_OK;
1231   }
1232 #endif
1233 }
1234 
1235 #if OS_WIN
1236 /*
1237 ** Return true (non-zero) if we are running under WinNT, Win2K or WinXP.
1238 ** Return false (zero) for Win95, Win98, or WinME.
1239 **
1240 ** Here is an interesting observation:  Win95, Win98, and WinME lack
1241 ** the LockFileEx() API.  But we can still statically link against that
1242 ** API as long as we don't call it win running Win95/98/ME.  A call to
1243 ** this routine is used to determine if the host is Win95/98/ME or
1244 ** WinNT/2K/XP so that we will know whether or not we can safely call
1245 ** the LockFileEx() API.
1246 */
1247 int isNT(void){
1248   static int osType = 0;   /* 0=unknown 1=win95 2=winNT */
1249   if( osType==0 ){
1250     OSVERSIONINFO sInfo;
1251     sInfo.dwOSVersionInfoSize = sizeof(sInfo);
1252     GetVersionEx(&sInfo);
1253     osType = sInfo.dwPlatformId==VER_PLATFORM_WIN32_NT ? 2 : 1;
1254   }
1255   return osType==2;
1256 }
1257 #endif
1258 
1259 /*
1260 ** Windows file locking notes:  [similar issues apply to MacOS]
1261 **
1262 ** We cannot use LockFileEx() or UnlockFileEx() on Win95/98/ME because
1263 ** those functions are not available.  So we use only LockFile() and
1264 ** UnlockFile().
1265 **
1266 ** LockFile() prevents not just writing but also reading by other processes.
1267 ** (This is a design error on the part of Windows, but there is nothing
1268 ** we can do about that.)  So the region used for locking is at the
1269 ** end of the file where it is unlikely to ever interfere with an
1270 ** actual read attempt.
1271 **
1272 ** A database read lock is obtained by locking a single randomly-chosen
1273 ** byte out of a specific range of bytes. The lock byte is obtained at
1274 ** random so two separate readers can probably access the file at the
1275 ** same time, unless they are unlucky and choose the same lock byte.
1276 ** A database write lock is obtained by locking all bytes in the range.
1277 ** There can only be one writer.
1278 **
1279 ** A lock is obtained on the first byte of the lock range before acquiring
1280 ** either a read lock or a write lock.  This prevents two processes from
1281 ** attempting to get a lock at a same time.  The semantics of
1282 ** sqliteOsReadLock() require that if there is already a write lock, that
1283 ** lock is converted into a read lock atomically.  The lock on the first
1284 ** byte allows us to drop the old write lock and get the read lock without
1285 ** another process jumping into the middle and messing us up.  The same
1286 ** argument applies to sqliteOsWriteLock().
1287 **
1288 ** On WinNT/2K/XP systems, LockFileEx() and UnlockFileEx() are available,
1289 ** which means we can use reader/writer locks.  When reader writer locks
1290 ** are used, the lock is placed on the same range of bytes that is used
1291 ** for probabilistic locking in Win95/98/ME.  Hence, the locking scheme
1292 ** will support two or more Win95 readers or two or more WinNT readers.
1293 ** But a single Win95 reader will lock out all WinNT readers and a single
1294 ** WinNT reader will lock out all other Win95 readers.
1295 **
1296 ** Note: On MacOS we use the resource fork for locking.
1297 **
1298 ** The following #defines specify the range of bytes used for locking.
1299 ** N_LOCKBYTE is the number of bytes available for doing the locking.
1300 ** The first byte used to hold the lock while the lock is changing does
1301 ** not count toward this number.  FIRST_LOCKBYTE is the address of
1302 ** the first byte in the range of bytes used for locking.
1303 */
1304 #define N_LOCKBYTE       10239
1305 #if OS_MAC
1306 # define FIRST_LOCKBYTE   (0x000fffff - N_LOCKBYTE)
1307 #else
1308 # define FIRST_LOCKBYTE   (0xffffffff - N_LOCKBYTE)
1309 #endif
1310 
1311 /*
1312 ** Change the status of the lock on the file "id" to be a readlock.
1313 ** If the file was write locked, then this reduces the lock to a read.
1314 ** If the file was read locked, then this acquires a new read lock.
1315 **
1316 ** Return SQLITE_OK on success and SQLITE_BUSY on failure.  If this
1317 ** library was compiled with large file support (LFS) but LFS is not
1318 ** available on the host, then an SQLITE_NOLFS is returned.
1319 */
1320 int sqliteOsReadLock(OsFile *id){
1321 #if OS_UNIX
1322   int rc;
1323   sqliteOsEnterMutex();
1324   if( id->pLock->cnt>0 ){
1325     if( !id->locked ){
1326       id->pLock->cnt++;
1327       id->locked = 1;
1328       id->pOpen->nLock++;
1329     }
1330     rc = SQLITE_OK;
1331   }else if( id->locked || id->pLock->cnt==0 ){
1332     struct flock lock;
1333     int s;
1334     lock.l_type = F_RDLCK;
1335     lock.l_whence = SEEK_SET;
1336     lock.l_start = lock.l_len = 0L;
1337     s = fcntl(id->fd, F_SETLK, &lock);
1338     if( s!=0 ){
1339       rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1340     }else{
1341       rc = SQLITE_OK;
1342       if( !id->locked ){
1343         id->pOpen->nLock++;
1344         id->locked = 1;
1345       }
1346       id->pLock->cnt = 1;
1347     }
1348   }else{
1349     rc = SQLITE_BUSY;
1350   }
1351   sqliteOsLeaveMutex();
1352   return rc;
1353 #endif
1354 #if OS_WIN
1355   int rc;
1356   if( id->locked>0 ){
1357     rc = SQLITE_OK;
1358   }else{
1359     int lk;
1360     int res;
1361     int cnt = 100;
1362     sqliteRandomness(sizeof(lk), &lk);
1363     lk = (lk & 0x7fffffff)%N_LOCKBYTE + 1;
1364     while( cnt-->0 && (res = LockFile(id->h, FIRST_LOCKBYTE, 0, 1, 0))==0 ){
1365       Sleep(1);
1366     }
1367     if( res ){
1368       UnlockFile(id->h, FIRST_LOCKBYTE+1, 0, N_LOCKBYTE, 0);
1369       if( isNT() ){
1370         OVERLAPPED ovlp;
1371         ovlp.Offset = FIRST_LOCKBYTE+1;
1372         ovlp.OffsetHigh = 0;
1373         ovlp.hEvent = 0;
1374         res = LockFileEx(id->h, LOCKFILE_FAIL_IMMEDIATELY,
1375                           0, N_LOCKBYTE, 0, &ovlp);
1376       }else{
1377         res = LockFile(id->h, FIRST_LOCKBYTE+lk, 0, 1, 0);
1378       }
1379       UnlockFile(id->h, FIRST_LOCKBYTE, 0, 1, 0);
1380     }
1381     if( res ){
1382       id->locked = lk;
1383       rc = SQLITE_OK;
1384     }else{
1385       rc = SQLITE_BUSY;
1386     }
1387   }
1388   return rc;
1389 #endif
1390 #if OS_MAC
1391   int rc;
1392   if( id->locked>0 || id->refNumRF == -1 ){
1393     rc = SQLITE_OK;
1394   }else{
1395     int lk;
1396     OSErr res;
1397     int cnt = 5;
1398     ParamBlockRec params;
1399     sqliteRandomness(sizeof(lk), &lk);
1400     lk = (lk & 0x7fffffff)%N_LOCKBYTE + 1;
1401     memset(&params, 0, sizeof(params));
1402     params.ioParam.ioRefNum = id->refNumRF;
1403     params.ioParam.ioPosMode = fsFromStart;
1404     params.ioParam.ioPosOffset = FIRST_LOCKBYTE;
1405     params.ioParam.ioReqCount = 1;
1406     while( cnt-->0 && (res = PBLockRangeSync(&params))!=noErr ){
1407       UInt32 finalTicks;
1408       Delay(1, &finalTicks); /* 1/60 sec */
1409     }
1410     if( res == noErr ){
1411       params.ioParam.ioPosOffset = FIRST_LOCKBYTE+1;
1412       params.ioParam.ioReqCount = N_LOCKBYTE;
1413       PBUnlockRangeSync(&params);
1414       params.ioParam.ioPosOffset = FIRST_LOCKBYTE+lk;
1415       params.ioParam.ioReqCount = 1;
1416       res = PBLockRangeSync(&params);
1417       params.ioParam.ioPosOffset = FIRST_LOCKBYTE;
1418       params.ioParam.ioReqCount = 1;
1419       PBUnlockRangeSync(&params);
1420     }
1421     if( res == noErr ){
1422       id->locked = lk;
1423       rc = SQLITE_OK;
1424     }else{
1425       rc = SQLITE_BUSY;
1426     }
1427   }
1428   return rc;
1429 #endif
1430 }
1431 
1432 /*
1433 ** Change the lock status to be an exclusive or write lock.  Return
1434 ** SQLITE_OK on success and SQLITE_BUSY on a failure.  If this
1435 ** library was compiled with large file support (LFS) but LFS is not
1436 ** available on the host, then an SQLITE_NOLFS is returned.
1437 */
1438 int sqliteOsWriteLock(OsFile *id){
1439 #if OS_UNIX
1440   int rc;
1441   sqliteOsEnterMutex();
1442   if( id->pLock->cnt==0 || (id->pLock->cnt==1 && id->locked==1) ){
1443     struct flock lock;
1444     int s;
1445     lock.l_type = F_WRLCK;
1446     lock.l_whence = SEEK_SET;
1447     lock.l_start = lock.l_len = 0L;
1448     s = fcntl(id->fd, F_SETLK, &lock);
1449     if( s!=0 ){
1450       rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1451     }else{
1452       rc = SQLITE_OK;
1453       if( !id->locked ){
1454         id->pOpen->nLock++;
1455         id->locked = 1;
1456       }
1457       id->pLock->cnt = -1;
1458     }
1459   }else{
1460     rc = SQLITE_BUSY;
1461   }
1462   sqliteOsLeaveMutex();
1463   return rc;
1464 #endif
1465 #if OS_WIN
1466   int rc;
1467   if( id->locked<0 ){
1468     rc = SQLITE_OK;
1469   }else{
1470     int res;
1471     int cnt = 100;
1472     while( cnt-->0 && (res = LockFile(id->h, FIRST_LOCKBYTE, 0, 1, 0))==0 ){
1473       Sleep(1);
1474     }
1475     if( res ){
1476       if( id->locked>0 ){
1477         if( isNT() ){
1478           UnlockFile(id->h, FIRST_LOCKBYTE+1, 0, N_LOCKBYTE, 0);
1479         }else{
1480           res = UnlockFile(id->h, FIRST_LOCKBYTE + id->locked, 0, 1, 0);
1481         }
1482       }
1483       if( res ){
1484         res = LockFile(id->h, FIRST_LOCKBYTE+1, 0, N_LOCKBYTE, 0);
1485       }else{
1486         res = 0;
1487       }
1488       UnlockFile(id->h, FIRST_LOCKBYTE, 0, 1, 0);
1489     }
1490     if( res ){
1491       id->locked = -1;
1492       rc = SQLITE_OK;
1493     }else{
1494       rc = SQLITE_BUSY;
1495     }
1496   }
1497   return rc;
1498 #endif
1499 #if OS_MAC
1500   int rc;
1501   if( id->locked<0 || id->refNumRF == -1 ){
1502     rc = SQLITE_OK;
1503   }else{
1504     OSErr res;
1505     int cnt = 5;
1506     ParamBlockRec params;
1507     memset(&params, 0, sizeof(params));
1508     params.ioParam.ioRefNum = id->refNumRF;
1509     params.ioParam.ioPosMode = fsFromStart;
1510     params.ioParam.ioPosOffset = FIRST_LOCKBYTE;
1511     params.ioParam.ioReqCount = 1;
1512     while( cnt-->0 && (res = PBLockRangeSync(&params))!=noErr ){
1513       UInt32 finalTicks;
1514       Delay(1, &finalTicks); /* 1/60 sec */
1515     }
1516     if( res == noErr ){
1517       params.ioParam.ioPosOffset = FIRST_LOCKBYTE + id->locked;
1518       params.ioParam.ioReqCount = 1;
1519       if( id->locked==0
1520             || PBUnlockRangeSync(&params)==noErr ){
1521         params.ioParam.ioPosOffset = FIRST_LOCKBYTE+1;
1522         params.ioParam.ioReqCount = N_LOCKBYTE;
1523         res = PBLockRangeSync(&params);
1524       }else{
1525         res = afpRangeNotLocked;
1526       }
1527       params.ioParam.ioPosOffset = FIRST_LOCKBYTE;
1528       params.ioParam.ioReqCount = 1;
1529       PBUnlockRangeSync(&params);
1530     }
1531     if( res == noErr ){
1532       id->locked = -1;
1533       rc = SQLITE_OK;
1534     }else{
1535       rc = SQLITE_BUSY;
1536     }
1537   }
1538   return rc;
1539 #endif
1540 }
1541 
1542 /*
1543 ** Unlock the given file descriptor.  If the file descriptor was
1544 ** not previously locked, then this routine is a no-op.  If this
1545 ** library was compiled with large file support (LFS) but LFS is not
1546 ** available on the host, then an SQLITE_NOLFS is returned.
1547 */
1548 int sqliteOsUnlock(OsFile *id){
1549 #if OS_UNIX
1550   int rc;
1551   if( !id->locked ) return SQLITE_OK;
1552   sqliteOsEnterMutex();
1553   assert( id->pLock->cnt!=0 );
1554   if( id->pLock->cnt>1 ){
1555     id->pLock->cnt--;
1556     rc = SQLITE_OK;
1557   }else{
1558     struct flock lock;
1559     int s;
1560     lock.l_type = F_UNLCK;
1561     lock.l_whence = SEEK_SET;
1562     lock.l_start = lock.l_len = 0L;
1563     s = fcntl(id->fd, F_SETLK, &lock);
1564     if( s!=0 ){
1565       rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1566     }else{
1567       rc = SQLITE_OK;
1568       id->pLock->cnt = 0;
1569     }
1570   }
1571   if( rc==SQLITE_OK ){
1572     /* Decrement the count of locks against this same file.  When the
1573     ** count reaches zero, close any other file descriptors whose close
1574     ** was deferred because of outstanding locks.
1575     */
1576     struct openCnt *pOpen = id->pOpen;
1577     pOpen->nLock--;
1578     assert( pOpen->nLock>=0 );
1579     if( pOpen->nLock==0 && pOpen->nPending>0 ){
1580       int i;
1581       for(i=0; i<pOpen->nPending; i++){
1582         close(pOpen->aPending[i]);
1583       }
1584       sqliteFree(pOpen->aPending);
1585       pOpen->nPending = 0;
1586       pOpen->aPending = 0;
1587     }
1588   }
1589   sqliteOsLeaveMutex();
1590   id->locked = 0;
1591   return rc;
1592 #endif
1593 #if OS_WIN
1594   int rc;
1595   if( id->locked==0 ){
1596     rc = SQLITE_OK;
1597   }else if( isNT() || id->locked<0 ){
1598     UnlockFile(id->h, FIRST_LOCKBYTE+1, 0, N_LOCKBYTE, 0);
1599     rc = SQLITE_OK;
1600     id->locked = 0;
1601   }else{
1602     UnlockFile(id->h, FIRST_LOCKBYTE+id->locked, 0, 1, 0);
1603     rc = SQLITE_OK;
1604     id->locked = 0;
1605   }
1606   return rc;
1607 #endif
1608 #if OS_MAC
1609   int rc;
1610   ParamBlockRec params;
1611   memset(&params, 0, sizeof(params));
1612   params.ioParam.ioRefNum = id->refNumRF;
1613   params.ioParam.ioPosMode = fsFromStart;
1614   if( id->locked==0 || id->refNumRF == -1 ){
1615     rc = SQLITE_OK;
1616   }else if( id->locked<0 ){
1617     params.ioParam.ioPosOffset = FIRST_LOCKBYTE+1;
1618     params.ioParam.ioReqCount = N_LOCKBYTE;
1619     PBUnlockRangeSync(&params);
1620     rc = SQLITE_OK;
1621     id->locked = 0;
1622   }else{
1623     params.ioParam.ioPosOffset = FIRST_LOCKBYTE+id->locked;
1624     params.ioParam.ioReqCount = 1;
1625     PBUnlockRangeSync(&params);
1626     rc = SQLITE_OK;
1627     id->locked = 0;
1628   }
1629   return rc;
1630 #endif
1631 }
1632 
1633 /*
1634 ** Get information to seed the random number generator.  The seed
1635 ** is written into the buffer zBuf[256].  The calling function must
1636 ** supply a sufficiently large buffer.
1637 */
1638 int sqliteOsRandomSeed(char *zBuf){
1639   /* We have to initialize zBuf to prevent valgrind from reporting
1640   ** errors.  The reports issued by valgrind are incorrect - we would
1641   ** prefer that the randomness be increased by making use of the
1642   ** uninitialized space in zBuf - but valgrind errors tend to worry
1643   ** some users.  Rather than argue, it seems easier just to initialize
1644   ** the whole array and silence valgrind, even if that means less randomness
1645   ** in the random seed.
1646   **
1647   ** When testing, initializing zBuf[] to zero is all we do.  That means
1648   ** that we always use the same random number sequence.* This makes the
1649   ** tests repeatable.
1650   */
1651   memset(zBuf, 0, 256);
1652 #if OS_UNIX && !defined(SQLITE_TEST)
1653   {
1654     int pid;
1655     time((time_t*)zBuf);
1656     pid = getpid();
1657     memcpy(&zBuf[sizeof(time_t)], &pid, sizeof(pid));
1658   }
1659 #endif
1660 #if OS_WIN && !defined(SQLITE_TEST)
1661   GetSystemTime((LPSYSTEMTIME)zBuf);
1662 #endif
1663 #if OS_MAC
1664   {
1665     int pid;
1666     Microseconds((UnsignedWide*)zBuf);
1667     pid = getpid();
1668     memcpy(&zBuf[sizeof(UnsignedWide)], &pid, sizeof(pid));
1669   }
1670 #endif
1671   return SQLITE_OK;
1672 }
1673 
1674 /*
1675 ** Sleep for a little while.  Return the amount of time slept.
1676 */
1677 int sqliteOsSleep(int ms){
1678 #if OS_UNIX
1679 #if defined(HAVE_USLEEP) && HAVE_USLEEP
1680   usleep(ms*1000);
1681   return ms;
1682 #else
1683   sleep((ms+999)/1000);
1684   return 1000*((ms+999)/1000);
1685 #endif
1686 #endif
1687 #if OS_WIN
1688   Sleep(ms);
1689   return ms;
1690 #endif
1691 #if OS_MAC
1692   UInt32 finalTicks;
1693   UInt32 ticks = (((UInt32)ms+16)*3)/50;  /* 1/60 sec per tick */
1694   Delay(ticks, &finalTicks);
1695   return (int)((ticks*50)/3);
1696 #endif
1697 }
1698 
1699 /*
1700 ** Static variables used for thread synchronization
1701 */
1702 static int inMutex = 0;
1703 #ifdef SQLITE_UNIX_THREADS
1704   static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
1705 #endif
1706 #ifdef SQLITE_W32_THREADS
1707   static CRITICAL_SECTION cs;
1708 #endif
1709 #ifdef SQLITE_MACOS_MULTITASKING
1710   static MPCriticalRegionID criticalRegion;
1711 #endif
1712 
1713 /*
1714 ** The following pair of routine implement mutual exclusion for
1715 ** multi-threaded processes.  Only a single thread is allowed to
1716 ** executed code that is surrounded by EnterMutex() and LeaveMutex().
1717 **
1718 ** SQLite uses only a single Mutex.  There is not much critical
1719 ** code and what little there is executes quickly and without blocking.
1720 */
1721 void sqliteOsEnterMutex(){
1722 #ifdef SQLITE_UNIX_THREADS
1723   pthread_mutex_lock(&mutex);
1724 #endif
1725 #ifdef SQLITE_W32_THREADS
1726   static int isInit = 0;
1727   while( !isInit ){
1728     static long lock = 0;
1729     if( InterlockedIncrement(&lock)==1 ){
1730       InitializeCriticalSection(&cs);
1731       isInit = 1;
1732     }else{
1733       Sleep(1);
1734     }
1735   }
1736   EnterCriticalSection(&cs);
1737 #endif
1738 #ifdef SQLITE_MACOS_MULTITASKING
1739   static volatile int notInit = 1;
1740   if( notInit ){
1741     if( notInit == 2 ) /* as close as you can get to thread safe init */
1742       MPYield();
1743     else{
1744       notInit = 2;
1745       MPCreateCriticalRegion(&criticalRegion);
1746       notInit = 0;
1747     }
1748   }
1749   MPEnterCriticalRegion(criticalRegion, kDurationForever);
1750 #endif
1751   assert( !inMutex );
1752   inMutex = 1;
1753 }
1754 void sqliteOsLeaveMutex(){
1755   assert( inMutex );
1756   inMutex = 0;
1757 #ifdef SQLITE_UNIX_THREADS
1758   pthread_mutex_unlock(&mutex);
1759 #endif
1760 #ifdef SQLITE_W32_THREADS
1761   LeaveCriticalSection(&cs);
1762 #endif
1763 #ifdef SQLITE_MACOS_MULTITASKING
1764   MPExitCriticalRegion(criticalRegion);
1765 #endif
1766 }
1767 
1768 /*
1769 ** Turn a relative pathname into a full pathname.  Return a pointer
1770 ** to the full pathname stored in space obtained from sqliteMalloc().
1771 ** The calling function is responsible for freeing this space once it
1772 ** is no longer needed.
1773 */
1774 char *sqliteOsFullPathname(const char *zRelative){
1775 #if OS_UNIX
1776   char *zFull = 0;
1777   if( zRelative[0]=='/' ){
1778     sqliteSetString(&zFull, zRelative, (char*)0);
1779   }else{
1780     char zBuf[5000];
1781     zBuf[0] = 0;
1782     sqliteSetString(&zFull, getcwd(zBuf, sizeof(zBuf)), "/", zRelative,
1783                     (char*)0);
1784   }
1785   return zFull;
1786 #endif
1787 #if OS_WIN
1788   char *zNotUsed;
1789   char *zFull;
1790   int nByte;
1791   nByte = GetFullPathName(zRelative, 0, 0, &zNotUsed) + 1;
1792   zFull = sqliteMalloc( nByte );
1793   if( zFull==0 ) return 0;
1794   GetFullPathName(zRelative, nByte, zFull, &zNotUsed);
1795   return zFull;
1796 #endif
1797 #if OS_MAC
1798   char *zFull = 0;
1799   if( zRelative[0]==':' ){
1800     char zBuf[_MAX_PATH+1];
1801     sqliteSetString(&zFull, getcwd(zBuf, sizeof(zBuf)), &(zRelative[1]),
1802                     (char*)0);
1803   }else{
1804     if( strchr(zRelative, ':') ){
1805       sqliteSetString(&zFull, zRelative, (char*)0);
1806     }else{
1807     char zBuf[_MAX_PATH+1];
1808       sqliteSetString(&zFull, getcwd(zBuf, sizeof(zBuf)), zRelative, (char*)0);
1809     }
1810   }
1811   return zFull;
1812 #endif
1813 }
1814 
1815 /*
1816 ** The following variable, if set to a non-zero value, becomes the result
1817 ** returned from sqliteOsCurrentTime().  This is used for testing.
1818 */
1819 #ifdef SQLITE_TEST
1820 int sqlite_current_time = 0;
1821 #endif
1822 
1823 /*
1824 ** Find the current time (in Universal Coordinated Time).  Write the
1825 ** current time and date as a Julian Day number into *prNow and
1826 ** return 0.  Return 1 if the time and date cannot be found.
1827 */
1828 int sqliteOsCurrentTime(double *prNow){
1829 #if OS_UNIX
1830   time_t t;
1831   time(&t);
1832   *prNow = t/86400.0 + 2440587.5;
1833 #endif
1834 #if OS_WIN
1835   FILETIME ft;
1836   /* FILETIME structure is a 64-bit value representing the number of
1837      100-nanosecond intervals since January 1, 1601 (= JD 2305813.5).
1838   */
1839   double now;
1840   GetSystemTimeAsFileTime( &ft );
1841   now = ((double)ft.dwHighDateTime) * 4294967296.0;
1842   *prNow = (now + ft.dwLowDateTime)/864000000000.0 + 2305813.5;
1843 #endif
1844 #ifdef SQLITE_TEST
1845   if( sqlite_current_time ){
1846     *prNow = sqlite_current_time/86400.0 + 2440587.5;
1847   }
1848 #endif
1849   return 0;
1850 }
1851