1 /*
2 ** 2001 September 16
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This file contains code that is specific to particular operating
14 ** systems. The purpose of this file is to provide a uniform abstraction
15 ** on which the rest of SQLite can operate.
16 */
17 #include "os.h" /* Must be first to enable large file support */
18 #include "sqliteInt.h"
19
20 #if OS_UNIX
21 # include <time.h>
22 # include <errno.h>
23 # include <unistd.h>
24 # ifndef O_LARGEFILE
25 # define O_LARGEFILE 0
26 # endif
27 # ifdef SQLITE_DISABLE_LFS
28 # undef O_LARGEFILE
29 # define O_LARGEFILE 0
30 # endif
31 # ifndef O_NOFOLLOW
32 # define O_NOFOLLOW 0
33 # endif
34 # ifndef O_BINARY
35 # define O_BINARY 0
36 # endif
37 #endif
38
39
40 #if OS_WIN
41 # include <winbase.h>
42 #endif
43
44 #if OS_MAC
45 # include <extras.h>
46 # include <path2fss.h>
47 # include <TextUtils.h>
48 # include <FinderRegistry.h>
49 # include <Folders.h>
50 # include <Timer.h>
51 # include <OSUtils.h>
52 #endif
53
54 /*
55 ** The DJGPP compiler environment looks mostly like Unix, but it
56 ** lacks the fcntl() system call. So redefine fcntl() to be something
57 ** that always succeeds. This means that locking does not occur under
58 ** DJGPP. But its DOS - what did you expect?
59 */
60 #ifdef __DJGPP__
61 # define fcntl(A,B,C) 0
62 #endif
63
64 /*
65 ** Macros used to determine whether or not to use threads. The
66 ** SQLITE_UNIX_THREADS macro is defined if we are synchronizing for
67 ** Posix threads and SQLITE_W32_THREADS is defined if we are
68 ** synchronizing using Win32 threads.
69 */
70 #if OS_UNIX && defined(THREADSAFE) && THREADSAFE
71 # include <pthread.h>
72 # define SQLITE_UNIX_THREADS 1
73 #endif
74 #if OS_WIN && defined(THREADSAFE) && THREADSAFE
75 # define SQLITE_W32_THREADS 1
76 #endif
77 #if OS_MAC && defined(THREADSAFE) && THREADSAFE
78 # include <Multiprocessing.h>
79 # define SQLITE_MACOS_MULTITASKING 1
80 #endif
81
82 /*
83 ** Macros for performance tracing. Normally turned off
84 */
85 #if 0
86 static int last_page = 0;
87 __inline__ unsigned long long int hwtime(void){
88 unsigned long long int x;
89 __asm__("rdtsc\n\t"
90 "mov %%edx, %%ecx\n\t"
91 :"=A" (x));
92 return x;
93 }
94 static unsigned long long int g_start;
95 static unsigned int elapse;
96 #define TIMER_START g_start=hwtime()
97 #define TIMER_END elapse=hwtime()-g_start
98 #define SEEK(X) last_page=(X)
99 #define TRACE1(X) fprintf(stderr,X)
100 #define TRACE2(X,Y) fprintf(stderr,X,Y)
101 #define TRACE3(X,Y,Z) fprintf(stderr,X,Y,Z)
102 #define TRACE4(X,Y,Z,A) fprintf(stderr,X,Y,Z,A)
103 #define TRACE5(X,Y,Z,A,B) fprintf(stderr,X,Y,Z,A,B)
104 #else
105 #define TIMER_START
106 #define TIMER_END
107 #define SEEK(X)
108 #define TRACE1(X)
109 #define TRACE2(X,Y)
110 #define TRACE3(X,Y,Z)
111 #define TRACE4(X,Y,Z,A)
112 #define TRACE5(X,Y,Z,A,B)
113 #endif
114
115
116 #if OS_UNIX
117 /*
118 ** Here is the dirt on POSIX advisory locks: ANSI STD 1003.1 (1996)
119 ** section 6.5.2.2 lines 483 through 490 specify that when a process
120 ** sets or clears a lock, that operation overrides any prior locks set
121 ** by the same process. It does not explicitly say so, but this implies
122 ** that it overrides locks set by the same process using a different
123 ** file descriptor. Consider this test case:
124 **
125 ** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
126 ** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
127 **
128 ** Suppose ./file1 and ./file2 are really the same file (because
129 ** one is a hard or symbolic link to the other) then if you set
130 ** an exclusive lock on fd1, then try to get an exclusive lock
131 ** on fd2, it works. I would have expected the second lock to
132 ** fail since there was already a lock on the file due to fd1.
133 ** But not so. Since both locks came from the same process, the
134 ** second overrides the first, even though they were on different
135 ** file descriptors opened on different file names.
136 **
137 ** Bummer. If you ask me, this is broken. Badly broken. It means
138 ** that we cannot use POSIX locks to synchronize file access among
139 ** competing threads of the same process. POSIX locks will work fine
140 ** to synchronize access for threads in separate processes, but not
141 ** threads within the same process.
142 **
143 ** To work around the problem, SQLite has to manage file locks internally
144 ** on its own. Whenever a new database is opened, we have to find the
145 ** specific inode of the database file (the inode is determined by the
146 ** st_dev and st_ino fields of the stat structure that fstat() fills in)
147 ** and check for locks already existing on that inode. When locks are
148 ** created or removed, we have to look at our own internal record of the
149 ** locks to see if another thread has previously set a lock on that same
150 ** inode.
151 **
152 ** The OsFile structure for POSIX is no longer just an integer file
153 ** descriptor. It is now a structure that holds the integer file
154 ** descriptor and a pointer to a structure that describes the internal
155 ** locks on the corresponding inode. There is one locking structure
156 ** per inode, so if the same inode is opened twice, both OsFile structures
157 ** point to the same locking structure. The locking structure keeps
158 ** a reference count (so we will know when to delete it) and a "cnt"
159 ** field that tells us its internal lock status. cnt==0 means the
160 ** file is unlocked. cnt==-1 means the file has an exclusive lock.
161 ** cnt>0 means there are cnt shared locks on the file.
162 **
163 ** Any attempt to lock or unlock a file first checks the locking
164 ** structure. The fcntl() system call is only invoked to set a
165 ** POSIX lock if the internal lock structure transitions between
166 ** a locked and an unlocked state.
167 **
168 ** 2004-Jan-11:
169 ** More recent discoveries about POSIX advisory locks. (The more
170 ** I discover, the more I realize the a POSIX advisory locks are
171 ** an abomination.)
172 **
173 ** If you close a file descriptor that points to a file that has locks,
174 ** all locks on that file that are owned by the current process are
175 ** released. To work around this problem, each OsFile structure contains
176 ** a pointer to an openCnt structure. There is one openCnt structure
177 ** per open inode, which means that multiple OsFiles can point to a single
178 ** openCnt. When an attempt is made to close an OsFile, if there are
179 ** other OsFiles open on the same inode that are holding locks, the call
180 ** to close() the file descriptor is deferred until all of the locks clear.
181 ** The openCnt structure keeps a list of file descriptors that need to
182 ** be closed and that list is walked (and cleared) when the last lock
183 ** clears.
184 **
185 ** First, under Linux threads, because each thread has a separate
186 ** process ID, lock operations in one thread do not override locks
187 ** to the same file in other threads. Linux threads behave like
188 ** separate processes in this respect. But, if you close a file
189 ** descriptor in linux threads, all locks are cleared, even locks
190 ** on other threads and even though the other threads have different
191 ** process IDs. Linux threads is inconsistent in this respect.
192 ** (I'm beginning to think that linux threads is an abomination too.)
193 ** The consequence of this all is that the hash table for the lockInfo
194 ** structure has to include the process id as part of its key because
195 ** locks in different threads are treated as distinct. But the
196 ** openCnt structure should not include the process id in its
197 ** key because close() clears lock on all threads, not just the current
198 ** thread. Were it not for this goofiness in linux threads, we could
199 ** combine the lockInfo and openCnt structures into a single structure.
200 */
201
202 /*
203 ** An instance of the following structure serves as the key used
204 ** to locate a particular lockInfo structure given its inode. Note
205 ** that we have to include the process ID as part of the key. On some
206 ** threading implementations (ex: linux), each thread has a separate
207 ** process ID.
208 */
209 struct lockKey {
210 dev_t dev; /* Device number */
211 ino_t ino; /* Inode number */
212 pid_t pid; /* Process ID */
213 };
214
215 /*
216 ** An instance of the following structure is allocated for each open
217 ** inode on each thread with a different process ID. (Threads have
218 ** different process IDs on linux, but not on most other unixes.)
219 **
220 ** A single inode can have multiple file descriptors, so each OsFile
221 ** structure contains a pointer to an instance of this object and this
222 ** object keeps a count of the number of OsFiles pointing to it.
223 */
224 struct lockInfo {
225 struct lockKey key; /* The lookup key */
226 int cnt; /* 0: unlocked. -1: write lock. 1...: read lock. */
227 int nRef; /* Number of pointers to this structure */
228 };
229
230 /*
231 ** An instance of the following structure serves as the key used
232 ** to locate a particular openCnt structure given its inode. This
233 ** is the same as the lockKey except that the process ID is omitted.
234 */
235 struct openKey {
236 dev_t dev; /* Device number */
237 ino_t ino; /* Inode number */
238 };
239
240 /*
241 ** An instance of the following structure is allocated for each open
242 ** inode. This structure keeps track of the number of locks on that
243 ** inode. If a close is attempted against an inode that is holding
244 ** locks, the close is deferred until all locks clear by adding the
245 ** file descriptor to be closed to the pending list.
246 */
247 struct openCnt {
248 struct openKey key; /* The lookup key */
249 int nRef; /* Number of pointers to this structure */
250 int nLock; /* Number of outstanding locks */
251 int nPending; /* Number of pending close() operations */
252 int *aPending; /* Malloced space holding fd's awaiting a close() */
253 };
254
255 /*
256 ** These hash table maps inodes and process IDs into lockInfo and openCnt
257 ** structures. Access to these hash tables must be protected by a mutex.
258 */
259 static Hash lockHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
260 static Hash openHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
261
262 /*
263 ** Release a lockInfo structure previously allocated by findLockInfo().
264 */
releaseLockInfo(struct lockInfo * pLock)265 static void releaseLockInfo(struct lockInfo *pLock){
266 pLock->nRef--;
267 if( pLock->nRef==0 ){
268 sqliteHashInsert(&lockHash, &pLock->key, sizeof(pLock->key), 0);
269 sqliteFree(pLock);
270 }
271 }
272
273 /*
274 ** Release a openCnt structure previously allocated by findLockInfo().
275 */
releaseOpenCnt(struct openCnt * pOpen)276 static void releaseOpenCnt(struct openCnt *pOpen){
277 pOpen->nRef--;
278 if( pOpen->nRef==0 ){
279 sqliteHashInsert(&openHash, &pOpen->key, sizeof(pOpen->key), 0);
280 sqliteFree(pOpen->aPending);
281 sqliteFree(pOpen);
282 }
283 }
284
285 /*
286 ** Given a file descriptor, locate lockInfo and openCnt structures that
287 ** describes that file descriptor. Create a new ones if necessary. The
288 ** return values might be unset if an error occurs.
289 **
290 ** Return the number of errors.
291 */
findLockInfo(int fd,struct lockInfo ** ppLock,struct openCnt ** ppOpen)292 int findLockInfo(
293 int fd, /* The file descriptor used in the key */
294 struct lockInfo **ppLock, /* Return the lockInfo structure here */
295 struct openCnt **ppOpen /* Return the openCnt structure here */
296 ){
297 int rc;
298 struct lockKey key1;
299 struct openKey key2;
300 struct stat statbuf;
301 struct lockInfo *pLock;
302 struct openCnt *pOpen;
303 rc = fstat(fd, &statbuf);
304 if( rc!=0 ) return 1;
305 memset(&key1, 0, sizeof(key1));
306 key1.dev = statbuf.st_dev;
307 key1.ino = statbuf.st_ino;
308 key1.pid = getpid();
309 memset(&key2, 0, sizeof(key2));
310 key2.dev = statbuf.st_dev;
311 key2.ino = statbuf.st_ino;
312 pLock = (struct lockInfo*)sqliteHashFind(&lockHash, &key1, sizeof(key1));
313 if( pLock==0 ){
314 struct lockInfo *pOld;
315 pLock = sqliteMallocRaw( sizeof(*pLock) );
316 if( pLock==0 ) return 1;
317 pLock->key = key1;
318 pLock->nRef = 1;
319 pLock->cnt = 0;
320 pOld = sqliteHashInsert(&lockHash, &pLock->key, sizeof(key1), pLock);
321 if( pOld!=0 ){
322 assert( pOld==pLock );
323 sqliteFree(pLock);
324 return 1;
325 }
326 }else{
327 pLock->nRef++;
328 }
329 *ppLock = pLock;
330 pOpen = (struct openCnt*)sqliteHashFind(&openHash, &key2, sizeof(key2));
331 if( pOpen==0 ){
332 struct openCnt *pOld;
333 pOpen = sqliteMallocRaw( sizeof(*pOpen) );
334 if( pOpen==0 ){
335 releaseLockInfo(pLock);
336 return 1;
337 }
338 pOpen->key = key2;
339 pOpen->nRef = 1;
340 pOpen->nLock = 0;
341 pOpen->nPending = 0;
342 pOpen->aPending = 0;
343 pOld = sqliteHashInsert(&openHash, &pOpen->key, sizeof(key2), pOpen);
344 if( pOld!=0 ){
345 assert( pOld==pOpen );
346 sqliteFree(pOpen);
347 releaseLockInfo(pLock);
348 return 1;
349 }
350 }else{
351 pOpen->nRef++;
352 }
353 *ppOpen = pOpen;
354 return 0;
355 }
356
357 #endif /** POSIX advisory lock work-around **/
358
359 /*
360 ** If we compile with the SQLITE_TEST macro set, then the following block
361 ** of code will give us the ability to simulate a disk I/O error. This
362 ** is used for testing the I/O recovery logic.
363 */
364 #ifdef SQLITE_TEST
365 int sqlite_io_error_pending = 0;
366 #define SimulateIOError(A) \
367 if( sqlite_io_error_pending ) \
368 if( sqlite_io_error_pending-- == 1 ){ local_ioerr(); return A; }
local_ioerr()369 static void local_ioerr(){
370 sqlite_io_error_pending = 0; /* Really just a place to set a breakpoint */
371 }
372 #else
373 #define SimulateIOError(A)
374 #endif
375
376 /*
377 ** When testing, keep a count of the number of open files.
378 */
379 #ifdef SQLITE_TEST
380 int sqlite_open_file_count = 0;
381 #define OpenCounter(X) sqlite_open_file_count+=(X)
382 #else
383 #define OpenCounter(X)
384 #endif
385
386
387 /*
388 ** Delete the named file
389 */
sqliteOsDelete(const char * zFilename)390 int sqliteOsDelete(const char *zFilename){
391 #if OS_UNIX
392 unlink(zFilename);
393 #endif
394 #if OS_WIN
395 DeleteFile(zFilename);
396 #endif
397 #if OS_MAC
398 unlink(zFilename);
399 #endif
400 return SQLITE_OK;
401 }
402
403 /*
404 ** Return TRUE if the named file exists.
405 */
sqliteOsFileExists(const char * zFilename)406 int sqliteOsFileExists(const char *zFilename){
407 #if OS_UNIX
408 return access(zFilename, 0)==0;
409 #endif
410 #if OS_WIN
411 return GetFileAttributes(zFilename) != 0xffffffff;
412 #endif
413 #if OS_MAC
414 return access(zFilename, 0)==0;
415 #endif
416 }
417
418
419 #if 0 /* NOT USED */
420 /*
421 ** Change the name of an existing file.
422 */
423 int sqliteOsFileRename(const char *zOldName, const char *zNewName){
424 #if OS_UNIX
425 if( link(zOldName, zNewName) ){
426 return SQLITE_ERROR;
427 }
428 unlink(zOldName);
429 return SQLITE_OK;
430 #endif
431 #if OS_WIN
432 if( !MoveFile(zOldName, zNewName) ){
433 return SQLITE_ERROR;
434 }
435 return SQLITE_OK;
436 #endif
437 #if OS_MAC
438 /**** FIX ME ***/
439 return SQLITE_ERROR;
440 #endif
441 }
442 #endif /* NOT USED */
443
444 /*
445 ** Attempt to open a file for both reading and writing. If that
446 ** fails, try opening it read-only. If the file does not exist,
447 ** try to create it.
448 **
449 ** On success, a handle for the open file is written to *id
450 ** and *pReadonly is set to 0 if the file was opened for reading and
451 ** writing or 1 if the file was opened read-only. The function returns
452 ** SQLITE_OK.
453 **
454 ** On failure, the function returns SQLITE_CANTOPEN and leaves
455 ** *id and *pReadonly unchanged.
456 */
sqliteOsOpenReadWrite(const char * zFilename,OsFile * id,int * pReadonly)457 int sqliteOsOpenReadWrite(
458 const char *zFilename,
459 OsFile *id,
460 int *pReadonly
461 ){
462 #if OS_UNIX
463 int rc;
464 id->dirfd = -1;
465 id->fd = open(zFilename, O_RDWR|O_CREAT|O_LARGEFILE|O_BINARY, 0644);
466 if( id->fd<0 ){
467 #ifdef EISDIR
468 if( errno==EISDIR ){
469 return SQLITE_CANTOPEN;
470 }
471 #endif
472 id->fd = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
473 if( id->fd<0 ){
474 return SQLITE_CANTOPEN;
475 }
476 *pReadonly = 1;
477 }else{
478 *pReadonly = 0;
479 }
480 sqliteOsEnterMutex();
481 rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
482 sqliteOsLeaveMutex();
483 if( rc ){
484 close(id->fd);
485 return SQLITE_NOMEM;
486 }
487 id->locked = 0;
488 TRACE3("OPEN %-3d %s\n", id->fd, zFilename);
489 OpenCounter(+1);
490 return SQLITE_OK;
491 #endif
492 #if OS_WIN
493 HANDLE h = CreateFile(zFilename,
494 GENERIC_READ | GENERIC_WRITE,
495 FILE_SHARE_READ | FILE_SHARE_WRITE,
496 NULL,
497 OPEN_ALWAYS,
498 FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS,
499 NULL
500 );
501 if( h==INVALID_HANDLE_VALUE ){
502 h = CreateFile(zFilename,
503 GENERIC_READ,
504 FILE_SHARE_READ,
505 NULL,
506 OPEN_ALWAYS,
507 FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS,
508 NULL
509 );
510 if( h==INVALID_HANDLE_VALUE ){
511 return SQLITE_CANTOPEN;
512 }
513 *pReadonly = 1;
514 }else{
515 *pReadonly = 0;
516 }
517 id->h = h;
518 id->locked = 0;
519 OpenCounter(+1);
520 return SQLITE_OK;
521 #endif
522 #if OS_MAC
523 FSSpec fsSpec;
524 # ifdef _LARGE_FILE
525 HFSUniStr255 dfName;
526 FSRef fsRef;
527 if( __path2fss(zFilename, &fsSpec) != noErr ){
528 if( HCreate(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, 'SQLI', cDocumentFile) != noErr )
529 return SQLITE_CANTOPEN;
530 }
531 if( FSpMakeFSRef(&fsSpec, &fsRef) != noErr )
532 return SQLITE_CANTOPEN;
533 FSGetDataForkName(&dfName);
534 if( FSOpenFork(&fsRef, dfName.length, dfName.unicode,
535 fsRdWrShPerm, &(id->refNum)) != noErr ){
536 if( FSOpenFork(&fsRef, dfName.length, dfName.unicode,
537 fsRdWrPerm, &(id->refNum)) != noErr ){
538 if (FSOpenFork(&fsRef, dfName.length, dfName.unicode,
539 fsRdPerm, &(id->refNum)) != noErr )
540 return SQLITE_CANTOPEN;
541 else
542 *pReadonly = 1;
543 } else
544 *pReadonly = 0;
545 } else
546 *pReadonly = 0;
547 # else
548 __path2fss(zFilename, &fsSpec);
549 if( !sqliteOsFileExists(zFilename) ){
550 if( HCreate(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, 'SQLI', cDocumentFile) != noErr )
551 return SQLITE_CANTOPEN;
552 }
553 if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrShPerm, &(id->refNum)) != noErr ){
554 if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrPerm, &(id->refNum)) != noErr ){
555 if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdPerm, &(id->refNum)) != noErr )
556 return SQLITE_CANTOPEN;
557 else
558 *pReadonly = 1;
559 } else
560 *pReadonly = 0;
561 } else
562 *pReadonly = 0;
563 # endif
564 if( HOpenRF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrShPerm, &(id->refNumRF)) != noErr){
565 id->refNumRF = -1;
566 }
567 id->locked = 0;
568 id->delOnClose = 0;
569 OpenCounter(+1);
570 return SQLITE_OK;
571 #endif
572 }
573
574
575 /*
576 ** Attempt to open a new file for exclusive access by this process.
577 ** The file will be opened for both reading and writing. To avoid
578 ** a potential security problem, we do not allow the file to have
579 ** previously existed. Nor do we allow the file to be a symbolic
580 ** link.
581 **
582 ** If delFlag is true, then make arrangements to automatically delete
583 ** the file when it is closed.
584 **
585 ** On success, write the file handle into *id and return SQLITE_OK.
586 **
587 ** On failure, return SQLITE_CANTOPEN.
588 */
sqliteOsOpenExclusive(const char * zFilename,OsFile * id,int delFlag)589 int sqliteOsOpenExclusive(const char *zFilename, OsFile *id, int delFlag){
590 #if OS_UNIX
591 int rc;
592 if( access(zFilename, 0)==0 ){
593 return SQLITE_CANTOPEN;
594 }
595 id->dirfd = -1;
596 id->fd = open(zFilename,
597 O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW|O_LARGEFILE|O_BINARY, 0600);
598 if( id->fd<0 ){
599 return SQLITE_CANTOPEN;
600 }
601 sqliteOsEnterMutex();
602 rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
603 sqliteOsLeaveMutex();
604 if( rc ){
605 close(id->fd);
606 unlink(zFilename);
607 return SQLITE_NOMEM;
608 }
609 id->locked = 0;
610 if( delFlag ){
611 unlink(zFilename);
612 }
613 TRACE3("OPEN-EX %-3d %s\n", id->fd, zFilename);
614 OpenCounter(+1);
615 return SQLITE_OK;
616 #endif
617 #if OS_WIN
618 HANDLE h;
619 int fileflags;
620 if( delFlag ){
621 fileflags = FILE_ATTRIBUTE_TEMPORARY | FILE_FLAG_RANDOM_ACCESS
622 | FILE_FLAG_DELETE_ON_CLOSE;
623 }else{
624 fileflags = FILE_FLAG_RANDOM_ACCESS;
625 }
626 h = CreateFile(zFilename,
627 GENERIC_READ | GENERIC_WRITE,
628 0,
629 NULL,
630 CREATE_ALWAYS,
631 fileflags,
632 NULL
633 );
634 if( h==INVALID_HANDLE_VALUE ){
635 return SQLITE_CANTOPEN;
636 }
637 id->h = h;
638 id->locked = 0;
639 OpenCounter(+1);
640 return SQLITE_OK;
641 #endif
642 #if OS_MAC
643 FSSpec fsSpec;
644 # ifdef _LARGE_FILE
645 HFSUniStr255 dfName;
646 FSRef fsRef;
647 __path2fss(zFilename, &fsSpec);
648 if( HCreate(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, 'SQLI', cDocumentFile) != noErr )
649 return SQLITE_CANTOPEN;
650 if( FSpMakeFSRef(&fsSpec, &fsRef) != noErr )
651 return SQLITE_CANTOPEN;
652 FSGetDataForkName(&dfName);
653 if( FSOpenFork(&fsRef, dfName.length, dfName.unicode,
654 fsRdWrPerm, &(id->refNum)) != noErr )
655 return SQLITE_CANTOPEN;
656 # else
657 __path2fss(zFilename, &fsSpec);
658 if( HCreate(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, 'SQLI', cDocumentFile) != noErr )
659 return SQLITE_CANTOPEN;
660 if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrPerm, &(id->refNum)) != noErr )
661 return SQLITE_CANTOPEN;
662 # endif
663 id->refNumRF = -1;
664 id->locked = 0;
665 id->delOnClose = delFlag;
666 if (delFlag)
667 id->pathToDel = sqliteOsFullPathname(zFilename);
668 OpenCounter(+1);
669 return SQLITE_OK;
670 #endif
671 }
672
673 /*
674 ** Attempt to open a new file for read-only access.
675 **
676 ** On success, write the file handle into *id and return SQLITE_OK.
677 **
678 ** On failure, return SQLITE_CANTOPEN.
679 */
sqliteOsOpenReadOnly(const char * zFilename,OsFile * id)680 int sqliteOsOpenReadOnly(const char *zFilename, OsFile *id){
681 #if OS_UNIX
682 int rc;
683 id->dirfd = -1;
684 id->fd = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
685 if( id->fd<0 ){
686 return SQLITE_CANTOPEN;
687 }
688 sqliteOsEnterMutex();
689 rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
690 sqliteOsLeaveMutex();
691 if( rc ){
692 close(id->fd);
693 return SQLITE_NOMEM;
694 }
695 id->locked = 0;
696 TRACE3("OPEN-RO %-3d %s\n", id->fd, zFilename);
697 OpenCounter(+1);
698 return SQLITE_OK;
699 #endif
700 #if OS_WIN
701 HANDLE h = CreateFile(zFilename,
702 GENERIC_READ,
703 0,
704 NULL,
705 OPEN_EXISTING,
706 FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS,
707 NULL
708 );
709 if( h==INVALID_HANDLE_VALUE ){
710 return SQLITE_CANTOPEN;
711 }
712 id->h = h;
713 id->locked = 0;
714 OpenCounter(+1);
715 return SQLITE_OK;
716 #endif
717 #if OS_MAC
718 FSSpec fsSpec;
719 # ifdef _LARGE_FILE
720 HFSUniStr255 dfName;
721 FSRef fsRef;
722 if( __path2fss(zFilename, &fsSpec) != noErr )
723 return SQLITE_CANTOPEN;
724 if( FSpMakeFSRef(&fsSpec, &fsRef) != noErr )
725 return SQLITE_CANTOPEN;
726 FSGetDataForkName(&dfName);
727 if( FSOpenFork(&fsRef, dfName.length, dfName.unicode,
728 fsRdPerm, &(id->refNum)) != noErr )
729 return SQLITE_CANTOPEN;
730 # else
731 __path2fss(zFilename, &fsSpec);
732 if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdPerm, &(id->refNum)) != noErr )
733 return SQLITE_CANTOPEN;
734 # endif
735 if( HOpenRF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrShPerm, &(id->refNumRF)) != noErr){
736 id->refNumRF = -1;
737 }
738 id->locked = 0;
739 id->delOnClose = 0;
740 OpenCounter(+1);
741 return SQLITE_OK;
742 #endif
743 }
744
745 /*
746 ** Attempt to open a file descriptor for the directory that contains a
747 ** file. This file descriptor can be used to fsync() the directory
748 ** in order to make sure the creation of a new file is actually written
749 ** to disk.
750 **
751 ** This routine is only meaningful for Unix. It is a no-op under
752 ** windows since windows does not support hard links.
753 **
754 ** On success, a handle for a previously open file is at *id is
755 ** updated with the new directory file descriptor and SQLITE_OK is
756 ** returned.
757 **
758 ** On failure, the function returns SQLITE_CANTOPEN and leaves
759 ** *id unchanged.
760 */
sqliteOsOpenDirectory(const char * zDirname,OsFile * id)761 int sqliteOsOpenDirectory(
762 const char *zDirname,
763 OsFile *id
764 ){
765 #if OS_UNIX
766 if( id->fd<0 ){
767 /* Do not open the directory if the corresponding file is not already
768 ** open. */
769 return SQLITE_CANTOPEN;
770 }
771 assert( id->dirfd<0 );
772 id->dirfd = open(zDirname, O_RDONLY|O_BINARY, 0644);
773 if( id->dirfd<0 ){
774 return SQLITE_CANTOPEN;
775 }
776 TRACE3("OPENDIR %-3d %s\n", id->dirfd, zDirname);
777 #endif
778 return SQLITE_OK;
779 }
780
781 /*
782 ** If the following global variable points to a string which is the
783 ** name of a directory, then that directory will be used to store
784 ** temporary files.
785 */
786 const char *sqlite_temp_directory = 0;
787
788 /*
789 ** Create a temporary file name in zBuf. zBuf must be big enough to
790 ** hold at least SQLITE_TEMPNAME_SIZE characters.
791 */
sqliteOsTempFileName(char * zBuf)792 int sqliteOsTempFileName(char *zBuf){
793 #if OS_UNIX
794 static const char *azDirs[] = {
795 0,
796 "/var/tmp",
797 "/usr/tmp",
798 "/tmp",
799 ".",
800 };
801 static unsigned char zChars[] =
802 "abcdefghijklmnopqrstuvwxyz"
803 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
804 "0123456789";
805 int i, j;
806 struct stat buf;
807 const char *zDir = ".";
808 azDirs[0] = sqlite_temp_directory;
809 for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
810 if( azDirs[i]==0 ) continue;
811 if( stat(azDirs[i], &buf) ) continue;
812 if( !S_ISDIR(buf.st_mode) ) continue;
813 if( access(azDirs[i], 07) ) continue;
814 zDir = azDirs[i];
815 break;
816 }
817 do{
818 sprintf(zBuf, "%s/"TEMP_FILE_PREFIX, zDir);
819 j = strlen(zBuf);
820 sqliteRandomness(15, &zBuf[j]);
821 for(i=0; i<15; i++, j++){
822 zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
823 }
824 zBuf[j] = 0;
825 }while( access(zBuf,0)==0 );
826 #endif
827 #if OS_WIN
828 static char zChars[] =
829 "abcdefghijklmnopqrstuvwxyz"
830 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
831 "0123456789";
832 int i, j;
833 const char *zDir;
834 char zTempPath[SQLITE_TEMPNAME_SIZE];
835 if( sqlite_temp_directory==0 ){
836 GetTempPath(SQLITE_TEMPNAME_SIZE-30, zTempPath);
837 for(i=strlen(zTempPath); i>0 && zTempPath[i-1]=='\\'; i--){}
838 zTempPath[i] = 0;
839 zDir = zTempPath;
840 }else{
841 zDir = sqlite_temp_directory;
842 }
843 for(;;){
844 sprintf(zBuf, "%s\\"TEMP_FILE_PREFIX, zDir);
845 j = strlen(zBuf);
846 sqliteRandomness(15, &zBuf[j]);
847 for(i=0; i<15; i++, j++){
848 zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
849 }
850 zBuf[j] = 0;
851 if( !sqliteOsFileExists(zBuf) ) break;
852 }
853 #endif
854 #if OS_MAC
855 static char zChars[] =
856 "abcdefghijklmnopqrstuvwxyz"
857 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
858 "0123456789";
859 int i, j;
860 char *zDir;
861 char zTempPath[SQLITE_TEMPNAME_SIZE];
862 char zdirName[32];
863 CInfoPBRec infoRec;
864 Str31 dirName;
865 memset(&infoRec, 0, sizeof(infoRec));
866 memset(zTempPath, 0, SQLITE_TEMPNAME_SIZE);
867 if( sqlite_temp_directory!=0 ){
868 zDir = sqlite_temp_directory;
869 }else if( FindFolder(kOnSystemDisk, kTemporaryFolderType, kCreateFolder,
870 &(infoRec.dirInfo.ioVRefNum), &(infoRec.dirInfo.ioDrParID)) == noErr ){
871 infoRec.dirInfo.ioNamePtr = dirName;
872 do{
873 infoRec.dirInfo.ioFDirIndex = -1;
874 infoRec.dirInfo.ioDrDirID = infoRec.dirInfo.ioDrParID;
875 if( PBGetCatInfoSync(&infoRec) == noErr ){
876 CopyPascalStringToC(dirName, zdirName);
877 i = strlen(zdirName);
878 memmove(&(zTempPath[i+1]), zTempPath, strlen(zTempPath));
879 strcpy(zTempPath, zdirName);
880 zTempPath[i] = ':';
881 }else{
882 *zTempPath = 0;
883 break;
884 }
885 } while( infoRec.dirInfo.ioDrDirID != fsRtDirID );
886 zDir = zTempPath;
887 }
888 if( zDir[0]==0 ){
889 getcwd(zTempPath, SQLITE_TEMPNAME_SIZE-24);
890 zDir = zTempPath;
891 }
892 for(;;){
893 sprintf(zBuf, "%s"TEMP_FILE_PREFIX, zDir);
894 j = strlen(zBuf);
895 sqliteRandomness(15, &zBuf[j]);
896 for(i=0; i<15; i++, j++){
897 zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
898 }
899 zBuf[j] = 0;
900 if( !sqliteOsFileExists(zBuf) ) break;
901 }
902 #endif
903 return SQLITE_OK;
904 }
905
906 /*
907 ** Close a file.
908 */
sqliteOsClose(OsFile * id)909 int sqliteOsClose(OsFile *id){
910 #if OS_UNIX
911 sqliteOsUnlock(id);
912 if( id->dirfd>=0 ) close(id->dirfd);
913 id->dirfd = -1;
914 sqliteOsEnterMutex();
915 if( id->pOpen->nLock ){
916 /* If there are outstanding locks, do not actually close the file just
917 ** yet because that would clear those locks. Instead, add the file
918 ** descriptor to pOpen->aPending. It will be automatically closed when
919 ** the last lock is cleared.
920 */
921 int *aNew;
922 struct openCnt *pOpen = id->pOpen;
923 pOpen->nPending++;
924 aNew = sqliteRealloc( pOpen->aPending, pOpen->nPending*sizeof(int) );
925 if( aNew==0 ){
926 /* If a malloc fails, just leak the file descriptor */
927 }else{
928 pOpen->aPending = aNew;
929 pOpen->aPending[pOpen->nPending-1] = id->fd;
930 }
931 }else{
932 /* There are no outstanding locks so we can close the file immediately */
933 close(id->fd);
934 }
935 releaseLockInfo(id->pLock);
936 releaseOpenCnt(id->pOpen);
937 sqliteOsLeaveMutex();
938 TRACE2("CLOSE %-3d\n", id->fd);
939 OpenCounter(-1);
940 return SQLITE_OK;
941 #endif
942 #if OS_WIN
943 CloseHandle(id->h);
944 OpenCounter(-1);
945 return SQLITE_OK;
946 #endif
947 #if OS_MAC
948 if( id->refNumRF!=-1 )
949 FSClose(id->refNumRF);
950 # ifdef _LARGE_FILE
951 FSCloseFork(id->refNum);
952 # else
953 FSClose(id->refNum);
954 # endif
955 if( id->delOnClose ){
956 unlink(id->pathToDel);
957 sqliteFree(id->pathToDel);
958 }
959 OpenCounter(-1);
960 return SQLITE_OK;
961 #endif
962 }
963
964 /*
965 ** Read data from a file into a buffer. Return SQLITE_OK if all
966 ** bytes were read successfully and SQLITE_IOERR if anything goes
967 ** wrong.
968 */
sqliteOsRead(OsFile * id,void * pBuf,int amt)969 int sqliteOsRead(OsFile *id, void *pBuf, int amt){
970 #if OS_UNIX
971 int got;
972 SimulateIOError(SQLITE_IOERR);
973 TIMER_START;
974 got = read(id->fd, pBuf, amt);
975 TIMER_END;
976 TRACE4("READ %-3d %7d %d\n", id->fd, last_page, elapse);
977 SEEK(0);
978 /* if( got<0 ) got = 0; */
979 if( got==amt ){
980 return SQLITE_OK;
981 }else{
982 return SQLITE_IOERR;
983 }
984 #endif
985 #if OS_WIN
986 DWORD got;
987 SimulateIOError(SQLITE_IOERR);
988 TRACE2("READ %d\n", last_page);
989 if( !ReadFile(id->h, pBuf, amt, &got, 0) ){
990 got = 0;
991 }
992 if( got==(DWORD)amt ){
993 return SQLITE_OK;
994 }else{
995 return SQLITE_IOERR;
996 }
997 #endif
998 #if OS_MAC
999 int got;
1000 SimulateIOError(SQLITE_IOERR);
1001 TRACE2("READ %d\n", last_page);
1002 # ifdef _LARGE_FILE
1003 FSReadFork(id->refNum, fsAtMark, 0, (ByteCount)amt, pBuf, (ByteCount*)&got);
1004 # else
1005 got = amt;
1006 FSRead(id->refNum, &got, pBuf);
1007 # endif
1008 if( got==amt ){
1009 return SQLITE_OK;
1010 }else{
1011 return SQLITE_IOERR;
1012 }
1013 #endif
1014 }
1015
1016 /*
1017 ** Write data from a buffer into a file. Return SQLITE_OK on success
1018 ** or some other error code on failure.
1019 */
sqliteOsWrite(OsFile * id,const void * pBuf,int amt)1020 int sqliteOsWrite(OsFile *id, const void *pBuf, int amt){
1021 #if OS_UNIX
1022 int wrote = 0;
1023 SimulateIOError(SQLITE_IOERR);
1024 TIMER_START;
1025 while( amt>0 && (wrote = write(id->fd, pBuf, amt))>0 ){
1026 amt -= wrote;
1027 pBuf = &((char*)pBuf)[wrote];
1028 }
1029 TIMER_END;
1030 TRACE4("WRITE %-3d %7d %d\n", id->fd, last_page, elapse);
1031 SEEK(0);
1032 if( amt>0 ){
1033 return SQLITE_FULL;
1034 }
1035 return SQLITE_OK;
1036 #endif
1037 #if OS_WIN
1038 int rc;
1039 DWORD wrote;
1040 SimulateIOError(SQLITE_IOERR);
1041 TRACE2("WRITE %d\n", last_page);
1042 while( amt>0 && (rc = WriteFile(id->h, pBuf, amt, &wrote, 0))!=0 && wrote>0 ){
1043 amt -= wrote;
1044 pBuf = &((char*)pBuf)[wrote];
1045 }
1046 if( !rc || amt>(int)wrote ){
1047 return SQLITE_FULL;
1048 }
1049 return SQLITE_OK;
1050 #endif
1051 #if OS_MAC
1052 OSErr oserr;
1053 int wrote = 0;
1054 SimulateIOError(SQLITE_IOERR);
1055 TRACE2("WRITE %d\n", last_page);
1056 while( amt>0 ){
1057 # ifdef _LARGE_FILE
1058 oserr = FSWriteFork(id->refNum, fsAtMark, 0,
1059 (ByteCount)amt, pBuf, (ByteCount*)&wrote);
1060 # else
1061 wrote = amt;
1062 oserr = FSWrite(id->refNum, &wrote, pBuf);
1063 # endif
1064 if( wrote == 0 || oserr != noErr)
1065 break;
1066 amt -= wrote;
1067 pBuf = &((char*)pBuf)[wrote];
1068 }
1069 if( oserr != noErr || amt>wrote ){
1070 return SQLITE_FULL;
1071 }
1072 return SQLITE_OK;
1073 #endif
1074 }
1075
1076 /*
1077 ** Move the read/write pointer in a file.
1078 */
sqliteOsSeek(OsFile * id,off_t offset)1079 int sqliteOsSeek(OsFile *id, off_t offset){
1080 SEEK(offset/1024 + 1);
1081 #if OS_UNIX
1082 lseek(id->fd, offset, SEEK_SET);
1083 return SQLITE_OK;
1084 #endif
1085 #if OS_WIN
1086 {
1087 LONG upperBits = offset>>32;
1088 LONG lowerBits = offset & 0xffffffff;
1089 DWORD rc;
1090 rc = SetFilePointer(id->h, lowerBits, &upperBits, FILE_BEGIN);
1091 /* TRACE3("SEEK rc=0x%x upper=0x%x\n", rc, upperBits); */
1092 }
1093 return SQLITE_OK;
1094 #endif
1095 #if OS_MAC
1096 {
1097 off_t curSize;
1098 if( sqliteOsFileSize(id, &curSize) != SQLITE_OK ){
1099 return SQLITE_IOERR;
1100 }
1101 if( offset >= curSize ){
1102 if( sqliteOsTruncate(id, offset+1) != SQLITE_OK ){
1103 return SQLITE_IOERR;
1104 }
1105 }
1106 # ifdef _LARGE_FILE
1107 if( FSSetForkPosition(id->refNum, fsFromStart, offset) != noErr ){
1108 # else
1109 if( SetFPos(id->refNum, fsFromStart, offset) != noErr ){
1110 # endif
1111 return SQLITE_IOERR;
1112 }else{
1113 return SQLITE_OK;
1114 }
1115 }
1116 #endif
1117 }
1118
1119 #ifdef SQLITE_NOSYNC
1120 # define fsync(X) 0
1121 #endif
1122
1123 /*
1124 ** Make sure all writes to a particular file are committed to disk.
1125 **
1126 ** Under Unix, also make sure that the directory entry for the file
1127 ** has been created by fsync-ing the directory that contains the file.
1128 ** If we do not do this and we encounter a power failure, the directory
1129 ** entry for the journal might not exist after we reboot. The next
1130 ** SQLite to access the file will not know that the journal exists (because
1131 ** the directory entry for the journal was never created) and the transaction
1132 ** will not roll back - possibly leading to database corruption.
1133 */
1134 int sqliteOsSync(OsFile *id){
1135 #if OS_UNIX
1136 SimulateIOError(SQLITE_IOERR);
1137 TRACE2("SYNC %-3d\n", id->fd);
1138 if( fsync(id->fd) ){
1139 return SQLITE_IOERR;
1140 }else{
1141 if( id->dirfd>=0 ){
1142 TRACE2("DIRSYNC %-3d\n", id->dirfd);
1143 fsync(id->dirfd);
1144 close(id->dirfd); /* Only need to sync once, so close the directory */
1145 id->dirfd = -1; /* when we are done. */
1146 }
1147 return SQLITE_OK;
1148 }
1149 #endif
1150 #if OS_WIN
1151 if( FlushFileBuffers(id->h) ){
1152 return SQLITE_OK;
1153 }else{
1154 return SQLITE_IOERR;
1155 }
1156 #endif
1157 #if OS_MAC
1158 # ifdef _LARGE_FILE
1159 if( FSFlushFork(id->refNum) != noErr ){
1160 # else
1161 ParamBlockRec params;
1162 memset(¶ms, 0, sizeof(ParamBlockRec));
1163 params.ioParam.ioRefNum = id->refNum;
1164 if( PBFlushFileSync(¶ms) != noErr ){
1165 # endif
1166 return SQLITE_IOERR;
1167 }else{
1168 return SQLITE_OK;
1169 }
1170 #endif
1171 }
1172
1173 /*
1174 ** Truncate an open file to a specified size
1175 */
1176 int sqliteOsTruncate(OsFile *id, off_t nByte){
1177 SimulateIOError(SQLITE_IOERR);
1178 #if OS_UNIX
1179 return ftruncate(id->fd, nByte)==0 ? SQLITE_OK : SQLITE_IOERR;
1180 #endif
1181 #if OS_WIN
1182 {
1183 LONG upperBits = nByte>>32;
1184 SetFilePointer(id->h, nByte, &upperBits, FILE_BEGIN);
1185 SetEndOfFile(id->h);
1186 }
1187 return SQLITE_OK;
1188 #endif
1189 #if OS_MAC
1190 # ifdef _LARGE_FILE
1191 if( FSSetForkSize(id->refNum, fsFromStart, nByte) != noErr){
1192 # else
1193 if( SetEOF(id->refNum, nByte) != noErr ){
1194 # endif
1195 return SQLITE_IOERR;
1196 }else{
1197 return SQLITE_OK;
1198 }
1199 #endif
1200 }
1201
1202 /*
1203 ** Determine the current size of a file in bytes
1204 */
1205 int sqliteOsFileSize(OsFile *id, off_t *pSize){
1206 #if OS_UNIX
1207 struct stat buf;
1208 SimulateIOError(SQLITE_IOERR);
1209 if( fstat(id->fd, &buf)!=0 ){
1210 return SQLITE_IOERR;
1211 }
1212 *pSize = buf.st_size;
1213 return SQLITE_OK;
1214 #endif
1215 #if OS_WIN
1216 DWORD upperBits, lowerBits;
1217 SimulateIOError(SQLITE_IOERR);
1218 lowerBits = GetFileSize(id->h, &upperBits);
1219 *pSize = (((off_t)upperBits)<<32) + lowerBits;
1220 return SQLITE_OK;
1221 #endif
1222 #if OS_MAC
1223 # ifdef _LARGE_FILE
1224 if( FSGetForkSize(id->refNum, pSize) != noErr){
1225 # else
1226 if( GetEOF(id->refNum, pSize) != noErr ){
1227 # endif
1228 return SQLITE_IOERR;
1229 }else{
1230 return SQLITE_OK;
1231 }
1232 #endif
1233 }
1234
1235 #if OS_WIN
1236 /*
1237 ** Return true (non-zero) if we are running under WinNT, Win2K or WinXP.
1238 ** Return false (zero) for Win95, Win98, or WinME.
1239 **
1240 ** Here is an interesting observation: Win95, Win98, and WinME lack
1241 ** the LockFileEx() API. But we can still statically link against that
1242 ** API as long as we don't call it win running Win95/98/ME. A call to
1243 ** this routine is used to determine if the host is Win95/98/ME or
1244 ** WinNT/2K/XP so that we will know whether or not we can safely call
1245 ** the LockFileEx() API.
1246 */
1247 int isNT(void){
1248 static int osType = 0; /* 0=unknown 1=win95 2=winNT */
1249 if( osType==0 ){
1250 OSVERSIONINFO sInfo;
1251 sInfo.dwOSVersionInfoSize = sizeof(sInfo);
1252 GetVersionEx(&sInfo);
1253 osType = sInfo.dwPlatformId==VER_PLATFORM_WIN32_NT ? 2 : 1;
1254 }
1255 return osType==2;
1256 }
1257 #endif
1258
1259 /*
1260 ** Windows file locking notes: [similar issues apply to MacOS]
1261 **
1262 ** We cannot use LockFileEx() or UnlockFileEx() on Win95/98/ME because
1263 ** those functions are not available. So we use only LockFile() and
1264 ** UnlockFile().
1265 **
1266 ** LockFile() prevents not just writing but also reading by other processes.
1267 ** (This is a design error on the part of Windows, but there is nothing
1268 ** we can do about that.) So the region used for locking is at the
1269 ** end of the file where it is unlikely to ever interfere with an
1270 ** actual read attempt.
1271 **
1272 ** A database read lock is obtained by locking a single randomly-chosen
1273 ** byte out of a specific range of bytes. The lock byte is obtained at
1274 ** random so two separate readers can probably access the file at the
1275 ** same time, unless they are unlucky and choose the same lock byte.
1276 ** A database write lock is obtained by locking all bytes in the range.
1277 ** There can only be one writer.
1278 **
1279 ** A lock is obtained on the first byte of the lock range before acquiring
1280 ** either a read lock or a write lock. This prevents two processes from
1281 ** attempting to get a lock at a same time. The semantics of
1282 ** sqliteOsReadLock() require that if there is already a write lock, that
1283 ** lock is converted into a read lock atomically. The lock on the first
1284 ** byte allows us to drop the old write lock and get the read lock without
1285 ** another process jumping into the middle and messing us up. The same
1286 ** argument applies to sqliteOsWriteLock().
1287 **
1288 ** On WinNT/2K/XP systems, LockFileEx() and UnlockFileEx() are available,
1289 ** which means we can use reader/writer locks. When reader writer locks
1290 ** are used, the lock is placed on the same range of bytes that is used
1291 ** for probabilistic locking in Win95/98/ME. Hence, the locking scheme
1292 ** will support two or more Win95 readers or two or more WinNT readers.
1293 ** But a single Win95 reader will lock out all WinNT readers and a single
1294 ** WinNT reader will lock out all other Win95 readers.
1295 **
1296 ** Note: On MacOS we use the resource fork for locking.
1297 **
1298 ** The following #defines specify the range of bytes used for locking.
1299 ** N_LOCKBYTE is the number of bytes available for doing the locking.
1300 ** The first byte used to hold the lock while the lock is changing does
1301 ** not count toward this number. FIRST_LOCKBYTE is the address of
1302 ** the first byte in the range of bytes used for locking.
1303 */
1304 #define N_LOCKBYTE 10239
1305 #if OS_MAC
1306 # define FIRST_LOCKBYTE (0x000fffff - N_LOCKBYTE)
1307 #else
1308 # define FIRST_LOCKBYTE (0xffffffff - N_LOCKBYTE)
1309 #endif
1310
1311 /*
1312 ** Change the status of the lock on the file "id" to be a readlock.
1313 ** If the file was write locked, then this reduces the lock to a read.
1314 ** If the file was read locked, then this acquires a new read lock.
1315 **
1316 ** Return SQLITE_OK on success and SQLITE_BUSY on failure. If this
1317 ** library was compiled with large file support (LFS) but LFS is not
1318 ** available on the host, then an SQLITE_NOLFS is returned.
1319 */
1320 int sqliteOsReadLock(OsFile *id){
1321 #if OS_UNIX
1322 int rc;
1323 sqliteOsEnterMutex();
1324 if( id->pLock->cnt>0 ){
1325 if( !id->locked ){
1326 id->pLock->cnt++;
1327 id->locked = 1;
1328 id->pOpen->nLock++;
1329 }
1330 rc = SQLITE_OK;
1331 }else if( id->locked || id->pLock->cnt==0 ){
1332 struct flock lock;
1333 int s;
1334 lock.l_type = F_RDLCK;
1335 lock.l_whence = SEEK_SET;
1336 lock.l_start = lock.l_len = 0L;
1337 s = fcntl(id->fd, F_SETLK, &lock);
1338 if( s!=0 ){
1339 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1340 }else{
1341 rc = SQLITE_OK;
1342 if( !id->locked ){
1343 id->pOpen->nLock++;
1344 id->locked = 1;
1345 }
1346 id->pLock->cnt = 1;
1347 }
1348 }else{
1349 rc = SQLITE_BUSY;
1350 }
1351 sqliteOsLeaveMutex();
1352 return rc;
1353 #endif
1354 #if OS_WIN
1355 int rc;
1356 if( id->locked>0 ){
1357 rc = SQLITE_OK;
1358 }else{
1359 int lk;
1360 int res;
1361 int cnt = 100;
1362 sqliteRandomness(sizeof(lk), &lk);
1363 lk = (lk & 0x7fffffff)%N_LOCKBYTE + 1;
1364 while( cnt-->0 && (res = LockFile(id->h, FIRST_LOCKBYTE, 0, 1, 0))==0 ){
1365 Sleep(1);
1366 }
1367 if( res ){
1368 UnlockFile(id->h, FIRST_LOCKBYTE+1, 0, N_LOCKBYTE, 0);
1369 if( isNT() ){
1370 OVERLAPPED ovlp;
1371 ovlp.Offset = FIRST_LOCKBYTE+1;
1372 ovlp.OffsetHigh = 0;
1373 ovlp.hEvent = 0;
1374 res = LockFileEx(id->h, LOCKFILE_FAIL_IMMEDIATELY,
1375 0, N_LOCKBYTE, 0, &ovlp);
1376 }else{
1377 res = LockFile(id->h, FIRST_LOCKBYTE+lk, 0, 1, 0);
1378 }
1379 UnlockFile(id->h, FIRST_LOCKBYTE, 0, 1, 0);
1380 }
1381 if( res ){
1382 id->locked = lk;
1383 rc = SQLITE_OK;
1384 }else{
1385 rc = SQLITE_BUSY;
1386 }
1387 }
1388 return rc;
1389 #endif
1390 #if OS_MAC
1391 int rc;
1392 if( id->locked>0 || id->refNumRF == -1 ){
1393 rc = SQLITE_OK;
1394 }else{
1395 int lk;
1396 OSErr res;
1397 int cnt = 5;
1398 ParamBlockRec params;
1399 sqliteRandomness(sizeof(lk), &lk);
1400 lk = (lk & 0x7fffffff)%N_LOCKBYTE + 1;
1401 memset(¶ms, 0, sizeof(params));
1402 params.ioParam.ioRefNum = id->refNumRF;
1403 params.ioParam.ioPosMode = fsFromStart;
1404 params.ioParam.ioPosOffset = FIRST_LOCKBYTE;
1405 params.ioParam.ioReqCount = 1;
1406 while( cnt-->0 && (res = PBLockRangeSync(¶ms))!=noErr ){
1407 UInt32 finalTicks;
1408 Delay(1, &finalTicks); /* 1/60 sec */
1409 }
1410 if( res == noErr ){
1411 params.ioParam.ioPosOffset = FIRST_LOCKBYTE+1;
1412 params.ioParam.ioReqCount = N_LOCKBYTE;
1413 PBUnlockRangeSync(¶ms);
1414 params.ioParam.ioPosOffset = FIRST_LOCKBYTE+lk;
1415 params.ioParam.ioReqCount = 1;
1416 res = PBLockRangeSync(¶ms);
1417 params.ioParam.ioPosOffset = FIRST_LOCKBYTE;
1418 params.ioParam.ioReqCount = 1;
1419 PBUnlockRangeSync(¶ms);
1420 }
1421 if( res == noErr ){
1422 id->locked = lk;
1423 rc = SQLITE_OK;
1424 }else{
1425 rc = SQLITE_BUSY;
1426 }
1427 }
1428 return rc;
1429 #endif
1430 }
1431
1432 /*
1433 ** Change the lock status to be an exclusive or write lock. Return
1434 ** SQLITE_OK on success and SQLITE_BUSY on a failure. If this
1435 ** library was compiled with large file support (LFS) but LFS is not
1436 ** available on the host, then an SQLITE_NOLFS is returned.
1437 */
1438 int sqliteOsWriteLock(OsFile *id){
1439 #if OS_UNIX
1440 int rc;
1441 sqliteOsEnterMutex();
1442 if( id->pLock->cnt==0 || (id->pLock->cnt==1 && id->locked==1) ){
1443 struct flock lock;
1444 int s;
1445 lock.l_type = F_WRLCK;
1446 lock.l_whence = SEEK_SET;
1447 lock.l_start = lock.l_len = 0L;
1448 s = fcntl(id->fd, F_SETLK, &lock);
1449 if( s!=0 ){
1450 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1451 }else{
1452 rc = SQLITE_OK;
1453 if( !id->locked ){
1454 id->pOpen->nLock++;
1455 id->locked = 1;
1456 }
1457 id->pLock->cnt = -1;
1458 }
1459 }else{
1460 rc = SQLITE_BUSY;
1461 }
1462 sqliteOsLeaveMutex();
1463 return rc;
1464 #endif
1465 #if OS_WIN
1466 int rc;
1467 if( id->locked<0 ){
1468 rc = SQLITE_OK;
1469 }else{
1470 int res;
1471 int cnt = 100;
1472 while( cnt-->0 && (res = LockFile(id->h, FIRST_LOCKBYTE, 0, 1, 0))==0 ){
1473 Sleep(1);
1474 }
1475 if( res ){
1476 if( id->locked>0 ){
1477 if( isNT() ){
1478 UnlockFile(id->h, FIRST_LOCKBYTE+1, 0, N_LOCKBYTE, 0);
1479 }else{
1480 res = UnlockFile(id->h, FIRST_LOCKBYTE + id->locked, 0, 1, 0);
1481 }
1482 }
1483 if( res ){
1484 res = LockFile(id->h, FIRST_LOCKBYTE+1, 0, N_LOCKBYTE, 0);
1485 }else{
1486 res = 0;
1487 }
1488 UnlockFile(id->h, FIRST_LOCKBYTE, 0, 1, 0);
1489 }
1490 if( res ){
1491 id->locked = -1;
1492 rc = SQLITE_OK;
1493 }else{
1494 rc = SQLITE_BUSY;
1495 }
1496 }
1497 return rc;
1498 #endif
1499 #if OS_MAC
1500 int rc;
1501 if( id->locked<0 || id->refNumRF == -1 ){
1502 rc = SQLITE_OK;
1503 }else{
1504 OSErr res;
1505 int cnt = 5;
1506 ParamBlockRec params;
1507 memset(¶ms, 0, sizeof(params));
1508 params.ioParam.ioRefNum = id->refNumRF;
1509 params.ioParam.ioPosMode = fsFromStart;
1510 params.ioParam.ioPosOffset = FIRST_LOCKBYTE;
1511 params.ioParam.ioReqCount = 1;
1512 while( cnt-->0 && (res = PBLockRangeSync(¶ms))!=noErr ){
1513 UInt32 finalTicks;
1514 Delay(1, &finalTicks); /* 1/60 sec */
1515 }
1516 if( res == noErr ){
1517 params.ioParam.ioPosOffset = FIRST_LOCKBYTE + id->locked;
1518 params.ioParam.ioReqCount = 1;
1519 if( id->locked==0
1520 || PBUnlockRangeSync(¶ms)==noErr ){
1521 params.ioParam.ioPosOffset = FIRST_LOCKBYTE+1;
1522 params.ioParam.ioReqCount = N_LOCKBYTE;
1523 res = PBLockRangeSync(¶ms);
1524 }else{
1525 res = afpRangeNotLocked;
1526 }
1527 params.ioParam.ioPosOffset = FIRST_LOCKBYTE;
1528 params.ioParam.ioReqCount = 1;
1529 PBUnlockRangeSync(¶ms);
1530 }
1531 if( res == noErr ){
1532 id->locked = -1;
1533 rc = SQLITE_OK;
1534 }else{
1535 rc = SQLITE_BUSY;
1536 }
1537 }
1538 return rc;
1539 #endif
1540 }
1541
1542 /*
1543 ** Unlock the given file descriptor. If the file descriptor was
1544 ** not previously locked, then this routine is a no-op. If this
1545 ** library was compiled with large file support (LFS) but LFS is not
1546 ** available on the host, then an SQLITE_NOLFS is returned.
1547 */
1548 int sqliteOsUnlock(OsFile *id){
1549 #if OS_UNIX
1550 int rc;
1551 if( !id->locked ) return SQLITE_OK;
1552 sqliteOsEnterMutex();
1553 assert( id->pLock->cnt!=0 );
1554 if( id->pLock->cnt>1 ){
1555 id->pLock->cnt--;
1556 rc = SQLITE_OK;
1557 }else{
1558 struct flock lock;
1559 int s;
1560 lock.l_type = F_UNLCK;
1561 lock.l_whence = SEEK_SET;
1562 lock.l_start = lock.l_len = 0L;
1563 s = fcntl(id->fd, F_SETLK, &lock);
1564 if( s!=0 ){
1565 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1566 }else{
1567 rc = SQLITE_OK;
1568 id->pLock->cnt = 0;
1569 }
1570 }
1571 if( rc==SQLITE_OK ){
1572 /* Decrement the count of locks against this same file. When the
1573 ** count reaches zero, close any other file descriptors whose close
1574 ** was deferred because of outstanding locks.
1575 */
1576 struct openCnt *pOpen = id->pOpen;
1577 pOpen->nLock--;
1578 assert( pOpen->nLock>=0 );
1579 if( pOpen->nLock==0 && pOpen->nPending>0 ){
1580 int i;
1581 for(i=0; i<pOpen->nPending; i++){
1582 close(pOpen->aPending[i]);
1583 }
1584 sqliteFree(pOpen->aPending);
1585 pOpen->nPending = 0;
1586 pOpen->aPending = 0;
1587 }
1588 }
1589 sqliteOsLeaveMutex();
1590 id->locked = 0;
1591 return rc;
1592 #endif
1593 #if OS_WIN
1594 int rc;
1595 if( id->locked==0 ){
1596 rc = SQLITE_OK;
1597 }else if( isNT() || id->locked<0 ){
1598 UnlockFile(id->h, FIRST_LOCKBYTE+1, 0, N_LOCKBYTE, 0);
1599 rc = SQLITE_OK;
1600 id->locked = 0;
1601 }else{
1602 UnlockFile(id->h, FIRST_LOCKBYTE+id->locked, 0, 1, 0);
1603 rc = SQLITE_OK;
1604 id->locked = 0;
1605 }
1606 return rc;
1607 #endif
1608 #if OS_MAC
1609 int rc;
1610 ParamBlockRec params;
1611 memset(¶ms, 0, sizeof(params));
1612 params.ioParam.ioRefNum = id->refNumRF;
1613 params.ioParam.ioPosMode = fsFromStart;
1614 if( id->locked==0 || id->refNumRF == -1 ){
1615 rc = SQLITE_OK;
1616 }else if( id->locked<0 ){
1617 params.ioParam.ioPosOffset = FIRST_LOCKBYTE+1;
1618 params.ioParam.ioReqCount = N_LOCKBYTE;
1619 PBUnlockRangeSync(¶ms);
1620 rc = SQLITE_OK;
1621 id->locked = 0;
1622 }else{
1623 params.ioParam.ioPosOffset = FIRST_LOCKBYTE+id->locked;
1624 params.ioParam.ioReqCount = 1;
1625 PBUnlockRangeSync(¶ms);
1626 rc = SQLITE_OK;
1627 id->locked = 0;
1628 }
1629 return rc;
1630 #endif
1631 }
1632
1633 /*
1634 ** Get information to seed the random number generator. The seed
1635 ** is written into the buffer zBuf[256]. The calling function must
1636 ** supply a sufficiently large buffer.
1637 */
1638 int sqliteOsRandomSeed(char *zBuf){
1639 /* We have to initialize zBuf to prevent valgrind from reporting
1640 ** errors. The reports issued by valgrind are incorrect - we would
1641 ** prefer that the randomness be increased by making use of the
1642 ** uninitialized space in zBuf - but valgrind errors tend to worry
1643 ** some users. Rather than argue, it seems easier just to initialize
1644 ** the whole array and silence valgrind, even if that means less randomness
1645 ** in the random seed.
1646 **
1647 ** When testing, initializing zBuf[] to zero is all we do. That means
1648 ** that we always use the same random number sequence.* This makes the
1649 ** tests repeatable.
1650 */
1651 memset(zBuf, 0, 256);
1652 #if OS_UNIX && !defined(SQLITE_TEST)
1653 {
1654 int pid;
1655 time((time_t*)zBuf);
1656 pid = getpid();
1657 memcpy(&zBuf[sizeof(time_t)], &pid, sizeof(pid));
1658 }
1659 #endif
1660 #if OS_WIN && !defined(SQLITE_TEST)
1661 GetSystemTime((LPSYSTEMTIME)zBuf);
1662 #endif
1663 #if OS_MAC
1664 {
1665 int pid;
1666 Microseconds((UnsignedWide*)zBuf);
1667 pid = getpid();
1668 memcpy(&zBuf[sizeof(UnsignedWide)], &pid, sizeof(pid));
1669 }
1670 #endif
1671 return SQLITE_OK;
1672 }
1673
1674 /*
1675 ** Sleep for a little while. Return the amount of time slept.
1676 */
1677 int sqliteOsSleep(int ms){
1678 #if OS_UNIX
1679 #if defined(HAVE_USLEEP) && HAVE_USLEEP
1680 usleep(ms*1000);
1681 return ms;
1682 #else
1683 sleep((ms+999)/1000);
1684 return 1000*((ms+999)/1000);
1685 #endif
1686 #endif
1687 #if OS_WIN
1688 Sleep(ms);
1689 return ms;
1690 #endif
1691 #if OS_MAC
1692 UInt32 finalTicks;
1693 UInt32 ticks = (((UInt32)ms+16)*3)/50; /* 1/60 sec per tick */
1694 Delay(ticks, &finalTicks);
1695 return (int)((ticks*50)/3);
1696 #endif
1697 }
1698
1699 /*
1700 ** Static variables used for thread synchronization
1701 */
1702 static int inMutex = 0;
1703 #ifdef SQLITE_UNIX_THREADS
1704 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
1705 #endif
1706 #ifdef SQLITE_W32_THREADS
1707 static CRITICAL_SECTION cs;
1708 #endif
1709 #ifdef SQLITE_MACOS_MULTITASKING
1710 static MPCriticalRegionID criticalRegion;
1711 #endif
1712
1713 /*
1714 ** The following pair of routine implement mutual exclusion for
1715 ** multi-threaded processes. Only a single thread is allowed to
1716 ** executed code that is surrounded by EnterMutex() and LeaveMutex().
1717 **
1718 ** SQLite uses only a single Mutex. There is not much critical
1719 ** code and what little there is executes quickly and without blocking.
1720 */
1721 void sqliteOsEnterMutex(){
1722 #ifdef SQLITE_UNIX_THREADS
1723 pthread_mutex_lock(&mutex);
1724 #endif
1725 #ifdef SQLITE_W32_THREADS
1726 static int isInit = 0;
1727 while( !isInit ){
1728 static long lock = 0;
1729 if( InterlockedIncrement(&lock)==1 ){
1730 InitializeCriticalSection(&cs);
1731 isInit = 1;
1732 }else{
1733 Sleep(1);
1734 }
1735 }
1736 EnterCriticalSection(&cs);
1737 #endif
1738 #ifdef SQLITE_MACOS_MULTITASKING
1739 static volatile int notInit = 1;
1740 if( notInit ){
1741 if( notInit == 2 ) /* as close as you can get to thread safe init */
1742 MPYield();
1743 else{
1744 notInit = 2;
1745 MPCreateCriticalRegion(&criticalRegion);
1746 notInit = 0;
1747 }
1748 }
1749 MPEnterCriticalRegion(criticalRegion, kDurationForever);
1750 #endif
1751 assert( !inMutex );
1752 inMutex = 1;
1753 }
1754 void sqliteOsLeaveMutex(){
1755 assert( inMutex );
1756 inMutex = 0;
1757 #ifdef SQLITE_UNIX_THREADS
1758 pthread_mutex_unlock(&mutex);
1759 #endif
1760 #ifdef SQLITE_W32_THREADS
1761 LeaveCriticalSection(&cs);
1762 #endif
1763 #ifdef SQLITE_MACOS_MULTITASKING
1764 MPExitCriticalRegion(criticalRegion);
1765 #endif
1766 }
1767
1768 /*
1769 ** Turn a relative pathname into a full pathname. Return a pointer
1770 ** to the full pathname stored in space obtained from sqliteMalloc().
1771 ** The calling function is responsible for freeing this space once it
1772 ** is no longer needed.
1773 */
1774 char *sqliteOsFullPathname(const char *zRelative){
1775 #if OS_UNIX
1776 char *zFull = 0;
1777 if( zRelative[0]=='/' ){
1778 sqliteSetString(&zFull, zRelative, (char*)0);
1779 }else{
1780 char zBuf[5000];
1781 zBuf[0] = 0;
1782 sqliteSetString(&zFull, getcwd(zBuf, sizeof(zBuf)), "/", zRelative,
1783 (char*)0);
1784 }
1785 return zFull;
1786 #endif
1787 #if OS_WIN
1788 char *zNotUsed;
1789 char *zFull;
1790 int nByte;
1791 nByte = GetFullPathName(zRelative, 0, 0, &zNotUsed) + 1;
1792 zFull = sqliteMalloc( nByte );
1793 if( zFull==0 ) return 0;
1794 GetFullPathName(zRelative, nByte, zFull, &zNotUsed);
1795 return zFull;
1796 #endif
1797 #if OS_MAC
1798 char *zFull = 0;
1799 if( zRelative[0]==':' ){
1800 char zBuf[_MAX_PATH+1];
1801 sqliteSetString(&zFull, getcwd(zBuf, sizeof(zBuf)), &(zRelative[1]),
1802 (char*)0);
1803 }else{
1804 if( strchr(zRelative, ':') ){
1805 sqliteSetString(&zFull, zRelative, (char*)0);
1806 }else{
1807 char zBuf[_MAX_PATH+1];
1808 sqliteSetString(&zFull, getcwd(zBuf, sizeof(zBuf)), zRelative, (char*)0);
1809 }
1810 }
1811 return zFull;
1812 #endif
1813 }
1814
1815 /*
1816 ** The following variable, if set to a non-zero value, becomes the result
1817 ** returned from sqliteOsCurrentTime(). This is used for testing.
1818 */
1819 #ifdef SQLITE_TEST
1820 int sqlite_current_time = 0;
1821 #endif
1822
1823 /*
1824 ** Find the current time (in Universal Coordinated Time). Write the
1825 ** current time and date as a Julian Day number into *prNow and
1826 ** return 0. Return 1 if the time and date cannot be found.
1827 */
1828 int sqliteOsCurrentTime(double *prNow){
1829 #if OS_UNIX
1830 time_t t;
1831 time(&t);
1832 *prNow = t/86400.0 + 2440587.5;
1833 #endif
1834 #if OS_WIN
1835 FILETIME ft;
1836 /* FILETIME structure is a 64-bit value representing the number of
1837 100-nanosecond intervals since January 1, 1601 (= JD 2305813.5).
1838 */
1839 double now;
1840 GetSystemTimeAsFileTime( &ft );
1841 now = ((double)ft.dwHighDateTime) * 4294967296.0;
1842 *prNow = (now + ft.dwLowDateTime)/864000000000.0 + 2305813.5;
1843 #endif
1844 #ifdef SQLITE_TEST
1845 if( sqlite_current_time ){
1846 *prNow = sqlite_current_time/86400.0 + 2440587.5;
1847 }
1848 #endif
1849 return 0;
1850 }
1851