adium 3842:c6a0c0da810e: Pipeline reading from disk and file par...

commits at adium.im commits at adium.im
Wed Apr 6 04:56:52 UTC 2011


details:	http://hg.adium.im/adium/rev/c6a0c0da810e
revision:	3842:c6a0c0da810e
branch:		AILoggerWithBlocks
author:		Stephen Holt <sholt at adium.im>
date:		Tue Apr 05 21:56:32 2011 -0700

Pipeline reading from disk and file parsing.  Save a few file IOs and allow for more efficient disk reads.

diffs (427 lines):

diff -r c6fcf1eb8953 -r c6a0c0da810e Other/Adium Spotlight Importer/GetMetadataForFile.m
--- a/Other/Adium Spotlight Importer/GetMetadataForFile.m	Sat Apr 02 16:35:33 2011 -0700
+++ b/Other/Adium Spotlight Importer/GetMetadataForFile.m	Tue Apr 05 21:56:32 2011 -0700
@@ -45,7 +45,7 @@
 ----------------------------------------------------------------------------- */
 
 Boolean GetMetadataForXMLLog(NSMutableDictionary *attributes, NSString *pathToFile);
-NSString *GetTextContentForXMLLog(NSString *pathToFile);
+NSString *CopyTextContentForXMLLogData(NSData *logData);
 
 Boolean GetMetadataForFile(void* thisInterface, 
 						   CFMutableDictionaryRef attributes, 
@@ -73,6 +73,73 @@
     return success;
 }
 
+static CFStringRef ResolveUTI(CFStringRef contentTypeUTI, NSURL *urlToFile) {
+    //Deteremine the UTI type if we weren't passed one
+    CFStringRef pathExtension = (CFStringRef)[urlToFile pathExtension];
+	if (contentTypeUTI == NULL) {
+		if (CFStringCompare(pathExtension, CFSTR("chatLog"), (kCFCompareBackwards | kCFCompareCaseInsensitive)) == kCFCompareEqualTo) {
+			contentTypeUTI = CFSTR("com.adiumx.xmllog");
+		} else if (CFStringCompare(pathExtension, CFSTR("AdiumXMLLog"), (kCFCompareBackwards | kCFCompareCaseInsensitive)) == kCFCompareEqualTo) {
+			contentTypeUTI = CFSTR("com.adiumx.xmllog");
+		} else {
+			//Treat all other log extensions as HTML logs (plaintext will come out fine this way, too)
+			contentTypeUTI = CFSTR("com.adiumx.htmllog");
+		}
+	}
+    return contentTypeUTI;
+}
+
+NSData *CopyDataForURL(CFStringRef contentTypeUTI, NSURL *urlToFile) {
+    NSAutoreleasePool	*pool = [[NSAutoreleasePool alloc] init];
+	NSData			*content;
+	contentTypeUTI = ResolveUTI(contentTypeUTI, urlToFile);
+    
+	if (CFEqual(contentTypeUTI, CFSTR("com.adiumx.htmllog"))) {
+		content = [[NSData alloc] initWithContentsOfURL:urlToFile options:NSDataReadingUncached error:NULL];
+	} else if (CFEqual(contentTypeUTI, CFSTR("com.adiumx.xmllog"))) {
+		BOOL isDir;
+        NSString *path = [urlToFile path];
+		if ([[NSFileManager defaultManager] fileExistsAtPath:path isDirectory:&isDir]) {
+            if (isDir) {
+                /* If we have a chatLog bundle, we want to get the text content for the xml file inside */
+                urlToFile = [NSURL fileURLWithPath:[path stringByAppendingPathComponent:[[[path lastPathComponent] stringByDeletingPathExtension] stringByAppendingPathExtension:@"xml"]]];
+            }
+			
+			content = [[NSData alloc] initWithContentsOfURL:urlToFile options:NSUncachedRead error:NULL];
+            
+		} else {
+			content = nil;
+		}
+		
+	} else {
+		content = nil;
+		NSLog(@"We were passed %@, of type %@, which is an unknown type", urlToFile, contentTypeUTI);
+	}
+    
+	[pool release];
+	
+	return content;
+}
+
+NSData *CopyDataForFile(CFStringRef contentTypeUTI, CFStringRef pathToFile) {
+    return CopyDataForURL(contentTypeUTI, [NSURL fileURLWithPath:(NSString *)pathToFile]);
+}
+
+CFStringRef CopyTextContentForFileData(CFStringRef contentTypeUTI, NSURL *urlToFile, NSData *fileData) {
+    if (!fileData) return NULL;
+        
+    contentTypeUTI = ResolveUTI(contentTypeUTI, urlToFile);
+    
+    NSString *result = nil;
+    
+    if (CFEqual(contentTypeUTI,CFSTR("com.adiumx.htmllog"))) {
+        result = CopyTextContentForHTMLLogData(fileData);
+	} else if (CFEqual(contentTypeUTI, CFSTR("com.adiumx.xmllog"))) {
+        result = CopyTextContentForXMLLogData(fileData);
+    }
+    return (CFStringRef)result;
+}
+
 /*!
  * @brief Copy the text content for a file
  *
@@ -86,46 +153,9 @@
 CFStringRef CopyTextContentForFile(CFStringRef contentTypeUTI,
 								   CFStringRef pathToFile)
 {
-	NSAutoreleasePool	*pool;
-	CFStringRef			textContent;
-	pool = [[NSAutoreleasePool alloc] init];
-	
-	//Deteremine the UTI type if we weren't passed one
-	if (contentTypeUTI == NULL) {
-		if (CFStringCompare((CFStringRef)[(NSString *)pathToFile pathExtension],
-							CFSTR("chatLog"),
-							(kCFCompareBackwards | kCFCompareCaseInsensitive)) == kCFCompareEqualTo) {
-			contentTypeUTI = CFSTR("com.adiumx.xmllog");
-		} else if (CFStringCompare((CFStringRef)[(NSString *)pathToFile pathExtension],
-							CFSTR("AdiumXMLLog"),
-							(kCFCompareBackwards | kCFCompareCaseInsensitive)) == kCFCompareEqualTo) {
-			contentTypeUTI = CFSTR("com.adiumx.xmllog");
-		} else {
-			//Treat all other log extensions as HTML logs (plaintext will come out fine this way, too)
-			contentTypeUTI = CFSTR("com.adiumx.htmllog");
-		}
-	}
-		
-	if (CFStringCompare(contentTypeUTI, CFSTR("com.adiumx.htmllog"), kCFCompareBackwards) == kCFCompareEqualTo) {
-		textContent = (CFStringRef)GetTextContentForHTMLLog((NSString *)pathToFile);
-	} else if (CFStringCompare(contentTypeUTI, (CFStringRef)@"com.adiumx.xmllog", kCFCompareBackwards) == kCFCompareEqualTo) {
-		BOOL isDir;
-		if ([[NSFileManager defaultManager] fileExistsAtPath:(NSString *)pathToFile isDirectory:&isDir]) {
-			/* If we have a chatLog bundle, we want to get the text content for the xml file inside */
-			if (isDir) pathToFile = (CFStringRef)[(NSString *)pathToFile stringByAppendingPathComponent:
-									 [[[(NSString *)pathToFile lastPathComponent] stringByDeletingPathExtension] stringByAppendingPathExtension:@"xml"]];
-			
-			textContent = (CFStringRef)GetTextContentForXMLLog((NSString *)pathToFile);
-		} else {
-			textContent = nil;
-		}
-		
-	} else {
-		textContent = nil;
-		NSLog(@"We were passed %@, of type %@, which is an unknown type",pathToFile,contentTypeUTI);
-	}
-
-	if (textContent) CFRetain(textContent);
+	NSAutoreleasePool	*pool = [[NSAutoreleasePool alloc] init];
+    NSData *logData = CopyDataForFile(contentTypeUTI, pathToFile);
+	CFStringRef	textContent = CopyTextContentForFileData(contentTypeUTI, [NSURL fileURLWithPath:(NSString *)pathToFile], logData);
 	[pool release];
 	
 	return textContent;
@@ -275,19 +305,13 @@
     return ret;
 }
 
-NSString *GetTextContentForXMLLog(NSString *pathToFile)
-{
-	NSError *err=nil;
-	NSURL *furl = [NSURL fileURLWithPath:(NSString *)pathToFile];
-	NSString *contentString = nil;
-	NSXMLDocument *xmlDoc = nil;
-	NSData *data = [NSData dataWithContentsOfURL:furl options:NSUncachedRead error:&err];
-	if (data) {
-		xmlDoc = [[NSXMLDocument alloc] initWithData:data options:NSXMLNodePreserveCDATA error:&err];
-		if (xmlDoc) {
-			contentString = [xmlDoc stringValue];
-			[xmlDoc release];
-		}
-	}
-	return contentString;
+NSString *CopyTextContentForXMLLogData(NSData *data) {
+    NSString *contentString = nil;
+    NSXMLDocument *xmlDoc = [[NSXMLDocument alloc] initWithData:data options:NSXMLNodePreserveCDATA error:NULL];
+    if (xmlDoc) {
+        NSArray *contentArray = [xmlDoc nodesForXPath:@"//message//text()" error:NULL];
+		contentString = [contentArray componentsJoinedByString:@" "];
+        [xmlDoc release];
+    }
+    return contentString;
 }
diff -r c6fcf1eb8953 -r c6a0c0da810e Other/Adium Spotlight Importer/GetMetadataForHTMLLog.h
--- a/Other/Adium Spotlight Importer/GetMetadataForHTMLLog.h	Sat Apr 02 16:35:33 2011 -0700
+++ b/Other/Adium Spotlight Importer/GetMetadataForHTMLLog.h	Tue Apr 05 21:56:32 2011 -0700
@@ -17,4 +17,4 @@
 #import <Foundation/Foundation.h>
 
 Boolean GetMetadataForHTMLLog(NSMutableDictionary *attributes, NSString *pathToFile);
-NSString *GetTextContentForHTMLLog(NSString *pathToFile);
+NSString *CopyTextContentForHTMLLogData(NSData *logData);
diff -r c6fcf1eb8953 -r c6a0c0da810e Other/Adium Spotlight Importer/GetMetadataForHTMLLog.m
--- a/Other/Adium Spotlight Importer/GetMetadataForHTMLLog.m	Sat Apr 02 16:35:33 2011 -0700
+++ b/Other/Adium Spotlight Importer/GetMetadataForHTMLLog.m	Tue Apr 05 21:56:32 2011 -0700
@@ -51,38 +51,20 @@
 	return date;
 }
 
-NSString *GetTextContentForHTMLLog(NSString *pathToFile)
-{
-	/* Perhaps we want to decode the HTML instead of stripping it so we can process
+NSString *CopyTextContentForHTMLLogData(NSData *logData) {
+    /* Perhaps we want to decode the HTML instead of stripping it so we can process
 	 * the attributed contents to turn links into link (URL) for searching purposes...
 	 */
-	NSString *textContent;
-
-	NSMutableData *UTF8Data = nil;
-	char *UTF8HTMLCString = nil;
-
-	int fd = open([pathToFile fileSystemRepresentation], O_RDONLY);
-	if (fd > -1) {
-		struct stat sb;
-		if (fstat(fd, &sb) == 0) {
-			UTF8Data = [NSMutableData dataWithLength:(NSUInteger)(sb.st_size + 1ULL)];
-			UTF8HTMLCString = [UTF8Data mutableBytes];
-			if (UTF8HTMLCString != NULL)
-				read(fd, UTF8HTMLCString, (size_t)sb.st_size);
-		}
-		close(fd);
-	}
-
-	if (UTF8HTMLCString) {
+	NSString *textContent = nil;
+    const char* UTF8HTMLCString = [logData bytes];
+    if (UTF8HTMLCString) {
 		//Strip the HTML markup
 		char *plainText = gaim_markup_strip_html(UTF8HTMLCString);
-		textContent = [NSString stringWithUTF8String:plainText];
+		textContent = [[NSString alloc] initWithUTF8String:plainText];
 		free((void *)plainText);
-	} else {
-		textContent = nil;
 	}
-
-	return textContent;
+    
+    return textContent;
 }
 
 Boolean GetMetadataForHTMLLog(NSMutableDictionary *attributes, NSString *pathToFile)
@@ -122,11 +104,15 @@
 					   forKey:(NSString *)kMDItemLastUsedDate];
 	}
 	
-	NSString *textContent;
-	if ((textContent = GetTextContentForHTMLLog(pathToFile))) {
+    NSData *logData = [[NSData alloc] initWithContentsOfURL:[NSURL fileURLWithPath:pathToFile isDirectory:NO]
+                                                    options:NSDataReadingUncached error:NULL];
+    NSString *textContent = nil;
+	if ((textContent = CopyTextContentForHTMLLogData(logData))) {
 		[attributes setObject:textContent
 					   forKey:(NSString *)kMDItemTextContent];
 	}
+    [logData release];
+    [textContent release];
 	
 	[attributes setObject:serviceClass
 				   forKey:@"com_adiumX_service"];
diff -r c6fcf1eb8953 -r c6a0c0da810e Source/AILoggerPlugin.m
--- a/Source/AILoggerPlugin.m	Sat Apr 02 16:35:33 2011 -0700
+++ b/Source/AILoggerPlugin.m	Tue Apr 05 21:56:32 2011 -0700
@@ -83,6 +83,11 @@
 #pragma mark -
 #pragma mark Private Interface
 #pragma mark -
+
+//GetMetadataForFile.m
+NSData *CopyDataForURL(CFStringRef contentTypeUTI, NSURL *urlToFile);
+CFStringRef CopyTextContentForFileData(CFStringRef contentTypeUTI, NSURL *urlToFile, NSData *fileData);
+
 @interface AILoggerPlugin ()
 // class methods
 + (NSString *)pathForLogsLikeChat:(AIChat *)chat;
@@ -155,7 +160,7 @@
 
 #pragma mark Private Function Prototypes
 void runWithAutoreleasePool(dispatch_block_t block);
-dispatch_block_t blockWithAutoreleasePool(dispatch_block_t block);
+static inline dispatch_block_t blockWithAutoreleasePool(dispatch_block_t block);
 NSCalendarDate* getDateFromPath(NSString *path);
 NSComparisonResult sortPaths(NSString *path1, NSString *path2, void *context);
 
@@ -173,6 +178,7 @@
 static dispatch_queue_t     dirtyLogSetMutationQueue;
 static dispatch_queue_t     searchIndexQueue;
 static dispatch_queue_t     activeAppendersMutationQueue;
+static dispatch_queue_t     addToSearchKitQueue;
 
 static dispatch_queue_t     ioQueue;
 
@@ -182,6 +188,7 @@
 static dispatch_group_t		loggerPluginGroup;
 
 static dispatch_semaphore_t jobSemaphore;
+static dispatch_semaphore_t logLoadingPrefetchSemaphore; //limit prefetching log data to N-1 ahead
 
 @implementation AILoggerPlugin
 @synthesize dirtyLogSet, indexingAllowed, loggingEnabled, logsToIndex, logsIndexed, canCloseIndex, canSaveDirtyLogSet, activeAppenders, logHTML, xhtmlDecoder, statusTranslation, isIndexing, indexIsFlushing;
@@ -208,6 +215,8 @@
 	dirtyLogSetMutationQueue = dispatch_queue_create("im.adium.AILoggerPlugin.dirtyLogSetMutationQueue", 0);
 	searchIndexQueue = dispatch_queue_create("im.adium.AILoggerPlugin.searchIndexFlushingQueue", 0);
 	activeAppendersMutationQueue = dispatch_queue_create("im.adium.AILoggerPlugin.activeAppendersMutationQueue", 0);
+    addToSearchKitQueue = dispatch_queue_create("im.adium.AILoggerPlugin.searchIndexAddingQueue", 0);
+    
 	logIndexingGroup = dispatch_group_create();
 	closingIndexGroup = dispatch_group_create();
 	logAppendingGroup = dispatch_group_create();
@@ -217,6 +226,7 @@
 	
 	NSUInteger cpuCount = [[NSProcessInfo processInfo] activeProcessorCount];	
 	jobSemaphore = dispatch_semaphore_create(cpuCount + AIfloor(cpuCount/2));
+    logLoadingPrefetchSemaphore = dispatch_semaphore_create(2); //prefetch one log
 	
 	
 	self.xhtmlDecoder = [[AIHTMLDecoder alloc] initWithHeaders:NO
@@ -532,7 +542,7 @@
 	[pool release];
 }
 
-dispatch_block_t blockWithAutoreleasePool(dispatch_block_t block)
+static inline dispatch_block_t blockWithAutoreleasePool(dispatch_block_t block)
 {
 	return [[^{
 		runWithAutoreleasePool(block);
@@ -1436,10 +1446,13 @@
 				});
 				logPath = [[__logPath copy] autorelease];
 				if (logPath) {
+                    NSURL *logURL = [NSURL fileURLWithPath:logPath];
+                    if (!logURL)
+                        NSLog(@"Uh oh");
 					dispatch_semaphore_wait(jobSemaphore, DISPATCH_TIME_FOREVER);
 					dispatch_group_async(logIndexingGroup, ioQueue, blockWithAutoreleasePool(^{
 						CFRetain(searchIndex);
-						__block SKDocumentRef document = SKDocumentCreateWithURL((CFURLRef)[NSURL fileURLWithPath:logPath]);
+						__block SKDocumentRef document = SKDocumentCreateWithURL((CFURLRef)logURL);
 						if (document && bself.indexingAllowed) {
 							/* We _could_ use SKIndexAddDocument() and depend on our Spotlight plugin for importing.
 							 * However, this has three problems:
@@ -1448,39 +1461,44 @@
 							 *  2. Sometimes logs don't appear to be associated with the right URI type and therefore don't get indexed.
 							 *  3. On 10.3, this means that logs' markup is indexed in addition to their text, which is undesireable.
 							 */
-							__block CFStringRef documentText = CopyTextContentForFile(NULL, (CFStringRef)logPath);
-							dispatch_group_async(logIndexingGroup, defaultDispatchQueue, blockWithAutoreleasePool(^{
-								CFRetain(searchIndex);
-								if (documentText && bself.indexingAllowed) {
-									SKIndexAddDocumentWithText(searchIndex,
-															   document,
-															   documentText,
-															   YES);
-									CFRelease(documentText);
-								} else if (documentText) {
-									CFRelease(documentText);
-								}
+                            NSData *documentData = [CopyDataForURL(NULL, logURL) autorelease];
+                            dispatch_group_async(logIndexingGroup, defaultDispatchQueue, blockWithAutoreleasePool(^{
+                                __block CFStringRef documentText = CopyTextContentForFileData(NULL, logURL, documentData);
+								if (documentText)
+									CFRetain(documentText);
+                                dispatch_group_async(logIndexingGroup, defaultDispatchQueue, blockWithAutoreleasePool(^{
+                                    CFRetain(searchIndex);
+                                    if (documentText && bself.indexingAllowed) {
+                                        SKIndexAddDocumentWithText(searchIndex,
+                                                                   document,
+                                                                   documentText,
+                                                                   YES);
+                                        CFRelease(documentText);
+                                    } else if (documentText) {
+                                        CFRelease(documentText);
+                                    }
 
-								CFRelease(document);
-								
-								OSAtomicIncrement64Barrier((int64_t *)&(bself->logsIndexed));
-								OSAtomicDecrement64Barrier((int64_t *)&_remainingLogs);
-								if (lastUpdate == 0 || TickCount() > lastUpdate + LOG_INDEX_STATUS_INTERVAL || _remainingLogs == 0) {
-									dispatch_async(dispatch_get_main_queue(), ^{
-										[[AILogViewerWindowController existingWindowController] logIndexingProgressUpdate];
-									});
-									UInt32 tick = TickCount();
-									OSAtomicCompareAndSwap32Barrier(lastUpdate, tick, (int32_t *)&lastUpdate);
-								}
-								
-								OSAtomicIncrement32Barrier((int32_t *)&unsavedChanges);
-								if (unsavedChanges > LOG_CLEAN_SAVE_INTERVAL) {
-									[bself _saveDirtyLogSet];
-									OSAtomicCompareAndSwap32Barrier(unsavedChanges, 0, (int32_t *)&unsavedChanges);
-								}
-								CFRelease(searchIndex);
-								dispatch_semaphore_signal(jobSemaphore);
-							}));
+                                    dispatch_semaphore_signal(jobSemaphore);
+                                    CFRelease(document);
+                                    
+                                    OSAtomicIncrement64Barrier((int64_t *)&(bself->logsIndexed));
+                                    OSAtomicDecrement64Barrier((int64_t *)&_remainingLogs);
+                                    if (lastUpdate == 0 || TickCount() > lastUpdate + LOG_INDEX_STATUS_INTERVAL || _remainingLogs == 0) {
+                                        dispatch_async(dispatch_get_main_queue(), ^{
+                                            [[AILogViewerWindowController existingWindowController] logIndexingProgressUpdate];
+                                        });
+                                        UInt32 tick = TickCount();
+                                        OSAtomicCompareAndSwap32Barrier(lastUpdate, tick, (int32_t *)&lastUpdate);
+                                    }
+                                    
+                                    OSAtomicIncrement32Barrier((int32_t *)&unsavedChanges);
+                                    if (unsavedChanges > LOG_CLEAN_SAVE_INTERVAL) {
+                                        [bself _saveDirtyLogSet];
+                                        OSAtomicCompareAndSwap32Barrier(unsavedChanges, 0, (int32_t *)&unsavedChanges);
+                                    }
+                                    CFRelease(searchIndex);
+                                }));
+                            }));
 						} else {
 							AILogWithSignature(@"Could not create document for %@ [%@]",logPath,[NSURL fileURLWithPath:logPath]);
 							CFRelease(document);
@@ -1502,6 +1520,7 @@
 				dispatch_async(dispatch_get_main_queue(), ^{
 					[[AILogViewerWindowController existingWindowController] logIndexingProgressUpdate];
 				});
+				[bself _flushIndex:searchIndex];
 				AILogWithSignature(@"After cleaning dirty logs, the search index has a max ID of %i and a count of %i",
 								   SKIndexGetMaximumDocumentID(searchIndex),
 								   SKIndexGetDocumentCount(searchIndex));
diff -r c6fcf1eb8953 -r c6a0c0da810e Source/AIObjectDebug.m
--- a/Source/AIObjectDebug.m	Sat Apr 02 16:35:33 2011 -0700
+++ b/Source/AIObjectDebug.m	Tue Apr 05 21:56:32 2011 -0700
@@ -45,13 +45,13 @@
 	if (sel_isEqual(aSelector, @selector(description)) || sel_isEqual(aSelector, @selector(doesNotRecognizeSelector:))) {
 		//we're hosed.
 		NSLog(@"Avoiding infinite recursion in doesNotRecognizeSelector:");
-		*((int*)0xdeadbeef) = 42;
+		abort();
 		return;
 	} else {
 		NSLog(@"%@ of class %@ does not respond to selector %@", self, [self class], NSStringFromSelector(aSelector));
 	}
 	__crashreporter_info__ = (char *)[[NSString stringWithFormat:@"Dear crash reporter team: We only put stuff here in debug builds of Adium. Don't Panic, it won't ship in a release unless there's public API for it.\n\n %@ of class %@ does not respond to selector %s", self, [self class], aSelector] cStringUsingEncoding:NSASCIIStringEncoding];
-	*((int*)0xdeadbeef) = 42;
+    abort();
 }
 
 @end




More information about the commits mailing list