// CMetadataImporter.m // // Lisp Metadata Importer // // Created by John Wiseman on 9/1/05. // Copyright 2005 John Wiseman. // // Licensed under the MIT license--see the accompanying LICENSE.txt // file. #import "CMetadataImporter.h" #import "NSString_HMext.h" #import "NSData_HMext.h" #import "DebugLog.h" #include "tree_sitter/api.h" #include "tree-sitter/lib/src/lib.c" #include "tree-sitter-java/src/parser.c" @implementation CMetadataImporter int MaxSourceSize = 500000; // Default maximum number of bytes that will be read for indexing purposes. long NO_MAXIMUM = -1; // All sorts of static data that we initialize once, then use many many times. static BOOL StaticDataIsInitialized = NO; static const TSLanguage* ts_language_java; char* package_query_str = "(package_declaration (scoped_identifier) @package)"; TSQuery *package_query; char* class_query_str = "(class_declaration name: (identifier) @class)"; TSQuery* class_query; char* interface_query_str = "(interface_declaration name: (identifier) @interface)"; TSQuery* interface_query; char* method_query_str = "(class_declaration name: (identifier) @class body: (class_body (method_declaration name: (identifier) @method (formal_parameters) @type)))(interface_declaration name: (identifier) @interface body: (interface_body (method_declaration name: (identifier) @method (formal_parameters) @type)))"; TSQuery* method_query; //static TSQuery method_query; //static TSQuery field_query; // //static NSError *err = nil; - (void)initStaticData { if (StaticDataIsInitialized) { return; } StaticDataIsInitialized = YES; uint32_t error_offset; TSQueryError error_type; method_query = ts_query_new(ts_language_java, method_query_str, (uint32_t)strlen(method_query_str), &error_offset, &error_type); if (error_type != 0) { NSLog(@"error while initializing method query offset: %d, type: %d", error_offset, error_type); } interface_query = ts_query_new(ts_language_java, interface_query_str, (uint32_t)strlen(interface_query_str), &error_offset, &error_type); if (error_type != 0) { NSLog(@"error while initializing interface query offset: %d, type: %d", error_offset, error_type); } class_query = ts_query_new(ts_language_java, class_query_str, (uint32_t)strlen(class_query_str), &error_offset, &error_type); if (error_type != 0) { NSLog(@"error while initializing class query offset: %d, type: %d", error_offset, error_type); } package_query = ts_query_new(ts_language_java, package_query_str, (uint32_t)strlen(package_query_str), &error_offset, &error_type); if (error_type != 0) { NSLog(@"error while initializing package query offset: %d type: %d", error_offset, error_type); } // Find the bundle, and Info.plist. Set the debug level specified // there, as well as the maximum file length to index. NSBundle *theBundle = [NSBundle bundleForClass:[self class]]; NSObject *debugLevelObj = [theBundle objectForInfoDictionaryKey:@"DebugLevel"]; if (debugLevelObj != nil) { SetDebugLogLevel(DebugLevelNameToValue((NSString*)debugLevelObj)); } NSObject *maxSourceSizeObj = [theBundle objectForInfoDictionaryKey:@"MaxSourceSizeToIndex"]; int max = [(NSNumber*)maxSourceSizeObj intValue]; if (max != 0) { DebugLog(DEBUG_LEVEL_DEBUG, @"Using MaxSourceSize=%d", max); MaxSourceSize = max; } else { NSLog(@"Error parsing MaxSourceSizeToIndex, using %d", MaxSourceSize); } // Precompile our regexes. DebugLog(DEBUG_LEVEL_DEBUG, @"Static data has been initialized."); } static NSStringEncoding PossibleSourceTextEncodings[] = { NSUTF8StringEncoding, NSMacOSRomanStringEncoding, NSISOLatin1StringEncoding, NSWindowsCP1252StringEncoding }; // Tries to read the file using the encodings specified in // PossibleSourceTextEncodings, in order, until one succeeds. // // There's probably a better way to do this (TEC Sniffers?). The // seemingly obvious way, stringWithContentsOfFile:usedEncoding:error, // doesn't work--apparently it just does something minimal, like // decide between UTF-8 and UCS-16 or something. - (NSString*)readContentsOfFile:(NSString*)pathToFile error:(NSError**)theError { int i; NSStringEncoding theEncoding; NSString *theSource = nil; NSData *data; DebugLog(DEBUG_LEVEL_DEBUG, @"Indexing %@", pathToFile); // Read the file. if (MaxSourceSize == NO_MAXIMUM) { data = [NSData dataWithContentsOfFile:pathToFile options:0 error:theError]; } else { data = [NSData dataWithContentsOfFile:pathToFile maxSize:MaxSourceSize error:theError]; if ([data length] == MaxSourceSize) { // This is not absolutely certain to be correct, since the file might just have been // MaxSourceSize bytes long. DebugLog(DEBUG_LEVEL_DEBUG, @"Truncated indexing of '%@' to %d bytes", pathToFile, MaxSourceSize); } } if (data == nil) { return nil; } // Try to convert the file contents to a string by trying the candidate // encodings, in order. for (i = 0; i < sizeof(PossibleSourceTextEncodings); i++) { theEncoding = PossibleSourceTextEncodings[i]; DebugLog(DEBUG_LEVEL_VERBOSE, @"Trying encoding %d", theEncoding); theSource = [[[NSString alloc] initWithData:data encoding:theEncoding] autorelease]; if (theSource != nil) { break; } else { DebugLog(DEBUG_LEVEL_DEBUG, @"Reading with encoding %d failed.", theEncoding); } } return theSource; } // Adds metadata values to the specified dictionary under the // specified key, using the specified regular expression. - (BOOL)addMatchesTo:(NSMutableDictionary *)attributes fromCString:(const char *)inp forKey:(NSString *)key { NSString* match = [NSString stringWithUTF8String:inp]; if (![[attributes objectForKey:key] containsObject:match]) { [[attributes objectForKey:key] addObject:match]; } return YES; } // This is the method that does all the importing and indexing work. // It stuffs attributes into the specified dictionary. - (BOOL)importFile:(NSString *)inPathToFile contentType:(NSString *)inContentType attributes:(NSMutableDictionary *)inAttributes { TSParser* ts_parser; char *packagename_buf = 0; char* classname_buf = 0; char* interfacename_buf = 0; char* methodname_buf = 0; // char* package_query_str = "(program (package_declaration (scoped_identifier) @package))"; // TSQuery* package_query; ts_parser = ts_parser_new(); ts_language_java = tree_sitter_java(); ts_parser_set_language(ts_parser, ts_language_java); TSQueryCursor *class_cursor = ts_query_cursor_new(); TSQueryCursor *package_cursor = ts_query_cursor_new(); BOOL theResult = NO; @try { NSAutoreleasePool *theAutoreleasePool = [[NSAutoreleasePool alloc] init]; NSError *error = nil; NSString *source; [self initStaticData]; source = [self readContentsOfFile:inPathToFile error:&error]; if (source == nil) { if (error) { NSLog(@"Lisp Metadata Importer: Could not process file '%@': %@", inPathToFile, error); } else { NSLog(@"Lisp Metadata Importer: Could not process file '%@': unknown error", inPathToFile); } return NO; } else { NSLog(@"Processing file '%@'", inPathToFile); } // Only process the first MaxSourceSize of the file. To try to do more // invites the swapping death. if ([source length] > MaxSourceSize) { source = [source substringToIndex:MaxSourceSize]; } const char *cstring_source = [source UTF8String]; TSTree *tree = ts_parser_parse_string(ts_parser, NULL, cstring_source, strlen(cstring_source)); TSNode root_node = ts_tree_root_node(tree); NSMutableDictionary *moreAttributes = [[[NSMutableDictionary alloc] initWithCapacity:4] autorelease]; [moreAttributes setObject:[NSMutableArray arrayWithCapacity:100] forKey:@"co_fwoar_java_package"]; [moreAttributes setObject:[NSMutableArray arrayWithCapacity:100] forKey:@"co_fwoar_java_classes"]; [moreAttributes setObject:[NSMutableArray arrayWithCapacity:100] forKey:@"co_fwoar_java_interfaces"]; [moreAttributes setObject:[NSMutableArray arrayWithCapacity:100] forKey:@"co_fwoar_java_methods"]; [moreAttributes setObject:[NSMutableArray arrayWithCapacity:100] forKey:@"co_fwoar_java_definitions"]; ts_query_cursor_exec(package_cursor, package_query, root_node); uint32_t packagename_start, packagename_end, packagename_length; TSQueryMatch ts_match; if (ts_query_cursor_next_match(package_cursor, &ts_match)) { packagename_start = ts_node_start_byte(ts_match.captures[0].node); packagename_end = ts_node_end_byte(ts_match.captures[0].node); packagename_length = packagename_end - packagename_start; packagename_buf = calloc(1 + packagename_length, sizeof(char)); if (packagename_buf) { strncpy(packagename_buf, cstring_source + packagename_start, packagename_length); [self addMatchesTo:moreAttributes fromCString: packagename_buf forKey:@"co_fwoar_java_package"]; } else { goto fail; } } ts_query_cursor_exec(class_cursor, class_query, root_node); while (ts_query_cursor_next_match(class_cursor, &ts_match)) { if (ts_match.captures) { uint32_t classname_start = ts_node_start_byte(ts_match.captures[0].node); uint32_t classname_end = ts_node_end_byte(ts_match.captures[0].node); uint32_t classname_length = classname_end - classname_start; if (classname_buf) { free(classname_buf); } classname_buf = calloc(1 + (packagename_length + 1 + classname_length), sizeof(char)); if (classname_buf) { strncpy(classname_buf, cstring_source + packagename_start, packagename_length); classname_buf[packagename_length] = '.'; strncpy(classname_buf + packagename_length + 1, cstring_source + classname_start, classname_length); [self addMatchesTo:moreAttributes fromCString: classname_buf forKey:@"co_fwoar_java_classes"]; [self addMatchesTo:moreAttributes fromCString: classname_buf forKey:@"co_fwoar_java_definitions"]; } else { goto fail; } } } ts_query_cursor_exec(class_cursor, interface_query, root_node); while (ts_query_cursor_next_match(class_cursor, &ts_match)) { if (ts_match.captures) { uint32_t interfacename_start = ts_node_start_byte(ts_match.captures[0].node); uint32_t interfacename_end = ts_node_end_byte(ts_match.captures[0].node); uint32_t interfacename_length = interfacename_end - interfacename_start; if (interfacename_buf) { free(interfacename_buf); } interfacename_buf = calloc(1 + (packagename_length + 1 + interfacename_length), sizeof(char)); if (interfacename_buf) { strncpy(interfacename_buf, cstring_source + packagename_start, packagename_length); interfacename_buf[packagename_length] = '.'; strncpy(interfacename_buf + packagename_length + 1, cstring_source + interfacename_start, interfacename_length); [self addMatchesTo:moreAttributes fromCString: interfacename_buf forKey:@"co_fwoar_java_interfaces"]; [self addMatchesTo:moreAttributes fromCString: interfacename_buf forKey:@"co_fwoar_java_definitions"]; } else { goto fail; } } } ts_query_cursor_exec(class_cursor, method_query, root_node); while (ts_query_cursor_next_match(class_cursor, &ts_match)) { if (ts_match.captures) { uint32_t classname_start = ts_node_start_byte(ts_match.captures[0].node); uint32_t classname_end = ts_node_end_byte(ts_match.captures[0].node); uint32_t classname_length = classname_end - classname_start; uint32_t methodname_start = ts_node_start_byte(ts_match.captures[1].node); uint32_t methodname_end = ts_node_end_byte(ts_match.captures[1].node); uint32_t methodname_length = methodname_end - methodname_start; if (methodname_buf) { free(methodname_buf); } methodname_buf = calloc(1 + (packagename_length + 1 + classname_length + 1 + methodname_length), sizeof(char)); if (methodname_buf) { strncpy(methodname_buf, cstring_source + packagename_start, packagename_length); methodname_buf[packagename_length] = '.'; strncpy(methodname_buf + packagename_length + 1, cstring_source + classname_start, classname_length); methodname_buf[packagename_length + 1 + classname_length] = '.'; strncpy(methodname_buf + packagename_length + 1 + classname_length + 1, cstring_source + methodname_start, methodname_length); // "((n+1)*m-1) uint32_t cur_offset = strlen(methodname_buf); uint32_t total_length = strlen(methodname_buf) + 1 /*null*/; for (uint16_t next = 2; next < ts_match.capture_count; next++) { uint32_t capture_start = ts_node_start_byte(ts_match.captures[next].node); uint32_t capture_end = ts_node_end_byte(ts_match.captures[next].node); uint32_t capture_length = capture_end - capture_start; total_length += capture_length; } methodname_buf = realloc(methodname_buf, total_length * sizeof(char)); methodname_buf[total_length - 1] = '\0'; for (uint16_t next = 2; next < ts_match.capture_count; next++) { uint32_t capture_start = ts_node_start_byte(ts_match.captures[next].node); uint32_t capture_end = ts_node_end_byte(ts_match.captures[next].node); uint32_t capture_length = capture_end - capture_start; strncpy(methodname_buf + cur_offset, cstring_source + capture_start, capture_length); cur_offset += capture_length; } if (cur_offset+1 != total_length) { NSLog(@"Something is wrong :( %u != %u", cur_offset+1, total_length); } [self addMatchesTo:moreAttributes fromCString: methodname_buf forKey:@"co_fwoar_java_methods"]; [self addMatchesTo:moreAttributes fromCString: methodname_buf forKey:@"co_fwoar_java_definitions"]; } else { goto fail; } } } // Add the complete source code as metadata. [moreAttributes setObject:source forKey:@"kMDItemTextContent"]; [inAttributes addEntriesFromDictionary:moreAttributes]; theResult = YES; fail: [theAutoreleasePool release]; } @catch (NSException *localException) { NSLog(@"Tree Sitter (java) Metadata Importer: Could not process file '%@' (Exception: %@)", inPathToFile, localException); } @finally { ts_query_cursor_delete(class_cursor); ts_query_cursor_delete(package_cursor); if (packagename_buf) { free(packagename_buf); } if (classname_buf) { free(classname_buf); } if (interfacename_buf) { free(interfacename_buf); } if (methodname_buf) { free(methodname_buf); } ts_parser_delete(ts_parser); } return(theResult); } @end