git.fiddlerwoaroof.com
Source/CMetadataImporter.m
9cd4cff1
 //  CMetadataImporter.m
 //
 //  Lisp Metadata Importer
 //
 //  Created by John Wiseman on 9/1/05.
 //  Copyright 2005 John Wiseman.
 //
 //  Licensed under the MIT license--see the accompanying LICENSE.txt
 //  file.
 
 #import "CMetadataImporter.h"
 
 #import "NSString_HMext.h"
 #import "NSData_HMext.h"
 #import "DebugLog.h"
6de0256e
 #include "tree_sitter/api.h"
 #include "tree-sitter/lib/src/lib.c"
 #include "tree-sitter-java/src/parser.c"
9cd4cff1
 
 @implementation CMetadataImporter
 
 
 int MaxSourceSize = 500000; // Default maximum number of bytes that will be read for indexing purposes.
 long NO_MAXIMUM = -1;
 
 
 // All sorts of static data that we initialize once, then use many many times.
 
 static BOOL StaticDataIsInitialized = NO;
6de0256e
 static const TSLanguage* ts_language_java;
 char* package_query_str = "(package_declaration (scoped_identifier) @package)";
 TSQuery *package_query;
9cd4cff1
 
6de0256e
 char* class_query_str = "(class_declaration name: (identifier) @class)";
 TSQuery* class_query;
9cd4cff1
 
6de0256e
 char* interface_query_str = "(interface_declaration name: (identifier) @interface)";
 TSQuery* interface_query;
9cd4cff1
 
6de0256e
 char* method_query_str = "(class_declaration name: (identifier) @class body: (class_body (method_declaration name: (identifier) @method (formal_parameters) @type)))(interface_declaration name: (identifier) @interface body: (interface_body (method_declaration name: (identifier) @method (formal_parameters) @type)))";
 TSQuery* method_query;
 //static TSQuery method_query;
 //static TSQuery field_query;
 //
 //static NSError *err = nil;
9cd4cff1
 
 - (void)initStaticData
 {
     if (StaticDataIsInitialized)
     {
         return;
     }
     StaticDataIsInitialized = YES;
     
6de0256e
     uint32_t error_offset;
     TSQueryError error_type;
     
     method_query = ts_query_new(ts_language_java, method_query_str, (uint32_t)strlen(method_query_str), &error_offset, &error_type);
     if (error_type != 0) {
         NSLog(@"error while initializing method query offset: %d, type: %d", error_offset, error_type);
     }
     interface_query = ts_query_new(ts_language_java, interface_query_str, (uint32_t)strlen(interface_query_str), &error_offset, &error_type);
     if (error_type != 0) {
         NSLog(@"error while initializing interface query offset: %d, type: %d", error_offset, error_type);
     }
     class_query = ts_query_new(ts_language_java, class_query_str, (uint32_t)strlen(class_query_str), &error_offset, &error_type);
     if (error_type != 0) {
         NSLog(@"error while initializing class query offset: %d, type: %d", error_offset, error_type);
     }
     package_query = ts_query_new(ts_language_java, package_query_str, (uint32_t)strlen(package_query_str), &error_offset, &error_type);
     if (error_type != 0) {
         NSLog(@"error while initializing package query offset: %d type: %d", error_offset, error_type);
     }
 
9cd4cff1
     // Find the bundle, and Info.plist.  Set the debug level specified
     // there, as well as the maximum file length to index.
     NSBundle *theBundle = [NSBundle bundleForClass:[self class]];
     
     NSObject *debugLevelObj = [theBundle objectForInfoDictionaryKey:@"DebugLevel"];
     if (debugLevelObj != nil)
     {
         SetDebugLogLevel(DebugLevelNameToValue((NSString*)debugLevelObj));
     }
     
     NSObject *maxSourceSizeObj = [theBundle objectForInfoDictionaryKey:@"MaxSourceSizeToIndex"];
     int max = [(NSNumber*)maxSourceSizeObj intValue];
     if (max != 0)
     {
         DebugLog(DEBUG_LEVEL_DEBUG, @"Using MaxSourceSize=%d", max);
         MaxSourceSize = max;
     }
     else
     {
         NSLog(@"Error parsing MaxSourceSizeToIndex, using %d", MaxSourceSize);
     }
     
     // Precompile our regexes.
     
     DebugLog(DEBUG_LEVEL_DEBUG, @"Static data has been initialized.");
 }
 
 
 
 static NSStringEncoding PossibleSourceTextEncodings[] = {	NSUTF8StringEncoding,
     NSMacOSRomanStringEncoding,
     NSISOLatin1StringEncoding,
     NSWindowsCP1252StringEncoding };
 
 // Tries to read the file using the encodings specified in
 // PossibleSourceTextEncodings, in order, until one succeeds.
 //
 // There's probably a better way to do this (TEC Sniffers?).  The
 // seemingly obvious way, stringWithContentsOfFile:usedEncoding:error,
 // doesn't work--apparently it just does something minimal, like
 // decide between UTF-8 and UCS-16 or something.
 
 - (NSString*)readContentsOfFile:(NSString*)pathToFile error:(NSError**)theError
 {
     int i;
     NSStringEncoding theEncoding;
     NSString *theSource = nil;
     NSData *data;
     
     DebugLog(DEBUG_LEVEL_DEBUG, @"Indexing %@", pathToFile);
     
     // Read the file.
     if (MaxSourceSize == NO_MAXIMUM)
     {
         data = [NSData dataWithContentsOfFile:pathToFile options:0 error:theError];
     }
     else
     {
         data = [NSData dataWithContentsOfFile:pathToFile maxSize:MaxSourceSize error:theError];
         if ([data length] == MaxSourceSize)
         {
             // This is not absolutely certain to be correct, since the file might just have been
             // MaxSourceSize bytes long.
             DebugLog(DEBUG_LEVEL_DEBUG, @"Truncated indexing of '%@' to %d bytes", pathToFile, MaxSourceSize);
         }
     }
     
     if (data == nil)
     {
         return nil;
     }
     
     // Try to convert the file contents to a string by trying the candidate
     // encodings, in order.
     for (i = 0; i < sizeof(PossibleSourceTextEncodings); i++)
     {
         theEncoding = PossibleSourceTextEncodings[i];
         DebugLog(DEBUG_LEVEL_VERBOSE, @"Trying encoding %d", theEncoding);
         theSource = [[[NSString alloc] initWithData:data encoding:theEncoding] autorelease];
         if (theSource != nil)
         {
             break;
         }
         else
         {
             DebugLog(DEBUG_LEVEL_DEBUG, @"Reading with encoding %d failed.", theEncoding);
         }
     }
     return theSource;
 }
 
 
 // Adds metadata values to the specified dictionary under the
 // specified key, using the specified regular expression.
 
6de0256e
 - (BOOL)addMatchesTo:(NSMutableDictionary *)attributes fromCString:(const char *)inp forKey:(NSString *)key
9cd4cff1
 {
6de0256e
     NSString* match = [NSString stringWithUTF8String:inp];
     if (![[attributes objectForKey:key] containsObject:match]) {
         [[attributes objectForKey:key] addObject:match];
9cd4cff1
     }
6de0256e
     return YES;
9cd4cff1
 }
 
 
 // This is the method that does all the importing and indexing work.
 // It stuffs attributes into the specified dictionary.
 - (BOOL)importFile:(NSString *)inPathToFile contentType:(NSString *)inContentType attributes:(NSMutableDictionary *)inAttributes
 {
6de0256e
     TSParser* ts_parser;
     
     char *packagename_buf = 0;
     char* classname_buf = 0;
     char* interfacename_buf = 0;
     char* methodname_buf = 0;
 
 //    char* package_query_str = "(program (package_declaration (scoped_identifier) @package))";
 //    TSQuery* package_query;
 
     
     ts_parser = ts_parser_new();
     ts_language_java = tree_sitter_java();
     ts_parser_set_language(ts_parser, ts_language_java);
     
     TSQueryCursor *class_cursor = ts_query_cursor_new();
     TSQueryCursor *package_cursor = ts_query_cursor_new();
 
9cd4cff1
     BOOL theResult = NO;
     
     @try
     {
         NSAutoreleasePool *theAutoreleasePool = [[NSAutoreleasePool alloc] init];
         NSError *error = nil;
         NSString *source;
         
         [self initStaticData];
         
         source = [self readContentsOfFile:inPathToFile error:&error];
         if (source == nil)
         {
             if (error)
             {
                 NSLog(@"Lisp Metadata Importer: Could not process file '%@': %@", inPathToFile, error);
             }
             else
             {
                 NSLog(@"Lisp Metadata Importer: Could not process file '%@': unknown error", inPathToFile);
             }	
             return NO;
6de0256e
         } else {
             NSLog(@"Processing file '%@'", inPathToFile);
9cd4cff1
         }
         
         // Only process the first MaxSourceSize of the file.  To try to do more
         // invites the swapping death.
         if ([source length] > MaxSourceSize)
         {
             source = [source substringToIndex:MaxSourceSize];
         }
         
6de0256e
         const char *cstring_source = [source UTF8String];
         TSTree *tree = ts_parser_parse_string(ts_parser, NULL, cstring_source, strlen(cstring_source));
         TSNode root_node = ts_tree_root_node(tree);
9cd4cff1
         
         
6de0256e
         NSMutableDictionary *moreAttributes = [[[NSMutableDictionary alloc] initWithCapacity:4] autorelease];
9cd4cff1
         
6de0256e
         [moreAttributes setObject:[NSMutableArray arrayWithCapacity:100] forKey:@"co_fwoar_java_package"];
         [moreAttributes setObject:[NSMutableArray arrayWithCapacity:100] forKey:@"co_fwoar_java_classes"];
         [moreAttributes setObject:[NSMutableArray arrayWithCapacity:100] forKey:@"co_fwoar_java_interfaces"];
         [moreAttributes setObject:[NSMutableArray arrayWithCapacity:100] forKey:@"co_fwoar_java_methods"];
         [moreAttributes setObject:[NSMutableArray arrayWithCapacity:100] forKey:@"co_fwoar_java_definitions"];
9cd4cff1
         
6de0256e
         ts_query_cursor_exec(package_cursor, package_query, root_node);
         uint32_t packagename_start, packagename_end, packagename_length;
         
         TSQueryMatch ts_match;
         if (ts_query_cursor_next_match(package_cursor, &ts_match)) {
             packagename_start = ts_node_start_byte(ts_match.captures[0].node);
             packagename_end = ts_node_end_byte(ts_match.captures[0].node);
             packagename_length = packagename_end - packagename_start;
             packagename_buf = calloc(1 + packagename_length, sizeof(char));
             if (packagename_buf) {
                 strncpy(packagename_buf, cstring_source + packagename_start, packagename_length);
                 [self addMatchesTo:moreAttributes fromCString: packagename_buf forKey:@"co_fwoar_java_package"];
             } else {
                 goto fail;
             }
         }
         
         ts_query_cursor_exec(class_cursor, class_query, root_node);
         while (ts_query_cursor_next_match(class_cursor, &ts_match)) {
             if (ts_match.captures) {
                 uint32_t classname_start = ts_node_start_byte(ts_match.captures[0].node);
                 uint32_t classname_end = ts_node_end_byte(ts_match.captures[0].node);
                 uint32_t classname_length = classname_end - classname_start;
                 if (classname_buf) {
                     free(classname_buf);
                 }
                 classname_buf = calloc(1 + (packagename_length + 1 + classname_length), sizeof(char));
                 if (classname_buf) {
                     strncpy(classname_buf, cstring_source + packagename_start, packagename_length);
                     classname_buf[packagename_length] = '.';
                     strncpy(classname_buf + packagename_length + 1, cstring_source + classname_start, classname_length);
                     [self addMatchesTo:moreAttributes fromCString: classname_buf forKey:@"co_fwoar_java_classes"];
                     [self addMatchesTo:moreAttributes fromCString: classname_buf forKey:@"co_fwoar_java_definitions"];
                 } else {
                     goto fail;
                 }
             }
         }
         
         ts_query_cursor_exec(class_cursor, interface_query, root_node);
         while (ts_query_cursor_next_match(class_cursor, &ts_match)) {
             if (ts_match.captures) {
                 uint32_t interfacename_start = ts_node_start_byte(ts_match.captures[0].node);
                 uint32_t interfacename_end = ts_node_end_byte(ts_match.captures[0].node);
                 uint32_t interfacename_length = interfacename_end - interfacename_start;
                 if (interfacename_buf) {
                     free(interfacename_buf);
                 }
                 interfacename_buf = calloc(1 + (packagename_length + 1 + interfacename_length), sizeof(char));
                 if (interfacename_buf) {
                     strncpy(interfacename_buf, cstring_source + packagename_start, packagename_length);
                     interfacename_buf[packagename_length] = '.';
                     strncpy(interfacename_buf + packagename_length + 1, cstring_source + interfacename_start, interfacename_length);
                     [self addMatchesTo:moreAttributes fromCString: interfacename_buf forKey:@"co_fwoar_java_interfaces"];
                     [self addMatchesTo:moreAttributes fromCString: interfacename_buf forKey:@"co_fwoar_java_definitions"];
                 } else {
                     goto fail;
                 }
             }
         }
         
         ts_query_cursor_exec(class_cursor, method_query, root_node);
         while (ts_query_cursor_next_match(class_cursor, &ts_match)) {
             if (ts_match.captures) {
                 uint32_t classname_start = ts_node_start_byte(ts_match.captures[0].node);
                 uint32_t classname_end = ts_node_end_byte(ts_match.captures[0].node);
                 uint32_t classname_length = classname_end - classname_start;
 
                 uint32_t methodname_start = ts_node_start_byte(ts_match.captures[1].node);
                 uint32_t methodname_end = ts_node_end_byte(ts_match.captures[1].node);
                 uint32_t methodname_length = methodname_end - methodname_start;
                 if (methodname_buf) {
                     free(methodname_buf);
                 }
                 methodname_buf = calloc(1 + (packagename_length + 1 + classname_length + 1 + methodname_length), sizeof(char));
                 if (methodname_buf) {
                     strncpy(methodname_buf, cstring_source + packagename_start, packagename_length);
                     methodname_buf[packagename_length] = '.';
                     strncpy(methodname_buf + packagename_length + 1, cstring_source + classname_start, classname_length);
                     methodname_buf[packagename_length + 1 + classname_length] = '.';
                     strncpy(methodname_buf + packagename_length + 1 + classname_length + 1, cstring_source + methodname_start, methodname_length);
                     // "((n+1)*m-1)
                     uint32_t cur_offset = strlen(methodname_buf);
                     uint32_t total_length = strlen(methodname_buf) + 1 /*null*/;
                     for (uint16_t next = 2; next < ts_match.capture_count; next++) {
                       uint32_t capture_start = ts_node_start_byte(ts_match.captures[next].node);
                       uint32_t capture_end = ts_node_end_byte(ts_match.captures[next].node);
                       uint32_t capture_length = capture_end - capture_start;
                       total_length += capture_length;
                     }
                     methodname_buf = realloc(methodname_buf, total_length * sizeof(char));
                     methodname_buf[total_length - 1] = '\0';
 
                     for (uint16_t next = 2; next < ts_match.capture_count; next++) {
                       uint32_t capture_start = ts_node_start_byte(ts_match.captures[next].node);
                       uint32_t capture_end = ts_node_end_byte(ts_match.captures[next].node);
                       uint32_t capture_length = capture_end - capture_start;
                       strncpy(methodname_buf + cur_offset, cstring_source + capture_start, capture_length);
                       cur_offset += capture_length;
                     }
                     if (cur_offset+1 != total_length) { NSLog(@"Something is wrong :( %u != %u", cur_offset+1, total_length); }
                     [self addMatchesTo:moreAttributes fromCString: methodname_buf forKey:@"co_fwoar_java_methods"];
                     [self addMatchesTo:moreAttributes fromCString: methodname_buf forKey:@"co_fwoar_java_definitions"];
                 } else {
                     goto fail;
9cd4cff1
                 }
             }
         }
         
         // Add the complete source code as metadata.
         [moreAttributes setObject:source forKey:@"kMDItemTextContent"];
         
         [inAttributes addEntriesFromDictionary:moreAttributes];
         theResult = YES;
6de0256e
     fail:
9cd4cff1
         [theAutoreleasePool release];
     }
     @catch (NSException *localException)
     {
6de0256e
         NSLog(@"Tree Sitter (java) Metadata Importer: Could not process file '%@' (Exception: %@)", inPathToFile, localException);
9cd4cff1
     }
     @finally
     {
6de0256e
         ts_query_cursor_delete(class_cursor);
         ts_query_cursor_delete(package_cursor);
         if (packagename_buf) {
             free(packagename_buf);
         }
         if (classname_buf) {
             free(classname_buf);
         }
         if (interfacename_buf) {
             free(interfacename_buf);
         }
         if (methodname_buf) {
             free(methodname_buf);
         }
         ts_parser_delete(ts_parser);
9cd4cff1
     }
     return(theResult);
 }
 
 @end