git.fiddlerwoaroof.com
Raw Blame History
//  CMetadataImporter.m
//
//  Lisp Metadata Importer
//
//  Created by John Wiseman on 9/1/05.
//  Copyright 2005 John Wiseman.
//
//  Licensed under the MIT license--see the accompanying LICENSE.txt
//  file.

#import "CMetadataImporter.h"

#import "NSString_HMext.h"
#import "NSData_HMext.h"
#import "DebugLog.h"
#include "tree_sitter/api.h"
#include "tree-sitter/lib/src/lib.c"
#include "tree-sitter-java/src/parser.c"

@implementation CMetadataImporter


int MaxSourceSize = 500000; // Default maximum number of bytes that will be read for indexing purposes.
long NO_MAXIMUM = -1;


// All sorts of static data that we initialize once, then use many many times.

static BOOL StaticDataIsInitialized = NO;
static const TSLanguage* ts_language_java;
char* package_query_str = "(package_declaration (scoped_identifier) @package)";
TSQuery *package_query;

char* class_query_str = "(class_declaration name: (identifier) @class)";
TSQuery* class_query;

char* interface_query_str = "(interface_declaration name: (identifier) @interface)";
TSQuery* interface_query;

char* method_query_str = "(class_declaration name: (identifier) @class body: (class_body (method_declaration name: (identifier) @method (formal_parameters) @type)))(interface_declaration name: (identifier) @interface body: (interface_body (method_declaration name: (identifier) @method (formal_parameters) @type)))";
TSQuery* method_query;
//static TSQuery method_query;
//static TSQuery field_query;
//
//static NSError *err = nil;

- (void)initStaticData
{
    if (StaticDataIsInitialized)
    {
        return;
    }
    StaticDataIsInitialized = YES;
    
    uint32_t error_offset;
    TSQueryError error_type;
    
    method_query = ts_query_new(ts_language_java, method_query_str, (uint32_t)strlen(method_query_str), &error_offset, &error_type);
    if (error_type != 0) {
        NSLog(@"error while initializing method query offset: %d, type: %d", error_offset, error_type);
    }
    interface_query = ts_query_new(ts_language_java, interface_query_str, (uint32_t)strlen(interface_query_str), &error_offset, &error_type);
    if (error_type != 0) {
        NSLog(@"error while initializing interface query offset: %d, type: %d", error_offset, error_type);
    }
    class_query = ts_query_new(ts_language_java, class_query_str, (uint32_t)strlen(class_query_str), &error_offset, &error_type);
    if (error_type != 0) {
        NSLog(@"error while initializing class query offset: %d, type: %d", error_offset, error_type);
    }
    package_query = ts_query_new(ts_language_java, package_query_str, (uint32_t)strlen(package_query_str), &error_offset, &error_type);
    if (error_type != 0) {
        NSLog(@"error while initializing package query offset: %d type: %d", error_offset, error_type);
    }

    // Find the bundle, and Info.plist.  Set the debug level specified
    // there, as well as the maximum file length to index.
    NSBundle *theBundle = [NSBundle bundleForClass:[self class]];
    
    NSObject *debugLevelObj = [theBundle objectForInfoDictionaryKey:@"DebugLevel"];
    if (debugLevelObj != nil)
    {
        SetDebugLogLevel(DebugLevelNameToValue((NSString*)debugLevelObj));
    }
    
    NSObject *maxSourceSizeObj = [theBundle objectForInfoDictionaryKey:@"MaxSourceSizeToIndex"];
    int max = [(NSNumber*)maxSourceSizeObj intValue];
    if (max != 0)
    {
        DebugLog(DEBUG_LEVEL_DEBUG, @"Using MaxSourceSize=%d", max);
        MaxSourceSize = max;
    }
    else
    {
        NSLog(@"Error parsing MaxSourceSizeToIndex, using %d", MaxSourceSize);
    }
    
    // Precompile our regexes.
    
    DebugLog(DEBUG_LEVEL_DEBUG, @"Static data has been initialized.");
}



static NSStringEncoding PossibleSourceTextEncodings[] = {	NSUTF8StringEncoding,
    NSMacOSRomanStringEncoding,
    NSISOLatin1StringEncoding,
    NSWindowsCP1252StringEncoding };

// Tries to read the file using the encodings specified in
// PossibleSourceTextEncodings, in order, until one succeeds.
//
// There's probably a better way to do this (TEC Sniffers?).  The
// seemingly obvious way, stringWithContentsOfFile:usedEncoding:error,
// doesn't work--apparently it just does something minimal, like
// decide between UTF-8 and UCS-16 or something.

- (NSString*)readContentsOfFile:(NSString*)pathToFile error:(NSError**)theError
{
    int i;
    NSStringEncoding theEncoding;
    NSString *theSource = nil;
    NSData *data;
    
    DebugLog(DEBUG_LEVEL_DEBUG, @"Indexing %@", pathToFile);
    
    // Read the file.
    if (MaxSourceSize == NO_MAXIMUM)
    {
        data = [NSData dataWithContentsOfFile:pathToFile options:0 error:theError];
    }
    else
    {
        data = [NSData dataWithContentsOfFile:pathToFile maxSize:MaxSourceSize error:theError];
        if ([data length] == MaxSourceSize)
        {
            // This is not absolutely certain to be correct, since the file might just have been
            // MaxSourceSize bytes long.
            DebugLog(DEBUG_LEVEL_DEBUG, @"Truncated indexing of '%@' to %d bytes", pathToFile, MaxSourceSize);
        }
    }
    
    if (data == nil)
    {
        return nil;
    }
    
    // Try to convert the file contents to a string by trying the candidate
    // encodings, in order.
    for (i = 0; i < sizeof(PossibleSourceTextEncodings); i++)
    {
        theEncoding = PossibleSourceTextEncodings[i];
        DebugLog(DEBUG_LEVEL_VERBOSE, @"Trying encoding %d", theEncoding);
        theSource = [[[NSString alloc] initWithData:data encoding:theEncoding] autorelease];
        if (theSource != nil)
        {
            break;
        }
        else
        {
            DebugLog(DEBUG_LEVEL_DEBUG, @"Reading with encoding %d failed.", theEncoding);
        }
    }
    return theSource;
}


// Adds metadata values to the specified dictionary under the
// specified key, using the specified regular expression.

- (BOOL)addMatchesTo:(NSMutableDictionary *)attributes fromCString:(const char *)inp forKey:(NSString *)key
{
    NSString* match = [NSString stringWithUTF8String:inp];
    if (![[attributes objectForKey:key] containsObject:match]) {
        [[attributes objectForKey:key] addObject:match];
    }
    return YES;
}


// This is the method that does all the importing and indexing work.
// It stuffs attributes into the specified dictionary.
- (BOOL)importFile:(NSString *)inPathToFile contentType:(NSString *)inContentType attributes:(NSMutableDictionary *)inAttributes
{
    TSParser* ts_parser;
    
    char *packagename_buf = 0;
    char* classname_buf = 0;
    char* interfacename_buf = 0;
    char* methodname_buf = 0;

//    char* package_query_str = "(program (package_declaration (scoped_identifier) @package))";
//    TSQuery* package_query;

    
    ts_parser = ts_parser_new();
    ts_language_java = tree_sitter_java();
    ts_parser_set_language(ts_parser, ts_language_java);
    
    TSQueryCursor *class_cursor = ts_query_cursor_new();
    TSQueryCursor *package_cursor = ts_query_cursor_new();

    BOOL theResult = NO;
    
    @try
    {
        NSAutoreleasePool *theAutoreleasePool = [[NSAutoreleasePool alloc] init];
        NSError *error = nil;
        NSString *source;
        
        [self initStaticData];
        
        source = [self readContentsOfFile:inPathToFile error:&error];
        if (source == nil)
        {
            if (error)
            {
                NSLog(@"Lisp Metadata Importer: Could not process file '%@': %@", inPathToFile, error);
            }
            else
            {
                NSLog(@"Lisp Metadata Importer: Could not process file '%@': unknown error", inPathToFile);
            }	
            return NO;
        } else {
            NSLog(@"Processing file '%@'", inPathToFile);
        }
        
        // Only process the first MaxSourceSize of the file.  To try to do more
        // invites the swapping death.
        if ([source length] > MaxSourceSize)
        {
            source = [source substringToIndex:MaxSourceSize];
        }
        
        const char *cstring_source = [source UTF8String];
        TSTree *tree = ts_parser_parse_string(ts_parser, NULL, cstring_source, strlen(cstring_source));
        TSNode root_node = ts_tree_root_node(tree);
        
        
        NSMutableDictionary *moreAttributes = [[[NSMutableDictionary alloc] initWithCapacity:4] autorelease];
        
        [moreAttributes setObject:[NSMutableArray arrayWithCapacity:100] forKey:@"co_fwoar_java_package"];
        [moreAttributes setObject:[NSMutableArray arrayWithCapacity:100] forKey:@"co_fwoar_java_classes"];
        [moreAttributes setObject:[NSMutableArray arrayWithCapacity:100] forKey:@"co_fwoar_java_interfaces"];
        [moreAttributes setObject:[NSMutableArray arrayWithCapacity:100] forKey:@"co_fwoar_java_methods"];
        [moreAttributes setObject:[NSMutableArray arrayWithCapacity:100] forKey:@"co_fwoar_java_definitions"];
        
        ts_query_cursor_exec(package_cursor, package_query, root_node);
        uint32_t packagename_start, packagename_end, packagename_length;
        
        TSQueryMatch ts_match;
        if (ts_query_cursor_next_match(package_cursor, &ts_match)) {
            packagename_start = ts_node_start_byte(ts_match.captures[0].node);
            packagename_end = ts_node_end_byte(ts_match.captures[0].node);
            packagename_length = packagename_end - packagename_start;
            packagename_buf = calloc(1 + packagename_length, sizeof(char));
            if (packagename_buf) {
                strncpy(packagename_buf, cstring_source + packagename_start, packagename_length);
                [self addMatchesTo:moreAttributes fromCString: packagename_buf forKey:@"co_fwoar_java_package"];
            } else {
                goto fail;
            }
        }
        
        ts_query_cursor_exec(class_cursor, class_query, root_node);
        while (ts_query_cursor_next_match(class_cursor, &ts_match)) {
            if (ts_match.captures) {
                uint32_t classname_start = ts_node_start_byte(ts_match.captures[0].node);
                uint32_t classname_end = ts_node_end_byte(ts_match.captures[0].node);
                uint32_t classname_length = classname_end - classname_start;
                if (classname_buf) {
                    free(classname_buf);
                }
                classname_buf = calloc(1 + (packagename_length + 1 + classname_length), sizeof(char));
                if (classname_buf) {
                    strncpy(classname_buf, cstring_source + packagename_start, packagename_length);
                    classname_buf[packagename_length] = '.';
                    strncpy(classname_buf + packagename_length + 1, cstring_source + classname_start, classname_length);
                    [self addMatchesTo:moreAttributes fromCString: classname_buf forKey:@"co_fwoar_java_classes"];
                    [self addMatchesTo:moreAttributes fromCString: classname_buf forKey:@"co_fwoar_java_definitions"];
                } else {
                    goto fail;
                }
            }
        }
        
        ts_query_cursor_exec(class_cursor, interface_query, root_node);
        while (ts_query_cursor_next_match(class_cursor, &ts_match)) {
            if (ts_match.captures) {
                uint32_t interfacename_start = ts_node_start_byte(ts_match.captures[0].node);
                uint32_t interfacename_end = ts_node_end_byte(ts_match.captures[0].node);
                uint32_t interfacename_length = interfacename_end - interfacename_start;
                if (interfacename_buf) {
                    free(interfacename_buf);
                }
                interfacename_buf = calloc(1 + (packagename_length + 1 + interfacename_length), sizeof(char));
                if (interfacename_buf) {
                    strncpy(interfacename_buf, cstring_source + packagename_start, packagename_length);
                    interfacename_buf[packagename_length] = '.';
                    strncpy(interfacename_buf + packagename_length + 1, cstring_source + interfacename_start, interfacename_length);
                    [self addMatchesTo:moreAttributes fromCString: interfacename_buf forKey:@"co_fwoar_java_interfaces"];
                    [self addMatchesTo:moreAttributes fromCString: interfacename_buf forKey:@"co_fwoar_java_definitions"];
                } else {
                    goto fail;
                }
            }
        }
        
        ts_query_cursor_exec(class_cursor, method_query, root_node);
        while (ts_query_cursor_next_match(class_cursor, &ts_match)) {
            if (ts_match.captures) {
                uint32_t classname_start = ts_node_start_byte(ts_match.captures[0].node);
                uint32_t classname_end = ts_node_end_byte(ts_match.captures[0].node);
                uint32_t classname_length = classname_end - classname_start;

                uint32_t methodname_start = ts_node_start_byte(ts_match.captures[1].node);
                uint32_t methodname_end = ts_node_end_byte(ts_match.captures[1].node);
                uint32_t methodname_length = methodname_end - methodname_start;
                if (methodname_buf) {
                    free(methodname_buf);
                }
                methodname_buf = calloc(1 + (packagename_length + 1 + classname_length + 1 + methodname_length), sizeof(char));
                if (methodname_buf) {
                    strncpy(methodname_buf, cstring_source + packagename_start, packagename_length);
                    methodname_buf[packagename_length] = '.';
                    strncpy(methodname_buf + packagename_length + 1, cstring_source + classname_start, classname_length);
                    methodname_buf[packagename_length + 1 + classname_length] = '.';
                    strncpy(methodname_buf + packagename_length + 1 + classname_length + 1, cstring_source + methodname_start, methodname_length);
                    // "((n+1)*m-1)
                    uint32_t cur_offset = strlen(methodname_buf);
                    uint32_t total_length = strlen(methodname_buf) + 1 /*null*/;
                    for (uint16_t next = 2; next < ts_match.capture_count; next++) {
                      uint32_t capture_start = ts_node_start_byte(ts_match.captures[next].node);
                      uint32_t capture_end = ts_node_end_byte(ts_match.captures[next].node);
                      uint32_t capture_length = capture_end - capture_start;
                      total_length += capture_length;
                    }
                    methodname_buf = realloc(methodname_buf, total_length * sizeof(char));
                    methodname_buf[total_length - 1] = '\0';

                    for (uint16_t next = 2; next < ts_match.capture_count; next++) {
                      uint32_t capture_start = ts_node_start_byte(ts_match.captures[next].node);
                      uint32_t capture_end = ts_node_end_byte(ts_match.captures[next].node);
                      uint32_t capture_length = capture_end - capture_start;
                      strncpy(methodname_buf + cur_offset, cstring_source + capture_start, capture_length);
                      cur_offset += capture_length;
                    }
                    if (cur_offset+1 != total_length) { NSLog(@"Something is wrong :( %u != %u", cur_offset+1, total_length); }
                    [self addMatchesTo:moreAttributes fromCString: methodname_buf forKey:@"co_fwoar_java_methods"];
                    [self addMatchesTo:moreAttributes fromCString: methodname_buf forKey:@"co_fwoar_java_definitions"];
                } else {
                    goto fail;
                }
            }
        }
        
        // Add the complete source code as metadata.
        [moreAttributes setObject:source forKey:@"kMDItemTextContent"];
        
        [inAttributes addEntriesFromDictionary:moreAttributes];
        theResult = YES;
    fail:
        [theAutoreleasePool release];
    }
    @catch (NSException *localException)
    {
        NSLog(@"Tree Sitter (java) Metadata Importer: Could not process file '%@' (Exception: %@)", inPathToFile, localException);
    }
    @finally
    {
        ts_query_cursor_delete(class_cursor);
        ts_query_cursor_delete(package_cursor);
        if (packagename_buf) {
            free(packagename_buf);
        }
        if (classname_buf) {
            free(classname_buf);
        }
        if (interfacename_buf) {
            free(interfacename_buf);
        }
        if (methodname_buf) {
            free(methodname_buf);
        }
        ts_parser_delete(ts_parser);
    }
    return(theResult);
}

@end