9cd4cff1 |
// CMetadataImporter.m
//
// Lisp Metadata Importer
//
// Created by John Wiseman on 9/1/05.
// Copyright 2005 John Wiseman.
//
// Licensed under the MIT license--see the accompanying LICENSE.txt
// file.
#import "CMetadataImporter.h"
#import "NSString_HMext.h"
#import "NSData_HMext.h"
#import "DebugLog.h"
|
6de0256e |
#include "tree_sitter/api.h"
#include "tree-sitter/lib/src/lib.c"
#include "tree-sitter-java/src/parser.c"
|
9cd4cff1 |
@implementation CMetadataImporter
int MaxSourceSize = 500000; // Default maximum number of bytes that will be read for indexing purposes.
long NO_MAXIMUM = -1;
// All sorts of static data that we initialize once, then use many many times.
static BOOL StaticDataIsInitialized = NO;
|
6de0256e |
static const TSLanguage* ts_language_java;
char* package_query_str = "(package_declaration (scoped_identifier) @package)";
TSQuery *package_query;
|
9cd4cff1 |
|
6de0256e |
char* class_query_str = "(class_declaration name: (identifier) @class)";
TSQuery* class_query;
|
9cd4cff1 |
|
6de0256e |
char* interface_query_str = "(interface_declaration name: (identifier) @interface)";
TSQuery* interface_query;
|
9cd4cff1 |
|
6de0256e |
char* method_query_str = "(class_declaration name: (identifier) @class body: (class_body (method_declaration name: (identifier) @method (formal_parameters) @type)))(interface_declaration name: (identifier) @interface body: (interface_body (method_declaration name: (identifier) @method (formal_parameters) @type)))";
TSQuery* method_query;
//static TSQuery method_query;
//static TSQuery field_query;
//
//static NSError *err = nil;
|
9cd4cff1 |
- (void)initStaticData
{
if (StaticDataIsInitialized)
{
return;
}
StaticDataIsInitialized = YES;
|
6de0256e |
uint32_t error_offset;
TSQueryError error_type;
method_query = ts_query_new(ts_language_java, method_query_str, (uint32_t)strlen(method_query_str), &error_offset, &error_type);
if (error_type != 0) {
NSLog(@"error while initializing method query offset: %d, type: %d", error_offset, error_type);
}
interface_query = ts_query_new(ts_language_java, interface_query_str, (uint32_t)strlen(interface_query_str), &error_offset, &error_type);
if (error_type != 0) {
NSLog(@"error while initializing interface query offset: %d, type: %d", error_offset, error_type);
}
class_query = ts_query_new(ts_language_java, class_query_str, (uint32_t)strlen(class_query_str), &error_offset, &error_type);
if (error_type != 0) {
NSLog(@"error while initializing class query offset: %d, type: %d", error_offset, error_type);
}
package_query = ts_query_new(ts_language_java, package_query_str, (uint32_t)strlen(package_query_str), &error_offset, &error_type);
if (error_type != 0) {
NSLog(@"error while initializing package query offset: %d type: %d", error_offset, error_type);
}
|
9cd4cff1 |
// Find the bundle, and Info.plist. Set the debug level specified
// there, as well as the maximum file length to index.
NSBundle *theBundle = [NSBundle bundleForClass:[self class]];
NSObject *debugLevelObj = [theBundle objectForInfoDictionaryKey:@"DebugLevel"];
if (debugLevelObj != nil)
{
SetDebugLogLevel(DebugLevelNameToValue((NSString*)debugLevelObj));
}
NSObject *maxSourceSizeObj = [theBundle objectForInfoDictionaryKey:@"MaxSourceSizeToIndex"];
int max = [(NSNumber*)maxSourceSizeObj intValue];
if (max != 0)
{
DebugLog(DEBUG_LEVEL_DEBUG, @"Using MaxSourceSize=%d", max);
MaxSourceSize = max;
}
else
{
NSLog(@"Error parsing MaxSourceSizeToIndex, using %d", MaxSourceSize);
}
// Precompile our regexes.
DebugLog(DEBUG_LEVEL_DEBUG, @"Static data has been initialized.");
}
static NSStringEncoding PossibleSourceTextEncodings[] = { NSUTF8StringEncoding,
NSMacOSRomanStringEncoding,
NSISOLatin1StringEncoding,
NSWindowsCP1252StringEncoding };
// Tries to read the file using the encodings specified in
// PossibleSourceTextEncodings, in order, until one succeeds.
//
// There's probably a better way to do this (TEC Sniffers?). The
// seemingly obvious way, stringWithContentsOfFile:usedEncoding:error,
// doesn't work--apparently it just does something minimal, like
// decide between UTF-8 and UCS-16 or something.
- (NSString*)readContentsOfFile:(NSString*)pathToFile error:(NSError**)theError
{
int i;
NSStringEncoding theEncoding;
NSString *theSource = nil;
NSData *data;
DebugLog(DEBUG_LEVEL_DEBUG, @"Indexing %@", pathToFile);
// Read the file.
if (MaxSourceSize == NO_MAXIMUM)
{
data = [NSData dataWithContentsOfFile:pathToFile options:0 error:theError];
}
else
{
data = [NSData dataWithContentsOfFile:pathToFile maxSize:MaxSourceSize error:theError];
if ([data length] == MaxSourceSize)
{
// This is not absolutely certain to be correct, since the file might just have been
// MaxSourceSize bytes long.
DebugLog(DEBUG_LEVEL_DEBUG, @"Truncated indexing of '%@' to %d bytes", pathToFile, MaxSourceSize);
}
}
if (data == nil)
{
return nil;
}
// Try to convert the file contents to a string by trying the candidate
// encodings, in order.
for (i = 0; i < sizeof(PossibleSourceTextEncodings); i++)
{
theEncoding = PossibleSourceTextEncodings[i];
DebugLog(DEBUG_LEVEL_VERBOSE, @"Trying encoding %d", theEncoding);
theSource = [[[NSString alloc] initWithData:data encoding:theEncoding] autorelease];
if (theSource != nil)
{
break;
}
else
{
DebugLog(DEBUG_LEVEL_DEBUG, @"Reading with encoding %d failed.", theEncoding);
}
}
return theSource;
}
// Adds metadata values to the specified dictionary under the
// specified key, using the specified regular expression.
|
6de0256e |
- (BOOL)addMatchesTo:(NSMutableDictionary *)attributes fromCString:(const char *)inp forKey:(NSString *)key
|
9cd4cff1 |
{
|
6de0256e |
NSString* match = [NSString stringWithUTF8String:inp];
if (![[attributes objectForKey:key] containsObject:match]) {
[[attributes objectForKey:key] addObject:match];
|
9cd4cff1 |
}
|
6de0256e |
return YES;
|
9cd4cff1 |
}
// This is the method that does all the importing and indexing work.
// It stuffs attributes into the specified dictionary.
- (BOOL)importFile:(NSString *)inPathToFile contentType:(NSString *)inContentType attributes:(NSMutableDictionary *)inAttributes
{
|
6de0256e |
TSParser* ts_parser;
char *packagename_buf = 0;
char* classname_buf = 0;
char* interfacename_buf = 0;
char* methodname_buf = 0;
// char* package_query_str = "(program (package_declaration (scoped_identifier) @package))";
// TSQuery* package_query;
ts_parser = ts_parser_new();
ts_language_java = tree_sitter_java();
ts_parser_set_language(ts_parser, ts_language_java);
TSQueryCursor *class_cursor = ts_query_cursor_new();
TSQueryCursor *package_cursor = ts_query_cursor_new();
|
9cd4cff1 |
BOOL theResult = NO;
@try
{
NSAutoreleasePool *theAutoreleasePool = [[NSAutoreleasePool alloc] init];
NSError *error = nil;
NSString *source;
[self initStaticData];
source = [self readContentsOfFile:inPathToFile error:&error];
if (source == nil)
{
if (error)
{
NSLog(@"Lisp Metadata Importer: Could not process file '%@': %@", inPathToFile, error);
}
else
{
NSLog(@"Lisp Metadata Importer: Could not process file '%@': unknown error", inPathToFile);
}
return NO;
|
6de0256e |
} else {
NSLog(@"Processing file '%@'", inPathToFile);
|
9cd4cff1 |
}
// Only process the first MaxSourceSize of the file. To try to do more
// invites the swapping death.
if ([source length] > MaxSourceSize)
{
source = [source substringToIndex:MaxSourceSize];
}
|
6de0256e |
const char *cstring_source = [source UTF8String];
TSTree *tree = ts_parser_parse_string(ts_parser, NULL, cstring_source, strlen(cstring_source));
TSNode root_node = ts_tree_root_node(tree);
|
9cd4cff1 |
|
6de0256e |
NSMutableDictionary *moreAttributes = [[[NSMutableDictionary alloc] initWithCapacity:4] autorelease];
|
9cd4cff1 |
|
6de0256e |
[moreAttributes setObject:[NSMutableArray arrayWithCapacity:100] forKey:@"co_fwoar_java_package"];
[moreAttributes setObject:[NSMutableArray arrayWithCapacity:100] forKey:@"co_fwoar_java_classes"];
[moreAttributes setObject:[NSMutableArray arrayWithCapacity:100] forKey:@"co_fwoar_java_interfaces"];
[moreAttributes setObject:[NSMutableArray arrayWithCapacity:100] forKey:@"co_fwoar_java_methods"];
[moreAttributes setObject:[NSMutableArray arrayWithCapacity:100] forKey:@"co_fwoar_java_definitions"];
|
9cd4cff1 |
|
6de0256e |
ts_query_cursor_exec(package_cursor, package_query, root_node);
uint32_t packagename_start, packagename_end, packagename_length;
TSQueryMatch ts_match;
if (ts_query_cursor_next_match(package_cursor, &ts_match)) {
packagename_start = ts_node_start_byte(ts_match.captures[0].node);
packagename_end = ts_node_end_byte(ts_match.captures[0].node);
packagename_length = packagename_end - packagename_start;
packagename_buf = calloc(1 + packagename_length, sizeof(char));
if (packagename_buf) {
strncpy(packagename_buf, cstring_source + packagename_start, packagename_length);
[self addMatchesTo:moreAttributes fromCString: packagename_buf forKey:@"co_fwoar_java_package"];
} else {
goto fail;
}
}
ts_query_cursor_exec(class_cursor, class_query, root_node);
while (ts_query_cursor_next_match(class_cursor, &ts_match)) {
if (ts_match.captures) {
uint32_t classname_start = ts_node_start_byte(ts_match.captures[0].node);
uint32_t classname_end = ts_node_end_byte(ts_match.captures[0].node);
uint32_t classname_length = classname_end - classname_start;
if (classname_buf) {
free(classname_buf);
}
classname_buf = calloc(1 + (packagename_length + 1 + classname_length), sizeof(char));
if (classname_buf) {
strncpy(classname_buf, cstring_source + packagename_start, packagename_length);
classname_buf[packagename_length] = '.';
strncpy(classname_buf + packagename_length + 1, cstring_source + classname_start, classname_length);
[self addMatchesTo:moreAttributes fromCString: classname_buf forKey:@"co_fwoar_java_classes"];
[self addMatchesTo:moreAttributes fromCString: classname_buf forKey:@"co_fwoar_java_definitions"];
} else {
goto fail;
}
}
}
ts_query_cursor_exec(class_cursor, interface_query, root_node);
while (ts_query_cursor_next_match(class_cursor, &ts_match)) {
if (ts_match.captures) {
uint32_t interfacename_start = ts_node_start_byte(ts_match.captures[0].node);
uint32_t interfacename_end = ts_node_end_byte(ts_match.captures[0].node);
uint32_t interfacename_length = interfacename_end - interfacename_start;
if (interfacename_buf) {
free(interfacename_buf);
}
interfacename_buf = calloc(1 + (packagename_length + 1 + interfacename_length), sizeof(char));
if (interfacename_buf) {
strncpy(interfacename_buf, cstring_source + packagename_start, packagename_length);
interfacename_buf[packagename_length] = '.';
strncpy(interfacename_buf + packagename_length + 1, cstring_source + interfacename_start, interfacename_length);
[self addMatchesTo:moreAttributes fromCString: interfacename_buf forKey:@"co_fwoar_java_interfaces"];
[self addMatchesTo:moreAttributes fromCString: interfacename_buf forKey:@"co_fwoar_java_definitions"];
} else {
goto fail;
}
}
}
ts_query_cursor_exec(class_cursor, method_query, root_node);
while (ts_query_cursor_next_match(class_cursor, &ts_match)) {
if (ts_match.captures) {
uint32_t classname_start = ts_node_start_byte(ts_match.captures[0].node);
uint32_t classname_end = ts_node_end_byte(ts_match.captures[0].node);
uint32_t classname_length = classname_end - classname_start;
uint32_t methodname_start = ts_node_start_byte(ts_match.captures[1].node);
uint32_t methodname_end = ts_node_end_byte(ts_match.captures[1].node);
uint32_t methodname_length = methodname_end - methodname_start;
if (methodname_buf) {
free(methodname_buf);
}
methodname_buf = calloc(1 + (packagename_length + 1 + classname_length + 1 + methodname_length), sizeof(char));
if (methodname_buf) {
strncpy(methodname_buf, cstring_source + packagename_start, packagename_length);
methodname_buf[packagename_length] = '.';
strncpy(methodname_buf + packagename_length + 1, cstring_source + classname_start, classname_length);
methodname_buf[packagename_length + 1 + classname_length] = '.';
strncpy(methodname_buf + packagename_length + 1 + classname_length + 1, cstring_source + methodname_start, methodname_length);
// "((n+1)*m-1)
uint32_t cur_offset = strlen(methodname_buf);
uint32_t total_length = strlen(methodname_buf) + 1 /*null*/;
for (uint16_t next = 2; next < ts_match.capture_count; next++) {
uint32_t capture_start = ts_node_start_byte(ts_match.captures[next].node);
uint32_t capture_end = ts_node_end_byte(ts_match.captures[next].node);
uint32_t capture_length = capture_end - capture_start;
total_length += capture_length;
}
methodname_buf = realloc(methodname_buf, total_length * sizeof(char));
methodname_buf[total_length - 1] = '\0';
for (uint16_t next = 2; next < ts_match.capture_count; next++) {
uint32_t capture_start = ts_node_start_byte(ts_match.captures[next].node);
uint32_t capture_end = ts_node_end_byte(ts_match.captures[next].node);
uint32_t capture_length = capture_end - capture_start;
strncpy(methodname_buf + cur_offset, cstring_source + capture_start, capture_length);
cur_offset += capture_length;
}
if (cur_offset+1 != total_length) { NSLog(@"Something is wrong :( %u != %u", cur_offset+1, total_length); }
[self addMatchesTo:moreAttributes fromCString: methodname_buf forKey:@"co_fwoar_java_methods"];
[self addMatchesTo:moreAttributes fromCString: methodname_buf forKey:@"co_fwoar_java_definitions"];
} else {
goto fail;
|
9cd4cff1 |
}
}
}
// Add the complete source code as metadata.
[moreAttributes setObject:source forKey:@"kMDItemTextContent"];
[inAttributes addEntriesFromDictionary:moreAttributes];
theResult = YES;
|
6de0256e |
fail:
|
9cd4cff1 |
[theAutoreleasePool release];
}
@catch (NSException *localException)
{
|
6de0256e |
NSLog(@"Tree Sitter (java) Metadata Importer: Could not process file '%@' (Exception: %@)", inPathToFile, localException);
|
9cd4cff1 |
}
@finally
{
|
6de0256e |
ts_query_cursor_delete(class_cursor);
ts_query_cursor_delete(package_cursor);
if (packagename_buf) {
free(packagename_buf);
}
if (classname_buf) {
free(classname_buf);
}
if (interfacename_buf) {
free(interfacename_buf);
}
if (methodname_buf) {
free(methodname_buf);
}
ts_parser_delete(ts_parser);
|
9cd4cff1 |
}
return(theResult);
}
@end
|