diff --git a/configure.ac b/configure.ac index 0dea296cb..57af81cd4 100644 --- a/configure.ac +++ b/configure.ac @@ -22,6 +22,8 @@ AC_PROG_INSTALL LDFLAGS="$LDFLAGS -L/usr/local/lib" # Checks for libraries. +AC_CHECK_LIB(json-c, json_tokener_parse) +AM_CONDITIONAL([USE_LIBJSON_C], [test "$HAVE_JSON_C" -eq 1]) AC_SEARCH_LIBS([log], [m],,[AC_MSG_ERROR([Could not find math library])]) diff --git a/src/Makefile.am b/src/Makefile.am index 9b5f4887e..3ca646dbe 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -42,6 +42,15 @@ near_dupe_test_SOURCES = strndup.c near_dupe_test.c string_utils.c utf8proc/utf8 near_dupe_test_LDADD = libpostal.la near_dupe_test_CFLAGS = $(CFLAGS_O3) +if USE_LIBJSON_C +noinst_PROGRAMS += thread_test + +thread_test_SOURCES = thread_test.c +thread_test_CPPFLAGS = -pthread +thread_test_CFLAGS = $(CFLAGS_O3) -I/usr/include/json-c +thread_test_LDADD = libpostal.la +thread_test_LDFLAGS = -ljson-c -pthread +endif build_address_dictionary_SOURCES = strndup.c address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c build_address_dictionary_CFLAGS = $(CFLAGS_O3) diff --git a/src/address_parser.c b/src/address_parser.c index 2b7c8811a..3cf95fa34 100644 --- a/src/address_parser.c +++ b/src/address_parser.c @@ -202,7 +202,7 @@ bool address_parser_load(char *dir) { parser->model.crf = crf_model; } else { char_array_destroy(path); - log_error("Averaged perceptron model could not be loaded\n"); + log_error("CRF model could not be loaded\n"); return false; } } else { @@ -294,11 +294,6 @@ bool address_parser_load(char *dir) { fclose(postal_codes_file); - parser->context = address_parser_context_new(); - if (parser->context == NULL) { - goto exit_address_parser_created; - } - char_array_destroy(path); return true; @@ -317,10 +312,6 @@ void address_parser_destroy(address_parser_t *self) { crf_destroy(self->model.crf); } - if (self->context != NULL) { - address_parser_context_destroy(self->context); - } - if (self->vocab != NULL) { trie_destroy(self->vocab); } @@ -1662,12 +1653,16 @@ libpostal_address_parser_response_t *address_parser_parse(char *address, char *l if (address == NULL) return NULL; address_parser_t *parser = get_address_parser(); - if (parser == NULL || parser->context == NULL) { + if (parser == NULL) { log_error("parser is not setup, call libpostal_setup_address_parser()\n"); return NULL; } - address_parser_context_t *context = parser->context; + address_parser_context_t *const context = address_parser_context_new(); + if (!context) { + log_error("error creating address parser context\n"); + return NULL; + } char *normalized = address_parser_normalize_string(address); bool is_normalized = normalized != NULL; @@ -1679,6 +1674,8 @@ libpostal_address_parser_response_t *address_parser_parse(char *address, char *l tokenized_string_t *tokenized_str = tokenized_string_new_from_str_size(normalized, strlen(normalized), tokens->n); + // It seems like we might be needing to clear context->separators somewhere + // (in the case where we re-use the context). for (size_t i = 0; i < tokens->n; i++) { token_t token = tokens->a[i]; if (ADDRESS_PARSER_IS_SEPARATOR(token.type)) { @@ -1709,6 +1706,8 @@ libpostal_address_parser_response_t *address_parser_parse(char *address, char *l language = NULL; country = NULL; + // We could probably do less work in this function if we are allocating a + // new context each call. address_parser_context_fill(context, parser, tokenized_str, language, country); libpostal_address_parser_response_t *response = NULL; @@ -1774,6 +1773,7 @@ libpostal_address_parser_response_t *address_parser_parse(char *address, char *l if (is_normalized) { free(normalized); } + address_parser_context_destroy(context); return response; } } @@ -1835,6 +1835,7 @@ libpostal_address_parser_response_t *address_parser_parse(char *address, char *l free(normalized); } + address_parser_context_destroy(context); return response; } diff --git a/src/address_parser.h b/src/address_parser.h index b059a246e..2d139fcad 100644 --- a/src/address_parser.h +++ b/src/address_parser.h @@ -209,7 +209,6 @@ typedef struct address_parser { averaged_perceptron_t *ap; crf_t *crf; } model; - address_parser_context_t *context; trie_t *vocab; trie_t *phrases; address_parser_types_array *phrase_types; diff --git a/src/address_parser_train.c b/src/address_parser_train.c index 3a1d22de5..4075e684d 100644 --- a/src/address_parser_train.c +++ b/src/address_parser_train.c @@ -328,13 +328,6 @@ address_parser_t *address_parser_init(char *filename) { return NULL; } - address_parser_context_t *context = address_parser_context_new(); - if (context == NULL) { - log_error("Error allocating context\n"); - return NULL; - } - parser->context = context; - khash_t(str_uint32) *vocab = kh_init(str_uint32); if (vocab == NULL) { log_error("Could not allocate vocab\n"); @@ -1052,7 +1045,11 @@ bool address_parser_train_epoch(address_parser_t *self, void *trainer, char *fil return false; } - address_parser_context_t *context = self->context; + address_parser_context_t *const context = address_parser_context_new(); + if (!context) { + log_error("error creating address parser context\n"); + return false; + } size_t examples = 0; uint64_t errors = address_parser_train_num_errors(self, trainer); @@ -1097,6 +1094,7 @@ bool address_parser_train_epoch(address_parser_t *self, void *trainer, char *fil exit_epoch_training_started: address_parser_data_set_destroy(data_set); + address_parser_context_destroy(context); return true; } diff --git a/src/averaged_perceptron.c b/src/averaged_perceptron.c index 4570f73bf..5e9e03ac9 100644 --- a/src/averaged_perceptron.c +++ b/src/averaged_perceptron.c @@ -7,8 +7,11 @@ static inline bool averaged_perceptron_get_feature_id(averaged_perceptron_t *sel } inline double_array *averaged_perceptron_predict_scores(averaged_perceptron_t *self, cstring_array *features) { + // Possible leak if (self->scores == NULL || self->scores->n == 0) self->scores = double_array_new_zeros((size_t)self->num_classes); + // TODO(horgh): Mutating scores makes this not thread safe. We could + // allocate it each call to resolve this. double_array_zero(self->scores->a, self->scores->n); double *scores = self->scores->a; diff --git a/src/crf.c b/src/crf.c index e668209e3..04d87fddf 100644 --- a/src/crf.c +++ b/src/crf.c @@ -12,14 +12,14 @@ static inline bool crf_get_state_trans_feature_id(crf_t *self, char *feature, ui return trie_get_data(self->state_trans_features, feature, feature_id); } -bool crf_tagger_score(crf_t *self, void *tagger, void *tagger_context, cstring_array *features, cstring_array *prev_tag_features, tagger_feature_function feature_function, tokenized_string_t *tokenized, bool print_features) { +bool crf_tagger_score(crf_t *self, void *tagger, void *tagger_context, cstring_array *features, cstring_array *prev_tag_features, tagger_feature_function feature_function, tokenized_string_t *tokenized, bool print_features, crf_context_t *const crf_context) { if (self == NULL || feature_function == NULL || tokenized == NULL ) { return false; } size_t num_tokens = tokenized->tokens->n; - crf_context_t *crf_context = self->context; crf_context_set_num_items(crf_context, num_tokens); + // We might not need this if we allocate one each lookup crf_context_reset(crf_context, CRF_CONTEXT_RESET_ALL); if (!double_matrix_copy(self->trans_weights, crf_context->trans)) { @@ -97,19 +97,24 @@ bool crf_tagger_score(crf_t *self, void *tagger, void *tagger_context, cstring_a return true; } -bool crf_tagger_score_viterbi(crf_t *self, void *tagger, void *tagger_context, cstring_array *features, cstring_array *prev_tag_features, tagger_feature_function feature_function, tokenized_string_t *tokenized, double *score, bool print_features) { - if (!crf_tagger_score(self, tagger, tagger_context, features, prev_tag_features, feature_function, tokenized, print_features)) { - return false; +uint32_array *crf_tagger_score_viterbi(crf_t *self, void *tagger, void *tagger_context, cstring_array *features, cstring_array *prev_tag_features, tagger_feature_function feature_function, tokenized_string_t *tokenized, double *score, bool print_features, crf_context_t *const crf_context) { + if (!crf_tagger_score(self, tagger, tagger_context, features, prev_tag_features, feature_function, tokenized, print_features, crf_context)) { + return NULL; } size_t num_tokens = tokenized->tokens->n; - uint32_array_resize_fixed(self->viterbi, num_tokens); - double viterbi_score = crf_context_viterbi(self->context, self->viterbi->a); + uint32_array *const viterbi = uint32_array_new_size_fixed(num_tokens); + if (!viterbi) { + log_error("error allocating viterbi array"); + return NULL; + } + + double viterbi_score = crf_context_viterbi(crf_context, viterbi->a); *score = viterbi_score; - return true; + return viterbi; } @@ -117,17 +122,28 @@ bool crf_tagger_predict(crf_t *self, void *tagger, void *context, cstring_array double score; if (labels == NULL) return false; - if (!crf_tagger_score_viterbi(self, tagger, context, features, prev_tag_features, feature_function, tokenized, &score, print_features)) { + + crf_context_t *const crf_context = crf_context_new(CRF_CONTEXT_VITERBI | CRF_CONTEXT_MARGINALS, self->num_classes, CRF_CONTEXT_DEFAULT_NUM_ITEMS); + if (!context) { + log_error("error creating crf context"); return false; } - uint32_t *viterbi = self->viterbi->a; + uint32_array *viterbi = crf_tagger_score_viterbi(self, tagger, context, features, prev_tag_features, feature_function, tokenized, &score, print_features, crf_context); + if (!viterbi) { + crf_context_destroy(crf_context); + return false; + } - for (size_t i = 0; i < self->viterbi->n; i++) { - char *predicted = cstring_array_get_string(self->classes, viterbi[i]); + for (size_t i = 0; i < viterbi->n; i++) { + char *predicted = cstring_array_get_string(self->classes, + viterbi->a[i]); cstring_array_add_string(labels, predicted); } + crf_context_destroy(crf_context); + uint32_array_destroy(viterbi); + return true; } @@ -265,16 +281,6 @@ crf_t *crf_read(FILE *f) { goto exit_crf_created; } - crf->viterbi = uint32_array_new(); - if (crf->viterbi == NULL) { - goto exit_crf_created; - } - - crf->context = crf_context_new(CRF_CONTEXT_VITERBI | CRF_CONTEXT_MARGINALS, crf->num_classes, CRF_CONTEXT_DEFAULT_NUM_ITEMS); - if (crf->context == NULL) { - goto exit_crf_created; - } - return crf; exit_crf_created: @@ -318,13 +324,5 @@ void crf_destroy(crf_t *self) { double_matrix_destroy(self->trans_weights); } - if (self->viterbi != NULL) { - uint32_array_destroy(self->viterbi); - } - - if (self->context != NULL) { - crf_context_destroy(self->context); - } - free(self); } diff --git a/src/crf.h b/src/crf.h index af0dabeb2..eee8a5d4e 100644 --- a/src/crf.h +++ b/src/crf.h @@ -31,14 +31,12 @@ typedef struct crf { trie_t *state_trans_features; sparse_matrix_t *state_trans_weights; double_matrix_t *trans_weights; - uint32_array *viterbi; - crf_context_t *context; } crf_t; bool crf_tagger_predict(crf_t *model, void *tagger, void *tagger_context, cstring_array *features, cstring_array *prev_tag_features, cstring_array *labels, tagger_feature_function feature_function, tokenized_string_t *tokenized, bool print_features); -bool crf_tagger_score(crf_t *self, void *tagger, void *tagger_context, cstring_array *features, cstring_array *prev_tag_features, tagger_feature_function feature_function, tokenized_string_t *tokenized, bool print_features); -bool crf_tagger_score_viterbi(crf_t *self, void *tagger, void *tagger_context, cstring_array *features, cstring_array *prev_tag_features, tagger_feature_function feature_function, tokenized_string_t *tokenized, double *score, bool print_features); +bool crf_tagger_score(crf_t *self, void *tagger, void *tagger_context, cstring_array *features, cstring_array *prev_tag_features, tagger_feature_function feature_function, tokenized_string_t *tokenized, bool print_features, crf_context_t *const); +uint32_array *crf_tagger_score_viterbi(crf_t *self, void *tagger, void *tagger_context, cstring_array *features, cstring_array *prev_tag_features, tagger_feature_function feature_function, tokenized_string_t *tokenized, double *score, bool print_features, crf_context_t *const); bool crf_tagger_predict(crf_t *self, void *tagger, void *context, cstring_array *features, cstring_array *prev_tag_features, cstring_array *labels, tagger_feature_function feature_function, tokenized_string_t *tokenized, bool print_features); @@ -50,4 +48,4 @@ crf_t *crf_load(char *filename); void crf_destroy(crf_t *self); -#endif \ No newline at end of file +#endif diff --git a/src/crf_trainer_averaged_perceptron.c b/src/crf_trainer_averaged_perceptron.c index fdd7f0c02..10f78cd17 100644 --- a/src/crf_trainer_averaged_perceptron.c +++ b/src/crf_trainer_averaged_perceptron.c @@ -941,10 +941,6 @@ crf_t *crf_averaged_perceptron_trainer_finalize(crf_averaged_perceptron_trainer_ crf->state_trans_features = state_trans_features; - crf->viterbi = uint32_array_new(); - - crf->context = crf_context_new(CRF_CONTEXT_VITERBI | CRF_CONTEXT_MARGINALS, num_classes, CRF_CONTEXT_DEFAULT_NUM_ITEMS); - crf_averaged_perceptron_trainer_destroy(self); return crf; diff --git a/src/log/log.h b/src/log/log.h index 4baf97639..65439c799 100644 --- a/src/log/log.h +++ b/src/log/log.h @@ -18,6 +18,7 @@ #define __FILENAME__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) /* safe readable version of errno */ +// TODO(horgh): strerror() is not thread safe #define clean_errno() (errno == 0 ? "None" : strerror(errno)) #if defined (LOG_NO_COLORS) || defined (_WIN32) diff --git a/src/thread_test.c b/src/thread_test.c new file mode 100644 index 000000000..9adaefc18 --- /dev/null +++ b/src/thread_test.c @@ -0,0 +1,469 @@ +// This program is for running multithreaded tests. +// +// Prerequisites: json-c (Ubuntu package libjson-c-dev) + +#define _POSIX_C_SOURCE 200809L + +#include +#include +#include +#include +#include "libpostal.h" +#include +#include +#include +#include +#include +#include + +struct Args { + char *input_file; + int thread_count; + int iterations; + bool verbose; +}; + +struct Address { + char *address; + char *country; +}; + +static struct Args *get_args(int, char **); +static void args_destroy(struct Args *const); +static void print_usage(char const *const); +static struct Address **addresses_load( + bool const, + char const *const); +static char *read_file(char const *const); +static void addresses_destroy(struct Address **const); +static bool start_threaded_lookups( + int const, + int const, + bool const, + struct Address **const); +static void *thread(void *); +static bool run_lookups(bool const, struct Address **const); + +int main(int argc, char **argv) +{ + struct Args *const args = get_args(argc, argv); + if (!args) { + return 1; + } + + if (args->verbose) { + printf("reading JSON...\n"); + } + + struct Address **const addresses = addresses_load(args->verbose, + args->input_file); + if (!addresses) { + fprintf(stderr, "error loading addresses\n"); + args_destroy(args); + return 1; + } + + if (args->verbose) { + printf("done reading JSON\n"); + } + + if (args->verbose) { + printf("setting up libpostal...\n"); + } + + if (!libpostal_setup() || !libpostal_setup_parser() || + !libpostal_setup_language_classifier()) { + fprintf(stderr, "libpostal setup failed\n"); + args_destroy(args); + addresses_destroy(addresses); + return 1; + } + + if (args->verbose) { + printf("done setting up libpostal\n"); + } + + if (!start_threaded_lookups(args->thread_count, args->iterations, + args->verbose, addresses)) { + fprintf(stderr, "error running lookups\n"); + args_destroy(args); + addresses_destroy(addresses); + libpostal_teardown(); + libpostal_teardown_parser(); + libpostal_teardown_language_classifier(); + return 1; + } + + args_destroy(args); + addresses_destroy(addresses); + libpostal_teardown(); + libpostal_teardown_parser(); + libpostal_teardown_language_classifier(); + return 0; +} + +static struct Args *get_args(int argc, char **argv) +{ + struct Args *const args = calloc(1, sizeof(struct Args)); + if (!args) { + fprintf(stderr, "%s: error allocating argument memory: %s\n", __func__, + strerror(errno)); + return NULL; + } + + while (1) { + int const opt = getopt(argc, argv, "f:t:i:vh\n"); + if (opt == -1) { + break; + } + + switch (opt) { + case 'f': + args->input_file = strdup(optarg); + if (!args->input_file) { + fprintf(stderr, "%s: error allocating memory: %s\n", __func__, + strerror(errno)); + args_destroy(args); + return NULL; + } + break; + case 't': + args->thread_count = atoi(optarg); + break; + case 'i': + args->iterations = atoi(optarg); + break; + case 'v': + args->verbose = true; + break; + case 'h': + print_usage(argv[0]); + return NULL; + break; + default: + print_usage(argv[0]); + return NULL; + break; + } + } + + if (!args->input_file) { + fprintf(stderr, "you must provide an input file\n"); + args_destroy(args); + return NULL; + } + + if (args->thread_count <= 0) { + fprintf(stderr, "thread count must be at least 1\n"); + args_destroy(args); + return NULL; + } + + if (args->iterations <= 0) { + fprintf(stderr, "iterations must be at least 1\n"); + args_destroy(args); + return NULL; + } + + return args; +} + +static void args_destroy(struct Args *const args) +{ + if (!args) { + return; + } + + if (args->input_file) { + free(args->input_file); + } +} + +static void print_usage(char const *const program_name) +{ + fprintf(stderr, "Usage: %s \n", program_name); + fprintf(stderr, "\n"); + fprintf(stderr, "Arguments:\n"); + fprintf(stderr, " -f Input file containing JSON with addresses to look up. \n"); + fprintf(stderr, " -t <#> How many threads to run.\n"); + fprintf(stderr, " -i <#> Number of times to perform each lookup in each thread.\n"); + fprintf(stderr, " -v Enable verbose output.\n"); + fprintf(stderr, " -h This help.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "JSON should look like this:\n"); + fprintf(stderr, " [ {\"address\": \"123 Main St.\", \"country\": \"CA\"}, ... ]\n"); +} + +static struct Address **addresses_load( + bool const verbose, + char const *const filename) +{ + if (!filename || strlen(filename) == 0) { + fprintf(stderr, "%s: %s\n", __func__, strerror(EINVAL)); + return NULL; + } + + char *const contents = read_file(filename); + if (!contents) { + fprintf(stderr, "%s: error reading file: %s\n", __func__, filename); + return NULL; + } + + json_object *const top_obj = json_tokener_parse(contents); + if (!top_obj) { + fprintf(stderr, "%s: invalid JSON\n", __func__); + free(contents); + return NULL; + } + + free(contents); + + struct Address **const addresses = calloc( + json_object_array_length(top_obj)+1, sizeof(struct Address *)); + if (!addresses) { + fprintf(stderr, "%s: error allocating memory for addresses: %s\n", + __func__, strerror(errno)); + json_object_put(top_obj); + return NULL; + } + + for (size_t i = 0; i < json_object_array_length(top_obj); i++) { + json_object *const ele_obj = json_object_array_get_idx(top_obj, i); + + json_object *address_obj = NULL; + if (!json_object_object_get_ex(ele_obj, "address", &address_obj)) { + fprintf(stderr, "%s: `address' property not found\n", __func__); + json_object_put(top_obj); + addresses_destroy(addresses); + return NULL; + } + + char const *const address = json_object_get_string(address_obj); + if (!address) { + fprintf(stderr, "%s: null `address' property found\n", __func__); + json_object_put(top_obj); + addresses_destroy(addresses); + return NULL; + } + + json_object *country_obj = NULL; + if (!json_object_object_get_ex(ele_obj, "country", &country_obj)) { + fprintf(stderr, "%s: `country' property not found\n", __func__); + json_object_put(top_obj); + addresses_destroy(addresses); + return NULL; + } + + char const *const country = json_object_get_string(country_obj); + if (!country) { + fprintf(stderr, "%s: null `country' property found\n", __func__); + json_object_put(top_obj); + addresses_destroy(addresses); + return NULL; + } + + addresses[i] = calloc(1, sizeof(struct Address)); + if (!addresses[i]) { + fprintf(stderr, "%s: error allocating address memory: %s\n", + __func__, strerror(errno)); + json_object_put(top_obj); + addresses_destroy(addresses); + return NULL; + } + addresses[i]->address = strdup(address); + if (!addresses[i]->address) { + fprintf(stderr, "%s: error allocating memory for address: %s\n", + __func__, strerror(errno)); + json_object_put(top_obj); + addresses_destroy(addresses); + return NULL; + } + addresses[i]->country = strdup(country); + if (!addresses[i]->country) { + fprintf(stderr, "%s: error allocating memory for country: %s\n", + __func__, strerror(errno)); + json_object_put(top_obj); + addresses_destroy(addresses); + return NULL; + } + + if (verbose) { + printf("address: [%s] country: [%s]\n", addresses[i]->address, + addresses[i]->country); + } + } + + json_object_put(top_obj); + + return addresses; +} + +static char *read_file(char const *const filename) +{ + if (!filename || strlen(filename) == 0) { + fprintf(stderr, "%s: %s\n", __func__, strerror(EINVAL)); + return NULL; + } + + FILE *const fh = fopen(filename, "r"); + if (!fh) { + fprintf(stderr, "%s: fopen(%s): %s\n", __func__, filename, + strerror(errno)); + return NULL; + } + + size_t const sz = 10240000; + char *const buf = calloc(sz, sizeof(char)); + if (!buf) { + fprintf(stderr, "%s: %s\n", __func__, strerror(errno)); + fclose(fh); + return NULL; + } + + (void) fread(buf, sizeof(char), sz, fh); + if (!feof(fh)) { + fprintf(stderr, "%s: error fully reading file: %s\n", __func__, + filename); + fclose(fh); + free(buf); + return NULL; + } + + if (fclose(fh) != 0) { + fprintf(stderr, "%s: error closing file: %s: %s\n", __func__, filename, + strerror(errno)); + free(buf); + return NULL; + } + + return buf; +} + +static void addresses_destroy(struct Address **const addresses) +{ + if (!addresses) { + return; + } + + for (size_t i = 0; addresses[i]; i++) { + struct Address *const address = addresses[i]; + if (address->address) { + free(address->address); + } + if (address->country) { + free(address->country); + } + free(address); + } + + free(addresses); +} + +struct thread_info { + pthread_t id; + int iterations; + bool verbose; + struct Address **addresses; +}; + +static bool start_threaded_lookups( + int const thread_count, + int const iterations, + bool const verbose, + struct Address **const addresses) +{ + struct thread_info *const tinfo = calloc(thread_count, + sizeof(struct thread_info)); + if (!tinfo) { + fprintf(stderr, "%s: calloc(): %s\n", __func__, strerror(errno)); + return false; + } + + for (int i = 0; i < thread_count; i++) { + tinfo[i].iterations = iterations; + tinfo[i].verbose = verbose; + tinfo[i].addresses = addresses; + + if (pthread_create(&tinfo[i].id, NULL, &thread, &tinfo[i]) != 0) { + fprintf(stderr, "%s: pthread_create() failed\n", __func__); + free(tinfo); + return false; + } + } + + for (int i = 0; i < thread_count; i++) { + if (pthread_join(tinfo[i].id, NULL) != 0) { + fprintf(stderr, "%s: pthread_join() failed\n", __func__); + free(tinfo); + return false; + } + } + + free(tinfo); + + return true; +} + +static void *thread(void *arg) +{ + struct thread_info const *const tinfo = arg; + if (!tinfo) { + fprintf(stderr, "%s: %s\n", __func__, strerror(EINVAL)); + return NULL; + } + + for (int i = 0; i < tinfo->iterations; i++) { + if (!run_lookups(tinfo->verbose, tinfo->addresses)) { + fprintf(stderr, "%s: run_lookups() failed\n", __func__); + return NULL; + } + } + + return NULL; +} + +static bool run_lookups(bool const verbose, struct Address **const addresses) +{ + for (size_t i = 0; addresses[i]; i++) { + struct Address const *const address = addresses[i]; + if (verbose) { + printf("address [%s] country [%s]:\n", address->address, + address->country); + } + + libpostal_address_parser_options_t options = + libpostal_get_address_parser_default_options(); + options.country = address->country; + + libpostal_address_parser_response_t *const parsed = + libpostal_parse_address(address->address, options); + if (!parsed) { + fprintf(stderr, "error parsing: address [%s] country [%s]\n", + address->address, address->country); + return false; + } + + libpostal_normalize_options_t normalize_options = + libpostal_get_default_options(); + + for (size_t j = 0; j < parsed->num_components; j++) { + if (verbose) { + printf(" %s: %s\n", parsed->labels[j], parsed->components[j]); + } + + size_t num_expansions = 0; + char **const expansions = libpostal_expand_address( + parsed->components[j], normalize_options, &num_expansions); + if (verbose) { + for (size_t k = 0; k < num_expansions; k++) { + printf(" -> %s\n", expansions[k]); + } + } + libpostal_expansion_array_destroy(expansions, num_expansions); + } + + libpostal_address_parser_response_destroy(parsed); + } + + return true; +} diff --git a/src/transliterate.c b/src/transliterate.c index bd8cb003e..8725debff 100644 --- a/src/transliterate.c +++ b/src/transliterate.c @@ -734,6 +734,7 @@ char *transliterate(char *trans_name, char *str, size_t len) { step = trans_table->steps->a[i]; step_name = step->name; if (step->type == STEP_RULESET && trans_node_id == NULL_NODE_ID) { + // TODO(horgh): Possible use after free log_warn("transliterator \"%s\" does not exist in trie\n", trans_name); free(str); return NULL; @@ -793,6 +794,7 @@ char *transliterate(char *trans_name, char *str, size_t len) { ch = (int32_t)*ptr; } else if (char_len <= 0) { log_warn("char_len=%zd at idx=%zu\n", char_len, idx); + // TODO(horgh): Possible double free free(trans_name); free(str); return NULL; diff --git a/src/trie.c b/src/trie.c index 2e11ba6e3..c960b3525 100644 --- a/src/trie.c +++ b/src/trie.c @@ -850,6 +850,7 @@ uint32_t trie_get_from_index(trie_t *self, char *word, size_t len, uint32_t i) { trie_node_t node = trie_get_node(self, i); if (node.base == NULL_NODE_ID) return NULL_NODE_ID; + // Possibly uninitialized? uint32_t next_id; // Include NUL-byte. It may be stored if this phrase is a prefix of a longer one diff --git a/src/vector_math.h b/src/vector_math.h index 3c964581a..3b854f116 100644 --- a/src/vector_math.h +++ b/src/vector_math.h @@ -490,7 +490,9 @@ static inline void remez9_0_log2_sse(double *values, size_t num) } \ \ static inline void name##_exp(type *array, size_t n) { \ - remez9_0_log2_sse(array, n); \ + for (size_t i = 0; i < n; i++) { \ + array[i] = exp(array[i]); \ + } \ } \ \ static inline type name##_sum_log(type *array, size_t n) { \ @@ -516,4 +518,4 @@ static inline void remez9_0_log2_sse(double *values, size_t num) -#endif \ No newline at end of file +#endif diff --git a/test/Makefile.am b/test/Makefile.am index f2e911f2a..e1cf4d702 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -1,4 +1,5 @@ -CFLAGS_BASE = -Wfloat-equal -Wpointer-arith -std=gnu99 -DLIBPOSTAL_DATA_DIR='"$(datadir)/libpostal"' -g +CFLAGS_CONF = @CFLAGS@ +CFLAGS_BASE = -Wfloat-equal -Wpointer-arith -std=gnu99 -DLIBPOSTAL_DATA_DIR='"$(datadir)/libpostal"' -g $(CFLAGS_CONF) CFLAGS_O0 = $(CFLAGS_BASE) -O0 CFLAGS_O1 = $(CFLAGS_BASE) -O1 CFLAGS_O2 = $(CFLAGS_BASE) -O2 diff --git a/windows/configure.ac b/windows/configure.ac index 0a964cf55..58c0b7930 100644 --- a/windows/configure.ac +++ b/windows/configure.ac @@ -22,6 +22,8 @@ AC_PROG_INSTALL LDFLAGS="$LDFLAGS -L/usr/local/lib" # Checks for libraries. +AC_CHECK_LIB(json-c, json_tokener_parse) +AM_CONDITIONAL([USE_LIBJSON_C], [test "$HAVE_JSON_C" -eq 1]) AC_SEARCH_LIBS([log], [m],,[AC_MSG_ERROR([Could not find math library])])