QI_BUILD 1-SEP-1993 10:03:31 VAX C V3.2-044 Page 1 V1.0 31-AUG-1993 14:26:31 X$SRC:[CSO]QI_BUILD.C;2 (1) 1 /* index qi sequential data file */ 2 /* Bruce Tanner - Cerritos College */ 3 4 /* 1.0 1993/08/14 Start with build_index */ 5 /* 1.1 1993/08/30 Make fopen failure more explicit */ 6 7 #include ssdef 649 #include stdio 911 #include string 957 #include ctype 1020 #include rms 3586 #include descrip 4019 #include climsgdef 4183 #include assert 4194 #include "qi.h" 4290 4291 char idx_record[IDX_RECORD_SIZE + 1]; 4292 char idx_key[IDX_KEY_SIZE + 1]; 4293 char dat_record[DAT_RECORD_SIZE + 1]; 4294 char dat_key[DAT_KEY_SIZE + 1]; 4295 4296 void read_fields(char *, int []); 4297 void index_words(char *, struct RAB *); 4298 struct dsc$descriptor_s *descr(char *); 4299 void build_commands(); 4300 int lib$get_foreign(), lib$get_input(); 4301 char *soundex(char *, char *, int); 4302 4303 4304 main(int argc, char *argv[]) 4305 { 4306 1 4307 1 FILE *src; 4308 1 char cli_input[256], file_arg[256], file_spec[256]; 4309 1 char idx_name[256], dat_name[256]; 4310 1 char *ptr, field[DATA_SIZE + 1]; 4311 1 int status, context = 0, count = 0, indexed[MAX_FIELD]; 4312 1 short leng; 4313 1 struct FAB idxfab, datfab; 4314 1 struct RAB idxrab, datrab; 4315 1 struct XABKEY idxxab, datxab; 4316 1 $DESCRIPTOR(input_dsc, cli_input); 4317 1 $DESCRIPTOR(file_dsc, file_arg); 4318 1 $DESCRIPTOR(file_spec_dsc, file_spec); 4319 1 $DESCRIPTOR(idx_dsc, idx_name); 4320 1 4321 1 status = lib$get_foreign(&input_dsc, 0, &leng, 0); 4322 1 4323 1 strncpy(cli_input+6, cli_input, leng); 4324 1 strncpy(cli_input, "build ", 6); 4325 1 4326 1 status = cli$dcl_parse(&input_dsc, build_commands, lib$get_input); 4327 1 4328 1 if (status != CLI$_NORMAL) /* error in parse, exit */ 4329 1 exit(1); QI_BUILD 1-SEP-1993 10:03:31 VAX C V3.2-044 Page 2 V1.0 31-AUG-1993 14:26:31 X$SRC:[CSO]QI_BUILD.C;2 (1) 4330 1 4331 1 if ((cli$present(descr("file")) & 1) == 0) { 4332 2 printf("Usage: build data_file /output=... /data\n"); 4333 2 exit(3); 4334 2 } 4335 1 4336 1 status = cli$get_value(descr("file"), &file_dsc, &leng); /* get source */ 4337 1 4338 1 status = lib$find_file(&file_dsc, &file_spec_dsc, &context, 0, 0, 0, 0); 4339 1 ptr = strchr(file_spec, ' '); 4340 1 if (ptr) *ptr = '\0'; /* chop off trailing spaces */ 4341 1 strcpy(idx_name, file_spec); /* make copy for output spec */ 4342 1 4343 1 if (cli$present(descr("output")) & 1) { /* if /output, overwrite out_name */ 4344 2 status = cli$get_value(descr("output"), &idx_dsc, &leng); 4345 2 idx_name[leng] = '\0'; 4346 2 } 4347 1 4348 1 ptr = strrchr(idx_name, '.'); /* just get file name */ 4349 1 if (ptr) *ptr = '\0'; 4350 1 strcat(idx_name, ".INDEX"); 4351 1 4352 1 idxfab = cc$rms_fab; 4353 1 idxfab.fab$b_bks = 6; 4354 1 idxfab.fab$b_fac = FAB$M_GET | FAB$M_PUT | FAB$M_UPD; 4355 1 idxfab.fab$l_fna = idx_name; 4356 1 idxfab.fab$b_fns = strlen(idx_name); 4357 1 idxfab.fab$l_fop = FAB$M_CBT | FAB$M_DFW; 4358 1 idxfab.fab$w_mrs = IDX_RECORD_SIZE; 4359 1 idxfab.fab$b_org = FAB$C_IDX; 4360 1 idxfab.fab$b_rat = FAB$M_CR; 4361 1 idxfab.fab$b_rfm = FAB$C_FIX; 4362 1 idxfab.fab$b_shr = FAB$M_NIL; 4363 1 idxfab.fab$l_xab = &idxxab; 4364 1 4365 1 idxrab = cc$rms_rab; 4366 1 idxrab.rab$l_fab = &idxfab; 4367 1 idxrab.rab$b_krf = 0; 4368 1 idxrab.rab$l_kbf = idx_key; 4369 1 idxrab.rab$b_ksz = IDX_KEY_SIZE; 4370 1 idxrab.rab$b_rac = RAB$C_KEY; 4371 1 idxrab.rab$l_rbf = idx_record; 4372 1 idxrab.rab$w_rsz = IDX_RECORD_SIZE; 4373 1 idxrab.rab$l_ubf = idx_record; 4374 1 idxrab.rab$w_usz = IDX_RECORD_SIZE; 4375 1 idxrab.rab$b_mbf = 20; 4376 1 idxrab.rab$l_rop = RAB$M_RAH | RAB$M_WBH; 4377 1 4378 1 idxxab = cc$rms_xabkey; 4379 1 idxxab.xab$b_dtp = XAB$C_STG; 4380 1 idxxab.xab$b_flg = XAB$M_DUP | XAB$M_IDX_NCMPR; 4381 1 idxxab.xab$w_pos0 = 0; 4382 1 idxxab.xab$b_siz0 = IDX_KEY_SIZE; 4383 1 idxxab.xab$b_ref = 0; 4384 1 4385 1 strcpy(dat_name, idx_name); 4386 1 ptr = strrchr(dat_name, '.'); /* just get file name */ QI_BUILD 1-SEP-1993 10:03:31 VAX C V3.2-044 Page 3 V1.0 31-AUG-1993 14:26:31 X$SRC:[CSO]QI_BUILD.C;2 (1) 4387 1 if (ptr) *ptr = '\0'; 4388 1 strcat(dat_name, ".DATA"); 4389 1 4390 1 datfab = cc$rms_fab; 4391 1 datfab.fab$b_bks = 9; 4392 1 datfab.fab$b_fac = FAB$M_PUT; 4393 1 datfab.fab$l_fna = dat_name; 4394 1 datfab.fab$b_fns = strlen(dat_name); 4395 1 datfab.fab$l_fop = FAB$M_CBT | FAB$M_DFW; 4396 1 datfab.fab$w_mrs = DAT_RECORD_SIZE; 4397 1 datfab.fab$b_org = FAB$C_IDX; 4398 1 datfab.fab$b_rat = FAB$M_CR; 4399 1 datfab.fab$b_rfm = FAB$C_VAR; 4400 1 datfab.fab$b_shr = FAB$M_NIL; 4401 1 datfab.fab$l_xab = &datxab; 4402 1 4403 1 datrab = cc$rms_rab; 4404 1 datrab.rab$l_fab = &datfab; 4405 1 datrab.rab$b_krf = 0; 4406 1 datrab.rab$l_kbf = dat_key; 4407 1 datrab.rab$b_ksz = DAT_KEY_SIZE; 4408 1 datrab.rab$b_rac = RAB$C_KEY; 4409 1 datrab.rab$l_rbf = dat_record; 4410 1 datrab.rab$b_mbf = 20; 4411 1 datrab.rab$l_rop = RAB$M_RAH | RAB$M_WBH; 4412 1 4413 1 datxab = cc$rms_xabkey; 4414 1 datxab.xab$b_dtp = XAB$C_STG; 4415 1 datxab.xab$b_flg = XAB$M_DAT_NCMPR | XAB$M_IDX_NCMPR; 4416 1 datxab.xab$w_pos0 = 0; 4417 1 datxab.xab$b_siz0 = DAT_KEY_SIZE; 4418 1 datxab.xab$b_ref = 0; 4419 1 4420 1 4421 1 /* open index file */ 4422 1 if (((status = sys$create(&idxfab)) & 1) != SS$_NORMAL) 4423 1 lib$stop(status); 4424 1 if (((status = sys$connect(&idxrab)) & 1) != SS$_NORMAL) 4425 1 lib$stop(status); 4426 1 4427 1 /* open data file if /DATA selected */ 4428 1 if (cli$present(descr("data")) & 1) { 4429 2 if (((status = sys$create(&datfab)) & 1) != SS$_NORMAL) 4430 2 lib$stop(status); 4431 2 if (((status = sys$connect(&datrab)) & 1) != SS$_NORMAL) 4432 2 lib$stop(status); 4433 2 } 4434 1 4435 1 /* record the fields with Indexed attribute */ 4436 1 read_fields(file_spec, indexed); 4437 1 4438 1 for (;;) { /* process all files in input spec, first one already found */ 4439 2 4440 2 if ((src = fopen(file_spec, "r", "mbc=50", "mbf=20")) == NULL) { 4441 3 printf("Can't read input file %s\n", file_spec); 4442 3 exit(5); 4443 3 } QI_BUILD 1-SEP-1993 10:03:31 VAX C V3.2-044 Page 4 V1.0 31-AUG-1993 14:26:31 X$SRC:[CSO]QI_BUILD.C;2 (1) 4444 2 printf("Building index for %s\n", file_spec); 4445 2 4446 2 while (fgets(dat_record, sizeof(dat_record), src)) { 4447 3 if ((ptr = strchr(dat_record, '\r')) || 4448 3 (ptr = strchr(dat_record, '\n'))) 4449 3 *ptr = '\0'; /* remove newline */ 4450 3 4451 3 if ((++count % 100) == 0) 4452 3 printf("%d\n", count); 4453 3 4454 3 /* if /DATA requested, write .data file record */ 4455 3 if (cli$present(descr("data")) & 1) { 4456 4 strncpy(dat_key, dat_record, DAT_KEY_SIZE); 4457 4 datrab.rab$w_rsz = strlen(dat_record); 4458 4 if ((status = sys$put(&datrab)) != RMS$_NORMAL) 4459 4 lib$stop(status); 4460 4 } 4461 3 4462 3 /* if this is an indexed field, write index record(s) */ 4463 3 strncpy(field, dat_record + ID_SIZE, FIELD_SIZE); 4464 3 field[FIELD_SIZE] = '\0'; 4465 3 if (indexed[atoi(field)] == TRUE) { 4466 4 for (ptr = dat_record; *ptr; ptr++) 4467 4 if (iscntrl(*ptr)) *ptr = ' '; /* convert tabs to spaces */ 4468 4 while ((strlen(dat_record) > 0) && 4469 4 (dat_record[strlen(dat_record)-1] == ' ')) 4470 4 dat_record[strlen(dat_record)-1] = '\0';/* remove trailing blanks */ 4471 4 for (ptr = dat_record; *ptr; ptr++) 4472 4 *ptr = _tolower(*ptr); /* force lowercase */ 4473 4 4474 4 index_words(dat_record, &idxrab); 4475 4 } 4476 3 } 4477 2 4478 2 fclose(src); 4479 2 status = lib$find_file(&file_dsc, &file_spec_dsc, &context, 0, 0, 0, 0); 4480 2 if ((status & 1) == 0) { 4481 3 lib$find_file_end(&context); 4482 3 break; 4483 3 } 4484 2 ptr = strchr(file_spec, ' '); 4485 2 if (ptr) *ptr = '\0'; /* chop off trailing spaces */ 4486 2 } 4487 1 if (cli$present(descr("data")) & 1) 4488 1 sys$close(&datfab); 4489 1 sys$close(&idxfab); 4490 1 } 4491 4492 4493 /* break data field into words and write them to index file */ 4494 4495 void index_words(char *line, struct RAB *idxptr) 4496 { 4497 1 char data[DATA_SIZE + 2], field[FIELD_SIZE + 1], id[ID_SIZE + 1]; 4498 1 char temp[SOUNDEX_SIZE + 1]; 4499 1 char *cp, *cp2; 4500 1 char *actual = "A"; /* Type field; actual data */ QI_BUILD 1-SEP-1993 10:03:31 VAX C V3.2-044 Page 5 V1.0 31-AUG-1993 14:26:31 X$SRC:[CSO]QI_BUILD.C;2 (1) 4501 1 char *sound = "S"; /* Type field; soundex data */ 4502 1 int status; 4503 1 4504 1 strncpy(id, line, ID_SIZE); 4505 1 id[ID_SIZE] = '\0'; 4506 1 strncpy(field, line + ID_SIZE, FIELD_SIZE); 4507 1 field[FIELD_SIZE] = '\0'; 4508 1 strncpy(data, line + ID_SIZE + FIELD_SIZE + SEQ_SIZE + ATTR_SIZE, DATA_SIZE); 4509 1 data[DATA_SIZE] = '\0'; 4510 1 4511 1 /* special hack to omit indexing the email domain */ 4512 1 if ((strcmp(field, EMAIL_FIELD) == 0) && (cp = strchr(data, '@'))) 4513 1 *cp = '\0'; 4514 1 #if NAME_HACK 4515 1 if (strcmp(field, NAME_FIELD) == 0) /* only edit name field */ 4516 1 for (cp = data; *cp; cp++) { /* apply any special editing to names */ 4517 2 if (*cp == '-') *cp = ' '; /* index both hyphenated names */ 4518 2 if (*cp == '\'') strcpy(cp, cp+1); /* squeeze out apostrophe */ 4519 2 } 4520 1 #endif 4521 1 4522 1 strcat(data, " "); /* line ends with a space */ 4523 1 cp = data; 4524 1 while(cp2 = strchr(cp, ' ')) { /* break at space boundary */ 4525 2 *cp2 = '\0'; 4526 2 if (strlen(cp) > KEYWORD_SIZE) 4527 2 printf("Truncating %d character word /%s/ to %d characters\n", 4528 2 strlen(cp), cp, KEYWORD_SIZE); 4529 2 if (strlen(cp) >= MIN_KEYWORD) { 4530 3 sprintf(idx_key, "%-*s%s%s", KEYWORD_SIZE, cp, field, actual); 4531 3 sprintf(idx_record, "%-*s%s%s%s", KEYWORD_SIZE, cp, 4532 3 field, actual, id); 4533 3 if (((status = sys$put(idxptr)) & 1) == 0) 4534 3 lib$stop(status); 4535 3 #if APPROX_MATCH 4536 3 if (strcmp(field, NAME_FIELD) == 0) { /* only soundex name field */ 4537 4 sprintf(idx_key, "%-*s%s%s", KEYWORD_SIZE, 4538 4 soundex(temp, cp, SOUNDEX_SIZE), field, sound); 4539 4 sprintf(idx_record, "%-*s%s%s%s", KEYWORD_SIZE, 4540 4 soundex(temp, cp, SOUNDEX_SIZE), field, sound, id); 4541 4 if (((status = sys$put(idxptr)) & 1) == 0) 4542 4 lib$stop(status); 4543 4 } 4544 3 #endif 4545 3 } 4546 2 cp = cp2 + 1; 4547 2 } 4548 1 } 4549 4550 char * get_field(char *ptr, char *field) 4551 { 4552 1 int ind; 4553 1 4554 1 for (ind= 0; *ptr != '\0' && *ptr != ':'; ptr++, ind++) 4555 1 field[ind] = _tolower(*ptr); 4556 1 field[ind] = '\0'; 4557 1 if (*ptr == ':') ptr++; /* skip over terminating ":" */ QI_BUILD 1-SEP-1993 10:03:31 VAX C V3.2-044 Page 6 V1.0 31-AUG-1993 14:26:31 X$SRC:[CSO]QI_BUILD.C;2 (1) 4558 1 return ptr; 4559 1 } 4560 4561 4562 void read_fields(char *file, int *array) 4563 { 4564 1 FILE *cnf; 4565 1 char *ptr, config[256], line[256], field[128]; 4566 1 int ind, field_num; 4567 1 4568 1 strcpy(config, file); 4569 1 ptr = strrchr(config, '.'); 4570 1 if (ptr) *ptr = '\0'; 4571 1 strcat(config,".cnf"); 4572 1 4573 1 for (ind = 0; ind < MAX_FIELD; ind++) 4574 1 array[ind] = FALSE; /* init array */ 4575 1 4576 1 if ((cnf = fopen(config, "r")) == NULL) { 4577 2 printf("Can't read config file %s\n", config); 4578 2 exit(7); 4579 2 } 4580 1 4581 1 while (fgets(line, sizeof(line), cnf)) { 4582 2 ptr = strchr(line, '\n'); 4583 2 if (ptr) *ptr = '\0'; /* remove newline */ 4584 2 4585 2 ptr = line; 4586 2 if ((*ptr == '#') || (*ptr == '\0')) /* comment or blank? */ 4587 2 continue; /* yes, skip line */ 4588 2 ptr = get_field(ptr, field); /* field number */ 4589 2 field_num = atoi(field); 4590 2 4591 2 ptr = get_field(ptr, field); /* field name */ 4592 2 ptr = get_field(ptr, field); /* field size */ 4593 2 ptr = get_field(ptr, field); /* field description */ 4594 2 ptr = get_field(ptr, field); /* field option */ 4595 2 4596 2 for (;;) { 4597 3 ptr = get_field(ptr, field); /* get attribute */ 4598 3 if (strlen(field) == 0) 4599 3 break; /* no more attributes */ 4600 3 4601 3 /* 'Indexed' is unique to one letter */ 4602 3 array[field_num] |= (field[0] == 'i'); 4603 3 } 4604 2 } 4605 1 4606 1 fclose(cnf); 4607 1 } 4608 4609 4610 /* descr() creates character descriptor and returns 4611 * the address of the descriptor to the caller. 4612 */ 4613 # define N_DESCR 10 4614 static struct dsc$descriptor_s str_desc[N_DESCR]; QI_BUILD 1-SEP-1993 10:03:31 VAX C V3.2-044 Page 7 V1.0 31-AUG-1993 14:26:31 X$SRC:[CSO]QI_BUILD.C;2 (1) 4615 static int cur_descr = -1; 4616 4617 struct dsc$descriptor_s *descr(char *string) 4618 { 4619 1 if(++cur_descr >= N_DESCR) cur_descr = 0; 4620 1 str_desc[cur_descr].dsc$w_length=(short)strlen(string); 4621 1 str_desc[cur_descr].dsc$b_dtype=DSC$K_DTYPE_T; 4622 1 str_desc[cur_descr].dsc$b_class=DSC$K_CLASS_S; 4623 1 str_desc[cur_descr].dsc$a_pointer=string; 4624 1 return (&str_desc[cur_descr]); 4625 1 } Command Line ------------ CC/DEBUG/NOOP/LIST QI_BUILD .