- 論壇徽章:
- 0
|
Hi,寫了個(gè)C的小程序,Sorry我沒做過專業(yè)C開發(fā),代碼糟爛請(qǐng)見諒,共讀兩次文件,處理昨天那個(gè)11G的文件,先找到不輸出的,最后輸出,加一起9mins.
不知道你輸出時(shí)可不可以亂序,亂序的話多線程可能更快點(diǎn)。- $ time ./a.exe ../tmp/test/test.txt
- i=0;14: @SQ
- i=1;24052116: HWI-ST507:18916A7ACXX:2:2105:18552:106051b
- i=2;3436017: HWI-ST507:18916A7ACXX:2:2105:17454:113152a
- i=3;3436016: HWI-ST507:18916A7ACXX:2:2105:4651:72095a
- i=4;3436017: HWI-ST507:18916A7ACXX:2:2105:5262:61025a
- i=5;3436017: HWI-ST507:189:D16A7ACXX:2:2105:7560:82822b
- i=6;3436016: HWI-ST507:189:D16A7ACXX:2:2105:1436:29632a
- i=7;3436016: HWI-ST507:189:D16A7ACXX:2:2105:2969:24612a
- i=8;3436017: HWI-ST507:189:D16A7ACXX:2:2105:14733:62299a
- i=9;3436017: HWI-ST507:189:D16A7ACXX:2:2105:3364:73136b
- i=10;3436016: HWI-ST507:189:D16A7ACXX:2:2105:3982:14275b
- i=11;3436017: HWI-ST507:189:D16A7ACXX:2:2105:19999:75440a
- i=12;1: HWI-ST507:189:D16A7ACXX:2:2105:14733:51733a
- @SQ
- @SQ
- @SQ
- @SQ
- @SQ
- @SQ
- @SQ
- @SQ
- @SQ
- @SQ
- @SQ
- @SQ
- @SQ
- @SQ
- HWI-ST507:189:D16A7ACXX:2:2105:14733:51733a
- real 9m59.714s
- user 5m48.443s
- sys 2m36.889s
復(fù)制代碼- $ cat fileprocess.c
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <pthread.h>
- #define BASE 1000
- #define INC 1000
- #define LOADBUFFERSIZE 1000000
- typedef struct key
- {
- char* reads;
- int num;
- pthread_mutex_t mutex;
- } key;
- int arraylength;
- pthread_mutex_t mutex_array;
- key** keyarray;
- char** loadbuffer;
- int arrayinc()
- {
- int i;
- pthread_mutex_lock(&(mutex_array));
- keyarray=realloc(keyarray,(arraylength+INC)*sizeof(key*));
- arraylength+=INC;
- for(i=arraylength-INC;i<arraylength;i++)
- {
- keyarray[i]=calloc(1,sizeof(key));
- keyarray[i]->reads=NULL;
- keyarray[i]->num=0;
- pthread_mutex_init(&(keyarray[i]->mutex), NULL);
- }
- pthread_mutex_unlock(&(mutex_array));
- return 0;
- }
- int search_reads(char* str)
- {
- int i=0;
- while(keyarray[i]->reads!=NULL&&i<arraylength)
- {
- if(strcmp(keyarray[i]->reads,str)==0)
- {
- pthread_mutex_lock(&(keyarray[i]->mutex));
- keyarray[i]->num+=1;
- pthread_mutex_unlock(&(keyarray[i]->mutex));
- return 0;
- }
- i++;
- }
- if(i==arraylength)
- arrayinc;
- keyarray[i]->reads=calloc(1,sizeof(char)*strlen(str));
- strcpy(keyarray[i]->reads,str);
- keyarray[i]->num=+1;
- return 0;
- }
- int just_search_reads(char* str)
- {
- int i=0;
- while(keyarray[i]->reads!=NULL&&i<arraylength)
- {
- if(strcmp(keyarray[i]->reads,str)==0)
- {
- if(keyarray[i]->num>5)
- return 1;
- }
- i++;
- }
- return 0;
- }
- void* peer_process(void* inum)
- {
- int i,num;
- num=*((int*)inum);
- char* token;
- // printf("I am thread %d\n",num+1);
- for(i=LOADBUFFERSIZE/8*num;i<LOADBUFFERSIZE/8*(num+1);i++)
- {
- if(loadbuffer[i]==NULL)
- return;
- token=strtok(loadbuffer[i]," \t");
- if(token == NULL)
- continue;
- search_reads(token);
- }
- return;
- }
- int main(int argc, char** argv)
- {
- pthread_t id1;
- pthread_t id2;
- pthread_t id3;
- pthread_t id4;
- pthread_t id5;
- pthread_t id6;
- pthread_t id7;
- pthread_t id8;
- int i,j,ret;
- int* a[8];
- char buffer[1024];
- char* token;
- for(i=0;i<8;i++)
- {
- a[i]=calloc(1,sizeof(int));
- *(a[i])=i;
- }
- if(argc!=2)
- {
- fprintf(stderr,"Usage:xxx filepath\n");
- return 1;
- }
- arraylength=BASE;
- keyarray=calloc(BASE,sizeof(key*));
- pthread_mutex_init(&(mutex_array), NULL);
- for(i=0;i<arraylength;i++)
- {
- keyarray[i]=calloc(1,sizeof(key));
- keyarray[i]->reads=NULL;
- keyarray[i]->num=0;
- pthread_mutex_init(&(keyarray[i]->mutex), NULL);
- }
- FILE* fp=fopen(argv[1],"r");
- if(fp==NULL)
- exit(1);
- while(!feof(fp))
- {
- i=0;
- loadbuffer=calloc(LOADBUFFERSIZE,sizeof(char*));
- while(fgets(buffer,1000,fp)&&i<LOADBUFFERSIZE)
- {
- loadbuffer[i]=calloc(strlen(buffer),sizeof(char));
- strcpy(loadbuffer[i++],buffer);
- memset(buffer,0,1024);
- }
- ret=pthread_create(&id1,NULL, (void*)peer_process,(void*)a[0]);
- ret=pthread_create(&id2,NULL, (void*)peer_process,(void*)a[1]);
- ret=pthread_create(&id3,NULL, (void*)peer_process,(void*)a[2]);
- ret=pthread_create(&id4,NULL, (void*)peer_process,(void*)a[3]);
- ret=pthread_create(&id5,NULL, (void*)peer_process,(void*)a[4]);
- ret=pthread_create(&id6,NULL, (void*)peer_process,(void*)a[5]);
- ret=pthread_create(&id7,NULL, (void*)peer_process,(void*)a[6]);
- ret=pthread_create(&id8,NULL, (void*)peer_process,(void*)a[7]);
- if(ret!=0){
- printf ("Create pthread error!\n");
- return 1;
- }
- pthread_join(id1,NULL);
- pthread_join(id2,NULL);
- pthread_join(id3,NULL);
- pthread_join(id4,NULL);
- pthread_join(id5,NULL);
- pthread_join(id6,NULL);
- pthread_join(id7,NULL);
- pthread_join(id8,NULL);
- for(i=0;i<LOADBUFFERSIZE;i++)
- free(loadbuffer[i]);
- free(loadbuffer);
- }
- fclose(fp);
- for(i=0;i<arraylength;i++)
- {
- if(keyarray[i]->num==0)
- {
- break;
- }
- printf ("i=%d;%d:\t%s\n",i,keyarray[i]->num,keyarray[i]->reads);
- }
- fp=fopen(argv[1],"r");
- if(fp==NULL)
- exit(1);
- while(!feof(fp))
- {
- if(fgets(buffer,1000,fp))
- {
- token=strtok(buffer," \t");
- if(just_search_reads(token)==0||strcmp(token,"@SQ")==0)
- puts(buffer);
- memset(buffer,0,1024);
- }
- }
- fclose(fp);
- return 0;
- }
復(fù)制代碼 |
|