Commit a258d334 by Celine Mercier

Improved the fasta parser for better memory handling and better parsing

of the last parts of fasta headers (definitions)
parent 52e94bbe
......@@ -69,15 +69,34 @@ char* fastaSeqPtr_header_add_field(fastaSeqPtr seq, char* name, char* value)
element_from_header* table_header_add_field(element_from_header* header, char* name, char* value)
{
int nbf;
int i, j;
element_from_header* new_header;
nbf = atoi(header[0].value);
nbf++;
header = (element_from_header*) realloc(header, (nbf+1)*sizeof(element_from_header));
header[nbf].name = (char*) malloc((1+strlen(name))*sizeof(char));
strcpy(header[nbf].name, name);
header[nbf].value = (char*) malloc((1+strlen(value))*sizeof(char));
strcpy(header[nbf].value, value);
sprintf(header[0].value, "%d", nbf);
return(header);
new_header = (element_from_header*) realloc(header, ((nbf+1)*sizeof(element_from_header)));
i=0;
while ((strcmp(new_header[i].name, "definition") != 0) && (i < nbf))
i++;
if (strcmp(new_header[i].name, "definition") == 0)
{
j = nbf-1;
while (strcmp(new_header[j].name, "definition") == 0)
{
new_header[j+1].name = new_header[j].name;
new_header[j+1].value = new_header[j].value;
j--;
}
}
new_header[i].name = (char*) malloc((1+strlen(name))*sizeof(char));
strcpy(new_header[i].name, name);
new_header[i].value = (char*) malloc((1+strlen(value))*sizeof(char));
strcpy(new_header[i].value, value);
sprintf(new_header[0].value, "%d", nbf+1);
return(new_header);
}
......@@ -86,7 +105,7 @@ void free_header_table(element_from_header* header)
int i;
int nbf = atoi(header[0].value);
for (i = 0; i <= nbf; i++)
for (i = 0; i < nbf; i++)
{
free((header[i]).name);
free((header[i]).value);
......@@ -101,7 +120,7 @@ char* getItemFromHeader(char* name, element_from_header* header)
int nbf;
int i;
nbf = atoi(header[0].value);
for (i = 1; i <= nbf; i++)
for (i = 1; i < nbf; i++)
{
if (strcmp(header[i].name,name)==0)
value = header[i].value;
......@@ -115,7 +134,7 @@ void changeValue(element_from_header* header, char* name, char* newValue)
int i;
int nbf = atoi(header[0].value);
for (i = 1; i <= nbf; i++)
for (i = 1; i < nbf; i++)
{
if (strcmp(header[i].name, name)==0)
{
......
......@@ -798,7 +798,7 @@ YY_RULE_SETUP
(*p_header)[*nbf].value = (char*) malloc(sizeof(char)*size_needed);
strcpy(((*p_header)[*nbf]).value,header_yytext);
(*nbf)++;
p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated);
}
YY_BREAK
case 3:
......@@ -820,7 +820,7 @@ YY_RULE_SETUP
case 5:
YY_RULE_SETUP
#line 69 "fasta_header_parser.l"
{
{ // TODO
/*fprintf(stderr,"\n<REGNAME>{SPACE} **%s**",header_yytext);*/
if (i != 0)
field = store_in_field(field,header_yytext,&free_size,&i);
......@@ -886,24 +886,21 @@ case YY_STATE_EOF(REGVAL):
#line 113 "fasta_header_parser.l"
{
field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i);
p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated);
(*nbf)++;
end_header_table(p_header, *nbf);
free(field);
BEGIN(INITIAL);
return 0;
}
YY_BREAK
case YY_STATE_EOF(REGNAME):
#line 123 "fasta_header_parser.l"
#line 122 "fasta_header_parser.l"
{
/*(*p_header)[*nbf].name = (char*) malloc(sizeof(char)*19);
strcpy((*p_header)[*nbf].name,"other_informations");
(*p_header)[*nbf].name = (char*) malloc(sizeof(char)*19);
strcpy((*p_header)[*nbf].name,"definition");
field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i);
p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated);
*/
end_header_table(p_header, *nbf);
(*nbf)++;
end_header_table(p_header, nbf);
free(field);
BEGIN(INITIAL);
return 0;
......@@ -911,10 +908,10 @@ case YY_STATE_EOF(REGNAME):
YY_BREAK
case 12:
YY_RULE_SETUP
#line 136 "fasta_header_parser.l"
#line 133 "fasta_header_parser.l"
ECHO;
YY_BREAK
#line 918 "<stdout>"
#line 915 "<stdout>"
case YY_STATE_EOF(INITIAL):
case YY_STATE_EOF(REGID):
yyterminate();
......@@ -1912,7 +1909,7 @@ void header_yyfree (void * ptr )
#define YYTABLES_NAME "yytables"
#line 136 "fasta_header_parser.l"
#line 133 "fasta_header_parser.l"
......@@ -1923,38 +1920,32 @@ int header_yywrap()
element_from_header* header_parser_main(char *h)
{
int nbfields,memory_allocated;
int nbfields, memory_allocated;
element_from_header* header;
char* nbfields_n;
char* nbfields_v;
nbfields_n = (char*) malloc(9*sizeof(char));
nbfields_v = (char*) malloc(5*sizeof(char));
YY_BUFFER_STATE state;
state=header_yy_scan_string(h);
memory_allocated=MEMALLOCATED;
nbfields=1;
header = (element_from_header*) malloc(memory_allocated * sizeof(element_from_header));
nbfields_n = (char*) malloc(9*sizeof(char));
strcpy(nbfields_n, "nbfields");
strcpy(nbfields_v, "1");
header = (element_from_header*) malloc(memory_allocated * sizeof(element_from_header));
header[0].name = nbfields_n;
header[0].value = nbfields_v;
YY_BUFFER_STATE state;
// Initialize memory to store the number of fields
header[0].value = (char*) malloc(10*sizeof(char));
state=header_yy_scan_string(h);
nbfields=1;
header_parser(&nbfields, &memory_allocated, &header);
header_yy_delete_buffer(state);
return header;
}
......@@ -53,7 +53,7 @@ EQUAL =
(*p_header)[*nbf].value = (char*) malloc(sizeof(char)*size_needed);
strcpy(((*p_header)[*nbf]).value,yytext);
(*nbf)++;
p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated);
}
......@@ -66,7 +66,7 @@ EQUAL =
field = store_in_field(field,yytext,&free_size,&i);
}
<REGNAME>{SPACE} {
<REGNAME>{SPACE} { // TODO
/*fprintf(stderr,"\n<REGNAME>{SPACE} **%s**",yytext);*/
if (i != 0)
field = store_in_field(field,yytext,&free_size,&i);
......@@ -112,22 +112,19 @@ EQUAL =
<REGVAL><<EOF>> {
field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i);
p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated);
(*nbf)++;
end_header_table(p_header, *nbf);
free(field);
BEGIN(INITIAL);
return 0;
}
<REGNAME><<EOF>> {
/*(*p_header)[*nbf].name = (char*) malloc(sizeof(char)*19);
strcpy((*p_header)[*nbf].name,"other_informations");
(*p_header)[*nbf].name = (char*) malloc(sizeof(char)*19);
strcpy((*p_header)[*nbf].name,"definition");
field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i);
p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated);
*/
end_header_table(p_header, *nbf);
(*nbf)++;
end_header_table(p_header, nbf);
free(field);
BEGIN(INITIAL);
return 0;
......@@ -142,37 +139,31 @@ int header_yywrap()
element_from_header* header_parser_main(char *h)
{
int nbfields,memory_allocated;
int nbfields, memory_allocated;
element_from_header* header;
char* nbfields_n;
char* nbfields_v;
nbfields_n = (char*) malloc(9*sizeof(char));
nbfields_v = (char*) malloc(5*sizeof(char));
YY_BUFFER_STATE state;
state=yy_scan_string(h);
memory_allocated=MEMALLOCATED;
nbfields=1;
header = (element_from_header*) malloc(memory_allocated * sizeof(element_from_header));
nbfields_n = (char*) malloc(9*sizeof(char));
strcpy(nbfields_n, "nbfields");
strcpy(nbfields_v, "1");
header = (element_from_header*) malloc(memory_allocated * sizeof(element_from_header));
header[0].name = nbfields_n;
header[0].value = nbfields_v;
YY_BUFFER_STATE state;
// Initialize memory to store the number of fields
header[0].value = (char*) malloc(10*sizeof(char));
state=yy_scan_string(h);
nbfields=1;
header_parser(&nbfields, &memory_allocated, &header);
yy_delete_buffer(state);
return header;
}
......@@ -52,7 +52,7 @@ void printOnlyHeaderFromTable(element_from_header* header, FILE* output)
fprintf(output,">%s ",header[1].value);
for (i = 2; i <= nbf; i++)
for (i = 2; i < nbf; i++)
{
if (strcmp(header[i].name, "definition") != 0)
{
......@@ -60,11 +60,10 @@ void printOnlyHeaderFromTable(element_from_header* header, FILE* output)
fprintf(output,"=");
fprintf(output,"%s; ",header[i].value);
}
else if (strcmp(header[i].name, "definition") == 0)
fprintf(output,"%s ", header[i].value);
}
if (strcmp(header[nbf].name, "definition") == 0)
fprintf(output,"%s; ",header[nbf].value);
fprintf(output,"\n");
}
......
......@@ -76,7 +76,7 @@ element_from_header** check_and_realloc_mem_in_header_table(element_from_header*
{
(*nbf)++;
if (*nbf == *memory_allocated)
if ((*nbf)+1 == *memory_allocated)
{
(*memory_allocated)++;
*p_header = (element_from_header*) realloc(*p_header, (*memory_allocated) * sizeof(element_from_header));
......@@ -87,7 +87,6 @@ element_from_header** check_and_realloc_mem_in_header_table(element_from_header*
void end_header_table(element_from_header** p_header, int nbf)
{
nbf = nbf - 1;
//fprintf(stderr, "nbf = %d", nbf);
*p_header = (element_from_header*) realloc(*p_header, nbf * sizeof(element_from_header));
sprintf((*p_header)->value, "%d", nbf);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment