Страницы

Translate

суббота, 26 октября 2013 г.

Exercise 6.4. Write a program that prints the distinct words in its input sorted into decreasing order of frequency of occurrence



Exercise 6.4. Write a program that prints the distinct words in its input sorted into decreasing order of frequency of occurrence. Precede each word by its count.



#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>

#define MAXWORD 100

struct tnode {  //the tree node
    char *word; //points to text
    int count; //number of occurrences
    struct tnode *left; //left child
    struct tnode *right; //right child
};
struct words {
    char *words;
    int counts;
};


struct tnode *addtree(struct tnode *, char *);
int getword(char *, int);
void sort(struct words *); //sort array
int countword;
int i = 0;


/* word frequency count */
int main()
{
    struct tnode *root;
    char word[MAXWORD];
    
    root = NULL;
    while(getword(word, MAXWORD) != EOF)
        if(isalpha(word[0]))
            root = addtree(root, word);
    struct words sortword[countword];
    void fillarray(struct tnode *, struct words *); //fill an array 
    fillarray(root, sortword);
    sort(sortword); 
    int j;
    for(j = 0; j < countword; j++)
        printf("%4d %s\n", sortword[j].counts, sortword[j].words);
    return 0;
}

/* fillarray: fill an array of structures */
void fillarray(struct tnode *p, struct words sortword[])
{
    if(p != NULL)
    {
        fillarray(p->left, sortword);
        sortword[i++] = (struct words) {p->word, p->count};
        fillarray(p->right, sortword);
    }
}

/* sort: sort an array by inserting */
void sort(struct words sortword[])
{
    struct words temp;
    int i, j;
    for(i = 1; i<countword; i++)
    {
        temp = sortword[i];
        for(j = i-1; j >=0 && sortword[j].counts > temp.counts; j--)
        {
            sortword[j+1] = sortword[j];
            sortword[j] = temp;
        }
    }
}

struct tnode *talloc(void);
char *s_dup(char *s);

/* adtree: add a node with w, at or below p */ 
struct tnode *addtree(struct tnode *p, char *w)
{
    int cond;
    
    if(p == NULL) //a new word has arrived
    {
        p = talloc(); //make a new node
        p->word = s_dup(w);
        countword++; //count words
        p->count = 1;
        p->left = p->right = NULL;
    }
    else if((cond = strcmp(w, p->word)) == 0)
        p->count++; //repeated word
    else if(cond < 0) //less than into left subtree
    {
        p->left = addtree(p->left, w);
    }
    else  //greater than into right subtree
    {
        p->right = addtree(p->right, w);
    }
    return p;
}

/* talloc: make a tnode */
struct tnode *talloc(void)
{
    return(struct tnode *) malloc(sizeof(struct tnode));
}

/* make a duplicate of s */
char *s_dup(char *s)
{
    char *p;
    
    p = (char *)malloc(strlen(s)+1); // +1 for '\0'
    if(p != NULL)
        strcpy(p, s);
    return p;
}

#define BUFSIZE 100

char buf[BUFSIZE];          //buffer for ungetch;
int bufp = 0;               //next free position in buf

int getch(void) // get a (possibly pushed-back) character  
{
   return (bufp > 0) ? buf[--bufp] : getchar();
}

void ungetch(int c) // push character back on input
{
    if(bufp >= BUFSIZE)
        printf("ungetch: too many characnters\n");
    else
        buf[bufp++] = c;
}

/* getword: get next word or character from input */
int getword(char *word, int lim)
{
    int c, d; 
    int getch(void);
    void ungetch(int);
    int in_comment(void);
    char *w = word;
    
    while(isspace(c = getch()))
        ;
    if(c != EOF)
        *w++ = c;
    if(isalpha(c) || c == ' ' || c == '#')
    {
        for(;--lim > 0; w++)
            if(!isalnum(*w= getch()) && *w != '_')
            {
                ungetch(*w);
                break;
            }
    }
    else if(c=='\'' || c == '"')
    {
        for(; --lim > 0; w++)
            if((*w = getch()) == '\\')
                *++w = getch();
            else if(*w == c)
            {
                w++;
                break;
            }
            else if(*w == EOF)
                break;
    }
    else if(c == '/')
    {
        if(((d = getch()) == '*'))
            c = in_comment();
        else
            ungetch(d);
    }
    *w = '\0';
    return c;
}

/* comment */
int in_comment(void)
{
    int c;
    
    while((c = getch()) != EOF)
        if(c == '*')
        {
            if((c = getch()) == '/')
                break;
            else
                ungetch(c);
        }
    return c;
}

Result:



среда, 23 октября 2013 г.

Exercise 6.3. Write a cross-referencer that prints a list of all words in a document, and for each word, a list of the line numbers on which it occurs.



Exercise 6.3. Write a cross-referencer that prints a list of all words in a document, and for each word, a list of the line numbers on which it occurs. Remove noise words like ``the,'' ``and,'' and so on.


#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>

#define MAXWORD 100

struct linklist                     //a list of line numbers
{
    int lnum;
    struct  linklist *ptr;
};

struct tnode                      //the tree node
 { 
    char *word;                    //points to text
    struct linklist *lines; //numbers string
    struct tnode *left;   //left child
    struct tnode *right;  //right child
};

struct tnode *addtree(struct tnode *, char *, int);
void treeprint(struct tnode *);
int getword(char *, int);
int commonwords(char *);



/* printing program cross-references */
int main(int argc, char **argv)
{
    struct tnode *root;
    char word[MAXWORD];
    int linenum = 1;
    
    root = NULL;
    while(getword(word, MAXWORD) != EOF)
        if(isalpha(word[0]) && commonwords(word) == -1)
            root = addtree(root, word, linenum);
        else if(word[0] == '\n')
            linenum++;
    treeprint(root);
    return 0;
}

struct tnode *talloc(void);
struct linklist *lalloc(void);
char *s_dup(char *s);
void addline(struct tnode *, int);

/* addtree: add a node with w, at or below p */ 
struct tnode *addtree(struct tnode *p, char *w, int linenum)
{
    int cond;
    
    if(p == NULL) //a new word has arrived
    {
        p = talloc(); //make a new node
        p->word = s_dup(w);
        p->lines = lalloc();
        p->lines->lnum = linenum;
        p->lines->ptr = NULL;
        p->left = p->right = NULL;
    }
    else if((cond = strcmp(w, p->word)) == 0)
        addline(p, linenum);
    else if(cond < 0)
        p->left = addtree(p->left, w, linenum);
    else
        p->right = addtree(p->right, w, linenum);
    return p;
}

/* addline: add the line number to the linked list */
void addline(struct tnode *p, int linenum)
{
    struct linklist *temp;
    
    temp = p->lines;
    while(temp->ptr != NULL && temp->lnum != linenum)
        temp = temp->ptr;
    if(temp->lnum != linenum)
    {
        temp->ptr = lalloc();
        temp->ptr->lnum = linenum;
        temp->ptr->ptr = NULL;
    }
}   

/* treeprint: print a tree of order p */
void treeprint(struct tnode *p)
{
    struct linklist *temp;
    
    if(p != NULL)
    {
        treeprint(p->left);
        printf("%10s:", p->word);
        for(temp = p->lines; temp != NULL; temp = temp->ptr)
            printf("%4d", temp->lnum);
        printf("\n");
        treeprint(p->right);
    }
}

/* lalloc: create a node of a linked list */
struct linklist *lalloc(void)
{
    return(struct linklist *) malloc(sizeof(struct linklist));
}

/* talloc: make a tnode */
struct tnode *talloc(void)
{
    return(struct tnode *) malloc(sizeof(struct tnode));
}

/* make a duplicate of s */
char *s_dup(char *s)
{
    char *p;
    
    p = (char *)malloc(strlen(s)+1); // +1 for '\0'
    if(p != NULL)
        strcpy(p, s);
    return p;
}

/* commonwords: identify common words */
int commonwords(char *w)
{
    static char *cw[] = {"a", "an", "are", "in", "is", "of", 
                        "or", "that", "the", "this", "to"};
    int cond, mid;
    int low = 0;
    int high = sizeof(cw)/sizeof(char *) - 1;
    
    while(low <= high)
    {
        mid = (low + high)/2;
        if((cond = strcmp(w, cw[mid])) < 0)
            high = mid -1;
        else if(cond > 0)
            low = mid + 1;
        else
            return mid;
    }
        return -1;
}

#define BUFSIZE 100

char buf[BUFSIZE];          //buffer for ungetch;
int bufp = 0;               //next free position in buf

int getch(void) // get a (possibly pushed-back) character  
{
   return (bufp > 0) ? buf[--bufp] : getchar();
}

void ungetch(int c) // push character back on input
{
    if(bufp >= BUFSIZE)
        printf("ungetch: too many characnters\n");
    else
        buf[bufp++] = c;
}

/* getword: get next word or character from input */
int getword(char *word, int lim)
{
    int c, d; 
    int getch(void);
    void ungetch(int);
    int in_comment(void);
    char *w = word;
    
    while(isspace(c = getch()) && c != '\n')
        ;
    if(c != EOF)
        *w++ = c;
    if(isalpha(c) || c == ' ' || c == '#')
    {
        for(;--lim > 0; w++)
            if(!isalnum(*w= getch()) && *w != '_')
            {
                ungetch(*w);
                break;
            }
    }
    else if(c=='\'' || c == '"')
    {
        for(; --lim > 0; w++)
            if((*w = getch()) == '\\')
                *++w = getch();
            else if(*w == c)
            {
                w++;
                break;
            }
            else if(*w == EOF)
                break;
    }
    else if(c == '/')
    {
        if(((d = getch()) == '*'))
            c = in_comment();
        else
            ungetch(d);
    }
    *w = '\0';
    return c;
}

/* comment */
int in_comment(void)
{
    int c;
    
    while((c = getch()) != EOF)
        if(c == '*')
        {
            if((c = getch()) == '/')
                break;
            else
                ungetch(c);
        }
    return c;
}

суббота, 19 октября 2013 г.

Exercise 6.2. Write a program that reads a C program and prints in alphabetical order each group of variable names that are identical in the first 6 characters



Exercise 6.2. Write a program that reads a C program and prints in alphabetical order each group of variable names that are identical in the first 6 characters, but different somewhere thereafter. Don't count words within strings and comments. Make 6 a parameter that can be set from the command line.



#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>

#define MAXWORD 100
#define YES      1
#define NO       0

struct tnode {  //the tree node
    char *word; //points to text
    int match; //sign matches
    struct tnode *left; //left child
    struct tnode *right; //right child
};

struct tnode *addtree(struct tnode *, char *, int, int *);
void treeprint(struct tnode *);
int getword(char *, int);



/* word frequency count */
int main(int argc, char **argv)
{
    struct tnode *root;
    char word[MAXWORD];
    int num;
    int find = NO;
    
    if(argc <= 1 || argc > 2 || atoi(*(argv+1)) > 0)
    {
        printf("Wrong parameters, use default - 6 -\n");
        num = 6;
        printf("num = %d\n", num);
    }
    else
        num = abs(atoi(*(argv+1)));
    root = NULL;
    while(getword(word, MAXWORD) != EOF)
    {
        if(isalpha(word[0]) && strlen(word) >= num)
            root = addtree(root, word, num, &find);
        find = NO;
    }
    treeprint(root);
    return 0;
}

struct tnode *talloc(void);
int compare(char *, struct tnode *, int, int *);
char *s_dup(char *s);

/* adtree: add a node with w, at or below p */ 
struct tnode *addtree(struct tnode *p, char *w, int num, int *find)
{
    int cond;
    
    if(p == NULL) //a new word has arrived
    {
        p = talloc(); //make a new node
        p->word = s_dup(w);
        p->match = *find;
        p->left = p->right = NULL;
    }
    else if((cond = compare(w, p, num, find)) < 0)
        p->left = addtree(p->left, w, num, find);
    else if(cond > 0)
        p->right = addtree(p->right, w, num, find);
    return p;
}

int compare(char *s, struct tnode *p, int num, int *find)
{
    int i;
    char *t = p->word;
    for(i = 0; *s == *t; i++, s++, t++)
        if(*s == '\0')
            return 0;
    if(i >= num)
    {
        *find = YES;
        p->match = YES;
    }
    return *s - *t;
}

/* treeprint */
void treeprint(struct tnode *p)
{
    if(p != NULL)
    {
        treeprint(p->left);
        if(p->match)
            printf("%s\n", p->word);
        treeprint(p->right);
    }
}

#include <stdlib.h>

/* talloc: make a tnode */
struct tnode *talloc(void)
{
    return(struct tnode *) malloc(sizeof(struct tnode));
}

/* make a duplicate of s */
char *s_dup(char *s)
{
    char *p;
    
    p = (char *)malloc(strlen(s)+1); // +1 for '\0'
    if(p != NULL)
        strcpy(p, s);
    return p;
}

#define BUFSIZE 100

char buf[BUFSIZE];          //buffer for ungetch;
int bufp = 0;               //next free position in buf

int getch(void) // get a (possibly pushed-back) character  
{
   return (bufp > 0) ? buf[--bufp] : getchar();
}

void ungetch(int c) // push character back on input
{
    if(bufp >= BUFSIZE)
        printf("ungetch: too many characnters\n");
    else
        buf[bufp++] = c;
}

/* getword: get next word or character from input */
int getword(char *word, int lim)
{
    int c, d; 
    int getch(void);
    void ungetch(int);
    int in_comment(void);
    char *w = word;
    
    while(isspace(c = getch()))
        ;
    if(c != EOF)
        *w++ = c;
    if(isalpha(c) || c == ' ' || c == '#')
    {
        for(;--lim > 0; w++)
            if(!isalnum(*w= getch()) && *w != '_')
            {
                ungetch(*w);
                break;
            }
    }
    else if(c=='\'' || c == '"')
    {
        for(; --lim > 0; w++)
            if((*w = getch()) == '\\')
                *++w = getch();
            else if(*w == c)
            {
                w++;
                break;
            }
            else if(*w == EOF)
                break;
    }
    else if(c == '/')
    {
        if(((d = getch()) == '*'))
            c = in_comment();
        else
            ungetch(d);
    }
    *w = '\0';
    return c;
}

/* comment */
int in_comment(void)
{
    int c;
    
    while((c = getch()) != EOF)
        if(c == '*')
        {
            if((c = getch()) == '/')
                break;
            else
                ungetch(c);
        }
    return c;
}

четверг, 10 октября 2013 г.

Exercise 6.1. Our version of getword does not properly handle underscores, string constants, comments, or preprocessor control lines. Write a better version.



Exercise 6.1. Our version of getword does not properly handle underscores, string constants, comments, or preprocessor control lines. Write a better version.


#include <stdio.h>
#include <ctype.h>
#include <string.h>

#define MAXWORD 100
#define NKEYS (sizeof(keytab) / sizeof(struct key))

struct key
{
    char *word;
    int count;
};
struct key keytab[] = {
    {"auto", 0},
    {"break", 0},
    {"case", 0},
    {"char", 0},
    {"const", 0},
    {"continue", 0},
    {"default", 0},
    {"define", 0},
    {"do", 0},
    {"double", 0},
    {"else", 0},
    {"enum", 0},
    {"extern", 0},
    {"float", 0},
    {"for", 0},
    {"goto", 0},
    {"if", 0},
    {"int", 0},
    {"long", 0},
    {"register", 0},
    {"return", 0},
    {"short", 0},
    {"signed", 0},
    {"static", 0},
    {"struct", 0},
    {"switch", 0},
    {"typdef", 0},
    {"unsigned", 0},
    {"void", 0},
    {"volatile", 0},  
    {"while", 0},
    };

int getword(char *, int);
int binsearch(char *, struct key *, int);


/* count C keywords */
int main()
{
    int n;
    char word[MAXWORD];
    
    while(getword(word, MAXWORD) != EOF)
        if(isalpha(word[0]))
            if((n = binsearch(word, keytab, NKEYS)) >= 0)
                keytab[n].count++;
    for(n = 0; n < NKEYS; n++)
        if(keytab[n].count > 0)
            printf("%d %s\n", keytab[n].count, keytab[n].word);
    return 0;
}

/* binsearch: find word in tab[0] ... tab[n - 1] */
int binsearch(char *word, struct key keytab[], int n)
{
    int cond;
    int low, high, mid;
    
    low = 0;
    high = n-1;
    while(low <= high)
    {
        mid = (low + high) / 2;
        if((cond = strcmp(word, keytab[mid].word)) < 0)
            high = mid -1;
        else if(cond > 0)
            low = mid + 1;
        else
            return mid;
    }
    return -1;
}

#define BUFSIZE 100

char buf[BUFSIZE];          //buffer for ungetch;
int bufp = 0;               //next free position in buf

int getch(void) // get a (possibly pushed-back) character  
{
   return (bufp > 0) ? buf[--bufp] : getchar();
}

void ungetch(int c) // push character back on input
{
    if(bufp >= BUFSIZE)
        printf("ungetch: too many characnters\n");
    else
        buf[bufp++] = c;
}

/* comment */
int in_comment(void)
{
    int c;
    
    while((c = getch()) != EOF)
        if(c == '*')
        {
            if((c = getch()) == '/')
                break;
            else
                ungetch(c);
        }
    return c;
}


/* getword: get next word or character from input */
int getword(char *word, int lim)
{
    int c, d; 
    int getch(void);
    void ungetch(int);
    int in_comment(void);
    char *w = word;
    
    while(isspace(c = getch()))
        ;
    if(c != EOF)
        *w++ = c;
    if(isalpha(c) || c == ' ' || c == '#')
    {
        for(;--lim > 0; w++)
            if(!isalnum(*w= getch()) && *w != '_')
            {
                ungetch(*w);
                break;
            }
    }
    else if(c=='\'' || c == '"')
    {
        for(; --lim > 0; w++)
            if((*w = getch()) == '\\')
                *++w = getch();
            else if(*w == c)
            {
                w++;
                break;
            }
            else if(*w == EOF)
                break;
    }
    else if(c == '/')
    {
        if(((d = getch()) == '*'))
            c = in_comment();
        else
            ungetch(d);
    }
    *w = '\0';
    return c;
}

Result:


пятница, 4 октября 2013 г.

Exercise 5.20. Expand dcl to handle declarations with function argument types, qualifiers like const, and so on.

Exercise 5.20. Expand dcl to handle declarations with function argument types, qualifiers like
const, and so on.


#include <stdio.h>
#include <string.h>
#include <ctype.h>

#define MAXTOKEN 100

enum {NAME, PARENS, BRACKETS};
enum {NO, YES};

void dcl(void);         //parse a declarator
void dirdcl(void);      //parse a direct declarator
void errmesg(char *);   //error message
int gettoken(void);

int tokentype;              //type of last token
char token[MAXTOKEN];       //last token string
char name[MAXTOKEN];        //identifier name
char datatype[MAXTOKEN];    //data type = char, int, etc.
char out[1000];
int errtoken = NO;
int errbrack = NO;

/* convert declaration to words */
int main()
{
    while(gettoken() != EOF)        //1st token on line
    {
        strcpy(datatype, token);    //is the datatype
        out[0] = '\0';
        dcl();      //parse rest of line
        if(tokentype != '\n')
            printf("syntax error\n");
        printf("%s: %s %s\n", name, out, datatype);
    }
    return 0;
}

/* dcl: parse a declarator */
void dcl(void)
{
    int ns;
    
    for(ns = 0; gettoken() == '*';) //count stars
        ns++;
    dirdcl();
    while(ns-- > 0)
        strcat(out, " pointer to");
}

/* dirdcl: parse a direct declarator */
void dirdcl(void)
{
    int type;
    void paramdcl(void);    //declarator analysis with parameters 
    
    if(tokentype == '(') //(dcl)
    {
        dcl();
        if(tokentype != ')')
            errmesg("error: missing )\n");
    }
    else if(tokentype == NAME) //variable name
    {
        if(name[0] == '\0')
            strcpy(name, token);
    }
    else
        errtoken = YES;
    while((type = gettoken()) == PARENS || type == BRACKETS || 
                                            type == '(')
        if(type == PARENS)
            strcat(out, " function returning");
        else if(type == '(')
        {
            strcat(out, " function taking");
            paramdcl();
            strcat(out, " and returning");
        }
        else
        {
            strcat(out, " array");
            strcat(out, token);
            strcat(out, " of");
        }
}

/* errmesg: error message */
void errmesg(char *msg)
{
    printf("%s", msg);
    errtoken = YES;
}

#include <stdlib.h>

void declspec(void);    //specification declaration
int typespec(void);     //type specifier
int typedesc(void);     //descriptor type

/* paramdcl: declarator analysis with parameters */
void paramdcl(void)
{
    do
    {
        declspec(); //specification declaration
    } while(tokentype == ',');
    if(tokentype != ')')
        errmesg("missing ) in declarator of parameters\n");
}

/* specification declaration */
void declspec(void)
{
    char temp[MAXTOKEN];
    
    temp[0] = '\0';
    gettoken();
    do
    {
        if(tokentype != NAME)
        {
            errtoken = YES;
            dcl();
        }
        else if(typespec() == YES) // type specifier
        {
            strcat(temp, " ");
            strcat(temp, token);
            gettoken();
        }
        else if(typedesc() == YES) //descriptor type
        {
            strcat(temp, " ");
            strcat(temp, token);
            gettoken();
        }
        else
            errmesg("error: unknown type in the parameter list\n");
    } while(tokentype != ',' && tokentype != ')');
    strcat(out, temp);
    if(tokentype == ',')
        strcat(out, ",");
}

/* type specifier */
int typespec(void)
{
    static char *types[] = {"char", "int", "void"};
    char *ptypes = token;
    int result, i;
   
    result = NO;
    for(i = 0; i < 3; i++)
        if(strcmp(ptypes, *(types + i)) == 0)
            return result = YES;
        else
            result = NO;
    return result;
        
}

/*  descriptor type */
int typedesc(void)
{
    static char *typed[] = {"const", "volatile"};
    char *ptd = token;
    int result, i;
    
    result = NO;
    for(i = 0; i < 2; i++)
        if(strcmp(ptd, *(typed + i)))
            return result = YES;
        else
            result = NO;
    return result;
}

int getch(void);        //get a (possibly pushed-back) character
void ungetch(int);    //push character back on input

int gettoken(void)
{
    int c;
    char *p = token;
    
    if(errtoken == YES)
    {
        errtoken = NO;
        return tokentype;
    }
    while((c = getch()) == ' ' || c == '\t')
        ;
    if(c == '(')
    {
        if((c = getch()) == ')')
        {
            strcpy(token, "()");
            return tokentype = PARENS;
        }
        else
        {
            ungetch(c);
            return tokentype = '(';
        }
    }
    else if(c == '[')
    {
        for(*p++ = c; *p != ']';)
        {
            *p = getch();
            if(*p != ']')
            {
                if(*p == '\n' || *p == ')' || *p == '(')
                {
                    printf("error: missing ]\n");
                    ungetch(*p);
                    *p = ']';
                }
                else
                    p++;
            }
        }
        *++p = '\0';
        return tokentype = BRACKETS;
    }
    else if(isalpha(c))
    {
        for(*p++ = c; isalnum(c = getch());)
            *p++ = c;
        *p = '\0';
        ungetch(c);
        return tokentype = NAME;
    }
    else
        return tokentype = c;
}

#define BUFSIZE 100

char buf[BUFSIZE];          //buffer for ungetch;
int bufp = 0;               //next free position in buf

int getch(void) // get a (possibly pushed-back) character  
{
   return (bufp > 0) ? buf[--bufp] : getchar();
}

void ungetch(int c) // push character back on input
{
    if(bufp >= BUFSIZE)
        printf("ungetch: too many characnters\n");
    else
        buf[bufp++] = c;
}

Result: