使用字典树实现对句子单词个数的统计

曾经看到有一道题,怎么在不使用C++的STL的情况下,完成句子中单词存储的个数计数。

比如输入
Hello everybody! He is Steve.

输出
He 1
Hello 1
Steve 1
everybody 1
is 1

我这里考虑使用字典树的做法,定义一个特殊的字典树节点:

struct node{
    char x;
    struct node * children[256];
    int count;
    bool isEnd;
    node(){
        isEnd = false;
        count = 0;
        memset(children, 0, sizeof(children));
    }
};

每当单词结束,就把isEnd标记为true,并把count ++;
我们在所有单词处理完毕以后,我们只需要深度优先遍历树就可以输出所有单词。

完整代码如下(C++解法):

#include<iostream>
#include<stdlib.h>

using namespace std;

char keys[10000];

struct node{
    char x;
    struct node * children[256];
    int count;
    bool isEnd;
    node(){
        isEnd = false;
        count = 0;
        memset(children, 0, sizeof(children));
    }
};

void buildTree( struct node * root , char * str ){
    if( str == NULL || strlen(str) <= 0 ){
        return;
    }
    struct node * current = root;
    int position = 0;
    while ( str[position] ) {
        if( (str[position] >= 'a' && str[position] <= 'z') || ( str[position] >= 'A' && str[position] <= 'Z') ){
            if( current -> children[str[position]] == NULL ){
                current -> children[str[position]] = new node;
                current -> children[str[position]] -> x = str[position];
                current = current -> children[str[position]] ;
            }else{
                current = current -> children[str[position]] ;
            }
        }else{
            if( current != root ){
                current -> count ++;
                current -> isEnd  = true;
                current = root;
            }
        }
        position ++;
    }
}

void outputTree( struct node * root , int level ){
    if( root != NULL){
        keys[level] = root -> x;
        if(root->isEnd){
            for ( int i = 1 ; i <= level; i ++ ) {
                cout<< keys[i];
            }
            cout << " "<<root->count << endl;
        }
        for(int i = 0 ; i < 256 ; i ++){
            if( root -> children [i] != NULL ) {
                outputTree( root ->children[i] , level +1);
            }
        }
    }
}

void destroy( struct node * root ){
    if( root != NULL){
        for(int i = 0 ; i < 256 ; i ++){
            if( root -> children [i] != NULL ) {
                destroy( root ->children[i]);
            }
        }
        delete root;
    }
}


int main(){
    char str[] = " Hello everybody! He is Steve. He is a boy.";
    struct node * root = new node;
    buildTree( root , str );
    outputTree( root ,1 );
    destroy(root);
    return 1;
}
标签: none
返回文章列表 文章二维码
本页链接的二维码
打赏二维码