词频统计

作业要求:

1.读取文件;

2.记录出现的词汇及出现频率;

3.输出运行结果。

编码实现:

// pin.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include <iostream>
#include <fstream>
#include <string>
#define SOURCE_H

struct node
{
  int col;
  int row;
  node* next;
};
struct Node
{
  char words[20];
  node* ptr;
  Node* next;
  int num;

};

class TLink
{
  public:
  TLink() { head = NULL; }
  ~TLink()
  {
    while( head != NULL )
    {
      Node* temp;
      temp = head;
      head = head -> next;
      delete temp;
    }
  }
  void Insert( char* Item );
  void calcute(char *szFile,int size);
  Node* gethead();
  private:
  Node* head;
};

char A_to_a( char alp );

void showwindow();

void show_text();

void input();
#include<iostream>
#include<fstream>
#include<cstdlib>
//#include "source.h"
using namespace std;

TLink link;
int i=0;
char szFile[2000];

int main()
{
  show_text();
  cout << endl;
  input();
  return 0;
}


/****************************************************************/

void TLink::Insert(char *Item)
{
  int flag = 0;
  Node* temp;
  temp = new Node;
  int i = 0;
  while( Item[i] != '\0' )
  {
    temp -> words[i] = Item[i];
    ++ i;
  }
  temp -> num = i;
  temp -> words[i] = '\0';


  Node* ptrr = NULL;
  ptrr = link.gethead();
  while( ptrr != NULL )
  {
    if( ptrr -> num == temp -> num )
    {
      int n;
      for( n = 0; n < i; ++ n )
      if( A_to_a( ptrr -> words[n] ) != A_to_a( Item[n] ) )
        break;
      if( n == i )
      {
        flag = 1;
        break;
      }
    }
    ptrr = ptrr -> next;
  }  

  if( flag != 1 )
  {

    temp -> ptr = NULL;
    temp -> next = NULL;
    Node* Temp = head;
    if( head == NULL )
    {
       head = temp;
    }
    else
    {
      while( Temp -> next != NULL )
      Temp = Temp -> next;
      Temp -> next = temp;
    }
  }
  else
  delete temp;

}

/*****************************************************************/

char A_to_a( char alp )
{
  if( ( alp >= 'A' ) && ( alp <= 'Z' ) )
  alp = alp + 32;
  return alp;
}

/*****************************************************************/

void TLink::calcute(char *szFile, int size)
{
  //cout << "calcute is called!" << endl;
  int i = 0; //记录已搜索过的字符数-1
  int col = 1;//列标
  int row = 0;//行标
  int count;//记录空格数-1
  Node* ptrr = NULL;
  while( i < size )
  {
    ptrr = link.gethead();
    int j = 0;//对每个单词从开始计数
    while( ( szFile[i] >= 'a' && szFile[i] <= 'z' ) || ( szFile[i] >= 'A' && szFile[i] <= 'Z' ) )
    {
      ++ i;
      ++ j;
    }
    while( ptrr != NULL )
    {
      if( ptrr -> num == j )
      {
        int n;
        for( n = 0; n <= j; ++ n )
        if( A_to_a( ptrr -> words[n] ) != A_to_a( szFile[i - j + n] ) )
          break;
        if( n == j )
        {
          node* temp;
          temp = new node;
          temp -> col = col;
          temp -> row = row;
          temp -> next = NULL;
          node* Temp = ptrr -> ptr;
          if( ptrr -> ptr == NULL )
          {
            ptrr -> ptr = temp;
          }
          else
          {
            while( Temp -> next != NULL )
            Temp = Temp -> next;
            Temp -> next = temp;
          }
        }//插入行数
      }

    ptrr = ptrr -> next;
  }
  if( szFile[i] == ' ' || szFile[i] == '\n' )
  {
    count = -1;
    while( szFile[i] == ' ' )
    {
      ++ i; //设置列数
      ++ row;//行的单词个数加
      ++ count;//单词之间空格-1
    }
    row = row - count;
    if( szFile[i] == '\n' )
    {
      ++ col; //列遇到换行累加
      ++ i;
      row = 0;//单词的行个数清零
    }
  }
  else
    ++ i;
  }
  cout << endl;

}

/****************************************************************/


Node* TLink::gethead()
{
  return head;
}

/********************************************************/

void showwindow()
{
  Node* curptr = link.gethead();
  while( curptr != NULL )
  {
    int word_num = 0;
    for( int k = 0; curptr -> words[k] != '\0'; ++ k )
    cout << curptr -> words[k];
    cout << endl;
    if( curptr -> ptr == NULL )
    cout << "没有该词,或输入不正确!" << endl;
    else
    while( curptr -> ptr != NULL )
    {
      cout << "(";
      cout << curptr -> ptr -> col ;
      cout << ",";
      cout << curptr -> ptr -> row ;
      cout << ")";
      cout << ' ';
      curptr -> ptr = curptr -> ptr -> next;
      word_num ++;
    }
    cout << endl;
    cout << "该单词共出现" << word_num << "次!" << endl;
    curptr = curptr -> next;

  }
}


/*************************************************************/
void show_text()
{
  ifstream fin;
  fin.open("F:\\pin\\Debug\\1.txt");
  if (fin.fail())
  {
    cout<<"Input file opening failed.\n";
    exit(1);
  }

  char next;

  fin.get(next);
  while (! fin.eof())
  {
    szFile[i] = next;
    ++ i;
    fin.get(next);
  }
  szFile[i] = '\0';
  for( int k = 0; k < i; ++ k )
  cout << szFile[k];
  cout << "*****Total number :" << i << endl;
  cout << "***************************************************************************" << endl;
}


/**********************************************************************/
void input()
{
  char Item[40]; //暂存数组
  char in; //接受输入字符
  char ans; //判断是否重新开始
  do{
  if( link.gethead() != NULL )
    link.~TLink();
    cout << "请输入要统计的单词,单词之间用逗号隔开(输入@键结束,本程序忽略空格):" << endl;
    cin >> in;
    int flag = 1;
    while( true )
    {
      if( in == '@' )
      break;
      int m = 0;
      while( in != ',' )
      {
        Item[m] = in;
        ++ m;
        cin >> in;
        if( in == '@' )
        {
          flag = 0;
          break;
        }
      }
      Item[m] = '\0';
      link.Insert( Item );
      if( flag == 0 )
        break;
        cin >> in;
    }
    if( link.gethead() == NULL )
      cout << "没有插入任何单词!" << endl;
    else
    {
      link.calcute( szFile, i );
      showwindow();
    }
    cout << "是否继续?(Y/y or N/n):";
    cin >> ans;
  }while( ( ans != 'n' ) && ( ans != 'N' ) );
}

运行结果:

 

posted @ 2016-09-06 23:40  刘ff  阅读(228)  评论(3编辑  收藏  举报