探究CRC32算法实现原理-why table-driven implemention探究CRC32算法实现原理-why table-driven implemention Author : Kevin Lynx Preface 基于不重造轮子的原则,本文尽量不涉及网络上遍地都是的资料。 What's CRC ? 简而言之,CRC是一个数值。该数值被用于校验数据的正确性。CRC数值简单地说就是通过让你需要做 那么,如何让你的数据除以一个常数?方法是对你的数据进行必要的编码处理,逐字节处理成数字。 以上内容你不必全部理解,因为你需要查阅其他资料来获取CRC完整的理论介绍。 The mathematics behind CRC ? 很多教科书会把CRC与多项式关联起来。这里的多项式指的是系数为0或1的式子,例如: 什么是生成多项式? 所谓的生成多项式,就是上面我所说的常数。注意,在这里,一个多项式就表示了一个比特流,也就是一堆 由此可以看出,CRC值也可以看成我们的数据除以一个生成多项式而得到的余数。 如何做这个除法? 套用大部分教科书给出的计算方法,因为任何数据都可以被处理成纯数字,因此,在某种程度上说,我们可以 希望进行到这里,你可以获取更多关于CRC的感性认识。而我们所要做的,也就是实现一个CRC的计算算法。 The simplest algorithm. 最简单的实现算法,是一种模拟算法。我们模拟上面的除法过程,遵从网上一份比较全面的资料,我们设定
/**////
/// The simplest CRC implement algorithm. /// /**//* Load the register with zero bits. Augment the message by appending W zero bits to the end of it. While (more message bits) Begin Shift the register left by one bit, reading the next bit of the augmented message into register bit position 0. If (a 1 bit popped out of the register during step 3) Register = Register XOR Poly. End The register now contains the remainder. */ #include <stdio.h> #define POLY 0x13 int main() { /**//// the data unsigned short data = 0x035b; /**//// load the register with zero bits unsigned short regi = 0x0000; /**//// augment the data by appending W(4) zero bits to the end of it. data <<= 4; /**//// we do it bit after bit for( int cur_bit = 15; cur_bit >= 0; -- cur_bit ) { /**//// test the highest bit which will be poped later. /// in fact, the 5th bit from right is the hightest bit here if( ( ( regi >> 4 ) & 0x0001 ) == 0x1 ) { regi = regi ^ POLY; } /**//// shift the register regi <<= 1; /**//// reading the next bit of the augmented data unsigned short tmp = ( data >> cur_bit ) & 0x0001; regi |= tmp; } /**//// and now, register contains the remainder which is also called CRC value. return 0; }
很多时候这种让人容易理解的算法都不会被实际用到。这种逐bit操作的算法实在很慢。你可能知道 一种改善这种bit after bit的方法就是将这个bit扩大,例如典型的做法就是换成byte。这里我要详细地叙述下 我们每次会先检查register的最高位是否为1,如果为1,则将生成多项式(所谓的Poly)与register进行异或操作。 也就是说,register中的某一位的值会决定后面几位的值。如果将register最高字节每一bit编码为: 那么,如果我们可以直接获取这个影响,我们就可以byte after byte地处理,而不是bit after bit。如何获取这个 但是,为什么我们逐bit进行计算的过程为什么可以简化为一步操作?事实上,我们没有简化这个操作。一种用于教学
/**////
/// The table-driven CRC implement algorithm part 1. /// /**//* While (augmented message is not exhausted) Begin Examine the top byte of the register Calculate the control byte from the top byte of the register Sum all the Polys at various offsets that are to be XORed into the register in accordance with the control byte Shift the register left by one byte, reading a new message byte into the rightmost byte of the register XOR the summed polys to the register End */ #include <stdio.h> #include <stdlib.h> #include <memory.h> #define POLY 0x04C11DB7L int main() { /**//// the data unsigned long data = 0x1011035b; /**//// load the register with the data unsigned long regi = 0; /**//// allocate memory to contain the AUGMENTED data (added some zeros) unsigned char p[8]; /**//// copy data memset( p, 0, 8 ); memcpy( p, &data, 4 ); /**//// because data contains 4 bytes for( int i = 0; i < 8; ++ i ) { /**//// get the top byte of the register unsigned char top_byte = (unsigned char)( ( regi >> 24 ) & 0xff ); /**//// sum all the polys at various offsets unsigned long sum_poly = top_byte << 24; for( int j = 0; j < 8; ++ j ) { /**//// check the top bit if( ( sum_poly >> 31 ) != 0 ) { /**//// TODO : understand why '<<' first sum_poly = ( sum_poly << 1 ) ^ POLY; } else { sum_poly <<= 1; } } /**//// shift the register left by on byte, reading a new regi = ( ( regi << 8 ) | p[i] ); /**//// xor the summed polys to the register regi ^= sum_poly; } /**//// and now, register contains the remainder which is also called CRC value. return 0; }
/**//// sum all the polys at various offsets
unsigned long sum_poly = top_byte << 24; for( int j = 0; j < 8; ++ j ) { /**//// check the top bit if( ( sum_poly >> 31 ) != 0 ) { /**//// TODO : understand why '<<' first sum_poly = ( sum_poly << 1 ) ^ POLY; } else { sum_poly <<= 1; } } show me where the table is : 如上所说,上面的算法很容易地就可以引进一个表:
上述算法一个典型特征是会在我们的数据后面添加若干0。这样做其他做了很多没用的计算。一种简化做法就是将这些 /**////
/// The table-driven CRC implement algorithm part 2. /// /**//* While (augmented message is not exhausted) Begin Examine the top byte of the register Calculate the control byte from the top byte of the register Sum all the Polys at various offsets that are to be XORed into the register in accordance with the control byte Shift the register left by one byte, reading a new message byte into the rightmost byte of the register XOR the summed polys to the register End */ #include <stdio.h> #include <stdlib.h> #include <memory.h> #define POLY 0x04C11DB7L unsigned long get_sum_poly( unsigned char top_byte ) { /**//// sum all the polys at various offsets unsigned long sum_poly = top_byte << 24; for( int j = 0; j < 8; ++ j ) { /**//// check the top bit if( ( sum_poly >> 31 ) != 0 ) { /**//// TODO : understand why '<<' first sum_poly = ( sum_poly << 1 ) ^ POLY; } else { sum_poly <<= 1; } } return sum_poly; } void create_table( unsigned long *table ) { for( int i = 0; i < 256; ++ i ) { table[i] = get_sum_poly( (unsigned char) i ); } } int main() { /**//// the data unsigned long data = 0x1011035b; /**//// load the register with the data unsigned long regi = 0; /**//// allocate memory to contain the AUGMENTED data (added some zeros) unsigned char p[8]; /**//// copy data memset( p, 0, 8 ); memcpy( p, &data, 4 ); /**//// the table unsigned long table[256]; /**//// create the table create_table( table ); /**//// because data contains 4 bytes for( int i = 0; i < 8; ++ i ) { /**//// get the top byte of the register unsigned char top_byte = (unsigned char)( ( regi >> 24 ) & 0xff ); /**//// shift the register left by on byte, reading a new regi = ( ( regi << 8 ) | p[i] ); /**//// xor the summed polys to the register regi ^= table[top_byte]; } /**//// and now, register contains the remainder which is also called CRC value. return 0; }
讨厌的附加0 以上算法有个很大的特征就是要为我们的数据附加很多0。附加0后其实也附加了很多无用的操作。我们要将这些
int main()
{ /**//// the data unsigned long data = 0x1011035b; /**//// load the register with the data unsigned long regi = 0; /**//// allocate memory to contain the data unsigned char p[4]; /**//// copy data memcpy( p, &data, 4 ); /**//// the table unsigned long table[256]; /**//// create the table create_table( table ); /**//// because data contains 4 bytes for( int i = 0; i < 4; ++ i ) { regi = ( regi << 8 ) ^ table[ ( regi >> 24 ) ^ p[i] ]; } /**//// and now, register contains the remainder which is also called CRC value. return 0; }
似乎一切被我说的很简单。我想只是因为我没说清楚。我尽量让你注意到事情的重点。我们进行到这里,似乎 在实际处理时,很多数据的bit会进行一种颠倒操作,例如1010会被颠倒为0101。出现这样的情况是因为某些硬件 另外,关于register的初始值问题,有些CRC算法会初始化为0xffffffff。以下给出一个会进行bit颠倒的算法,
/**////
/// The table-driven CRC implement algorithm part 4. /// /// Donot need augment W/8 zero bytes. /// #include <stdio.h> #include <stdlib.h> #include <memory.h> #define POLY 0x04C11DB7L #define BITMASK(X) (1L << (X)) unsigned long refelect( unsigned long v, int b ) { int i; unsigned long t = v; for( i = 0; i < b; ++ i ) { if( t & 1L ) v |= BITMASK( (b-1)-i ); else v &= ~BITMASK( (b-1)-i ); t >>= 1; } return v; } /**//// i'll try to write a correct algorithm unsigned long get_sum_poly( unsigned char byte ) { byte = (unsigned long) refelect( byte, 8 ); unsigned long sum_poly = byte << 24; for( int i = 0; i < 8; ++ i ) { /**//// check the top bit if( ( sum_poly >> 31 ) != 0 ) { /**//// TODO : understand why '<<' first sum_poly = ( sum_poly << 1 ) ^ POLY; } else { sum_poly <<= 1; } } sum_poly = refelect( sum_poly, 32 ); return sum_poly; } void create_table( unsigned long *table ) { for( int i = 0; i <= 255; ++ i ) { table[i] = get_sum_poly( (unsigned char) i ); } } void output_table( const unsigned long *table ) { FILE *fp = fopen( "table.txt", "w" ); for( int y = 0; y < 64; ++ y ) { fprintf( fp, "0x%08lXL,\t0x%08lXL,\t0x%08lXL,\t0x%08lXL, \n", table[ y * 4 + 0], table[ y * 4 + 1], table[ y * 4 + 2], table[ y * 4 + 3] ); } fclose( fp ); } int main() { /**//// the table unsigned long table[256]; /**//// the data unsigned long data = 0x1011035b; /**//// load the register with the data unsigned long regi = 0; /**//// allocate memory to contain the data unsigned char p[4]; /**//// copy data memcpy( p, &data, 4 ); /**//// create the table create_table( table ); /**//// output the table output_table( table ); /**//// because data contains 4 bytes for( int i = 0; i < 4; ++ i ) { regi = ( regi << 8 ) ^ table[ ( regi >> 24 ) ^ p[i] ]; } /**//// and now, register contains the remainder which is also called CRC value. return 0; }
我想我并没有将整个过程彻底地讲清楚。但是我希望你能明白大致的原理。关于table-driven中那个神奇的表的来历,
|
|
来自: neverdeady > 《编程思想C语言》