RC4 Decryption Module

This module provides RC4 encryption/decryption. Like other encryption algorithms, RC4 takes a human-readable message, called the plaintext, as well as a secret key, and produces an encrypted message of the same length, called the cyphertext. In the decryption process, you provide the cyphertext and the same key, and the original plaintext is produced.

You can read about RC4 encryption at https://en.wikipedia.org/wiki/RC4. RC4 is very simple and can be implemented in about 10-15 lines of software code, that you can find in the Key-scheduling algorithm (KSA) and Pseudo-random generation algorithm (PRGA) sections at the Wikipedia link. Essentially, RC4 takes a key of any length of bytes, and uses it to create an endless stream of pseudo-random bytes. These bytes are XOR’d with the user message to produce the cyphertext.

As you may be aware, if we XOR a value with another value twice, we get back the original value (A ^ B ^ B == A). Thus, if we use the same key to generate the same pseudo-random stream of bytes, and XOR’d them with the cyphertext, we will get back the original plaintext. Using the same key and algorithm for encryption and decryption makes RC4 a symmetric encryption algorithm. This means that the provided module can be used to perform either encryption or decryption.

Assuming one is using the provided module to perform RC4 decryption, the cyphertext is provided to the bytes_in input, and the key to the key input. The decryption process begins when the enable signal is raised, and when completed, the done output will be high for a single cycle. The resulting plaintext is available from the bytes_out output, which won’t change until you start a new encryption/decryption process (by lower and raising the enable signal).

Module Name = decrypt_rc4      
Parameter Default Value Description  
BYTES_LEN 16 The length of byte stream the encryption engine will process.  
Port Name Direction Width Description
clk Input 1 100 MHz Clock
reset Input 1 Active-high reset
enable Input 1 Set high to start running the decryption. Once started the decryption will continue until finished. You need to lower this signal and then raise it again to start a new decryption process.
key Input 24 Encryption key
done Output 1 Active-high for one cycle when the encryption/decryption completes
bytes_in Input BYTES_LEN * 8 Input bytes (plaintext for encryption, cyphertext for decryption)
bytes_out Output BYTES_LEN * 8 Output bytes (cyphertext for encryption, plaintext for decryption)

Click the link below to download the file.

decrypt_rc4.sv

`default_nettype none

module decrypt_rc4 #(
    parameter K_BYTES_LEN =  3,
    parameter M_BYTES_LEN = 16
) (
    input wire logic                                        clk,        // Clock
    input wire logic                                        reset,      // Active-high reset
    input wire logic                                        enable,     // Start encryption/decryption
    input wire logic    [(K_BYTES_LEN * 8) - 1:0]           key,        // key
    input wire logic    [(M_BYTES_LEN * 8) - 1:0]           bytes_in,   // byte stream in
    output logic        [(M_BYTES_LEN * 8) - 1:0]           bytes_out,  // byte stream out
    output logic                                            done        // Active-high done
);

    // This module implements the following RC4 encryption/decryption algorithm:
    // for i from 0 to 255                                  (LOOP1)
    //     S[i] := i
    // endfor
    // j := 0
    // for i from 0 to 255                                  (LOOP2)
    //     j := (j + S[i] + key[i mod keylength]) mod 256
    //     swap values of S[i] and S[j]
    // endfor
    // i := 0
    // j := 0
    // while GeneratingOutput:                              (LOOP3)
    //     i := (i + 1) mod 256
    //     j := (j + S[i]) mod 256
    //     swap values of S[i] and S[j]
    //     K := S[(S[i] + S[j]) mod 256]
    //     output K
    // endwhile

    typedef enum {S_INIT, S_LOOP1,
                    S_LOOP2_readSi, S_LOOP2_readSj, S_LOOP2_write,
                    S_LOOP3_init, S_LOOP3_readSi, S_LOOP3_readSj, S_LOOP3_writeSi, S_LOOP3_writeSj, S_LOOP3_readK,
                    S_update_text_out,
                    S_DONE} StateType;
    StateType cs;

    logic   [7:0]   i;                          // i Variable register
    logic   [7:0]   j;                          // j Variable registers
    logic   [7:0]   j_calc;                     // Combinational-logic calculate of j variable (LOOP2)

    // Signals to access dual-port S[] array
    logic   [7:0]   ram_addr_a;
    logic   [7:0]   ram_addr_b;
    logic ram_we_a;
    logic ram_we_b;
    logic   [7:0]   ram_data_in_a;
    logic   [7:0]   ram_data_in_b;
    logic   [7:0]   ram_data_out_a;
    logic   [7:0]   ram_data_out_b;

    logic   [7:0]   Si_saved;                   // Register to save S[i] read value
    logic   [7:0]   Sj_saved;                   // Register to save S[j] read value
    logic   [7:0]   i_calc_loop3;               // Combinational logic to calculate i variable (LOOP3)
    logic   [7:0]   j_calc_loop3;               // Combinational logic to calculate j variable (LOOP3)
    logic   [7:0]   K_lookup;                   // Address to lookup K-value: (S[i] + S[j])

    logic   [$clog2(K_BYTES_LEN)-1:0]  key_byte_idx; // Index to count which byte of key is being processed.
    logic   [$clog2(M_BYTES_LEN)-1:0]  msg_byte_idx; // Index to count which byte of input stream is being processed.
                                                     // The most significant byte is processed first.

    ////////////////////////////// Outputs //////////////////////////////////////

    always_ff @(posedge clk) begin
        if (cs == S_update_text_out) begin
            bytes_out[msg_byte_idx * 8 +: 8] <= bytes_in[msg_byte_idx * 8 +: 8] ^ ram_data_out_a;
        end
    end

    ////////////////////////////// STATE MACHINE ////////////////////////////////

    always_ff @(posedge clk) begin
        done <= 1'b0;
        if (reset) begin
            cs <= S_INIT;
        end else begin
            case(cs)
                S_INIT:
                    if (enable)
                        cs <= S_LOOP1;
                S_LOOP1:
                    if (i == 254)
                        cs <= S_LOOP2_readSi;
                S_LOOP2_readSi:
                    cs <= S_LOOP2_readSj;
                S_LOOP2_readSj:
                    cs <= S_LOOP2_write;
                S_LOOP2_write:
                    if (i == 255)
                        cs <= S_LOOP3_init;
                    else
                        cs <= S_LOOP2_readSi;
                S_LOOP3_init:
                    cs <= S_LOOP3_readSi;
                S_LOOP3_readSi:
                    cs <= S_LOOP3_readSj;
                S_LOOP3_readSj:
                    cs <= S_LOOP3_writeSi;
                S_LOOP3_writeSi:
                    cs <= S_LOOP3_writeSj;
                S_LOOP3_writeSj:
                    cs <= S_LOOP3_readK;
                S_LOOP3_readK:
                    cs <= S_update_text_out;
                S_update_text_out: begin
                    if (msg_byte_idx == 0) begin
                        cs <= S_DONE;
                        done <= 1'b1;
                    end else begin
                        cs <= S_LOOP3_readSi;
                    end
                end
                S_DONE:
                    if (!enable)
                        cs <= S_INIT;
            endcase
        end
    end

    ////////////////////////////// Datapath variables ////////////////////////////////

    // Update i, j and current byte index (msg_byte_idx)
    always_ff @(posedge clk) begin
        case(cs)
        S_INIT: begin
            i <= 8'b0;
            j <= 8'b0;
            key_byte_idx <= 0;
        end
        S_LOOP1: begin
            i <= i + 2;
        end
        S_LOOP2_readSj: begin
            j <= j_calc;
        end
        S_LOOP2_write: begin
            i <= i + 1;
            if (key_byte_idx == K_BYTES_LEN - 1)
                key_byte_idx <= 0;
            else
                key_byte_idx <= key_byte_idx + 1;
        end
        S_LOOP3_init: begin
            i <= 0;
            j <= 0;
            msg_byte_idx <= (M_BYTES_LEN - 1);
        end
        S_LOOP3_readSi: begin
            i <= i_calc_loop3;
        end
        S_LOOP3_readSj: begin
            j <= j_calc_loop3;
        end
        S_update_text_out: begin
            msg_byte_idx <= msg_byte_idx - 1;
        end
        endcase
    end

    // Save S[i] and S[j] values read from S[] memory
    always_ff @(posedge clk) begin
        if (cs == S_LOOP2_readSj)
            Si_saved <= ram_data_out_a;
        if (cs == S_LOOP3_readSj)
            Si_saved <= ram_data_out_a;
        if (cs == S_LOOP3_writeSi)
            Sj_saved <= ram_data_out_a;
    end

    assign i_calc_loop3 = i + 1;
    assign j_calc = j + ram_data_out_a + key[key_byte_idx * 8 +: 8];
    assign j_calc_loop3 = (j + ram_data_out_a);
    assign K_lookup = Si_saved + Sj_saved;

    // Signals to access S[] memory
    always_comb begin
        ram_addr_a = 8'bxxxxxxxx;
        ram_addr_b = 8'bxxxxxxxx;
        ram_data_in_a = 8'bxxxxxxxx;
        ram_data_in_b = 8'bxxxxxxxx;
        ram_we_a = 0;
        ram_we_b = 0;
        case(cs)
            S_LOOP1: begin
                ram_we_a = 1;
                ram_we_b = 1;
                ram_addr_a = i;
                ram_addr_b = i + 1;
                ram_data_in_a = i;
                ram_data_in_b = i + 1;
            end
            S_LOOP2_readSi: begin
                ram_addr_a = i;
            end
            S_LOOP2_readSj: begin
                ram_addr_a = j_calc;
            end
            S_LOOP2_write: begin
                ram_we_a = 1;
                ram_addr_a = i;
                ram_data_in_a = ram_data_out_a;

                ram_we_b = 1;
                ram_addr_b = j;
                ram_data_in_b = Si_saved;
            end
            S_LOOP3_readSi: begin
                ram_addr_a = i_calc_loop3;
            end
            S_LOOP3_readSj: begin
                ram_addr_a = j_calc_loop3;
            end
            S_LOOP3_writeSi: begin
                ram_we_a = 1;
                ram_addr_a = i;
                ram_data_in_a = ram_data_out_a;
            end
            S_LOOP3_writeSj: begin
                ram_we_a = 1;
                ram_addr_a = j;
                ram_data_in_a = Si_saved;
            end
            S_LOOP3_readK: begin
                ram_addr_a = K_lookup;
            end
        endcase
    end

    dual_port_ram #(.ADDR_WIDTH(8), .DATA_WIDTH(8)) ram_inst (
        .clk_a(clk),
        .clk_b(clk),
        .en_a(1'b1),
        .en_b(1'b1),
        .we_a(ram_we_a),
        .we_b(ram_we_b),
        .addr_a(ram_addr_a),
        .addr_b(ram_addr_b),
        .data_in_a(ram_data_in_a),
        .data_in_b(ram_data_in_b),
        .data_out_a(ram_data_out_a),
        .data_out_b(ram_data_out_b)
    );

endmodule


module dual_port_ram #(
    parameter ADDR_WIDTH=10,
    parameter DATA_WIDTH=32
)(
    input wire logic clk_a,
    input wire logic clk_b,
    input wire logic en_a,
    input wire logic en_b,
    input wire logic we_a,
    input wire logic we_b,
    input wire logic    [ADDR_WIDTH-1:0]    addr_a,
    input wire logic    [ADDR_WIDTH-1:0]    addr_b,
    input wire logic    [DATA_WIDTH-1:0]    data_in_a,
    input wire logic    [DATA_WIDTH-1:0]    data_in_b,
    output logic        [DATA_WIDTH-1:0]    data_out_a,
    output logic        [DATA_WIDTH-1:0]    data_out_b
);

logic   [DATA_WIDTH-1:0]    ram [(2**ADDR_WIDTH)-1:0];

always_ff @(posedge clk_a) begin
    if (en_a) begin
        if (we_a)
            ram[addr_a] <= data_in_a;
        data_out_a <= ram[addr_a];
    end
end

always @(posedge clk_b) begin
    if (en_b) begin
        if (we_b)
            ram[addr_b] <= data_in_b;
        data_out_b <= ram[addr_b];
    end
end

endmodule